0001
0002
0003 #include <linux/init.h>
0004 #include <linux/kernel.h>
0005 #include <linux/module.h>
0006 #include <linux/pci.h>
0007 #include <linux/device.h>
0008 #include <linux/sched/task.h>
0009 #include <linux/intel-svm.h>
0010 #include <linux/io-64-nonatomic-lo-hi.h>
0011 #include <linux/cdev.h>
0012 #include <linux/fs.h>
0013 #include <linux/poll.h>
0014 #include <linux/iommu.h>
0015 #include <uapi/linux/idxd.h>
0016 #include "registers.h"
0017 #include "idxd.h"
0018
0019 struct idxd_cdev_context {
0020 const char *name;
0021 dev_t devt;
0022 struct ida minor_ida;
0023 };
0024
0025
0026
0027
0028
0029 static struct idxd_cdev_context ictx[IDXD_TYPE_MAX] = {
0030 { .name = "dsa" },
0031 { .name = "iax" }
0032 };
0033
0034 struct idxd_user_context {
0035 struct idxd_wq *wq;
0036 struct task_struct *task;
0037 unsigned int pasid;
0038 unsigned int flags;
0039 struct iommu_sva *sva;
0040 };
0041
0042 static void idxd_cdev_dev_release(struct device *dev)
0043 {
0044 struct idxd_cdev *idxd_cdev = dev_to_cdev(dev);
0045 struct idxd_cdev_context *cdev_ctx;
0046 struct idxd_wq *wq = idxd_cdev->wq;
0047
0048 cdev_ctx = &ictx[wq->idxd->data->type];
0049 ida_simple_remove(&cdev_ctx->minor_ida, idxd_cdev->minor);
0050 kfree(idxd_cdev);
0051 }
0052
0053 static struct device_type idxd_cdev_device_type = {
0054 .name = "idxd_cdev",
0055 .release = idxd_cdev_dev_release,
0056 };
0057
0058 static inline struct idxd_cdev *inode_idxd_cdev(struct inode *inode)
0059 {
0060 struct cdev *cdev = inode->i_cdev;
0061
0062 return container_of(cdev, struct idxd_cdev, cdev);
0063 }
0064
0065 static inline struct idxd_wq *inode_wq(struct inode *inode)
0066 {
0067 struct idxd_cdev *idxd_cdev = inode_idxd_cdev(inode);
0068
0069 return idxd_cdev->wq;
0070 }
0071
0072 static int idxd_cdev_open(struct inode *inode, struct file *filp)
0073 {
0074 struct idxd_user_context *ctx;
0075 struct idxd_device *idxd;
0076 struct idxd_wq *wq;
0077 struct device *dev;
0078 int rc = 0;
0079 struct iommu_sva *sva;
0080 unsigned int pasid;
0081
0082 wq = inode_wq(inode);
0083 idxd = wq->idxd;
0084 dev = &idxd->pdev->dev;
0085
0086 dev_dbg(dev, "%s called: %d\n", __func__, idxd_wq_refcount(wq));
0087
0088 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
0089 if (!ctx)
0090 return -ENOMEM;
0091
0092 mutex_lock(&wq->wq_lock);
0093
0094 if (idxd_wq_refcount(wq) > 0 && wq_dedicated(wq)) {
0095 rc = -EBUSY;
0096 goto failed;
0097 }
0098
0099 ctx->wq = wq;
0100 filp->private_data = ctx;
0101
0102 if (device_user_pasid_enabled(idxd)) {
0103 sva = iommu_sva_bind_device(dev, current->mm, NULL);
0104 if (IS_ERR(sva)) {
0105 rc = PTR_ERR(sva);
0106 dev_err(dev, "pasid allocation failed: %d\n", rc);
0107 goto failed;
0108 }
0109
0110 pasid = iommu_sva_get_pasid(sva);
0111 if (pasid == IOMMU_PASID_INVALID) {
0112 iommu_sva_unbind_device(sva);
0113 rc = -EINVAL;
0114 goto failed;
0115 }
0116
0117 ctx->sva = sva;
0118 ctx->pasid = pasid;
0119
0120 if (wq_dedicated(wq)) {
0121 rc = idxd_wq_set_pasid(wq, pasid);
0122 if (rc < 0) {
0123 iommu_sva_unbind_device(sva);
0124 dev_err(dev, "wq set pasid failed: %d\n", rc);
0125 goto failed;
0126 }
0127 }
0128 }
0129
0130 idxd_wq_get(wq);
0131 mutex_unlock(&wq->wq_lock);
0132 return 0;
0133
0134 failed:
0135 mutex_unlock(&wq->wq_lock);
0136 kfree(ctx);
0137 return rc;
0138 }
0139
0140 static int idxd_cdev_release(struct inode *node, struct file *filep)
0141 {
0142 struct idxd_user_context *ctx = filep->private_data;
0143 struct idxd_wq *wq = ctx->wq;
0144 struct idxd_device *idxd = wq->idxd;
0145 struct device *dev = &idxd->pdev->dev;
0146 int rc;
0147
0148 dev_dbg(dev, "%s called\n", __func__);
0149 filep->private_data = NULL;
0150
0151
0152 if (wq_shared(wq)) {
0153 idxd_device_drain_pasid(idxd, ctx->pasid);
0154 } else {
0155 if (device_user_pasid_enabled(idxd)) {
0156
0157 rc = idxd_wq_disable_pasid(wq);
0158 if (rc < 0)
0159 dev_err(dev, "wq disable pasid failed.\n");
0160 } else {
0161 idxd_wq_drain(wq);
0162 }
0163 }
0164
0165 if (ctx->sva)
0166 iommu_sva_unbind_device(ctx->sva);
0167 kfree(ctx);
0168 mutex_lock(&wq->wq_lock);
0169 idxd_wq_put(wq);
0170 mutex_unlock(&wq->wq_lock);
0171 return 0;
0172 }
0173
0174 static int check_vma(struct idxd_wq *wq, struct vm_area_struct *vma,
0175 const char *func)
0176 {
0177 struct device *dev = &wq->idxd->pdev->dev;
0178
0179 if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
0180 dev_info_ratelimited(dev,
0181 "%s: %s: mapping too large: %lu\n",
0182 current->comm, func,
0183 vma->vm_end - vma->vm_start);
0184 return -EINVAL;
0185 }
0186
0187 return 0;
0188 }
0189
0190 static int idxd_cdev_mmap(struct file *filp, struct vm_area_struct *vma)
0191 {
0192 struct idxd_user_context *ctx = filp->private_data;
0193 struct idxd_wq *wq = ctx->wq;
0194 struct idxd_device *idxd = wq->idxd;
0195 struct pci_dev *pdev = idxd->pdev;
0196 phys_addr_t base = pci_resource_start(pdev, IDXD_WQ_BAR);
0197 unsigned long pfn;
0198 int rc;
0199
0200 dev_dbg(&pdev->dev, "%s called\n", __func__);
0201 rc = check_vma(wq, vma, __func__);
0202 if (rc < 0)
0203 return rc;
0204
0205 vma->vm_flags |= VM_DONTCOPY;
0206 pfn = (base + idxd_get_wq_portal_full_offset(wq->id,
0207 IDXD_PORTAL_LIMITED)) >> PAGE_SHIFT;
0208 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
0209 vma->vm_private_data = ctx;
0210
0211 return io_remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE,
0212 vma->vm_page_prot);
0213 }
0214
0215 static __poll_t idxd_cdev_poll(struct file *filp,
0216 struct poll_table_struct *wait)
0217 {
0218 struct idxd_user_context *ctx = filp->private_data;
0219 struct idxd_wq *wq = ctx->wq;
0220 struct idxd_device *idxd = wq->idxd;
0221 __poll_t out = 0;
0222
0223 poll_wait(filp, &wq->err_queue, wait);
0224 spin_lock(&idxd->dev_lock);
0225 if (idxd->sw_err.valid)
0226 out = EPOLLIN | EPOLLRDNORM;
0227 spin_unlock(&idxd->dev_lock);
0228
0229 return out;
0230 }
0231
0232 static const struct file_operations idxd_cdev_fops = {
0233 .owner = THIS_MODULE,
0234 .open = idxd_cdev_open,
0235 .release = idxd_cdev_release,
0236 .mmap = idxd_cdev_mmap,
0237 .poll = idxd_cdev_poll,
0238 };
0239
0240 int idxd_cdev_get_major(struct idxd_device *idxd)
0241 {
0242 return MAJOR(ictx[idxd->data->type].devt);
0243 }
0244
0245 int idxd_wq_add_cdev(struct idxd_wq *wq)
0246 {
0247 struct idxd_device *idxd = wq->idxd;
0248 struct idxd_cdev *idxd_cdev;
0249 struct cdev *cdev;
0250 struct device *dev;
0251 struct idxd_cdev_context *cdev_ctx;
0252 int rc, minor;
0253
0254 idxd_cdev = kzalloc(sizeof(*idxd_cdev), GFP_KERNEL);
0255 if (!idxd_cdev)
0256 return -ENOMEM;
0257
0258 idxd_cdev->idxd_dev.type = IDXD_DEV_CDEV;
0259 idxd_cdev->wq = wq;
0260 cdev = &idxd_cdev->cdev;
0261 dev = cdev_dev(idxd_cdev);
0262 cdev_ctx = &ictx[wq->idxd->data->type];
0263 minor = ida_simple_get(&cdev_ctx->minor_ida, 0, MINORMASK, GFP_KERNEL);
0264 if (minor < 0) {
0265 kfree(idxd_cdev);
0266 return minor;
0267 }
0268 idxd_cdev->minor = minor;
0269
0270 device_initialize(dev);
0271 dev->parent = wq_confdev(wq);
0272 dev->bus = &dsa_bus_type;
0273 dev->type = &idxd_cdev_device_type;
0274 dev->devt = MKDEV(MAJOR(cdev_ctx->devt), minor);
0275
0276 rc = dev_set_name(dev, "%s/wq%u.%u", idxd->data->name_prefix, idxd->id, wq->id);
0277 if (rc < 0)
0278 goto err;
0279
0280 wq->idxd_cdev = idxd_cdev;
0281 cdev_init(cdev, &idxd_cdev_fops);
0282 rc = cdev_device_add(cdev, dev);
0283 if (rc) {
0284 dev_dbg(&wq->idxd->pdev->dev, "cdev_add failed: %d\n", rc);
0285 goto err;
0286 }
0287
0288 return 0;
0289
0290 err:
0291 put_device(dev);
0292 wq->idxd_cdev = NULL;
0293 return rc;
0294 }
0295
0296 void idxd_wq_del_cdev(struct idxd_wq *wq)
0297 {
0298 struct idxd_cdev *idxd_cdev;
0299
0300 idxd_cdev = wq->idxd_cdev;
0301 wq->idxd_cdev = NULL;
0302 cdev_device_del(&idxd_cdev->cdev, cdev_dev(idxd_cdev));
0303 put_device(cdev_dev(idxd_cdev));
0304 }
0305
0306 static int idxd_user_drv_probe(struct idxd_dev *idxd_dev)
0307 {
0308 struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
0309 struct idxd_device *idxd = wq->idxd;
0310 int rc;
0311
0312 if (idxd->state != IDXD_DEV_ENABLED)
0313 return -ENXIO;
0314
0315 mutex_lock(&wq->wq_lock);
0316 wq->type = IDXD_WQT_USER;
0317 rc = drv_enable_wq(wq);
0318 if (rc < 0)
0319 goto err;
0320
0321 rc = idxd_wq_add_cdev(wq);
0322 if (rc < 0) {
0323 idxd->cmd_status = IDXD_SCMD_CDEV_ERR;
0324 goto err_cdev;
0325 }
0326
0327 idxd->cmd_status = 0;
0328 mutex_unlock(&wq->wq_lock);
0329 return 0;
0330
0331 err_cdev:
0332 drv_disable_wq(wq);
0333 err:
0334 wq->type = IDXD_WQT_NONE;
0335 mutex_unlock(&wq->wq_lock);
0336 return rc;
0337 }
0338
0339 static void idxd_user_drv_remove(struct idxd_dev *idxd_dev)
0340 {
0341 struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
0342
0343 mutex_lock(&wq->wq_lock);
0344 idxd_wq_del_cdev(wq);
0345 drv_disable_wq(wq);
0346 wq->type = IDXD_WQT_NONE;
0347 mutex_unlock(&wq->wq_lock);
0348 }
0349
0350 static enum idxd_dev_type dev_types[] = {
0351 IDXD_DEV_WQ,
0352 IDXD_DEV_NONE,
0353 };
0354
0355 struct idxd_device_driver idxd_user_drv = {
0356 .probe = idxd_user_drv_probe,
0357 .remove = idxd_user_drv_remove,
0358 .name = "user",
0359 .type = dev_types,
0360 };
0361 EXPORT_SYMBOL_GPL(idxd_user_drv);
0362
0363 int idxd_cdev_register(void)
0364 {
0365 int rc, i;
0366
0367 for (i = 0; i < IDXD_TYPE_MAX; i++) {
0368 ida_init(&ictx[i].minor_ida);
0369 rc = alloc_chrdev_region(&ictx[i].devt, 0, MINORMASK,
0370 ictx[i].name);
0371 if (rc)
0372 goto err_free_chrdev_region;
0373 }
0374
0375 return 0;
0376
0377 err_free_chrdev_region:
0378 for (i--; i >= 0; i--)
0379 unregister_chrdev_region(ictx[i].devt, MINORMASK);
0380
0381 return rc;
0382 }
0383
0384 void idxd_cdev_remove(void)
0385 {
0386 int i;
0387
0388 for (i = 0; i < IDXD_TYPE_MAX; i++) {
0389 unregister_chrdev_region(ictx[i].devt, MINORMASK);
0390 ida_destroy(&ictx[i].minor_ida);
0391 }
0392 }