0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013 #include <linux/cdev.h>
0014 #include <linux/compat.h>
0015 #include <linux/device.h>
0016 #include <linux/file.h>
0017 #include <linux/anon_inodes.h>
0018 #include <linux/fs.h>
0019 #include <linux/idr.h>
0020 #include <linux/iommu.h>
0021 #include <linux/list.h>
0022 #include <linux/miscdevice.h>
0023 #include <linux/module.h>
0024 #include <linux/mutex.h>
0025 #include <linux/pci.h>
0026 #include <linux/rwsem.h>
0027 #include <linux/sched.h>
0028 #include <linux/slab.h>
0029 #include <linux/stat.h>
0030 #include <linux/string.h>
0031 #include <linux/uaccess.h>
0032 #include <linux/vfio.h>
0033 #include <linux/wait.h>
0034 #include <linux/sched/signal.h>
0035 #include "vfio.h"
0036
0037 #define DRIVER_VERSION "0.3"
0038 #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
0039 #define DRIVER_DESC "VFIO - User Level meta-driver"
0040
0041 static struct vfio {
0042 struct class *class;
0043 struct list_head iommu_drivers_list;
0044 struct mutex iommu_drivers_lock;
0045 struct list_head group_list;
0046 struct mutex group_lock;
0047 struct ida group_ida;
0048 dev_t group_devt;
0049 } vfio;
0050
0051 struct vfio_iommu_driver {
0052 const struct vfio_iommu_driver_ops *ops;
0053 struct list_head vfio_next;
0054 };
0055
0056 struct vfio_container {
0057 struct kref kref;
0058 struct list_head group_list;
0059 struct rw_semaphore group_lock;
0060 struct vfio_iommu_driver *iommu_driver;
0061 void *iommu_data;
0062 bool noiommu;
0063 };
0064
0065 struct vfio_group {
0066 struct device dev;
0067 struct cdev cdev;
0068 refcount_t users;
0069 unsigned int container_users;
0070 struct iommu_group *iommu_group;
0071 struct vfio_container *container;
0072 struct list_head device_list;
0073 struct mutex device_lock;
0074 struct list_head vfio_next;
0075 struct list_head container_next;
0076 enum vfio_group_type type;
0077 unsigned int dev_counter;
0078 struct rw_semaphore group_rwsem;
0079 struct kvm *kvm;
0080 struct file *opened_file;
0081 struct blocking_notifier_head notifier;
0082 };
0083
0084 #ifdef CONFIG_VFIO_NOIOMMU
0085 static bool noiommu __read_mostly;
0086 module_param_named(enable_unsafe_noiommu_mode,
0087 noiommu, bool, S_IRUGO | S_IWUSR);
0088 MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)");
0089 #endif
0090
0091 static DEFINE_XARRAY(vfio_device_set_xa);
0092 static const struct file_operations vfio_group_fops;
0093
0094 int vfio_assign_device_set(struct vfio_device *device, void *set_id)
0095 {
0096 unsigned long idx = (unsigned long)set_id;
0097 struct vfio_device_set *new_dev_set;
0098 struct vfio_device_set *dev_set;
0099
0100 if (WARN_ON(!set_id))
0101 return -EINVAL;
0102
0103
0104
0105
0106 xa_lock(&vfio_device_set_xa);
0107 dev_set = xa_load(&vfio_device_set_xa, idx);
0108 if (dev_set)
0109 goto found_get_ref;
0110 xa_unlock(&vfio_device_set_xa);
0111
0112 new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL);
0113 if (!new_dev_set)
0114 return -ENOMEM;
0115 mutex_init(&new_dev_set->lock);
0116 INIT_LIST_HEAD(&new_dev_set->device_list);
0117 new_dev_set->set_id = set_id;
0118
0119 xa_lock(&vfio_device_set_xa);
0120 dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set,
0121 GFP_KERNEL);
0122 if (!dev_set) {
0123 dev_set = new_dev_set;
0124 goto found_get_ref;
0125 }
0126
0127 kfree(new_dev_set);
0128 if (xa_is_err(dev_set)) {
0129 xa_unlock(&vfio_device_set_xa);
0130 return xa_err(dev_set);
0131 }
0132
0133 found_get_ref:
0134 dev_set->device_count++;
0135 xa_unlock(&vfio_device_set_xa);
0136 mutex_lock(&dev_set->lock);
0137 device->dev_set = dev_set;
0138 list_add_tail(&device->dev_set_list, &dev_set->device_list);
0139 mutex_unlock(&dev_set->lock);
0140 return 0;
0141 }
0142 EXPORT_SYMBOL_GPL(vfio_assign_device_set);
0143
0144 static void vfio_release_device_set(struct vfio_device *device)
0145 {
0146 struct vfio_device_set *dev_set = device->dev_set;
0147
0148 if (!dev_set)
0149 return;
0150
0151 mutex_lock(&dev_set->lock);
0152 list_del(&device->dev_set_list);
0153 mutex_unlock(&dev_set->lock);
0154
0155 xa_lock(&vfio_device_set_xa);
0156 if (!--dev_set->device_count) {
0157 __xa_erase(&vfio_device_set_xa,
0158 (unsigned long)dev_set->set_id);
0159 mutex_destroy(&dev_set->lock);
0160 kfree(dev_set);
0161 }
0162 xa_unlock(&vfio_device_set_xa);
0163 }
0164
0165 #ifdef CONFIG_VFIO_NOIOMMU
0166 static void *vfio_noiommu_open(unsigned long arg)
0167 {
0168 if (arg != VFIO_NOIOMMU_IOMMU)
0169 return ERR_PTR(-EINVAL);
0170 if (!capable(CAP_SYS_RAWIO))
0171 return ERR_PTR(-EPERM);
0172
0173 return NULL;
0174 }
0175
0176 static void vfio_noiommu_release(void *iommu_data)
0177 {
0178 }
0179
0180 static long vfio_noiommu_ioctl(void *iommu_data,
0181 unsigned int cmd, unsigned long arg)
0182 {
0183 if (cmd == VFIO_CHECK_EXTENSION)
0184 return noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
0185
0186 return -ENOTTY;
0187 }
0188
0189 static int vfio_noiommu_attach_group(void *iommu_data,
0190 struct iommu_group *iommu_group, enum vfio_group_type type)
0191 {
0192 return 0;
0193 }
0194
0195 static void vfio_noiommu_detach_group(void *iommu_data,
0196 struct iommu_group *iommu_group)
0197 {
0198 }
0199
0200 static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
0201 .name = "vfio-noiommu",
0202 .owner = THIS_MODULE,
0203 .open = vfio_noiommu_open,
0204 .release = vfio_noiommu_release,
0205 .ioctl = vfio_noiommu_ioctl,
0206 .attach_group = vfio_noiommu_attach_group,
0207 .detach_group = vfio_noiommu_detach_group,
0208 };
0209
0210
0211
0212
0213
0214 static inline bool vfio_iommu_driver_allowed(struct vfio_container *container,
0215 const struct vfio_iommu_driver *driver)
0216 {
0217 return container->noiommu == (driver->ops == &vfio_noiommu_ops);
0218 }
0219 #else
0220 static inline bool vfio_iommu_driver_allowed(struct vfio_container *container,
0221 const struct vfio_iommu_driver *driver)
0222 {
0223 return true;
0224 }
0225 #endif
0226
0227
0228
0229
0230 int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
0231 {
0232 struct vfio_iommu_driver *driver, *tmp;
0233
0234 if (WARN_ON(!ops->register_device != !ops->unregister_device))
0235 return -EINVAL;
0236
0237 driver = kzalloc(sizeof(*driver), GFP_KERNEL);
0238 if (!driver)
0239 return -ENOMEM;
0240
0241 driver->ops = ops;
0242
0243 mutex_lock(&vfio.iommu_drivers_lock);
0244
0245
0246 list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
0247 if (tmp->ops == ops) {
0248 mutex_unlock(&vfio.iommu_drivers_lock);
0249 kfree(driver);
0250 return -EINVAL;
0251 }
0252 }
0253
0254 list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
0255
0256 mutex_unlock(&vfio.iommu_drivers_lock);
0257
0258 return 0;
0259 }
0260 EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
0261
0262 void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
0263 {
0264 struct vfio_iommu_driver *driver;
0265
0266 mutex_lock(&vfio.iommu_drivers_lock);
0267 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
0268 if (driver->ops == ops) {
0269 list_del(&driver->vfio_next);
0270 mutex_unlock(&vfio.iommu_drivers_lock);
0271 kfree(driver);
0272 return;
0273 }
0274 }
0275 mutex_unlock(&vfio.iommu_drivers_lock);
0276 }
0277 EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
0278
0279 static void vfio_group_get(struct vfio_group *group);
0280
0281
0282
0283
0284
0285
0286
0287 static void vfio_container_get(struct vfio_container *container)
0288 {
0289 kref_get(&container->kref);
0290 }
0291
0292 static void vfio_container_release(struct kref *kref)
0293 {
0294 struct vfio_container *container;
0295 container = container_of(kref, struct vfio_container, kref);
0296
0297 kfree(container);
0298 }
0299
0300 static void vfio_container_put(struct vfio_container *container)
0301 {
0302 kref_put(&container->kref, vfio_container_release);
0303 }
0304
0305
0306
0307
0308 static struct vfio_group *
0309 __vfio_group_get_from_iommu(struct iommu_group *iommu_group)
0310 {
0311 struct vfio_group *group;
0312
0313 list_for_each_entry(group, &vfio.group_list, vfio_next) {
0314 if (group->iommu_group == iommu_group) {
0315 vfio_group_get(group);
0316 return group;
0317 }
0318 }
0319 return NULL;
0320 }
0321
0322 static struct vfio_group *
0323 vfio_group_get_from_iommu(struct iommu_group *iommu_group)
0324 {
0325 struct vfio_group *group;
0326
0327 mutex_lock(&vfio.group_lock);
0328 group = __vfio_group_get_from_iommu(iommu_group);
0329 mutex_unlock(&vfio.group_lock);
0330 return group;
0331 }
0332
0333 static void vfio_group_release(struct device *dev)
0334 {
0335 struct vfio_group *group = container_of(dev, struct vfio_group, dev);
0336
0337 mutex_destroy(&group->device_lock);
0338 iommu_group_put(group->iommu_group);
0339 ida_free(&vfio.group_ida, MINOR(group->dev.devt));
0340 kfree(group);
0341 }
0342
0343 static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group,
0344 enum vfio_group_type type)
0345 {
0346 struct vfio_group *group;
0347 int minor;
0348
0349 group = kzalloc(sizeof(*group), GFP_KERNEL);
0350 if (!group)
0351 return ERR_PTR(-ENOMEM);
0352
0353 minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL);
0354 if (minor < 0) {
0355 kfree(group);
0356 return ERR_PTR(minor);
0357 }
0358
0359 device_initialize(&group->dev);
0360 group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor);
0361 group->dev.class = vfio.class;
0362 group->dev.release = vfio_group_release;
0363 cdev_init(&group->cdev, &vfio_group_fops);
0364 group->cdev.owner = THIS_MODULE;
0365
0366 refcount_set(&group->users, 1);
0367 init_rwsem(&group->group_rwsem);
0368 INIT_LIST_HEAD(&group->device_list);
0369 mutex_init(&group->device_lock);
0370 group->iommu_group = iommu_group;
0371
0372 iommu_group_ref_get(iommu_group);
0373 group->type = type;
0374 BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
0375
0376 return group;
0377 }
0378
0379 static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
0380 enum vfio_group_type type)
0381 {
0382 struct vfio_group *group;
0383 struct vfio_group *ret;
0384 int err;
0385
0386 group = vfio_group_alloc(iommu_group, type);
0387 if (IS_ERR(group))
0388 return group;
0389
0390 err = dev_set_name(&group->dev, "%s%d",
0391 group->type == VFIO_NO_IOMMU ? "noiommu-" : "",
0392 iommu_group_id(iommu_group));
0393 if (err) {
0394 ret = ERR_PTR(err);
0395 goto err_put;
0396 }
0397
0398 mutex_lock(&vfio.group_lock);
0399
0400
0401 ret = __vfio_group_get_from_iommu(iommu_group);
0402 if (ret)
0403 goto err_unlock;
0404
0405 err = cdev_device_add(&group->cdev, &group->dev);
0406 if (err) {
0407 ret = ERR_PTR(err);
0408 goto err_unlock;
0409 }
0410
0411 list_add(&group->vfio_next, &vfio.group_list);
0412
0413 mutex_unlock(&vfio.group_lock);
0414 return group;
0415
0416 err_unlock:
0417 mutex_unlock(&vfio.group_lock);
0418 err_put:
0419 put_device(&group->dev);
0420 return ret;
0421 }
0422
0423 static void vfio_group_put(struct vfio_group *group)
0424 {
0425 if (!refcount_dec_and_mutex_lock(&group->users, &vfio.group_lock))
0426 return;
0427
0428
0429
0430
0431
0432
0433
0434 WARN_ON(!list_empty(&group->device_list));
0435 WARN_ON(group->container || group->container_users);
0436 WARN_ON(group->notifier.head);
0437
0438 list_del(&group->vfio_next);
0439 cdev_device_del(&group->cdev, &group->dev);
0440 mutex_unlock(&vfio.group_lock);
0441
0442 put_device(&group->dev);
0443 }
0444
0445 static void vfio_group_get(struct vfio_group *group)
0446 {
0447 refcount_inc(&group->users);
0448 }
0449
0450
0451
0452
0453
0454 static void vfio_device_put(struct vfio_device *device)
0455 {
0456 if (refcount_dec_and_test(&device->refcount))
0457 complete(&device->comp);
0458 }
0459
0460 static bool vfio_device_try_get(struct vfio_device *device)
0461 {
0462 return refcount_inc_not_zero(&device->refcount);
0463 }
0464
0465 static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
0466 struct device *dev)
0467 {
0468 struct vfio_device *device;
0469
0470 mutex_lock(&group->device_lock);
0471 list_for_each_entry(device, &group->device_list, group_next) {
0472 if (device->dev == dev && vfio_device_try_get(device)) {
0473 mutex_unlock(&group->device_lock);
0474 return device;
0475 }
0476 }
0477 mutex_unlock(&group->device_lock);
0478 return NULL;
0479 }
0480
0481
0482
0483
0484 void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
0485 const struct vfio_device_ops *ops)
0486 {
0487 init_completion(&device->comp);
0488 device->dev = dev;
0489 device->ops = ops;
0490 }
0491 EXPORT_SYMBOL_GPL(vfio_init_group_dev);
0492
0493 void vfio_uninit_group_dev(struct vfio_device *device)
0494 {
0495 vfio_release_device_set(device);
0496 }
0497 EXPORT_SYMBOL_GPL(vfio_uninit_group_dev);
0498
0499 static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev,
0500 enum vfio_group_type type)
0501 {
0502 struct iommu_group *iommu_group;
0503 struct vfio_group *group;
0504 int ret;
0505
0506 iommu_group = iommu_group_alloc();
0507 if (IS_ERR(iommu_group))
0508 return ERR_CAST(iommu_group);
0509
0510 ret = iommu_group_set_name(iommu_group, "vfio-noiommu");
0511 if (ret)
0512 goto out_put_group;
0513 ret = iommu_group_add_device(iommu_group, dev);
0514 if (ret)
0515 goto out_put_group;
0516
0517 group = vfio_create_group(iommu_group, type);
0518 if (IS_ERR(group)) {
0519 ret = PTR_ERR(group);
0520 goto out_remove_device;
0521 }
0522 iommu_group_put(iommu_group);
0523 return group;
0524
0525 out_remove_device:
0526 iommu_group_remove_device(dev);
0527 out_put_group:
0528 iommu_group_put(iommu_group);
0529 return ERR_PTR(ret);
0530 }
0531
0532 static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
0533 {
0534 struct iommu_group *iommu_group;
0535 struct vfio_group *group;
0536
0537 iommu_group = iommu_group_get(dev);
0538 #ifdef CONFIG_VFIO_NOIOMMU
0539 if (!iommu_group && noiommu) {
0540
0541
0542
0543
0544
0545
0546 group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU);
0547 if (!IS_ERR(group)) {
0548 add_taint(TAINT_USER, LOCKDEP_STILL_OK);
0549 dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
0550 }
0551 return group;
0552 }
0553 #endif
0554 if (!iommu_group)
0555 return ERR_PTR(-EINVAL);
0556
0557
0558
0559
0560
0561
0562 if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) {
0563 iommu_group_put(iommu_group);
0564 return ERR_PTR(-EINVAL);
0565 }
0566
0567 group = vfio_group_get_from_iommu(iommu_group);
0568 if (!group)
0569 group = vfio_create_group(iommu_group, VFIO_IOMMU);
0570
0571
0572 iommu_group_put(iommu_group);
0573 return group;
0574 }
0575
0576 static int __vfio_register_dev(struct vfio_device *device,
0577 struct vfio_group *group)
0578 {
0579 struct vfio_device *existing_device;
0580
0581 if (IS_ERR(group))
0582 return PTR_ERR(group);
0583
0584
0585
0586
0587
0588 if (!device->dev_set)
0589 vfio_assign_device_set(device, device);
0590
0591 existing_device = vfio_group_get_device(group, device->dev);
0592 if (existing_device) {
0593 dev_WARN(device->dev, "Device already exists on group %d\n",
0594 iommu_group_id(group->iommu_group));
0595 vfio_device_put(existing_device);
0596 if (group->type == VFIO_NO_IOMMU ||
0597 group->type == VFIO_EMULATED_IOMMU)
0598 iommu_group_remove_device(device->dev);
0599 vfio_group_put(group);
0600 return -EBUSY;
0601 }
0602
0603
0604 device->group = group;
0605
0606
0607 refcount_set(&device->refcount, 1);
0608
0609 mutex_lock(&group->device_lock);
0610 list_add(&device->group_next, &group->device_list);
0611 group->dev_counter++;
0612 mutex_unlock(&group->device_lock);
0613
0614 return 0;
0615 }
0616
0617 int vfio_register_group_dev(struct vfio_device *device)
0618 {
0619 return __vfio_register_dev(device,
0620 vfio_group_find_or_alloc(device->dev));
0621 }
0622 EXPORT_SYMBOL_GPL(vfio_register_group_dev);
0623
0624
0625
0626
0627
0628 int vfio_register_emulated_iommu_dev(struct vfio_device *device)
0629 {
0630 return __vfio_register_dev(device,
0631 vfio_noiommu_group_alloc(device->dev, VFIO_EMULATED_IOMMU));
0632 }
0633 EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev);
0634
0635 static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
0636 char *buf)
0637 {
0638 struct vfio_device *it, *device = ERR_PTR(-ENODEV);
0639
0640 mutex_lock(&group->device_lock);
0641 list_for_each_entry(it, &group->device_list, group_next) {
0642 int ret;
0643
0644 if (it->ops->match) {
0645 ret = it->ops->match(it, buf);
0646 if (ret < 0) {
0647 device = ERR_PTR(ret);
0648 break;
0649 }
0650 } else {
0651 ret = !strcmp(dev_name(it->dev), buf);
0652 }
0653
0654 if (ret && vfio_device_try_get(it)) {
0655 device = it;
0656 break;
0657 }
0658 }
0659 mutex_unlock(&group->device_lock);
0660
0661 return device;
0662 }
0663
0664
0665
0666
0667 void vfio_unregister_group_dev(struct vfio_device *device)
0668 {
0669 struct vfio_group *group = device->group;
0670 unsigned int i = 0;
0671 bool interrupted = false;
0672 long rc;
0673
0674 vfio_device_put(device);
0675 rc = try_wait_for_completion(&device->comp);
0676 while (rc <= 0) {
0677 if (device->ops->request)
0678 device->ops->request(device, i++);
0679
0680 if (interrupted) {
0681 rc = wait_for_completion_timeout(&device->comp,
0682 HZ * 10);
0683 } else {
0684 rc = wait_for_completion_interruptible_timeout(
0685 &device->comp, HZ * 10);
0686 if (rc < 0) {
0687 interrupted = true;
0688 dev_warn(device->dev,
0689 "Device is currently in use, task"
0690 " \"%s\" (%d) "
0691 "blocked until device is released",
0692 current->comm, task_pid_nr(current));
0693 }
0694 }
0695 }
0696
0697 mutex_lock(&group->device_lock);
0698 list_del(&device->group_next);
0699 group->dev_counter--;
0700 mutex_unlock(&group->device_lock);
0701
0702 if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU)
0703 iommu_group_remove_device(device->dev);
0704
0705
0706 vfio_group_put(group);
0707 }
0708 EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
0709
0710
0711
0712
0713 static long vfio_ioctl_check_extension(struct vfio_container *container,
0714 unsigned long arg)
0715 {
0716 struct vfio_iommu_driver *driver;
0717 long ret = 0;
0718
0719 down_read(&container->group_lock);
0720
0721 driver = container->iommu_driver;
0722
0723 switch (arg) {
0724
0725 default:
0726
0727
0728
0729
0730
0731
0732 if (!driver) {
0733 mutex_lock(&vfio.iommu_drivers_lock);
0734 list_for_each_entry(driver, &vfio.iommu_drivers_list,
0735 vfio_next) {
0736
0737 if (!list_empty(&container->group_list) &&
0738 !vfio_iommu_driver_allowed(container,
0739 driver))
0740 continue;
0741 if (!try_module_get(driver->ops->owner))
0742 continue;
0743
0744 ret = driver->ops->ioctl(NULL,
0745 VFIO_CHECK_EXTENSION,
0746 arg);
0747 module_put(driver->ops->owner);
0748 if (ret > 0)
0749 break;
0750 }
0751 mutex_unlock(&vfio.iommu_drivers_lock);
0752 } else
0753 ret = driver->ops->ioctl(container->iommu_data,
0754 VFIO_CHECK_EXTENSION, arg);
0755 }
0756
0757 up_read(&container->group_lock);
0758
0759 return ret;
0760 }
0761
0762
0763 static int __vfio_container_attach_groups(struct vfio_container *container,
0764 struct vfio_iommu_driver *driver,
0765 void *data)
0766 {
0767 struct vfio_group *group;
0768 int ret = -ENODEV;
0769
0770 list_for_each_entry(group, &container->group_list, container_next) {
0771 ret = driver->ops->attach_group(data, group->iommu_group,
0772 group->type);
0773 if (ret)
0774 goto unwind;
0775 }
0776
0777 return ret;
0778
0779 unwind:
0780 list_for_each_entry_continue_reverse(group, &container->group_list,
0781 container_next) {
0782 driver->ops->detach_group(data, group->iommu_group);
0783 }
0784
0785 return ret;
0786 }
0787
0788 static long vfio_ioctl_set_iommu(struct vfio_container *container,
0789 unsigned long arg)
0790 {
0791 struct vfio_iommu_driver *driver;
0792 long ret = -ENODEV;
0793
0794 down_write(&container->group_lock);
0795
0796
0797
0798
0799
0800
0801
0802
0803
0804 if (list_empty(&container->group_list) || container->iommu_driver) {
0805 up_write(&container->group_lock);
0806 return -EINVAL;
0807 }
0808
0809 mutex_lock(&vfio.iommu_drivers_lock);
0810 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
0811 void *data;
0812
0813 if (!vfio_iommu_driver_allowed(container, driver))
0814 continue;
0815 if (!try_module_get(driver->ops->owner))
0816 continue;
0817
0818
0819
0820
0821
0822
0823
0824
0825 if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
0826 module_put(driver->ops->owner);
0827 continue;
0828 }
0829
0830 data = driver->ops->open(arg);
0831 if (IS_ERR(data)) {
0832 ret = PTR_ERR(data);
0833 module_put(driver->ops->owner);
0834 continue;
0835 }
0836
0837 ret = __vfio_container_attach_groups(container, driver, data);
0838 if (ret) {
0839 driver->ops->release(data);
0840 module_put(driver->ops->owner);
0841 continue;
0842 }
0843
0844 container->iommu_driver = driver;
0845 container->iommu_data = data;
0846 break;
0847 }
0848
0849 mutex_unlock(&vfio.iommu_drivers_lock);
0850 up_write(&container->group_lock);
0851
0852 return ret;
0853 }
0854
0855 static long vfio_fops_unl_ioctl(struct file *filep,
0856 unsigned int cmd, unsigned long arg)
0857 {
0858 struct vfio_container *container = filep->private_data;
0859 struct vfio_iommu_driver *driver;
0860 void *data;
0861 long ret = -EINVAL;
0862
0863 if (!container)
0864 return ret;
0865
0866 switch (cmd) {
0867 case VFIO_GET_API_VERSION:
0868 ret = VFIO_API_VERSION;
0869 break;
0870 case VFIO_CHECK_EXTENSION:
0871 ret = vfio_ioctl_check_extension(container, arg);
0872 break;
0873 case VFIO_SET_IOMMU:
0874 ret = vfio_ioctl_set_iommu(container, arg);
0875 break;
0876 default:
0877 driver = container->iommu_driver;
0878 data = container->iommu_data;
0879
0880 if (driver)
0881 ret = driver->ops->ioctl(data, cmd, arg);
0882 }
0883
0884 return ret;
0885 }
0886
0887 static int vfio_fops_open(struct inode *inode, struct file *filep)
0888 {
0889 struct vfio_container *container;
0890
0891 container = kzalloc(sizeof(*container), GFP_KERNEL);
0892 if (!container)
0893 return -ENOMEM;
0894
0895 INIT_LIST_HEAD(&container->group_list);
0896 init_rwsem(&container->group_lock);
0897 kref_init(&container->kref);
0898
0899 filep->private_data = container;
0900
0901 return 0;
0902 }
0903
0904 static int vfio_fops_release(struct inode *inode, struct file *filep)
0905 {
0906 struct vfio_container *container = filep->private_data;
0907 struct vfio_iommu_driver *driver = container->iommu_driver;
0908
0909 if (driver && driver->ops->notify)
0910 driver->ops->notify(container->iommu_data,
0911 VFIO_IOMMU_CONTAINER_CLOSE);
0912
0913 filep->private_data = NULL;
0914
0915 vfio_container_put(container);
0916
0917 return 0;
0918 }
0919
0920 static const struct file_operations vfio_fops = {
0921 .owner = THIS_MODULE,
0922 .open = vfio_fops_open,
0923 .release = vfio_fops_release,
0924 .unlocked_ioctl = vfio_fops_unl_ioctl,
0925 .compat_ioctl = compat_ptr_ioctl,
0926 };
0927
0928
0929
0930
0931 static void __vfio_group_unset_container(struct vfio_group *group)
0932 {
0933 struct vfio_container *container = group->container;
0934 struct vfio_iommu_driver *driver;
0935
0936 lockdep_assert_held_write(&group->group_rwsem);
0937
0938 down_write(&container->group_lock);
0939
0940 driver = container->iommu_driver;
0941 if (driver)
0942 driver->ops->detach_group(container->iommu_data,
0943 group->iommu_group);
0944
0945 if (group->type == VFIO_IOMMU)
0946 iommu_group_release_dma_owner(group->iommu_group);
0947
0948 group->container = NULL;
0949 group->container_users = 0;
0950 list_del(&group->container_next);
0951
0952
0953 if (driver && list_empty(&container->group_list)) {
0954 driver->ops->release(container->iommu_data);
0955 module_put(driver->ops->owner);
0956 container->iommu_driver = NULL;
0957 container->iommu_data = NULL;
0958 }
0959
0960 up_write(&container->group_lock);
0961
0962 vfio_container_put(container);
0963 }
0964
0965
0966
0967
0968
0969
0970
0971 static int vfio_group_unset_container(struct vfio_group *group)
0972 {
0973 lockdep_assert_held_write(&group->group_rwsem);
0974
0975 if (!group->container)
0976 return -EINVAL;
0977 if (group->container_users != 1)
0978 return -EBUSY;
0979 __vfio_group_unset_container(group);
0980 return 0;
0981 }
0982
0983 static int vfio_group_set_container(struct vfio_group *group, int container_fd)
0984 {
0985 struct fd f;
0986 struct vfio_container *container;
0987 struct vfio_iommu_driver *driver;
0988 int ret = 0;
0989
0990 lockdep_assert_held_write(&group->group_rwsem);
0991
0992 if (group->container || WARN_ON(group->container_users))
0993 return -EINVAL;
0994
0995 if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
0996 return -EPERM;
0997
0998 f = fdget(container_fd);
0999 if (!f.file)
1000 return -EBADF;
1001
1002
1003 if (f.file->f_op != &vfio_fops) {
1004 fdput(f);
1005 return -EINVAL;
1006 }
1007
1008 container = f.file->private_data;
1009 WARN_ON(!container);
1010
1011 down_write(&container->group_lock);
1012
1013
1014 if (!list_empty(&container->group_list) &&
1015 container->noiommu != (group->type == VFIO_NO_IOMMU)) {
1016 ret = -EPERM;
1017 goto unlock_out;
1018 }
1019
1020 if (group->type == VFIO_IOMMU) {
1021 ret = iommu_group_claim_dma_owner(group->iommu_group, f.file);
1022 if (ret)
1023 goto unlock_out;
1024 }
1025
1026 driver = container->iommu_driver;
1027 if (driver) {
1028 ret = driver->ops->attach_group(container->iommu_data,
1029 group->iommu_group,
1030 group->type);
1031 if (ret) {
1032 if (group->type == VFIO_IOMMU)
1033 iommu_group_release_dma_owner(
1034 group->iommu_group);
1035 goto unlock_out;
1036 }
1037 }
1038
1039 group->container = container;
1040 group->container_users = 1;
1041 container->noiommu = (group->type == VFIO_NO_IOMMU);
1042 list_add(&group->container_next, &container->group_list);
1043
1044
1045 vfio_container_get(container);
1046
1047 unlock_out:
1048 up_write(&container->group_lock);
1049 fdput(f);
1050 return ret;
1051 }
1052
1053 static const struct file_operations vfio_device_fops;
1054
1055
1056 static bool vfio_assert_device_open(struct vfio_device *device)
1057 {
1058 return !WARN_ON_ONCE(!READ_ONCE(device->open_count));
1059 }
1060
1061 static int vfio_device_assign_container(struct vfio_device *device)
1062 {
1063 struct vfio_group *group = device->group;
1064
1065 lockdep_assert_held_write(&group->group_rwsem);
1066
1067 if (!group->container || !group->container->iommu_driver ||
1068 WARN_ON(!group->container_users))
1069 return -EINVAL;
1070
1071 if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
1072 return -EPERM;
1073
1074 get_file(group->opened_file);
1075 group->container_users++;
1076 return 0;
1077 }
1078
1079 static void vfio_device_unassign_container(struct vfio_device *device)
1080 {
1081 down_write(&device->group->group_rwsem);
1082 WARN_ON(device->group->container_users <= 1);
1083 device->group->container_users--;
1084 fput(device->group->opened_file);
1085 up_write(&device->group->group_rwsem);
1086 }
1087
1088 static struct file *vfio_device_open(struct vfio_device *device)
1089 {
1090 struct vfio_iommu_driver *iommu_driver;
1091 struct file *filep;
1092 int ret;
1093
1094 down_write(&device->group->group_rwsem);
1095 ret = vfio_device_assign_container(device);
1096 up_write(&device->group->group_rwsem);
1097 if (ret)
1098 return ERR_PTR(ret);
1099
1100 if (!try_module_get(device->dev->driver->owner)) {
1101 ret = -ENODEV;
1102 goto err_unassign_container;
1103 }
1104
1105 mutex_lock(&device->dev_set->lock);
1106 device->open_count++;
1107 if (device->open_count == 1) {
1108
1109
1110
1111
1112
1113 down_read(&device->group->group_rwsem);
1114 device->kvm = device->group->kvm;
1115
1116 if (device->ops->open_device) {
1117 ret = device->ops->open_device(device);
1118 if (ret)
1119 goto err_undo_count;
1120 }
1121
1122 iommu_driver = device->group->container->iommu_driver;
1123 if (iommu_driver && iommu_driver->ops->register_device)
1124 iommu_driver->ops->register_device(
1125 device->group->container->iommu_data, device);
1126
1127 up_read(&device->group->group_rwsem);
1128 }
1129 mutex_unlock(&device->dev_set->lock);
1130
1131
1132
1133
1134
1135 filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
1136 device, O_RDWR);
1137 if (IS_ERR(filep)) {
1138 ret = PTR_ERR(filep);
1139 goto err_close_device;
1140 }
1141
1142
1143
1144
1145
1146
1147 filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE);
1148
1149 if (device->group->type == VFIO_NO_IOMMU)
1150 dev_warn(device->dev, "vfio-noiommu device opened by user "
1151 "(%s:%d)\n", current->comm, task_pid_nr(current));
1152
1153
1154
1155
1156 return filep;
1157
1158 err_close_device:
1159 mutex_lock(&device->dev_set->lock);
1160 down_read(&device->group->group_rwsem);
1161 if (device->open_count == 1 && device->ops->close_device) {
1162 device->ops->close_device(device);
1163
1164 iommu_driver = device->group->container->iommu_driver;
1165 if (iommu_driver && iommu_driver->ops->unregister_device)
1166 iommu_driver->ops->unregister_device(
1167 device->group->container->iommu_data, device);
1168 }
1169 err_undo_count:
1170 up_read(&device->group->group_rwsem);
1171 device->open_count--;
1172 if (device->open_count == 0 && device->kvm)
1173 device->kvm = NULL;
1174 mutex_unlock(&device->dev_set->lock);
1175 module_put(device->dev->driver->owner);
1176 err_unassign_container:
1177 vfio_device_unassign_container(device);
1178 return ERR_PTR(ret);
1179 }
1180
1181 static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
1182 {
1183 struct vfio_device *device;
1184 struct file *filep;
1185 int fdno;
1186 int ret;
1187
1188 device = vfio_device_get_from_name(group, buf);
1189 if (IS_ERR(device))
1190 return PTR_ERR(device);
1191
1192 fdno = get_unused_fd_flags(O_CLOEXEC);
1193 if (fdno < 0) {
1194 ret = fdno;
1195 goto err_put_device;
1196 }
1197
1198 filep = vfio_device_open(device);
1199 if (IS_ERR(filep)) {
1200 ret = PTR_ERR(filep);
1201 goto err_put_fdno;
1202 }
1203
1204 fd_install(fdno, filep);
1205 return fdno;
1206
1207 err_put_fdno:
1208 put_unused_fd(fdno);
1209 err_put_device:
1210 vfio_device_put(device);
1211 return ret;
1212 }
1213
1214 static long vfio_group_fops_unl_ioctl(struct file *filep,
1215 unsigned int cmd, unsigned long arg)
1216 {
1217 struct vfio_group *group = filep->private_data;
1218 long ret = -ENOTTY;
1219
1220 switch (cmd) {
1221 case VFIO_GROUP_GET_STATUS:
1222 {
1223 struct vfio_group_status status;
1224 unsigned long minsz;
1225
1226 minsz = offsetofend(struct vfio_group_status, flags);
1227
1228 if (copy_from_user(&status, (void __user *)arg, minsz))
1229 return -EFAULT;
1230
1231 if (status.argsz < minsz)
1232 return -EINVAL;
1233
1234 status.flags = 0;
1235
1236 down_read(&group->group_rwsem);
1237 if (group->container)
1238 status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET |
1239 VFIO_GROUP_FLAGS_VIABLE;
1240 else if (!iommu_group_dma_owner_claimed(group->iommu_group))
1241 status.flags |= VFIO_GROUP_FLAGS_VIABLE;
1242 up_read(&group->group_rwsem);
1243
1244 if (copy_to_user((void __user *)arg, &status, minsz))
1245 return -EFAULT;
1246
1247 ret = 0;
1248 break;
1249 }
1250 case VFIO_GROUP_SET_CONTAINER:
1251 {
1252 int fd;
1253
1254 if (get_user(fd, (int __user *)arg))
1255 return -EFAULT;
1256
1257 if (fd < 0)
1258 return -EINVAL;
1259
1260 down_write(&group->group_rwsem);
1261 ret = vfio_group_set_container(group, fd);
1262 up_write(&group->group_rwsem);
1263 break;
1264 }
1265 case VFIO_GROUP_UNSET_CONTAINER:
1266 down_write(&group->group_rwsem);
1267 ret = vfio_group_unset_container(group);
1268 up_write(&group->group_rwsem);
1269 break;
1270 case VFIO_GROUP_GET_DEVICE_FD:
1271 {
1272 char *buf;
1273
1274 buf = strndup_user((const char __user *)arg, PAGE_SIZE);
1275 if (IS_ERR(buf))
1276 return PTR_ERR(buf);
1277
1278 ret = vfio_group_get_device_fd(group, buf);
1279 kfree(buf);
1280 break;
1281 }
1282 }
1283
1284 return ret;
1285 }
1286
1287 static int vfio_group_fops_open(struct inode *inode, struct file *filep)
1288 {
1289 struct vfio_group *group =
1290 container_of(inode->i_cdev, struct vfio_group, cdev);
1291 int ret;
1292
1293 down_write(&group->group_rwsem);
1294
1295
1296 if (!refcount_inc_not_zero(&group->users)) {
1297 ret = -ENODEV;
1298 goto err_unlock;
1299 }
1300
1301 if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) {
1302 ret = -EPERM;
1303 goto err_put;
1304 }
1305
1306
1307
1308
1309 if (group->opened_file) {
1310 ret = -EBUSY;
1311 goto err_put;
1312 }
1313 group->opened_file = filep;
1314 filep->private_data = group;
1315
1316 up_write(&group->group_rwsem);
1317 return 0;
1318 err_put:
1319 vfio_group_put(group);
1320 err_unlock:
1321 up_write(&group->group_rwsem);
1322 return ret;
1323 }
1324
1325 static int vfio_group_fops_release(struct inode *inode, struct file *filep)
1326 {
1327 struct vfio_group *group = filep->private_data;
1328
1329 filep->private_data = NULL;
1330
1331 down_write(&group->group_rwsem);
1332
1333
1334
1335
1336 WARN_ON(group->notifier.head);
1337 if (group->container) {
1338 WARN_ON(group->container_users != 1);
1339 __vfio_group_unset_container(group);
1340 }
1341 group->opened_file = NULL;
1342 up_write(&group->group_rwsem);
1343
1344 vfio_group_put(group);
1345
1346 return 0;
1347 }
1348
1349 static const struct file_operations vfio_group_fops = {
1350 .owner = THIS_MODULE,
1351 .unlocked_ioctl = vfio_group_fops_unl_ioctl,
1352 .compat_ioctl = compat_ptr_ioctl,
1353 .open = vfio_group_fops_open,
1354 .release = vfio_group_fops_release,
1355 };
1356
1357
1358
1359
1360 static int vfio_device_fops_release(struct inode *inode, struct file *filep)
1361 {
1362 struct vfio_device *device = filep->private_data;
1363 struct vfio_iommu_driver *iommu_driver;
1364
1365 mutex_lock(&device->dev_set->lock);
1366 vfio_assert_device_open(device);
1367 down_read(&device->group->group_rwsem);
1368 if (device->open_count == 1 && device->ops->close_device)
1369 device->ops->close_device(device);
1370
1371 iommu_driver = device->group->container->iommu_driver;
1372 if (iommu_driver && iommu_driver->ops->unregister_device)
1373 iommu_driver->ops->unregister_device(
1374 device->group->container->iommu_data, device);
1375 up_read(&device->group->group_rwsem);
1376 device->open_count--;
1377 if (device->open_count == 0)
1378 device->kvm = NULL;
1379 mutex_unlock(&device->dev_set->lock);
1380
1381 module_put(device->dev->driver->owner);
1382
1383 vfio_device_unassign_container(device);
1384
1385 vfio_device_put(device);
1386
1387 return 0;
1388 }
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405 int vfio_mig_get_next_state(struct vfio_device *device,
1406 enum vfio_device_mig_state cur_fsm,
1407 enum vfio_device_mig_state new_fsm,
1408 enum vfio_device_mig_state *next_fsm)
1409 {
1410 enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_RUNNING_P2P + 1 };
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445 static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = {
1446 [VFIO_DEVICE_STATE_STOP] = {
1447 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
1448 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P,
1449 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
1450 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING,
1451 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
1452 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
1453 },
1454 [VFIO_DEVICE_STATE_RUNNING] = {
1455 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P,
1456 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
1457 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P,
1458 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P,
1459 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
1460 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
1461 },
1462 [VFIO_DEVICE_STATE_STOP_COPY] = {
1463 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
1464 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP,
1465 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
1466 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP,
1467 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP,
1468 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
1469 },
1470 [VFIO_DEVICE_STATE_RESUMING] = {
1471 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
1472 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP,
1473 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP,
1474 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING,
1475 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP,
1476 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
1477 },
1478 [VFIO_DEVICE_STATE_RUNNING_P2P] = {
1479 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
1480 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
1481 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP,
1482 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP,
1483 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
1484 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
1485 },
1486 [VFIO_DEVICE_STATE_ERROR] = {
1487 [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR,
1488 [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR,
1489 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR,
1490 [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR,
1491 [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR,
1492 [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
1493 },
1494 };
1495
1496 static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = {
1497 [VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY,
1498 [VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY,
1499 [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY,
1500 [VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY,
1501 [VFIO_DEVICE_STATE_RUNNING_P2P] =
1502 VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P,
1503 [VFIO_DEVICE_STATE_ERROR] = ~0U,
1504 };
1505
1506 if (WARN_ON(cur_fsm >= ARRAY_SIZE(vfio_from_fsm_table) ||
1507 (state_flags_table[cur_fsm] & device->migration_flags) !=
1508 state_flags_table[cur_fsm]))
1509 return -EINVAL;
1510
1511 if (new_fsm >= ARRAY_SIZE(vfio_from_fsm_table) ||
1512 (state_flags_table[new_fsm] & device->migration_flags) !=
1513 state_flags_table[new_fsm])
1514 return -EINVAL;
1515
1516
1517
1518
1519
1520
1521 *next_fsm = vfio_from_fsm_table[cur_fsm][new_fsm];
1522 while ((state_flags_table[*next_fsm] & device->migration_flags) !=
1523 state_flags_table[*next_fsm])
1524 *next_fsm = vfio_from_fsm_table[*next_fsm][new_fsm];
1525
1526 return (*next_fsm != VFIO_DEVICE_STATE_ERROR) ? 0 : -EINVAL;
1527 }
1528 EXPORT_SYMBOL_GPL(vfio_mig_get_next_state);
1529
1530
1531
1532
1533 static int vfio_ioct_mig_return_fd(struct file *filp, void __user *arg,
1534 struct vfio_device_feature_mig_state *mig)
1535 {
1536 int ret;
1537 int fd;
1538
1539 fd = get_unused_fd_flags(O_CLOEXEC);
1540 if (fd < 0) {
1541 ret = fd;
1542 goto out_fput;
1543 }
1544
1545 mig->data_fd = fd;
1546 if (copy_to_user(arg, mig, sizeof(*mig))) {
1547 ret = -EFAULT;
1548 goto out_put_unused;
1549 }
1550 fd_install(fd, filp);
1551 return 0;
1552
1553 out_put_unused:
1554 put_unused_fd(fd);
1555 out_fput:
1556 fput(filp);
1557 return ret;
1558 }
1559
1560 static int
1561 vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device,
1562 u32 flags, void __user *arg,
1563 size_t argsz)
1564 {
1565 size_t minsz =
1566 offsetofend(struct vfio_device_feature_mig_state, data_fd);
1567 struct vfio_device_feature_mig_state mig;
1568 struct file *filp = NULL;
1569 int ret;
1570
1571 if (!device->mig_ops)
1572 return -ENOTTY;
1573
1574 ret = vfio_check_feature(flags, argsz,
1575 VFIO_DEVICE_FEATURE_SET |
1576 VFIO_DEVICE_FEATURE_GET,
1577 sizeof(mig));
1578 if (ret != 1)
1579 return ret;
1580
1581 if (copy_from_user(&mig, arg, minsz))
1582 return -EFAULT;
1583
1584 if (flags & VFIO_DEVICE_FEATURE_GET) {
1585 enum vfio_device_mig_state curr_state;
1586
1587 ret = device->mig_ops->migration_get_state(device,
1588 &curr_state);
1589 if (ret)
1590 return ret;
1591 mig.device_state = curr_state;
1592 goto out_copy;
1593 }
1594
1595
1596 filp = device->mig_ops->migration_set_state(device, mig.device_state);
1597 if (IS_ERR(filp) || !filp)
1598 goto out_copy;
1599
1600 return vfio_ioct_mig_return_fd(filp, arg, &mig);
1601 out_copy:
1602 mig.data_fd = -1;
1603 if (copy_to_user(arg, &mig, sizeof(mig)))
1604 return -EFAULT;
1605 if (IS_ERR(filp))
1606 return PTR_ERR(filp);
1607 return 0;
1608 }
1609
1610 static int vfio_ioctl_device_feature_migration(struct vfio_device *device,
1611 u32 flags, void __user *arg,
1612 size_t argsz)
1613 {
1614 struct vfio_device_feature_migration mig = {
1615 .flags = device->migration_flags,
1616 };
1617 int ret;
1618
1619 if (!device->mig_ops)
1620 return -ENOTTY;
1621
1622 ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET,
1623 sizeof(mig));
1624 if (ret != 1)
1625 return ret;
1626 if (copy_to_user(arg, &mig, sizeof(mig)))
1627 return -EFAULT;
1628 return 0;
1629 }
1630
1631 static int vfio_ioctl_device_feature(struct vfio_device *device,
1632 struct vfio_device_feature __user *arg)
1633 {
1634 size_t minsz = offsetofend(struct vfio_device_feature, flags);
1635 struct vfio_device_feature feature;
1636
1637 if (copy_from_user(&feature, arg, minsz))
1638 return -EFAULT;
1639
1640 if (feature.argsz < minsz)
1641 return -EINVAL;
1642
1643
1644 if (feature.flags &
1645 ~(VFIO_DEVICE_FEATURE_MASK | VFIO_DEVICE_FEATURE_SET |
1646 VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE))
1647 return -EINVAL;
1648
1649
1650 if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) &&
1651 (feature.flags & VFIO_DEVICE_FEATURE_SET) &&
1652 (feature.flags & VFIO_DEVICE_FEATURE_GET))
1653 return -EINVAL;
1654
1655 switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) {
1656 case VFIO_DEVICE_FEATURE_MIGRATION:
1657 return vfio_ioctl_device_feature_migration(
1658 device, feature.flags, arg->data,
1659 feature.argsz - minsz);
1660 case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE:
1661 return vfio_ioctl_device_feature_mig_device_state(
1662 device, feature.flags, arg->data,
1663 feature.argsz - minsz);
1664 default:
1665 if (unlikely(!device->ops->device_feature))
1666 return -EINVAL;
1667 return device->ops->device_feature(device, feature.flags,
1668 arg->data,
1669 feature.argsz - minsz);
1670 }
1671 }
1672
1673 static long vfio_device_fops_unl_ioctl(struct file *filep,
1674 unsigned int cmd, unsigned long arg)
1675 {
1676 struct vfio_device *device = filep->private_data;
1677
1678 switch (cmd) {
1679 case VFIO_DEVICE_FEATURE:
1680 return vfio_ioctl_device_feature(device, (void __user *)arg);
1681 default:
1682 if (unlikely(!device->ops->ioctl))
1683 return -EINVAL;
1684 return device->ops->ioctl(device, cmd, arg);
1685 }
1686 }
1687
1688 static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
1689 size_t count, loff_t *ppos)
1690 {
1691 struct vfio_device *device = filep->private_data;
1692
1693 if (unlikely(!device->ops->read))
1694 return -EINVAL;
1695
1696 return device->ops->read(device, buf, count, ppos);
1697 }
1698
1699 static ssize_t vfio_device_fops_write(struct file *filep,
1700 const char __user *buf,
1701 size_t count, loff_t *ppos)
1702 {
1703 struct vfio_device *device = filep->private_data;
1704
1705 if (unlikely(!device->ops->write))
1706 return -EINVAL;
1707
1708 return device->ops->write(device, buf, count, ppos);
1709 }
1710
1711 static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
1712 {
1713 struct vfio_device *device = filep->private_data;
1714
1715 if (unlikely(!device->ops->mmap))
1716 return -EINVAL;
1717
1718 return device->ops->mmap(device, vma);
1719 }
1720
1721 static const struct file_operations vfio_device_fops = {
1722 .owner = THIS_MODULE,
1723 .release = vfio_device_fops_release,
1724 .read = vfio_device_fops_read,
1725 .write = vfio_device_fops_write,
1726 .unlocked_ioctl = vfio_device_fops_unl_ioctl,
1727 .compat_ioctl = compat_ptr_ioctl,
1728 .mmap = vfio_device_fops_mmap,
1729 };
1730
1731
1732
1733
1734
1735
1736
1737 struct iommu_group *vfio_file_iommu_group(struct file *file)
1738 {
1739 struct vfio_group *group = file->private_data;
1740
1741 if (file->f_op != &vfio_group_fops)
1742 return NULL;
1743 return group->iommu_group;
1744 }
1745 EXPORT_SYMBOL_GPL(vfio_file_iommu_group);
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756 bool vfio_file_enforced_coherent(struct file *file)
1757 {
1758 struct vfio_group *group = file->private_data;
1759 bool ret;
1760
1761 if (file->f_op != &vfio_group_fops)
1762 return true;
1763
1764 down_read(&group->group_rwsem);
1765 if (group->container) {
1766 ret = vfio_ioctl_check_extension(group->container,
1767 VFIO_DMA_CC_IOMMU);
1768 } else {
1769
1770
1771
1772
1773
1774 ret = true;
1775 }
1776 up_read(&group->group_rwsem);
1777 return ret;
1778 }
1779 EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789 void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
1790 {
1791 struct vfio_group *group = file->private_data;
1792
1793 if (file->f_op != &vfio_group_fops)
1794 return;
1795
1796 down_write(&group->group_rwsem);
1797 group->kvm = kvm;
1798 up_write(&group->group_rwsem);
1799 }
1800 EXPORT_SYMBOL_GPL(vfio_file_set_kvm);
1801
1802
1803
1804
1805
1806
1807
1808
1809 bool vfio_file_has_dev(struct file *file, struct vfio_device *device)
1810 {
1811 struct vfio_group *group = file->private_data;
1812
1813 if (file->f_op != &vfio_group_fops)
1814 return false;
1815
1816 return group == device->group;
1817 }
1818 EXPORT_SYMBOL_GPL(vfio_file_has_dev);
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832 struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
1833 size_t size, u16 id, u16 version)
1834 {
1835 void *buf;
1836 struct vfio_info_cap_header *header, *tmp;
1837
1838 buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
1839 if (!buf) {
1840 kfree(caps->buf);
1841 caps->buf = NULL;
1842 caps->size = 0;
1843 return ERR_PTR(-ENOMEM);
1844 }
1845
1846 caps->buf = buf;
1847 header = buf + caps->size;
1848
1849
1850 memset(header, 0, size);
1851
1852 header->id = id;
1853 header->version = version;
1854
1855
1856 for (tmp = buf; tmp->next; tmp = buf + tmp->next)
1857 ;
1858
1859 tmp->next = caps->size;
1860 caps->size += size;
1861
1862 return header;
1863 }
1864 EXPORT_SYMBOL_GPL(vfio_info_cap_add);
1865
1866 void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
1867 {
1868 struct vfio_info_cap_header *tmp;
1869 void *buf = (void *)caps->buf;
1870
1871 for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset)
1872 tmp->next += offset;
1873 }
1874 EXPORT_SYMBOL(vfio_info_cap_shift);
1875
1876 int vfio_info_add_capability(struct vfio_info_cap *caps,
1877 struct vfio_info_cap_header *cap, size_t size)
1878 {
1879 struct vfio_info_cap_header *header;
1880
1881 header = vfio_info_cap_add(caps, size, cap->id, cap->version);
1882 if (IS_ERR(header))
1883 return PTR_ERR(header);
1884
1885 memcpy(header + 1, cap + 1, size - sizeof(*header));
1886
1887 return 0;
1888 }
1889 EXPORT_SYMBOL(vfio_info_add_capability);
1890
1891 int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
1892 int max_irq_type, size_t *data_size)
1893 {
1894 unsigned long minsz;
1895 size_t size;
1896
1897 minsz = offsetofend(struct vfio_irq_set, count);
1898
1899 if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) ||
1900 (hdr->count >= (U32_MAX - hdr->start)) ||
1901 (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK |
1902 VFIO_IRQ_SET_ACTION_TYPE_MASK)))
1903 return -EINVAL;
1904
1905 if (data_size)
1906 *data_size = 0;
1907
1908 if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs)
1909 return -EINVAL;
1910
1911 switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) {
1912 case VFIO_IRQ_SET_DATA_NONE:
1913 size = 0;
1914 break;
1915 case VFIO_IRQ_SET_DATA_BOOL:
1916 size = sizeof(uint8_t);
1917 break;
1918 case VFIO_IRQ_SET_DATA_EVENTFD:
1919 size = sizeof(int32_t);
1920 break;
1921 default:
1922 return -EINVAL;
1923 }
1924
1925 if (size) {
1926 if (hdr->argsz - minsz < hdr->count * size)
1927 return -EINVAL;
1928
1929 if (!data_size)
1930 return -EINVAL;
1931
1932 *data_size = hdr->count * size;
1933 }
1934
1935 return 0;
1936 }
1937 EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950 int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
1951 int npage, int prot, struct page **pages)
1952 {
1953 struct vfio_container *container;
1954 struct vfio_group *group = device->group;
1955 struct vfio_iommu_driver *driver;
1956 int ret;
1957
1958 if (!pages || !npage || !vfio_assert_device_open(device))
1959 return -EINVAL;
1960
1961 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
1962 return -E2BIG;
1963
1964 if (group->dev_counter > 1)
1965 return -EINVAL;
1966
1967
1968 container = group->container;
1969 driver = container->iommu_driver;
1970 if (likely(driver && driver->ops->pin_pages))
1971 ret = driver->ops->pin_pages(container->iommu_data,
1972 group->iommu_group, iova,
1973 npage, prot, pages);
1974 else
1975 ret = -ENOTTY;
1976
1977 return ret;
1978 }
1979 EXPORT_SYMBOL(vfio_pin_pages);
1980
1981
1982
1983
1984
1985
1986
1987
1988 void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage)
1989 {
1990 struct vfio_container *container;
1991 struct vfio_iommu_driver *driver;
1992
1993 if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
1994 return;
1995
1996 if (WARN_ON(!vfio_assert_device_open(device)))
1997 return;
1998
1999
2000 container = device->group->container;
2001 driver = container->iommu_driver;
2002
2003 driver->ops->unpin_pages(container->iommu_data, iova, npage);
2004 }
2005 EXPORT_SYMBOL(vfio_unpin_pages);
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024 int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data,
2025 size_t len, bool write)
2026 {
2027 struct vfio_container *container;
2028 struct vfio_iommu_driver *driver;
2029 int ret = 0;
2030
2031 if (!data || len <= 0 || !vfio_assert_device_open(device))
2032 return -EINVAL;
2033
2034
2035 container = device->group->container;
2036 driver = container->iommu_driver;
2037
2038 if (likely(driver && driver->ops->dma_rw))
2039 ret = driver->ops->dma_rw(container->iommu_data,
2040 iova, data, len, write);
2041 else
2042 ret = -ENOTTY;
2043 return ret;
2044 }
2045 EXPORT_SYMBOL(vfio_dma_rw);
2046
2047
2048
2049
2050 static char *vfio_devnode(struct device *dev, umode_t *mode)
2051 {
2052 return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
2053 }
2054
2055 static struct miscdevice vfio_dev = {
2056 .minor = VFIO_MINOR,
2057 .name = "vfio",
2058 .fops = &vfio_fops,
2059 .nodename = "vfio/vfio",
2060 .mode = S_IRUGO | S_IWUGO,
2061 };
2062
2063 static int __init vfio_init(void)
2064 {
2065 int ret;
2066
2067 ida_init(&vfio.group_ida);
2068 mutex_init(&vfio.group_lock);
2069 mutex_init(&vfio.iommu_drivers_lock);
2070 INIT_LIST_HEAD(&vfio.group_list);
2071 INIT_LIST_HEAD(&vfio.iommu_drivers_list);
2072
2073 ret = misc_register(&vfio_dev);
2074 if (ret) {
2075 pr_err("vfio: misc device register failed\n");
2076 return ret;
2077 }
2078
2079
2080 vfio.class = class_create(THIS_MODULE, "vfio");
2081 if (IS_ERR(vfio.class)) {
2082 ret = PTR_ERR(vfio.class);
2083 goto err_class;
2084 }
2085
2086 vfio.class->devnode = vfio_devnode;
2087
2088 ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio");
2089 if (ret)
2090 goto err_alloc_chrdev;
2091
2092 #ifdef CONFIG_VFIO_NOIOMMU
2093 ret = vfio_register_iommu_driver(&vfio_noiommu_ops);
2094 #endif
2095 if (ret)
2096 goto err_driver_register;
2097
2098 pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
2099 return 0;
2100
2101 err_driver_register:
2102 unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
2103 err_alloc_chrdev:
2104 class_destroy(vfio.class);
2105 vfio.class = NULL;
2106 err_class:
2107 misc_deregister(&vfio_dev);
2108 return ret;
2109 }
2110
2111 static void __exit vfio_cleanup(void)
2112 {
2113 WARN_ON(!list_empty(&vfio.group_list));
2114
2115 #ifdef CONFIG_VFIO_NOIOMMU
2116 vfio_unregister_iommu_driver(&vfio_noiommu_ops);
2117 #endif
2118 ida_destroy(&vfio.group_ida);
2119 unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
2120 class_destroy(vfio.class);
2121 vfio.class = NULL;
2122 misc_deregister(&vfio_dev);
2123 xa_destroy(&vfio_device_set_xa);
2124 }
2125
2126 module_init(vfio_init);
2127 module_exit(vfio_cleanup);
2128
2129 MODULE_VERSION(DRIVER_VERSION);
2130 MODULE_LICENSE("GPL v2");
2131 MODULE_AUTHOR(DRIVER_AUTHOR);
2132 MODULE_DESCRIPTION(DRIVER_DESC);
2133 MODULE_ALIAS_MISCDEV(VFIO_MINOR);
2134 MODULE_ALIAS("devname:vfio/vfio");
2135 MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce");