0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0012
0013 #include <linux/aperture.h>
0014 #include <linux/device.h>
0015 #include <linux/eventfd.h>
0016 #include <linux/file.h>
0017 #include <linux/interrupt.h>
0018 #include <linux/iommu.h>
0019 #include <linux/module.h>
0020 #include <linux/mutex.h>
0021 #include <linux/notifier.h>
0022 #include <linux/pci.h>
0023 #include <linux/pm_runtime.h>
0024 #include <linux/slab.h>
0025 #include <linux/types.h>
0026 #include <linux/uaccess.h>
0027 #include <linux/vgaarb.h>
0028 #include <linux/nospec.h>
0029 #include <linux/sched/mm.h>
0030
0031 #include <linux/vfio_pci_core.h>
0032
0033 #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
0034 #define DRIVER_DESC "core driver for VFIO based PCI devices"
0035
0036 static bool nointxmask;
0037 static bool disable_vga;
0038 static bool disable_idle_d3;
0039
0040
0041 static DEFINE_MUTEX(vfio_pci_sriov_pfs_mutex);
0042 static LIST_HEAD(vfio_pci_sriov_pfs);
0043
0044 static inline bool vfio_vga_disabled(void)
0045 {
0046 #ifdef CONFIG_VFIO_PCI_VGA
0047 return disable_vga;
0048 #else
0049 return true;
0050 #endif
0051 }
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061 static unsigned int vfio_pci_set_decode(struct pci_dev *pdev, bool single_vga)
0062 {
0063 struct pci_dev *tmp = NULL;
0064 unsigned char max_busnr;
0065 unsigned int decodes;
0066
0067 if (single_vga || !vfio_vga_disabled() || pci_is_root_bus(pdev->bus))
0068 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM |
0069 VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
0070
0071 max_busnr = pci_bus_max_busnr(pdev->bus);
0072 decodes = VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
0073
0074 while ((tmp = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, tmp)) != NULL) {
0075 if (tmp == pdev ||
0076 pci_domain_nr(tmp->bus) != pci_domain_nr(pdev->bus) ||
0077 pci_is_root_bus(tmp->bus))
0078 continue;
0079
0080 if (tmp->bus->number >= pdev->bus->number &&
0081 tmp->bus->number <= max_busnr) {
0082 pci_dev_put(tmp);
0083 decodes |= VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
0084 break;
0085 }
0086 }
0087
0088 return decodes;
0089 }
0090
0091 static void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev)
0092 {
0093 struct resource *res;
0094 int i;
0095 struct vfio_pci_dummy_resource *dummy_res;
0096
0097 for (i = 0; i < PCI_STD_NUM_BARS; i++) {
0098 int bar = i + PCI_STD_RESOURCES;
0099
0100 res = &vdev->pdev->resource[bar];
0101
0102 if (!IS_ENABLED(CONFIG_VFIO_PCI_MMAP))
0103 goto no_mmap;
0104
0105 if (!(res->flags & IORESOURCE_MEM))
0106 goto no_mmap;
0107
0108
0109
0110
0111
0112
0113 if (!resource_size(res))
0114 goto no_mmap;
0115
0116 if (resource_size(res) >= PAGE_SIZE) {
0117 vdev->bar_mmap_supported[bar] = true;
0118 continue;
0119 }
0120
0121 if (!(res->start & ~PAGE_MASK)) {
0122
0123
0124
0125
0126
0127 dummy_res = kzalloc(sizeof(*dummy_res), GFP_KERNEL);
0128 if (dummy_res == NULL)
0129 goto no_mmap;
0130
0131 dummy_res->resource.name = "vfio sub-page reserved";
0132 dummy_res->resource.start = res->end + 1;
0133 dummy_res->resource.end = res->start + PAGE_SIZE - 1;
0134 dummy_res->resource.flags = res->flags;
0135 if (request_resource(res->parent,
0136 &dummy_res->resource)) {
0137 kfree(dummy_res);
0138 goto no_mmap;
0139 }
0140 dummy_res->index = bar;
0141 list_add(&dummy_res->res_next,
0142 &vdev->dummy_resources_list);
0143 vdev->bar_mmap_supported[bar] = true;
0144 continue;
0145 }
0146
0147
0148
0149
0150
0151
0152
0153
0154 no_mmap:
0155 vdev->bar_mmap_supported[bar] = false;
0156 }
0157 }
0158
0159 struct vfio_pci_group_info;
0160 static void vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set);
0161 static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
0162 struct vfio_pci_group_info *groups);
0163
0164
0165
0166
0167
0168
0169
0170
0171
0172
0173 static bool vfio_pci_nointx(struct pci_dev *pdev)
0174 {
0175 switch (pdev->vendor) {
0176 case PCI_VENDOR_ID_INTEL:
0177 switch (pdev->device) {
0178
0179 case 0x1572:
0180 case 0x1574:
0181 case 0x1580 ... 0x1581:
0182 case 0x1583 ... 0x158b:
0183 case 0x37d0 ... 0x37d2:
0184
0185 case 0x1563:
0186 return true;
0187 default:
0188 return false;
0189 }
0190 }
0191
0192 return false;
0193 }
0194
0195 static void vfio_pci_probe_power_state(struct vfio_pci_core_device *vdev)
0196 {
0197 struct pci_dev *pdev = vdev->pdev;
0198 u16 pmcsr;
0199
0200 if (!pdev->pm_cap)
0201 return;
0202
0203 pci_read_config_word(pdev, pdev->pm_cap + PCI_PM_CTRL, &pmcsr);
0204
0205 vdev->needs_pm_restore = !(pmcsr & PCI_PM_CTRL_NO_SOFT_RESET);
0206 }
0207
0208
0209
0210
0211
0212
0213
0214
0215 int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev, pci_power_t state)
0216 {
0217 struct pci_dev *pdev = vdev->pdev;
0218 bool needs_restore = false, needs_save = false;
0219 int ret;
0220
0221
0222 if (pci_num_vf(pdev) && state > PCI_D0)
0223 return -EBUSY;
0224
0225 if (vdev->needs_pm_restore) {
0226 if (pdev->current_state < PCI_D3hot && state >= PCI_D3hot) {
0227 pci_save_state(pdev);
0228 needs_save = true;
0229 }
0230
0231 if (pdev->current_state >= PCI_D3hot && state <= PCI_D0)
0232 needs_restore = true;
0233 }
0234
0235 ret = pci_set_power_state(pdev, state);
0236
0237 if (!ret) {
0238
0239 if (needs_save && pdev->current_state >= PCI_D3hot) {
0240
0241
0242
0243
0244
0245
0246
0247
0248
0249
0250
0251
0252 kfree(vdev->pm_save);
0253 vdev->pm_save = pci_store_saved_state(pdev);
0254 } else if (needs_restore) {
0255 pci_load_and_free_saved_state(pdev, &vdev->pm_save);
0256 pci_restore_state(pdev);
0257 }
0258 }
0259
0260 return ret;
0261 }
0262
0263
0264
0265
0266
0267
0268
0269
0270
0271
0272 static const struct dev_pm_ops vfio_pci_core_pm_ops = { };
0273
0274 int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
0275 {
0276 struct pci_dev *pdev = vdev->pdev;
0277 int ret;
0278 u16 cmd;
0279 u8 msix_pos;
0280
0281 if (!disable_idle_d3) {
0282 ret = pm_runtime_resume_and_get(&pdev->dev);
0283 if (ret < 0)
0284 return ret;
0285 }
0286
0287
0288 pci_clear_master(pdev);
0289
0290 ret = pci_enable_device(pdev);
0291 if (ret)
0292 goto out_power;
0293
0294
0295 ret = pci_try_reset_function(pdev);
0296 if (ret == -EAGAIN)
0297 goto out_disable_device;
0298
0299 vdev->reset_works = !ret;
0300 pci_save_state(pdev);
0301 vdev->pci_saved_state = pci_store_saved_state(pdev);
0302 if (!vdev->pci_saved_state)
0303 pci_dbg(pdev, "%s: Couldn't store saved state\n", __func__);
0304
0305 if (likely(!nointxmask)) {
0306 if (vfio_pci_nointx(pdev)) {
0307 pci_info(pdev, "Masking broken INTx support\n");
0308 vdev->nointx = true;
0309 pci_intx(pdev, 0);
0310 } else
0311 vdev->pci_2_3 = pci_intx_mask_supported(pdev);
0312 }
0313
0314 pci_read_config_word(pdev, PCI_COMMAND, &cmd);
0315 if (vdev->pci_2_3 && (cmd & PCI_COMMAND_INTX_DISABLE)) {
0316 cmd &= ~PCI_COMMAND_INTX_DISABLE;
0317 pci_write_config_word(pdev, PCI_COMMAND, cmd);
0318 }
0319
0320 ret = vfio_pci_zdev_open_device(vdev);
0321 if (ret)
0322 goto out_free_state;
0323
0324 ret = vfio_config_init(vdev);
0325 if (ret)
0326 goto out_free_zdev;
0327
0328 msix_pos = pdev->msix_cap;
0329 if (msix_pos) {
0330 u16 flags;
0331 u32 table;
0332
0333 pci_read_config_word(pdev, msix_pos + PCI_MSIX_FLAGS, &flags);
0334 pci_read_config_dword(pdev, msix_pos + PCI_MSIX_TABLE, &table);
0335
0336 vdev->msix_bar = table & PCI_MSIX_TABLE_BIR;
0337 vdev->msix_offset = table & PCI_MSIX_TABLE_OFFSET;
0338 vdev->msix_size = ((flags & PCI_MSIX_FLAGS_QSIZE) + 1) * 16;
0339 } else
0340 vdev->msix_bar = 0xFF;
0341
0342 if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev))
0343 vdev->has_vga = true;
0344
0345
0346 return 0;
0347
0348 out_free_zdev:
0349 vfio_pci_zdev_close_device(vdev);
0350 out_free_state:
0351 kfree(vdev->pci_saved_state);
0352 vdev->pci_saved_state = NULL;
0353 out_disable_device:
0354 pci_disable_device(pdev);
0355 out_power:
0356 if (!disable_idle_d3)
0357 pm_runtime_put(&pdev->dev);
0358 return ret;
0359 }
0360 EXPORT_SYMBOL_GPL(vfio_pci_core_enable);
0361
0362 void vfio_pci_core_disable(struct vfio_pci_core_device *vdev)
0363 {
0364 struct pci_dev *pdev = vdev->pdev;
0365 struct vfio_pci_dummy_resource *dummy_res, *tmp;
0366 struct vfio_pci_ioeventfd *ioeventfd, *ioeventfd_tmp;
0367 int i, bar;
0368
0369
0370 lockdep_assert_held(&vdev->vdev.dev_set->lock);
0371
0372
0373
0374
0375
0376
0377
0378
0379
0380
0381 vfio_pci_set_power_state(vdev, PCI_D0);
0382
0383
0384 pci_clear_master(pdev);
0385
0386 vfio_pci_set_irqs_ioctl(vdev, VFIO_IRQ_SET_DATA_NONE |
0387 VFIO_IRQ_SET_ACTION_TRIGGER,
0388 vdev->irq_type, 0, 0, NULL);
0389
0390
0391 list_for_each_entry_safe(ioeventfd, ioeventfd_tmp,
0392 &vdev->ioeventfds_list, next) {
0393 vfio_virqfd_disable(&ioeventfd->virqfd);
0394 list_del(&ioeventfd->next);
0395 kfree(ioeventfd);
0396 }
0397 vdev->ioeventfds_nr = 0;
0398
0399 vdev->virq_disabled = false;
0400
0401 for (i = 0; i < vdev->num_regions; i++)
0402 vdev->region[i].ops->release(vdev, &vdev->region[i]);
0403
0404 vdev->num_regions = 0;
0405 kfree(vdev->region);
0406 vdev->region = NULL;
0407
0408 vfio_config_free(vdev);
0409
0410 for (i = 0; i < PCI_STD_NUM_BARS; i++) {
0411 bar = i + PCI_STD_RESOURCES;
0412 if (!vdev->barmap[bar])
0413 continue;
0414 pci_iounmap(pdev, vdev->barmap[bar]);
0415 pci_release_selected_regions(pdev, 1 << bar);
0416 vdev->barmap[bar] = NULL;
0417 }
0418
0419 list_for_each_entry_safe(dummy_res, tmp,
0420 &vdev->dummy_resources_list, res_next) {
0421 list_del(&dummy_res->res_next);
0422 release_resource(&dummy_res->resource);
0423 kfree(dummy_res);
0424 }
0425
0426 vdev->needs_reset = true;
0427
0428 vfio_pci_zdev_close_device(vdev);
0429
0430
0431
0432
0433
0434
0435
0436 if (pci_load_and_free_saved_state(pdev, &vdev->pci_saved_state)) {
0437 pci_info(pdev, "%s: Couldn't reload saved state\n", __func__);
0438
0439 if (!vdev->reset_works)
0440 goto out;
0441
0442 pci_save_state(pdev);
0443 }
0444
0445
0446
0447
0448
0449 pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
0450
0451
0452
0453
0454
0455
0456
0457
0458 if (vdev->reset_works && pci_dev_trylock(pdev)) {
0459 if (!__pci_reset_function_locked(pdev))
0460 vdev->needs_reset = false;
0461 pci_dev_unlock(pdev);
0462 }
0463
0464 pci_restore_state(pdev);
0465 out:
0466 pci_disable_device(pdev);
0467
0468 vfio_pci_dev_set_try_reset(vdev->vdev.dev_set);
0469
0470
0471 if (!disable_idle_d3)
0472 pm_runtime_put(&pdev->dev);
0473 }
0474 EXPORT_SYMBOL_GPL(vfio_pci_core_disable);
0475
0476 void vfio_pci_core_close_device(struct vfio_device *core_vdev)
0477 {
0478 struct vfio_pci_core_device *vdev =
0479 container_of(core_vdev, struct vfio_pci_core_device, vdev);
0480
0481 if (vdev->sriov_pf_core_dev) {
0482 mutex_lock(&vdev->sriov_pf_core_dev->vf_token->lock);
0483 WARN_ON(!vdev->sriov_pf_core_dev->vf_token->users);
0484 vdev->sriov_pf_core_dev->vf_token->users--;
0485 mutex_unlock(&vdev->sriov_pf_core_dev->vf_token->lock);
0486 }
0487 vfio_spapr_pci_eeh_release(vdev->pdev);
0488 vfio_pci_core_disable(vdev);
0489
0490 mutex_lock(&vdev->igate);
0491 if (vdev->err_trigger) {
0492 eventfd_ctx_put(vdev->err_trigger);
0493 vdev->err_trigger = NULL;
0494 }
0495 if (vdev->req_trigger) {
0496 eventfd_ctx_put(vdev->req_trigger);
0497 vdev->req_trigger = NULL;
0498 }
0499 mutex_unlock(&vdev->igate);
0500 }
0501 EXPORT_SYMBOL_GPL(vfio_pci_core_close_device);
0502
0503 void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev)
0504 {
0505 vfio_pci_probe_mmaps(vdev);
0506 vfio_spapr_pci_eeh_open(vdev->pdev);
0507
0508 if (vdev->sriov_pf_core_dev) {
0509 mutex_lock(&vdev->sriov_pf_core_dev->vf_token->lock);
0510 vdev->sriov_pf_core_dev->vf_token->users++;
0511 mutex_unlock(&vdev->sriov_pf_core_dev->vf_token->lock);
0512 }
0513 }
0514 EXPORT_SYMBOL_GPL(vfio_pci_core_finish_enable);
0515
0516 static int vfio_pci_get_irq_count(struct vfio_pci_core_device *vdev, int irq_type)
0517 {
0518 if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) {
0519 u8 pin;
0520
0521 if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) ||
0522 vdev->nointx || vdev->pdev->is_virtfn)
0523 return 0;
0524
0525 pci_read_config_byte(vdev->pdev, PCI_INTERRUPT_PIN, &pin);
0526
0527 return pin ? 1 : 0;
0528 } else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) {
0529 u8 pos;
0530 u16 flags;
0531
0532 pos = vdev->pdev->msi_cap;
0533 if (pos) {
0534 pci_read_config_word(vdev->pdev,
0535 pos + PCI_MSI_FLAGS, &flags);
0536 return 1 << ((flags & PCI_MSI_FLAGS_QMASK) >> 1);
0537 }
0538 } else if (irq_type == VFIO_PCI_MSIX_IRQ_INDEX) {
0539 u8 pos;
0540 u16 flags;
0541
0542 pos = vdev->pdev->msix_cap;
0543 if (pos) {
0544 pci_read_config_word(vdev->pdev,
0545 pos + PCI_MSIX_FLAGS, &flags);
0546
0547 return (flags & PCI_MSIX_FLAGS_QSIZE) + 1;
0548 }
0549 } else if (irq_type == VFIO_PCI_ERR_IRQ_INDEX) {
0550 if (pci_is_pcie(vdev->pdev))
0551 return 1;
0552 } else if (irq_type == VFIO_PCI_REQ_IRQ_INDEX) {
0553 return 1;
0554 }
0555
0556 return 0;
0557 }
0558
0559 static int vfio_pci_count_devs(struct pci_dev *pdev, void *data)
0560 {
0561 (*(int *)data)++;
0562 return 0;
0563 }
0564
0565 struct vfio_pci_fill_info {
0566 int max;
0567 int cur;
0568 struct vfio_pci_dependent_device *devices;
0569 };
0570
0571 static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data)
0572 {
0573 struct vfio_pci_fill_info *fill = data;
0574 struct iommu_group *iommu_group;
0575
0576 if (fill->cur == fill->max)
0577 return -EAGAIN;
0578
0579 iommu_group = iommu_group_get(&pdev->dev);
0580 if (!iommu_group)
0581 return -EPERM;
0582
0583 fill->devices[fill->cur].group_id = iommu_group_id(iommu_group);
0584 fill->devices[fill->cur].segment = pci_domain_nr(pdev->bus);
0585 fill->devices[fill->cur].bus = pdev->bus->number;
0586 fill->devices[fill->cur].devfn = pdev->devfn;
0587 fill->cur++;
0588 iommu_group_put(iommu_group);
0589 return 0;
0590 }
0591
0592 struct vfio_pci_group_info {
0593 int count;
0594 struct file **files;
0595 };
0596
0597 static bool vfio_pci_dev_below_slot(struct pci_dev *pdev, struct pci_slot *slot)
0598 {
0599 for (; pdev; pdev = pdev->bus->self)
0600 if (pdev->bus == slot->bus)
0601 return (pdev->slot == slot);
0602 return false;
0603 }
0604
0605 struct vfio_pci_walk_info {
0606 int (*fn)(struct pci_dev *pdev, void *data);
0607 void *data;
0608 struct pci_dev *pdev;
0609 bool slot;
0610 int ret;
0611 };
0612
0613 static int vfio_pci_walk_wrapper(struct pci_dev *pdev, void *data)
0614 {
0615 struct vfio_pci_walk_info *walk = data;
0616
0617 if (!walk->slot || vfio_pci_dev_below_slot(pdev, walk->pdev->slot))
0618 walk->ret = walk->fn(pdev, walk->data);
0619
0620 return walk->ret;
0621 }
0622
0623 static int vfio_pci_for_each_slot_or_bus(struct pci_dev *pdev,
0624 int (*fn)(struct pci_dev *,
0625 void *data), void *data,
0626 bool slot)
0627 {
0628 struct vfio_pci_walk_info walk = {
0629 .fn = fn, .data = data, .pdev = pdev, .slot = slot, .ret = 0,
0630 };
0631
0632 pci_walk_bus(pdev->bus, vfio_pci_walk_wrapper, &walk);
0633
0634 return walk.ret;
0635 }
0636
0637 static int msix_mmappable_cap(struct vfio_pci_core_device *vdev,
0638 struct vfio_info_cap *caps)
0639 {
0640 struct vfio_info_cap_header header = {
0641 .id = VFIO_REGION_INFO_CAP_MSIX_MAPPABLE,
0642 .version = 1
0643 };
0644
0645 return vfio_info_add_capability(caps, &header, sizeof(header));
0646 }
0647
0648 int vfio_pci_register_dev_region(struct vfio_pci_core_device *vdev,
0649 unsigned int type, unsigned int subtype,
0650 const struct vfio_pci_regops *ops,
0651 size_t size, u32 flags, void *data)
0652 {
0653 struct vfio_pci_region *region;
0654
0655 region = krealloc(vdev->region,
0656 (vdev->num_regions + 1) * sizeof(*region),
0657 GFP_KERNEL);
0658 if (!region)
0659 return -ENOMEM;
0660
0661 vdev->region = region;
0662 vdev->region[vdev->num_regions].type = type;
0663 vdev->region[vdev->num_regions].subtype = subtype;
0664 vdev->region[vdev->num_regions].ops = ops;
0665 vdev->region[vdev->num_regions].size = size;
0666 vdev->region[vdev->num_regions].flags = flags;
0667 vdev->region[vdev->num_regions].data = data;
0668
0669 vdev->num_regions++;
0670
0671 return 0;
0672 }
0673 EXPORT_SYMBOL_GPL(vfio_pci_register_dev_region);
0674
0675 long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
0676 unsigned long arg)
0677 {
0678 struct vfio_pci_core_device *vdev =
0679 container_of(core_vdev, struct vfio_pci_core_device, vdev);
0680 unsigned long minsz;
0681
0682 if (cmd == VFIO_DEVICE_GET_INFO) {
0683 struct vfio_device_info info;
0684 struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
0685 unsigned long capsz;
0686 int ret;
0687
0688 minsz = offsetofend(struct vfio_device_info, num_irqs);
0689
0690
0691 capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset);
0692
0693 if (copy_from_user(&info, (void __user *)arg, minsz))
0694 return -EFAULT;
0695
0696 if (info.argsz < minsz)
0697 return -EINVAL;
0698
0699 if (info.argsz >= capsz) {
0700 minsz = capsz;
0701 info.cap_offset = 0;
0702 }
0703
0704 info.flags = VFIO_DEVICE_FLAGS_PCI;
0705
0706 if (vdev->reset_works)
0707 info.flags |= VFIO_DEVICE_FLAGS_RESET;
0708
0709 info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
0710 info.num_irqs = VFIO_PCI_NUM_IRQS;
0711
0712 ret = vfio_pci_info_zdev_add_caps(vdev, &caps);
0713 if (ret && ret != -ENODEV) {
0714 pci_warn(vdev->pdev, "Failed to setup zPCI info capabilities\n");
0715 return ret;
0716 }
0717
0718 if (caps.size) {
0719 info.flags |= VFIO_DEVICE_FLAGS_CAPS;
0720 if (info.argsz < sizeof(info) + caps.size) {
0721 info.argsz = sizeof(info) + caps.size;
0722 } else {
0723 vfio_info_cap_shift(&caps, sizeof(info));
0724 if (copy_to_user((void __user *)arg +
0725 sizeof(info), caps.buf,
0726 caps.size)) {
0727 kfree(caps.buf);
0728 return -EFAULT;
0729 }
0730 info.cap_offset = sizeof(info);
0731 }
0732
0733 kfree(caps.buf);
0734 }
0735
0736 return copy_to_user((void __user *)arg, &info, minsz) ?
0737 -EFAULT : 0;
0738
0739 } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
0740 struct pci_dev *pdev = vdev->pdev;
0741 struct vfio_region_info info;
0742 struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
0743 int i, ret;
0744
0745 minsz = offsetofend(struct vfio_region_info, offset);
0746
0747 if (copy_from_user(&info, (void __user *)arg, minsz))
0748 return -EFAULT;
0749
0750 if (info.argsz < minsz)
0751 return -EINVAL;
0752
0753 switch (info.index) {
0754 case VFIO_PCI_CONFIG_REGION_INDEX:
0755 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
0756 info.size = pdev->cfg_size;
0757 info.flags = VFIO_REGION_INFO_FLAG_READ |
0758 VFIO_REGION_INFO_FLAG_WRITE;
0759 break;
0760 case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
0761 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
0762 info.size = pci_resource_len(pdev, info.index);
0763 if (!info.size) {
0764 info.flags = 0;
0765 break;
0766 }
0767
0768 info.flags = VFIO_REGION_INFO_FLAG_READ |
0769 VFIO_REGION_INFO_FLAG_WRITE;
0770 if (vdev->bar_mmap_supported[info.index]) {
0771 info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
0772 if (info.index == vdev->msix_bar) {
0773 ret = msix_mmappable_cap(vdev, &caps);
0774 if (ret)
0775 return ret;
0776 }
0777 }
0778
0779 break;
0780 case VFIO_PCI_ROM_REGION_INDEX:
0781 {
0782 void __iomem *io;
0783 size_t size;
0784 u16 cmd;
0785
0786 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
0787 info.flags = 0;
0788
0789
0790 info.size = pci_resource_len(pdev, info.index);
0791 if (!info.size) {
0792
0793 if (pdev->resource[PCI_ROM_RESOURCE].flags &
0794 IORESOURCE_ROM_SHADOW)
0795 info.size = 0x20000;
0796 else
0797 break;
0798 }
0799
0800
0801
0802
0803
0804 cmd = vfio_pci_memory_lock_and_enable(vdev);
0805 io = pci_map_rom(pdev, &size);
0806 if (io) {
0807 info.flags = VFIO_REGION_INFO_FLAG_READ;
0808 pci_unmap_rom(pdev, io);
0809 } else {
0810 info.size = 0;
0811 }
0812 vfio_pci_memory_unlock_and_restore(vdev, cmd);
0813
0814 break;
0815 }
0816 case VFIO_PCI_VGA_REGION_INDEX:
0817 if (!vdev->has_vga)
0818 return -EINVAL;
0819
0820 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
0821 info.size = 0xc0000;
0822 info.flags = VFIO_REGION_INFO_FLAG_READ |
0823 VFIO_REGION_INFO_FLAG_WRITE;
0824
0825 break;
0826 default:
0827 {
0828 struct vfio_region_info_cap_type cap_type = {
0829 .header.id = VFIO_REGION_INFO_CAP_TYPE,
0830 .header.version = 1 };
0831
0832 if (info.index >=
0833 VFIO_PCI_NUM_REGIONS + vdev->num_regions)
0834 return -EINVAL;
0835 info.index = array_index_nospec(info.index,
0836 VFIO_PCI_NUM_REGIONS +
0837 vdev->num_regions);
0838
0839 i = info.index - VFIO_PCI_NUM_REGIONS;
0840
0841 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
0842 info.size = vdev->region[i].size;
0843 info.flags = vdev->region[i].flags;
0844
0845 cap_type.type = vdev->region[i].type;
0846 cap_type.subtype = vdev->region[i].subtype;
0847
0848 ret = vfio_info_add_capability(&caps, &cap_type.header,
0849 sizeof(cap_type));
0850 if (ret)
0851 return ret;
0852
0853 if (vdev->region[i].ops->add_capability) {
0854 ret = vdev->region[i].ops->add_capability(vdev,
0855 &vdev->region[i], &caps);
0856 if (ret)
0857 return ret;
0858 }
0859 }
0860 }
0861
0862 if (caps.size) {
0863 info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
0864 if (info.argsz < sizeof(info) + caps.size) {
0865 info.argsz = sizeof(info) + caps.size;
0866 info.cap_offset = 0;
0867 } else {
0868 vfio_info_cap_shift(&caps, sizeof(info));
0869 if (copy_to_user((void __user *)arg +
0870 sizeof(info), caps.buf,
0871 caps.size)) {
0872 kfree(caps.buf);
0873 return -EFAULT;
0874 }
0875 info.cap_offset = sizeof(info);
0876 }
0877
0878 kfree(caps.buf);
0879 }
0880
0881 return copy_to_user((void __user *)arg, &info, minsz) ?
0882 -EFAULT : 0;
0883
0884 } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
0885 struct vfio_irq_info info;
0886
0887 minsz = offsetofend(struct vfio_irq_info, count);
0888
0889 if (copy_from_user(&info, (void __user *)arg, minsz))
0890 return -EFAULT;
0891
0892 if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
0893 return -EINVAL;
0894
0895 switch (info.index) {
0896 case VFIO_PCI_INTX_IRQ_INDEX ... VFIO_PCI_MSIX_IRQ_INDEX:
0897 case VFIO_PCI_REQ_IRQ_INDEX:
0898 break;
0899 case VFIO_PCI_ERR_IRQ_INDEX:
0900 if (pci_is_pcie(vdev->pdev))
0901 break;
0902 fallthrough;
0903 default:
0904 return -EINVAL;
0905 }
0906
0907 info.flags = VFIO_IRQ_INFO_EVENTFD;
0908
0909 info.count = vfio_pci_get_irq_count(vdev, info.index);
0910
0911 if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
0912 info.flags |= (VFIO_IRQ_INFO_MASKABLE |
0913 VFIO_IRQ_INFO_AUTOMASKED);
0914 else
0915 info.flags |= VFIO_IRQ_INFO_NORESIZE;
0916
0917 return copy_to_user((void __user *)arg, &info, minsz) ?
0918 -EFAULT : 0;
0919
0920 } else if (cmd == VFIO_DEVICE_SET_IRQS) {
0921 struct vfio_irq_set hdr;
0922 u8 *data = NULL;
0923 int max, ret = 0;
0924 size_t data_size = 0;
0925
0926 minsz = offsetofend(struct vfio_irq_set, count);
0927
0928 if (copy_from_user(&hdr, (void __user *)arg, minsz))
0929 return -EFAULT;
0930
0931 max = vfio_pci_get_irq_count(vdev, hdr.index);
0932
0933 ret = vfio_set_irqs_validate_and_prepare(&hdr, max,
0934 VFIO_PCI_NUM_IRQS, &data_size);
0935 if (ret)
0936 return ret;
0937
0938 if (data_size) {
0939 data = memdup_user((void __user *)(arg + minsz),
0940 data_size);
0941 if (IS_ERR(data))
0942 return PTR_ERR(data);
0943 }
0944
0945 mutex_lock(&vdev->igate);
0946
0947 ret = vfio_pci_set_irqs_ioctl(vdev, hdr.flags, hdr.index,
0948 hdr.start, hdr.count, data);
0949
0950 mutex_unlock(&vdev->igate);
0951 kfree(data);
0952
0953 return ret;
0954
0955 } else if (cmd == VFIO_DEVICE_RESET) {
0956 int ret;
0957
0958 if (!vdev->reset_works)
0959 return -EINVAL;
0960
0961 vfio_pci_zap_and_down_write_memory_lock(vdev);
0962
0963
0964
0965
0966
0967
0968
0969
0970
0971
0972
0973 vfio_pci_set_power_state(vdev, PCI_D0);
0974
0975 ret = pci_try_reset_function(vdev->pdev);
0976 up_write(&vdev->memory_lock);
0977
0978 return ret;
0979
0980 } else if (cmd == VFIO_DEVICE_GET_PCI_HOT_RESET_INFO) {
0981 struct vfio_pci_hot_reset_info hdr;
0982 struct vfio_pci_fill_info fill = { 0 };
0983 struct vfio_pci_dependent_device *devices = NULL;
0984 bool slot = false;
0985 int ret = 0;
0986
0987 minsz = offsetofend(struct vfio_pci_hot_reset_info, count);
0988
0989 if (copy_from_user(&hdr, (void __user *)arg, minsz))
0990 return -EFAULT;
0991
0992 if (hdr.argsz < minsz)
0993 return -EINVAL;
0994
0995 hdr.flags = 0;
0996
0997
0998 if (!pci_probe_reset_slot(vdev->pdev->slot))
0999 slot = true;
1000 else if (pci_probe_reset_bus(vdev->pdev->bus))
1001 return -ENODEV;
1002
1003
1004 ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
1005 vfio_pci_count_devs,
1006 &fill.max, slot);
1007 if (ret)
1008 return ret;
1009
1010 WARN_ON(!fill.max);
1011
1012
1013
1014
1015
1016 if (hdr.argsz < sizeof(hdr) + (fill.max * sizeof(*devices))) {
1017 ret = -ENOSPC;
1018 hdr.count = fill.max;
1019 goto reset_info_exit;
1020 }
1021
1022 devices = kcalloc(fill.max, sizeof(*devices), GFP_KERNEL);
1023 if (!devices)
1024 return -ENOMEM;
1025
1026 fill.devices = devices;
1027
1028 ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
1029 vfio_pci_fill_devs,
1030 &fill, slot);
1031
1032
1033
1034
1035
1036
1037 if (!ret)
1038 hdr.count = fill.cur;
1039
1040 reset_info_exit:
1041 if (copy_to_user((void __user *)arg, &hdr, minsz))
1042 ret = -EFAULT;
1043
1044 if (!ret) {
1045 if (copy_to_user((void __user *)(arg + minsz), devices,
1046 hdr.count * sizeof(*devices)))
1047 ret = -EFAULT;
1048 }
1049
1050 kfree(devices);
1051 return ret;
1052
1053 } else if (cmd == VFIO_DEVICE_PCI_HOT_RESET) {
1054 struct vfio_pci_hot_reset hdr;
1055 int32_t *group_fds;
1056 struct file **files;
1057 struct vfio_pci_group_info info;
1058 bool slot = false;
1059 int file_idx, count = 0, ret = 0;
1060
1061 minsz = offsetofend(struct vfio_pci_hot_reset, count);
1062
1063 if (copy_from_user(&hdr, (void __user *)arg, minsz))
1064 return -EFAULT;
1065
1066 if (hdr.argsz < minsz || hdr.flags)
1067 return -EINVAL;
1068
1069
1070 if (!pci_probe_reset_slot(vdev->pdev->slot))
1071 slot = true;
1072 else if (pci_probe_reset_bus(vdev->pdev->bus))
1073 return -ENODEV;
1074
1075
1076
1077
1078
1079
1080
1081 ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
1082 vfio_pci_count_devs,
1083 &count, slot);
1084 if (ret)
1085 return ret;
1086
1087
1088 if (!hdr.count || hdr.count > count)
1089 return -EINVAL;
1090
1091 group_fds = kcalloc(hdr.count, sizeof(*group_fds), GFP_KERNEL);
1092 files = kcalloc(hdr.count, sizeof(*files), GFP_KERNEL);
1093 if (!group_fds || !files) {
1094 kfree(group_fds);
1095 kfree(files);
1096 return -ENOMEM;
1097 }
1098
1099 if (copy_from_user(group_fds, (void __user *)(arg + minsz),
1100 hdr.count * sizeof(*group_fds))) {
1101 kfree(group_fds);
1102 kfree(files);
1103 return -EFAULT;
1104 }
1105
1106
1107
1108
1109
1110
1111 for (file_idx = 0; file_idx < hdr.count; file_idx++) {
1112 struct file *file = fget(group_fds[file_idx]);
1113
1114 if (!file) {
1115 ret = -EBADF;
1116 break;
1117 }
1118
1119
1120 if (!vfio_file_iommu_group(file)) {
1121 fput(file);
1122 ret = -EINVAL;
1123 break;
1124 }
1125
1126 files[file_idx] = file;
1127 }
1128
1129 kfree(group_fds);
1130
1131
1132 if (ret)
1133 goto hot_reset_release;
1134
1135 info.count = hdr.count;
1136 info.files = files;
1137
1138 ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info);
1139
1140 hot_reset_release:
1141 for (file_idx--; file_idx >= 0; file_idx--)
1142 fput(files[file_idx]);
1143
1144 kfree(files);
1145 return ret;
1146 } else if (cmd == VFIO_DEVICE_IOEVENTFD) {
1147 struct vfio_device_ioeventfd ioeventfd;
1148 int count;
1149
1150 minsz = offsetofend(struct vfio_device_ioeventfd, fd);
1151
1152 if (copy_from_user(&ioeventfd, (void __user *)arg, minsz))
1153 return -EFAULT;
1154
1155 if (ioeventfd.argsz < minsz)
1156 return -EINVAL;
1157
1158 if (ioeventfd.flags & ~VFIO_DEVICE_IOEVENTFD_SIZE_MASK)
1159 return -EINVAL;
1160
1161 count = ioeventfd.flags & VFIO_DEVICE_IOEVENTFD_SIZE_MASK;
1162
1163 if (hweight8(count) != 1 || ioeventfd.fd < -1)
1164 return -EINVAL;
1165
1166 return vfio_pci_ioeventfd(vdev, ioeventfd.offset,
1167 ioeventfd.data, count, ioeventfd.fd);
1168 }
1169 return -ENOTTY;
1170 }
1171 EXPORT_SYMBOL_GPL(vfio_pci_core_ioctl);
1172
1173 static int vfio_pci_core_feature_token(struct vfio_device *device, u32 flags,
1174 void __user *arg, size_t argsz)
1175 {
1176 struct vfio_pci_core_device *vdev =
1177 container_of(device, struct vfio_pci_core_device, vdev);
1178 uuid_t uuid;
1179 int ret;
1180
1181 if (!vdev->vf_token)
1182 return -ENOTTY;
1183
1184
1185
1186
1187 ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_SET,
1188 sizeof(uuid));
1189 if (ret != 1)
1190 return ret;
1191
1192 if (copy_from_user(&uuid, arg, sizeof(uuid)))
1193 return -EFAULT;
1194
1195 mutex_lock(&vdev->vf_token->lock);
1196 uuid_copy(&vdev->vf_token->uuid, &uuid);
1197 mutex_unlock(&vdev->vf_token->lock);
1198 return 0;
1199 }
1200
1201 int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags,
1202 void __user *arg, size_t argsz)
1203 {
1204 switch (flags & VFIO_DEVICE_FEATURE_MASK) {
1205 case VFIO_DEVICE_FEATURE_PCI_VF_TOKEN:
1206 return vfio_pci_core_feature_token(device, flags, arg, argsz);
1207 default:
1208 return -ENOTTY;
1209 }
1210 }
1211 EXPORT_SYMBOL_GPL(vfio_pci_core_ioctl_feature);
1212
1213 static ssize_t vfio_pci_rw(struct vfio_pci_core_device *vdev, char __user *buf,
1214 size_t count, loff_t *ppos, bool iswrite)
1215 {
1216 unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
1217
1218 if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
1219 return -EINVAL;
1220
1221 switch (index) {
1222 case VFIO_PCI_CONFIG_REGION_INDEX:
1223 return vfio_pci_config_rw(vdev, buf, count, ppos, iswrite);
1224
1225 case VFIO_PCI_ROM_REGION_INDEX:
1226 if (iswrite)
1227 return -EINVAL;
1228 return vfio_pci_bar_rw(vdev, buf, count, ppos, false);
1229
1230 case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
1231 return vfio_pci_bar_rw(vdev, buf, count, ppos, iswrite);
1232
1233 case VFIO_PCI_VGA_REGION_INDEX:
1234 return vfio_pci_vga_rw(vdev, buf, count, ppos, iswrite);
1235 default:
1236 index -= VFIO_PCI_NUM_REGIONS;
1237 return vdev->region[index].ops->rw(vdev, buf,
1238 count, ppos, iswrite);
1239 }
1240
1241 return -EINVAL;
1242 }
1243
1244 ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *buf,
1245 size_t count, loff_t *ppos)
1246 {
1247 struct vfio_pci_core_device *vdev =
1248 container_of(core_vdev, struct vfio_pci_core_device, vdev);
1249
1250 if (!count)
1251 return 0;
1252
1253 return vfio_pci_rw(vdev, buf, count, ppos, false);
1254 }
1255 EXPORT_SYMBOL_GPL(vfio_pci_core_read);
1256
1257 ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *buf,
1258 size_t count, loff_t *ppos)
1259 {
1260 struct vfio_pci_core_device *vdev =
1261 container_of(core_vdev, struct vfio_pci_core_device, vdev);
1262
1263 if (!count)
1264 return 0;
1265
1266 return vfio_pci_rw(vdev, (char __user *)buf, count, ppos, true);
1267 }
1268 EXPORT_SYMBOL_GPL(vfio_pci_core_write);
1269
1270
1271 static int vfio_pci_zap_and_vma_lock(struct vfio_pci_core_device *vdev, bool try)
1272 {
1273 struct vfio_pci_mmap_vma *mmap_vma, *tmp;
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298 while (1) {
1299 struct mm_struct *mm = NULL;
1300
1301 if (try) {
1302 if (!mutex_trylock(&vdev->vma_lock))
1303 return 0;
1304 } else {
1305 mutex_lock(&vdev->vma_lock);
1306 }
1307 while (!list_empty(&vdev->vma_list)) {
1308 mmap_vma = list_first_entry(&vdev->vma_list,
1309 struct vfio_pci_mmap_vma,
1310 vma_next);
1311 mm = mmap_vma->vma->vm_mm;
1312 if (mmget_not_zero(mm))
1313 break;
1314
1315 list_del(&mmap_vma->vma_next);
1316 kfree(mmap_vma);
1317 mm = NULL;
1318 }
1319 if (!mm)
1320 return 1;
1321 mutex_unlock(&vdev->vma_lock);
1322
1323 if (try) {
1324 if (!mmap_read_trylock(mm)) {
1325 mmput(mm);
1326 return 0;
1327 }
1328 } else {
1329 mmap_read_lock(mm);
1330 }
1331 if (try) {
1332 if (!mutex_trylock(&vdev->vma_lock)) {
1333 mmap_read_unlock(mm);
1334 mmput(mm);
1335 return 0;
1336 }
1337 } else {
1338 mutex_lock(&vdev->vma_lock);
1339 }
1340 list_for_each_entry_safe(mmap_vma, tmp,
1341 &vdev->vma_list, vma_next) {
1342 struct vm_area_struct *vma = mmap_vma->vma;
1343
1344 if (vma->vm_mm != mm)
1345 continue;
1346
1347 list_del(&mmap_vma->vma_next);
1348 kfree(mmap_vma);
1349
1350 zap_vma_ptes(vma, vma->vm_start,
1351 vma->vm_end - vma->vm_start);
1352 }
1353 mutex_unlock(&vdev->vma_lock);
1354 mmap_read_unlock(mm);
1355 mmput(mm);
1356 }
1357 }
1358
1359 void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device *vdev)
1360 {
1361 vfio_pci_zap_and_vma_lock(vdev, false);
1362 down_write(&vdev->memory_lock);
1363 mutex_unlock(&vdev->vma_lock);
1364 }
1365
1366 u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev)
1367 {
1368 u16 cmd;
1369
1370 down_write(&vdev->memory_lock);
1371 pci_read_config_word(vdev->pdev, PCI_COMMAND, &cmd);
1372 if (!(cmd & PCI_COMMAND_MEMORY))
1373 pci_write_config_word(vdev->pdev, PCI_COMMAND,
1374 cmd | PCI_COMMAND_MEMORY);
1375
1376 return cmd;
1377 }
1378
1379 void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev, u16 cmd)
1380 {
1381 pci_write_config_word(vdev->pdev, PCI_COMMAND, cmd);
1382 up_write(&vdev->memory_lock);
1383 }
1384
1385
1386 static int __vfio_pci_add_vma(struct vfio_pci_core_device *vdev,
1387 struct vm_area_struct *vma)
1388 {
1389 struct vfio_pci_mmap_vma *mmap_vma;
1390
1391 mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL);
1392 if (!mmap_vma)
1393 return -ENOMEM;
1394
1395 mmap_vma->vma = vma;
1396 list_add(&mmap_vma->vma_next, &vdev->vma_list);
1397
1398 return 0;
1399 }
1400
1401
1402
1403
1404
1405 static void vfio_pci_mmap_open(struct vm_area_struct *vma)
1406 {
1407 zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
1408 }
1409
1410 static void vfio_pci_mmap_close(struct vm_area_struct *vma)
1411 {
1412 struct vfio_pci_core_device *vdev = vma->vm_private_data;
1413 struct vfio_pci_mmap_vma *mmap_vma;
1414
1415 mutex_lock(&vdev->vma_lock);
1416 list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) {
1417 if (mmap_vma->vma == vma) {
1418 list_del(&mmap_vma->vma_next);
1419 kfree(mmap_vma);
1420 break;
1421 }
1422 }
1423 mutex_unlock(&vdev->vma_lock);
1424 }
1425
1426 static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf)
1427 {
1428 struct vm_area_struct *vma = vmf->vma;
1429 struct vfio_pci_core_device *vdev = vma->vm_private_data;
1430 struct vfio_pci_mmap_vma *mmap_vma;
1431 vm_fault_t ret = VM_FAULT_NOPAGE;
1432
1433 mutex_lock(&vdev->vma_lock);
1434 down_read(&vdev->memory_lock);
1435
1436 if (!__vfio_pci_memory_enabled(vdev)) {
1437 ret = VM_FAULT_SIGBUS;
1438 goto up_out;
1439 }
1440
1441
1442
1443
1444
1445
1446
1447 list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) {
1448 if (mmap_vma->vma == vma)
1449 goto up_out;
1450 }
1451
1452 if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
1453 vma->vm_end - vma->vm_start,
1454 vma->vm_page_prot)) {
1455 ret = VM_FAULT_SIGBUS;
1456 zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
1457 goto up_out;
1458 }
1459
1460 if (__vfio_pci_add_vma(vdev, vma)) {
1461 ret = VM_FAULT_OOM;
1462 zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
1463 }
1464
1465 up_out:
1466 up_read(&vdev->memory_lock);
1467 mutex_unlock(&vdev->vma_lock);
1468 return ret;
1469 }
1470
1471 static const struct vm_operations_struct vfio_pci_mmap_ops = {
1472 .open = vfio_pci_mmap_open,
1473 .close = vfio_pci_mmap_close,
1474 .fault = vfio_pci_mmap_fault,
1475 };
1476
1477 int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma)
1478 {
1479 struct vfio_pci_core_device *vdev =
1480 container_of(core_vdev, struct vfio_pci_core_device, vdev);
1481 struct pci_dev *pdev = vdev->pdev;
1482 unsigned int index;
1483 u64 phys_len, req_len, pgoff, req_start;
1484 int ret;
1485
1486 index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
1487
1488 if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
1489 return -EINVAL;
1490 if (vma->vm_end < vma->vm_start)
1491 return -EINVAL;
1492 if ((vma->vm_flags & VM_SHARED) == 0)
1493 return -EINVAL;
1494 if (index >= VFIO_PCI_NUM_REGIONS) {
1495 int regnum = index - VFIO_PCI_NUM_REGIONS;
1496 struct vfio_pci_region *region = vdev->region + regnum;
1497
1498 if (region->ops && region->ops->mmap &&
1499 (region->flags & VFIO_REGION_INFO_FLAG_MMAP))
1500 return region->ops->mmap(vdev, region, vma);
1501 return -EINVAL;
1502 }
1503 if (index >= VFIO_PCI_ROM_REGION_INDEX)
1504 return -EINVAL;
1505 if (!vdev->bar_mmap_supported[index])
1506 return -EINVAL;
1507
1508 phys_len = PAGE_ALIGN(pci_resource_len(pdev, index));
1509 req_len = vma->vm_end - vma->vm_start;
1510 pgoff = vma->vm_pgoff &
1511 ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
1512 req_start = pgoff << PAGE_SHIFT;
1513
1514 if (req_start + req_len > phys_len)
1515 return -EINVAL;
1516
1517
1518
1519
1520
1521 if (!vdev->barmap[index]) {
1522 ret = pci_request_selected_regions(pdev,
1523 1 << index, "vfio-pci");
1524 if (ret)
1525 return ret;
1526
1527 vdev->barmap[index] = pci_iomap(pdev, index, 0);
1528 if (!vdev->barmap[index]) {
1529 pci_release_selected_regions(pdev, 1 << index);
1530 return -ENOMEM;
1531 }
1532 }
1533
1534 vma->vm_private_data = vdev;
1535 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1536 vma->vm_pgoff = (pci_resource_start(pdev, index) >> PAGE_SHIFT) + pgoff;
1537
1538
1539
1540
1541
1542 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
1543 vma->vm_ops = &vfio_pci_mmap_ops;
1544
1545 return 0;
1546 }
1547 EXPORT_SYMBOL_GPL(vfio_pci_core_mmap);
1548
1549 void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count)
1550 {
1551 struct vfio_pci_core_device *vdev =
1552 container_of(core_vdev, struct vfio_pci_core_device, vdev);
1553 struct pci_dev *pdev = vdev->pdev;
1554
1555 mutex_lock(&vdev->igate);
1556
1557 if (vdev->req_trigger) {
1558 if (!(count % 10))
1559 pci_notice_ratelimited(pdev,
1560 "Relaying device request to user (#%u)\n",
1561 count);
1562 eventfd_signal(vdev->req_trigger, 1);
1563 } else if (count == 0) {
1564 pci_warn(pdev,
1565 "No device request channel registered, blocked until released by user\n");
1566 }
1567
1568 mutex_unlock(&vdev->igate);
1569 }
1570 EXPORT_SYMBOL_GPL(vfio_pci_core_request);
1571
1572 static int vfio_pci_validate_vf_token(struct vfio_pci_core_device *vdev,
1573 bool vf_token, uuid_t *uuid)
1574 {
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600 if (vdev->pdev->is_virtfn) {
1601 struct vfio_pci_core_device *pf_vdev = vdev->sriov_pf_core_dev;
1602 bool match;
1603
1604 if (!pf_vdev) {
1605 if (!vf_token)
1606 return 0;
1607
1608 pci_info_ratelimited(vdev->pdev,
1609 "VF token incorrectly provided, PF not bound to vfio-pci\n");
1610 return -EINVAL;
1611 }
1612
1613 if (!vf_token) {
1614 pci_info_ratelimited(vdev->pdev,
1615 "VF token required to access device\n");
1616 return -EACCES;
1617 }
1618
1619 mutex_lock(&pf_vdev->vf_token->lock);
1620 match = uuid_equal(uuid, &pf_vdev->vf_token->uuid);
1621 mutex_unlock(&pf_vdev->vf_token->lock);
1622
1623 if (!match) {
1624 pci_info_ratelimited(vdev->pdev,
1625 "Incorrect VF token provided for device\n");
1626 return -EACCES;
1627 }
1628 } else if (vdev->vf_token) {
1629 mutex_lock(&vdev->vf_token->lock);
1630 if (vdev->vf_token->users) {
1631 if (!vf_token) {
1632 mutex_unlock(&vdev->vf_token->lock);
1633 pci_info_ratelimited(vdev->pdev,
1634 "VF token required to access device\n");
1635 return -EACCES;
1636 }
1637
1638 if (!uuid_equal(uuid, &vdev->vf_token->uuid)) {
1639 mutex_unlock(&vdev->vf_token->lock);
1640 pci_info_ratelimited(vdev->pdev,
1641 "Incorrect VF token provided for device\n");
1642 return -EACCES;
1643 }
1644 } else if (vf_token) {
1645 uuid_copy(&vdev->vf_token->uuid, uuid);
1646 }
1647
1648 mutex_unlock(&vdev->vf_token->lock);
1649 } else if (vf_token) {
1650 pci_info_ratelimited(vdev->pdev,
1651 "VF token incorrectly provided, not a PF or VF\n");
1652 return -EINVAL;
1653 }
1654
1655 return 0;
1656 }
1657
1658 #define VF_TOKEN_ARG "vf_token="
1659
1660 int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf)
1661 {
1662 struct vfio_pci_core_device *vdev =
1663 container_of(core_vdev, struct vfio_pci_core_device, vdev);
1664 bool vf_token = false;
1665 uuid_t uuid;
1666 int ret;
1667
1668 if (strncmp(pci_name(vdev->pdev), buf, strlen(pci_name(vdev->pdev))))
1669 return 0;
1670
1671 if (strlen(buf) > strlen(pci_name(vdev->pdev))) {
1672 buf += strlen(pci_name(vdev->pdev));
1673
1674 if (*buf != ' ')
1675 return 0;
1676
1677 while (*buf) {
1678 if (*buf == ' ') {
1679 buf++;
1680 continue;
1681 }
1682
1683 if (!vf_token && !strncmp(buf, VF_TOKEN_ARG,
1684 strlen(VF_TOKEN_ARG))) {
1685 buf += strlen(VF_TOKEN_ARG);
1686
1687 if (strlen(buf) < UUID_STRING_LEN)
1688 return -EINVAL;
1689
1690 ret = uuid_parse(buf, &uuid);
1691 if (ret)
1692 return ret;
1693
1694 vf_token = true;
1695 buf += UUID_STRING_LEN;
1696 } else {
1697
1698 return -EINVAL;
1699 }
1700 }
1701 }
1702
1703 ret = vfio_pci_validate_vf_token(vdev, vf_token, &uuid);
1704 if (ret)
1705 return ret;
1706
1707 return 1;
1708 }
1709 EXPORT_SYMBOL_GPL(vfio_pci_core_match);
1710
1711 static int vfio_pci_bus_notifier(struct notifier_block *nb,
1712 unsigned long action, void *data)
1713 {
1714 struct vfio_pci_core_device *vdev = container_of(nb,
1715 struct vfio_pci_core_device, nb);
1716 struct device *dev = data;
1717 struct pci_dev *pdev = to_pci_dev(dev);
1718 struct pci_dev *physfn = pci_physfn(pdev);
1719
1720 if (action == BUS_NOTIFY_ADD_DEVICE &&
1721 pdev->is_virtfn && physfn == vdev->pdev) {
1722 pci_info(vdev->pdev, "Captured SR-IOV VF %s driver_override\n",
1723 pci_name(pdev));
1724 pdev->driver_override = kasprintf(GFP_KERNEL, "%s",
1725 vdev->vdev.ops->name);
1726 } else if (action == BUS_NOTIFY_BOUND_DRIVER &&
1727 pdev->is_virtfn && physfn == vdev->pdev) {
1728 struct pci_driver *drv = pci_dev_driver(pdev);
1729
1730 if (drv && drv != pci_dev_driver(vdev->pdev))
1731 pci_warn(vdev->pdev,
1732 "VF %s bound to driver %s while PF bound to driver %s\n",
1733 pci_name(pdev), drv->name,
1734 pci_dev_driver(vdev->pdev)->name);
1735 }
1736
1737 return 0;
1738 }
1739
1740 static int vfio_pci_vf_init(struct vfio_pci_core_device *vdev)
1741 {
1742 struct pci_dev *pdev = vdev->pdev;
1743 struct vfio_pci_core_device *cur;
1744 struct pci_dev *physfn;
1745 int ret;
1746
1747 if (pdev->is_virtfn) {
1748
1749
1750
1751
1752
1753
1754 physfn = pci_physfn(vdev->pdev);
1755 mutex_lock(&vfio_pci_sriov_pfs_mutex);
1756 list_for_each_entry(cur, &vfio_pci_sriov_pfs, sriov_pfs_item) {
1757 if (cur->pdev == physfn) {
1758 vdev->sriov_pf_core_dev = cur;
1759 break;
1760 }
1761 }
1762 mutex_unlock(&vfio_pci_sriov_pfs_mutex);
1763 return 0;
1764 }
1765
1766
1767 if (!pdev->is_physfn)
1768 return 0;
1769
1770 vdev->vf_token = kzalloc(sizeof(*vdev->vf_token), GFP_KERNEL);
1771 if (!vdev->vf_token)
1772 return -ENOMEM;
1773
1774 mutex_init(&vdev->vf_token->lock);
1775 uuid_gen(&vdev->vf_token->uuid);
1776
1777 vdev->nb.notifier_call = vfio_pci_bus_notifier;
1778 ret = bus_register_notifier(&pci_bus_type, &vdev->nb);
1779 if (ret) {
1780 kfree(vdev->vf_token);
1781 return ret;
1782 }
1783 return 0;
1784 }
1785
1786 static void vfio_pci_vf_uninit(struct vfio_pci_core_device *vdev)
1787 {
1788 if (!vdev->vf_token)
1789 return;
1790
1791 bus_unregister_notifier(&pci_bus_type, &vdev->nb);
1792 WARN_ON(vdev->vf_token->users);
1793 mutex_destroy(&vdev->vf_token->lock);
1794 kfree(vdev->vf_token);
1795 }
1796
1797 static int vfio_pci_vga_init(struct vfio_pci_core_device *vdev)
1798 {
1799 struct pci_dev *pdev = vdev->pdev;
1800 int ret;
1801
1802 if (!vfio_pci_is_vga(pdev))
1803 return 0;
1804
1805 ret = aperture_remove_conflicting_pci_devices(pdev, vdev->vdev.ops->name);
1806 if (ret)
1807 return ret;
1808
1809 ret = vga_client_register(pdev, vfio_pci_set_decode);
1810 if (ret)
1811 return ret;
1812 vga_set_legacy_decoding(pdev, vfio_pci_set_decode(pdev, false));
1813 return 0;
1814 }
1815
1816 static void vfio_pci_vga_uninit(struct vfio_pci_core_device *vdev)
1817 {
1818 struct pci_dev *pdev = vdev->pdev;
1819
1820 if (!vfio_pci_is_vga(pdev))
1821 return;
1822 vga_client_unregister(pdev);
1823 vga_set_legacy_decoding(pdev, VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM |
1824 VGA_RSRC_LEGACY_IO |
1825 VGA_RSRC_LEGACY_MEM);
1826 }
1827
1828 void vfio_pci_core_init_device(struct vfio_pci_core_device *vdev,
1829 struct pci_dev *pdev,
1830 const struct vfio_device_ops *vfio_pci_ops)
1831 {
1832 vfio_init_group_dev(&vdev->vdev, &pdev->dev, vfio_pci_ops);
1833 vdev->pdev = pdev;
1834 vdev->irq_type = VFIO_PCI_NUM_IRQS;
1835 mutex_init(&vdev->igate);
1836 spin_lock_init(&vdev->irqlock);
1837 mutex_init(&vdev->ioeventfds_lock);
1838 INIT_LIST_HEAD(&vdev->dummy_resources_list);
1839 INIT_LIST_HEAD(&vdev->ioeventfds_list);
1840 mutex_init(&vdev->vma_lock);
1841 INIT_LIST_HEAD(&vdev->vma_list);
1842 INIT_LIST_HEAD(&vdev->sriov_pfs_item);
1843 init_rwsem(&vdev->memory_lock);
1844 }
1845 EXPORT_SYMBOL_GPL(vfio_pci_core_init_device);
1846
1847 void vfio_pci_core_uninit_device(struct vfio_pci_core_device *vdev)
1848 {
1849 mutex_destroy(&vdev->igate);
1850 mutex_destroy(&vdev->ioeventfds_lock);
1851 mutex_destroy(&vdev->vma_lock);
1852 vfio_uninit_group_dev(&vdev->vdev);
1853 kfree(vdev->region);
1854 kfree(vdev->pm_save);
1855 }
1856 EXPORT_SYMBOL_GPL(vfio_pci_core_uninit_device);
1857
1858 int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev)
1859 {
1860 struct pci_dev *pdev = vdev->pdev;
1861 struct device *dev = &pdev->dev;
1862 int ret;
1863
1864
1865 if (WARN_ON(vdev != dev_get_drvdata(dev)))
1866 return -EINVAL;
1867
1868 if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
1869 return -EINVAL;
1870
1871 if (vdev->vdev.mig_ops) {
1872 if (!(vdev->vdev.mig_ops->migration_get_state &&
1873 vdev->vdev.mig_ops->migration_set_state) ||
1874 !(vdev->vdev.migration_flags & VFIO_MIGRATION_STOP_COPY))
1875 return -EINVAL;
1876 }
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886 if (pci_num_vf(pdev)) {
1887 pci_warn(pdev, "Cannot bind to PF with SR-IOV enabled\n");
1888 return -EBUSY;
1889 }
1890
1891 if (pci_is_root_bus(pdev->bus)) {
1892 ret = vfio_assign_device_set(&vdev->vdev, vdev);
1893 } else if (!pci_probe_reset_slot(pdev->slot)) {
1894 ret = vfio_assign_device_set(&vdev->vdev, pdev->slot);
1895 } else {
1896
1897
1898
1899
1900 ret = vfio_assign_device_set(&vdev->vdev, pdev->bus);
1901 }
1902
1903 if (ret)
1904 return ret;
1905 ret = vfio_pci_vf_init(vdev);
1906 if (ret)
1907 return ret;
1908 ret = vfio_pci_vga_init(vdev);
1909 if (ret)
1910 goto out_vf;
1911
1912 vfio_pci_probe_power_state(vdev);
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923 vfio_pci_set_power_state(vdev, PCI_D0);
1924
1925 dev->driver->pm = &vfio_pci_core_pm_ops;
1926 pm_runtime_allow(dev);
1927 if (!disable_idle_d3)
1928 pm_runtime_put(dev);
1929
1930 ret = vfio_register_group_dev(&vdev->vdev);
1931 if (ret)
1932 goto out_power;
1933 return 0;
1934
1935 out_power:
1936 if (!disable_idle_d3)
1937 pm_runtime_get_noresume(dev);
1938
1939 pm_runtime_forbid(dev);
1940 out_vf:
1941 vfio_pci_vf_uninit(vdev);
1942 return ret;
1943 }
1944 EXPORT_SYMBOL_GPL(vfio_pci_core_register_device);
1945
1946 void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev)
1947 {
1948 vfio_pci_core_sriov_configure(vdev, 0);
1949
1950 vfio_unregister_group_dev(&vdev->vdev);
1951
1952 vfio_pci_vf_uninit(vdev);
1953 vfio_pci_vga_uninit(vdev);
1954
1955 if (!disable_idle_d3)
1956 pm_runtime_get_noresume(&vdev->pdev->dev);
1957
1958 pm_runtime_forbid(&vdev->pdev->dev);
1959 }
1960 EXPORT_SYMBOL_GPL(vfio_pci_core_unregister_device);
1961
1962 pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev,
1963 pci_channel_state_t state)
1964 {
1965 struct vfio_pci_core_device *vdev = dev_get_drvdata(&pdev->dev);
1966
1967 mutex_lock(&vdev->igate);
1968
1969 if (vdev->err_trigger)
1970 eventfd_signal(vdev->err_trigger, 1);
1971
1972 mutex_unlock(&vdev->igate);
1973
1974 return PCI_ERS_RESULT_CAN_RECOVER;
1975 }
1976 EXPORT_SYMBOL_GPL(vfio_pci_core_aer_err_detected);
1977
1978 int vfio_pci_core_sriov_configure(struct vfio_pci_core_device *vdev,
1979 int nr_virtfn)
1980 {
1981 struct pci_dev *pdev = vdev->pdev;
1982 int ret = 0;
1983
1984 device_lock_assert(&pdev->dev);
1985
1986 if (nr_virtfn) {
1987 mutex_lock(&vfio_pci_sriov_pfs_mutex);
1988
1989
1990
1991
1992
1993
1994 if (!list_empty(&vdev->sriov_pfs_item)) {
1995 ret = -EINVAL;
1996 goto out_unlock;
1997 }
1998 list_add_tail(&vdev->sriov_pfs_item, &vfio_pci_sriov_pfs);
1999 mutex_unlock(&vfio_pci_sriov_pfs_mutex);
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011 ret = pm_runtime_resume_and_get(&pdev->dev);
2012 if (ret)
2013 goto out_del;
2014
2015 down_write(&vdev->memory_lock);
2016 vfio_pci_set_power_state(vdev, PCI_D0);
2017 ret = pci_enable_sriov(pdev, nr_virtfn);
2018 up_write(&vdev->memory_lock);
2019 if (ret) {
2020 pm_runtime_put(&pdev->dev);
2021 goto out_del;
2022 }
2023 return nr_virtfn;
2024 }
2025
2026 if (pci_num_vf(pdev)) {
2027 pci_disable_sriov(pdev);
2028 pm_runtime_put(&pdev->dev);
2029 }
2030
2031 out_del:
2032 mutex_lock(&vfio_pci_sriov_pfs_mutex);
2033 list_del_init(&vdev->sriov_pfs_item);
2034 out_unlock:
2035 mutex_unlock(&vfio_pci_sriov_pfs_mutex);
2036 return ret;
2037 }
2038 EXPORT_SYMBOL_GPL(vfio_pci_core_sriov_configure);
2039
2040 const struct pci_error_handlers vfio_pci_core_err_handlers = {
2041 .error_detected = vfio_pci_core_aer_err_detected,
2042 };
2043 EXPORT_SYMBOL_GPL(vfio_pci_core_err_handlers);
2044
2045 static bool vfio_dev_in_groups(struct vfio_pci_core_device *vdev,
2046 struct vfio_pci_group_info *groups)
2047 {
2048 unsigned int i;
2049
2050 for (i = 0; i < groups->count; i++)
2051 if (vfio_file_has_dev(groups->files[i], &vdev->vdev))
2052 return true;
2053 return false;
2054 }
2055
2056 static int vfio_pci_is_device_in_set(struct pci_dev *pdev, void *data)
2057 {
2058 struct vfio_device_set *dev_set = data;
2059 struct vfio_device *cur;
2060
2061 list_for_each_entry(cur, &dev_set->device_list, dev_set_list)
2062 if (cur->dev == &pdev->dev)
2063 return 0;
2064 return -EBUSY;
2065 }
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077 static struct pci_dev *
2078 vfio_pci_dev_set_resettable(struct vfio_device_set *dev_set)
2079 {
2080 struct pci_dev *pdev;
2081
2082 lockdep_assert_held(&dev_set->lock);
2083
2084
2085
2086
2087
2088
2089 pdev = list_first_entry(&dev_set->device_list,
2090 struct vfio_pci_core_device,
2091 vdev.dev_set_list)->pdev;
2092
2093
2094 if (pci_probe_reset_slot(pdev->slot) && pci_probe_reset_bus(pdev->bus))
2095 return NULL;
2096
2097 if (vfio_pci_for_each_slot_or_bus(pdev, vfio_pci_is_device_in_set,
2098 dev_set,
2099 !pci_probe_reset_slot(pdev->slot)))
2100 return NULL;
2101 return pdev;
2102 }
2103
2104 static int vfio_pci_dev_set_pm_runtime_get(struct vfio_device_set *dev_set)
2105 {
2106 struct vfio_pci_core_device *cur;
2107 int ret;
2108
2109 list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {
2110 ret = pm_runtime_resume_and_get(&cur->pdev->dev);
2111 if (ret)
2112 goto unwind;
2113 }
2114
2115 return 0;
2116
2117 unwind:
2118 list_for_each_entry_continue_reverse(cur, &dev_set->device_list,
2119 vdev.dev_set_list)
2120 pm_runtime_put(&cur->pdev->dev);
2121
2122 return ret;
2123 }
2124
2125
2126
2127
2128
2129
2130 static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
2131 struct vfio_pci_group_info *groups)
2132 {
2133 struct vfio_pci_core_device *cur_mem;
2134 struct vfio_pci_core_device *cur_vma;
2135 struct vfio_pci_core_device *cur;
2136 struct pci_dev *pdev;
2137 bool is_mem = true;
2138 int ret;
2139
2140 mutex_lock(&dev_set->lock);
2141 cur_mem = list_first_entry(&dev_set->device_list,
2142 struct vfio_pci_core_device,
2143 vdev.dev_set_list);
2144
2145 pdev = vfio_pci_dev_set_resettable(dev_set);
2146 if (!pdev) {
2147 ret = -EINVAL;
2148 goto err_unlock;
2149 }
2150
2151 list_for_each_entry(cur_vma, &dev_set->device_list, vdev.dev_set_list) {
2152
2153
2154
2155
2156 if (!vfio_dev_in_groups(cur_vma, groups)) {
2157 ret = -EINVAL;
2158 goto err_undo;
2159 }
2160
2161
2162
2163
2164
2165 if (!vfio_pci_zap_and_vma_lock(cur_vma, true)) {
2166 ret = -EBUSY;
2167 goto err_undo;
2168 }
2169 }
2170 cur_vma = NULL;
2171
2172 list_for_each_entry(cur_mem, &dev_set->device_list, vdev.dev_set_list) {
2173 if (!down_write_trylock(&cur_mem->memory_lock)) {
2174 ret = -EBUSY;
2175 goto err_undo;
2176 }
2177 mutex_unlock(&cur_mem->vma_lock);
2178 }
2179 cur_mem = NULL;
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190 list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list)
2191 vfio_pci_set_power_state(cur, PCI_D0);
2192
2193 ret = pci_reset_bus(pdev);
2194
2195 err_undo:
2196 list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {
2197 if (cur == cur_mem)
2198 is_mem = false;
2199 if (cur == cur_vma)
2200 break;
2201 if (is_mem)
2202 up_write(&cur->memory_lock);
2203 else
2204 mutex_unlock(&cur->vma_lock);
2205 }
2206 err_unlock:
2207 mutex_unlock(&dev_set->lock);
2208 return ret;
2209 }
2210
2211 static bool vfio_pci_dev_set_needs_reset(struct vfio_device_set *dev_set)
2212 {
2213 struct vfio_pci_core_device *cur;
2214 bool needs_reset = false;
2215
2216 list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {
2217
2218 if (cur->vdev.open_count)
2219 return false;
2220 needs_reset |= cur->needs_reset;
2221 }
2222 return needs_reset;
2223 }
2224
2225
2226
2227
2228
2229
2230
2231
2232 static void vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set)
2233 {
2234 struct vfio_pci_core_device *cur;
2235 struct pci_dev *pdev;
2236 bool reset_done = false;
2237
2238 if (!vfio_pci_dev_set_needs_reset(dev_set))
2239 return;
2240
2241 pdev = vfio_pci_dev_set_resettable(dev_set);
2242 if (!pdev)
2243 return;
2244
2245
2246
2247
2248
2249
2250 if (!disable_idle_d3 && vfio_pci_dev_set_pm_runtime_get(dev_set))
2251 return;
2252
2253 if (!pci_reset_bus(pdev))
2254 reset_done = true;
2255
2256 list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {
2257 if (reset_done)
2258 cur->needs_reset = false;
2259
2260 if (!disable_idle_d3)
2261 pm_runtime_put(&cur->pdev->dev);
2262 }
2263 }
2264
2265 void vfio_pci_core_set_params(bool is_nointxmask, bool is_disable_vga,
2266 bool is_disable_idle_d3)
2267 {
2268 nointxmask = is_nointxmask;
2269 disable_vga = is_disable_vga;
2270 disable_idle_d3 = is_disable_idle_d3;
2271 }
2272 EXPORT_SYMBOL_GPL(vfio_pci_core_set_params);
2273
2274 static void vfio_pci_core_cleanup(void)
2275 {
2276 vfio_pci_uninit_perm_bits();
2277 }
2278
2279 static int __init vfio_pci_core_init(void)
2280 {
2281
2282 return vfio_pci_init_perm_bits();
2283 }
2284
2285 module_init(vfio_pci_core_init);
2286 module_exit(vfio_pci_core_cleanup);
2287
2288 MODULE_LICENSE("GPL v2");
2289 MODULE_AUTHOR(DRIVER_AUTHOR);
2290 MODULE_DESCRIPTION(DRIVER_DESC);