0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024 #include <linux/compat.h>
0025 #include <linux/device.h>
0026 #include <linux/fs.h>
0027 #include <linux/highmem.h>
0028 #include <linux/iommu.h>
0029 #include <linux/module.h>
0030 #include <linux/mm.h>
0031 #include <linux/kthread.h>
0032 #include <linux/rbtree.h>
0033 #include <linux/sched/signal.h>
0034 #include <linux/sched/mm.h>
0035 #include <linux/slab.h>
0036 #include <linux/uaccess.h>
0037 #include <linux/vfio.h>
0038 #include <linux/workqueue.h>
0039 #include <linux/notifier.h>
0040 #include <linux/dma-iommu.h>
0041 #include <linux/irqdomain.h>
0042 #include "vfio.h"
0043
0044 #define DRIVER_VERSION "0.2"
0045 #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
0046 #define DRIVER_DESC "Type1 IOMMU driver for VFIO"
0047
0048 static bool allow_unsafe_interrupts;
0049 module_param_named(allow_unsafe_interrupts,
0050 allow_unsafe_interrupts, bool, S_IRUGO | S_IWUSR);
0051 MODULE_PARM_DESC(allow_unsafe_interrupts,
0052 "Enable VFIO IOMMU support for on platforms without interrupt remapping support.");
0053
0054 static bool disable_hugepages;
0055 module_param_named(disable_hugepages,
0056 disable_hugepages, bool, S_IRUGO | S_IWUSR);
0057 MODULE_PARM_DESC(disable_hugepages,
0058 "Disable VFIO IOMMU support for IOMMU hugepages.");
0059
0060 static unsigned int dma_entry_limit __read_mostly = U16_MAX;
0061 module_param_named(dma_entry_limit, dma_entry_limit, uint, 0644);
0062 MODULE_PARM_DESC(dma_entry_limit,
0063 "Maximum number of user DMA mappings per container (65535).");
0064
0065 struct vfio_iommu {
0066 struct list_head domain_list;
0067 struct list_head iova_list;
0068 struct mutex lock;
0069 struct rb_root dma_list;
0070 struct list_head device_list;
0071 struct mutex device_list_lock;
0072 unsigned int dma_avail;
0073 unsigned int vaddr_invalid_count;
0074 uint64_t pgsize_bitmap;
0075 uint64_t num_non_pinned_groups;
0076 wait_queue_head_t vaddr_wait;
0077 bool v2;
0078 bool nesting;
0079 bool dirty_page_tracking;
0080 bool container_open;
0081 struct list_head emulated_iommu_groups;
0082 };
0083
0084 struct vfio_domain {
0085 struct iommu_domain *domain;
0086 struct list_head next;
0087 struct list_head group_list;
0088 bool fgsp : 1;
0089 bool enforce_cache_coherency : 1;
0090 };
0091
0092 struct vfio_dma {
0093 struct rb_node node;
0094 dma_addr_t iova;
0095 unsigned long vaddr;
0096 size_t size;
0097 int prot;
0098 bool iommu_mapped;
0099 bool lock_cap;
0100 bool vaddr_invalid;
0101 struct task_struct *task;
0102 struct rb_root pfn_list;
0103 unsigned long *bitmap;
0104 };
0105
0106 struct vfio_batch {
0107 struct page **pages;
0108 struct page *fallback_page;
0109 int capacity;
0110 int size;
0111 int offset;
0112 };
0113
0114 struct vfio_iommu_group {
0115 struct iommu_group *iommu_group;
0116 struct list_head next;
0117 bool pinned_page_dirty_scope;
0118 };
0119
0120 struct vfio_iova {
0121 struct list_head list;
0122 dma_addr_t start;
0123 dma_addr_t end;
0124 };
0125
0126
0127
0128
0129 struct vfio_pfn {
0130 struct rb_node node;
0131 dma_addr_t iova;
0132 unsigned long pfn;
0133 unsigned int ref_count;
0134 };
0135
0136 struct vfio_regions {
0137 struct list_head list;
0138 dma_addr_t iova;
0139 phys_addr_t phys;
0140 size_t len;
0141 };
0142
0143 #define DIRTY_BITMAP_BYTES(n) (ALIGN(n, BITS_PER_TYPE(u64)) / BITS_PER_BYTE)
0144
0145
0146
0147
0148
0149
0150
0151
0152
0153 #define DIRTY_BITMAP_PAGES_MAX ((u64)INT_MAX)
0154 #define DIRTY_BITMAP_SIZE_MAX DIRTY_BITMAP_BYTES(DIRTY_BITMAP_PAGES_MAX)
0155
0156 #define WAITED 1
0157
0158 static int put_pfn(unsigned long pfn, int prot);
0159
0160 static struct vfio_iommu_group*
0161 vfio_iommu_find_iommu_group(struct vfio_iommu *iommu,
0162 struct iommu_group *iommu_group);
0163
0164
0165
0166
0167
0168
0169 static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu,
0170 dma_addr_t start, size_t size)
0171 {
0172 struct rb_node *node = iommu->dma_list.rb_node;
0173
0174 while (node) {
0175 struct vfio_dma *dma = rb_entry(node, struct vfio_dma, node);
0176
0177 if (start + size <= dma->iova)
0178 node = node->rb_left;
0179 else if (start >= dma->iova + dma->size)
0180 node = node->rb_right;
0181 else
0182 return dma;
0183 }
0184
0185 return NULL;
0186 }
0187
0188 static struct rb_node *vfio_find_dma_first_node(struct vfio_iommu *iommu,
0189 dma_addr_t start, u64 size)
0190 {
0191 struct rb_node *res = NULL;
0192 struct rb_node *node = iommu->dma_list.rb_node;
0193 struct vfio_dma *dma_res = NULL;
0194
0195 while (node) {
0196 struct vfio_dma *dma = rb_entry(node, struct vfio_dma, node);
0197
0198 if (start < dma->iova + dma->size) {
0199 res = node;
0200 dma_res = dma;
0201 if (start >= dma->iova)
0202 break;
0203 node = node->rb_left;
0204 } else {
0205 node = node->rb_right;
0206 }
0207 }
0208 if (res && size && dma_res->iova >= start + size)
0209 res = NULL;
0210 return res;
0211 }
0212
0213 static void vfio_link_dma(struct vfio_iommu *iommu, struct vfio_dma *new)
0214 {
0215 struct rb_node **link = &iommu->dma_list.rb_node, *parent = NULL;
0216 struct vfio_dma *dma;
0217
0218 while (*link) {
0219 parent = *link;
0220 dma = rb_entry(parent, struct vfio_dma, node);
0221
0222 if (new->iova + new->size <= dma->iova)
0223 link = &(*link)->rb_left;
0224 else
0225 link = &(*link)->rb_right;
0226 }
0227
0228 rb_link_node(&new->node, parent, link);
0229 rb_insert_color(&new->node, &iommu->dma_list);
0230 }
0231
0232 static void vfio_unlink_dma(struct vfio_iommu *iommu, struct vfio_dma *old)
0233 {
0234 rb_erase(&old->node, &iommu->dma_list);
0235 }
0236
0237
0238 static int vfio_dma_bitmap_alloc(struct vfio_dma *dma, size_t pgsize)
0239 {
0240 uint64_t npages = dma->size / pgsize;
0241
0242 if (npages > DIRTY_BITMAP_PAGES_MAX)
0243 return -EINVAL;
0244
0245
0246
0247
0248
0249
0250 dma->bitmap = kvzalloc(DIRTY_BITMAP_BYTES(npages) + sizeof(u64),
0251 GFP_KERNEL);
0252 if (!dma->bitmap)
0253 return -ENOMEM;
0254
0255 return 0;
0256 }
0257
0258 static void vfio_dma_bitmap_free(struct vfio_dma *dma)
0259 {
0260 kvfree(dma->bitmap);
0261 dma->bitmap = NULL;
0262 }
0263
0264 static void vfio_dma_populate_bitmap(struct vfio_dma *dma, size_t pgsize)
0265 {
0266 struct rb_node *p;
0267 unsigned long pgshift = __ffs(pgsize);
0268
0269 for (p = rb_first(&dma->pfn_list); p; p = rb_next(p)) {
0270 struct vfio_pfn *vpfn = rb_entry(p, struct vfio_pfn, node);
0271
0272 bitmap_set(dma->bitmap, (vpfn->iova - dma->iova) >> pgshift, 1);
0273 }
0274 }
0275
0276 static void vfio_iommu_populate_bitmap_full(struct vfio_iommu *iommu)
0277 {
0278 struct rb_node *n;
0279 unsigned long pgshift = __ffs(iommu->pgsize_bitmap);
0280
0281 for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) {
0282 struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
0283
0284 bitmap_set(dma->bitmap, 0, dma->size >> pgshift);
0285 }
0286 }
0287
0288 static int vfio_dma_bitmap_alloc_all(struct vfio_iommu *iommu, size_t pgsize)
0289 {
0290 struct rb_node *n;
0291
0292 for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) {
0293 struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
0294 int ret;
0295
0296 ret = vfio_dma_bitmap_alloc(dma, pgsize);
0297 if (ret) {
0298 struct rb_node *p;
0299
0300 for (p = rb_prev(n); p; p = rb_prev(p)) {
0301 struct vfio_dma *dma = rb_entry(n,
0302 struct vfio_dma, node);
0303
0304 vfio_dma_bitmap_free(dma);
0305 }
0306 return ret;
0307 }
0308 vfio_dma_populate_bitmap(dma, pgsize);
0309 }
0310 return 0;
0311 }
0312
0313 static void vfio_dma_bitmap_free_all(struct vfio_iommu *iommu)
0314 {
0315 struct rb_node *n;
0316
0317 for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) {
0318 struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
0319
0320 vfio_dma_bitmap_free(dma);
0321 }
0322 }
0323
0324
0325
0326
0327 static struct vfio_pfn *vfio_find_vpfn(struct vfio_dma *dma, dma_addr_t iova)
0328 {
0329 struct vfio_pfn *vpfn;
0330 struct rb_node *node = dma->pfn_list.rb_node;
0331
0332 while (node) {
0333 vpfn = rb_entry(node, struct vfio_pfn, node);
0334
0335 if (iova < vpfn->iova)
0336 node = node->rb_left;
0337 else if (iova > vpfn->iova)
0338 node = node->rb_right;
0339 else
0340 return vpfn;
0341 }
0342 return NULL;
0343 }
0344
0345 static void vfio_link_pfn(struct vfio_dma *dma,
0346 struct vfio_pfn *new)
0347 {
0348 struct rb_node **link, *parent = NULL;
0349 struct vfio_pfn *vpfn;
0350
0351 link = &dma->pfn_list.rb_node;
0352 while (*link) {
0353 parent = *link;
0354 vpfn = rb_entry(parent, struct vfio_pfn, node);
0355
0356 if (new->iova < vpfn->iova)
0357 link = &(*link)->rb_left;
0358 else
0359 link = &(*link)->rb_right;
0360 }
0361
0362 rb_link_node(&new->node, parent, link);
0363 rb_insert_color(&new->node, &dma->pfn_list);
0364 }
0365
0366 static void vfio_unlink_pfn(struct vfio_dma *dma, struct vfio_pfn *old)
0367 {
0368 rb_erase(&old->node, &dma->pfn_list);
0369 }
0370
0371 static int vfio_add_to_pfn_list(struct vfio_dma *dma, dma_addr_t iova,
0372 unsigned long pfn)
0373 {
0374 struct vfio_pfn *vpfn;
0375
0376 vpfn = kzalloc(sizeof(*vpfn), GFP_KERNEL);
0377 if (!vpfn)
0378 return -ENOMEM;
0379
0380 vpfn->iova = iova;
0381 vpfn->pfn = pfn;
0382 vpfn->ref_count = 1;
0383 vfio_link_pfn(dma, vpfn);
0384 return 0;
0385 }
0386
0387 static void vfio_remove_from_pfn_list(struct vfio_dma *dma,
0388 struct vfio_pfn *vpfn)
0389 {
0390 vfio_unlink_pfn(dma, vpfn);
0391 kfree(vpfn);
0392 }
0393
0394 static struct vfio_pfn *vfio_iova_get_vfio_pfn(struct vfio_dma *dma,
0395 unsigned long iova)
0396 {
0397 struct vfio_pfn *vpfn = vfio_find_vpfn(dma, iova);
0398
0399 if (vpfn)
0400 vpfn->ref_count++;
0401 return vpfn;
0402 }
0403
0404 static int vfio_iova_put_vfio_pfn(struct vfio_dma *dma, struct vfio_pfn *vpfn)
0405 {
0406 int ret = 0;
0407
0408 vpfn->ref_count--;
0409 if (!vpfn->ref_count) {
0410 ret = put_pfn(vpfn->pfn, dma->prot);
0411 vfio_remove_from_pfn_list(dma, vpfn);
0412 }
0413 return ret;
0414 }
0415
0416 static int vfio_lock_acct(struct vfio_dma *dma, long npage, bool async)
0417 {
0418 struct mm_struct *mm;
0419 int ret;
0420
0421 if (!npage)
0422 return 0;
0423
0424 mm = async ? get_task_mm(dma->task) : dma->task->mm;
0425 if (!mm)
0426 return -ESRCH;
0427
0428 ret = mmap_write_lock_killable(mm);
0429 if (!ret) {
0430 ret = __account_locked_vm(mm, abs(npage), npage > 0, dma->task,
0431 dma->lock_cap);
0432 mmap_write_unlock(mm);
0433 }
0434
0435 if (async)
0436 mmput(mm);
0437
0438 return ret;
0439 }
0440
0441
0442
0443
0444
0445
0446
0447
0448 static bool is_invalid_reserved_pfn(unsigned long pfn)
0449 {
0450 if (pfn_valid(pfn))
0451 return PageReserved(pfn_to_page(pfn));
0452
0453 return true;
0454 }
0455
0456 static int put_pfn(unsigned long pfn, int prot)
0457 {
0458 if (!is_invalid_reserved_pfn(pfn)) {
0459 struct page *page = pfn_to_page(pfn);
0460
0461 unpin_user_pages_dirty_lock(&page, 1, prot & IOMMU_WRITE);
0462 return 1;
0463 }
0464 return 0;
0465 }
0466
0467 #define VFIO_BATCH_MAX_CAPACITY (PAGE_SIZE / sizeof(struct page *))
0468
0469 static void vfio_batch_init(struct vfio_batch *batch)
0470 {
0471 batch->size = 0;
0472 batch->offset = 0;
0473
0474 if (unlikely(disable_hugepages))
0475 goto fallback;
0476
0477 batch->pages = (struct page **) __get_free_page(GFP_KERNEL);
0478 if (!batch->pages)
0479 goto fallback;
0480
0481 batch->capacity = VFIO_BATCH_MAX_CAPACITY;
0482 return;
0483
0484 fallback:
0485 batch->pages = &batch->fallback_page;
0486 batch->capacity = 1;
0487 }
0488
0489 static void vfio_batch_unpin(struct vfio_batch *batch, struct vfio_dma *dma)
0490 {
0491 while (batch->size) {
0492 unsigned long pfn = page_to_pfn(batch->pages[batch->offset]);
0493
0494 put_pfn(pfn, dma->prot);
0495 batch->offset++;
0496 batch->size--;
0497 }
0498 }
0499
0500 static void vfio_batch_fini(struct vfio_batch *batch)
0501 {
0502 if (batch->capacity == VFIO_BATCH_MAX_CAPACITY)
0503 free_page((unsigned long)batch->pages);
0504 }
0505
0506 static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
0507 unsigned long vaddr, unsigned long *pfn,
0508 bool write_fault)
0509 {
0510 pte_t *ptep;
0511 spinlock_t *ptl;
0512 int ret;
0513
0514 ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl);
0515 if (ret) {
0516 bool unlocked = false;
0517
0518 ret = fixup_user_fault(mm, vaddr,
0519 FAULT_FLAG_REMOTE |
0520 (write_fault ? FAULT_FLAG_WRITE : 0),
0521 &unlocked);
0522 if (unlocked)
0523 return -EAGAIN;
0524
0525 if (ret)
0526 return ret;
0527
0528 ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl);
0529 if (ret)
0530 return ret;
0531 }
0532
0533 if (write_fault && !pte_write(*ptep))
0534 ret = -EFAULT;
0535 else
0536 *pfn = pte_pfn(*ptep);
0537
0538 pte_unmap_unlock(ptep, ptl);
0539 return ret;
0540 }
0541
0542
0543
0544
0545
0546 static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
0547 long npages, int prot, unsigned long *pfn,
0548 struct page **pages)
0549 {
0550 struct vm_area_struct *vma;
0551 unsigned int flags = 0;
0552 int ret;
0553
0554 if (prot & IOMMU_WRITE)
0555 flags |= FOLL_WRITE;
0556
0557 mmap_read_lock(mm);
0558 ret = pin_user_pages_remote(mm, vaddr, npages, flags | FOLL_LONGTERM,
0559 pages, NULL, NULL);
0560 if (ret > 0) {
0561 int i;
0562
0563
0564
0565
0566
0567
0568 for (i = 0 ; i < ret; i++) {
0569 if (unlikely(is_zero_pfn(page_to_pfn(pages[i]))))
0570 unpin_user_page(pages[i]);
0571 }
0572
0573 *pfn = page_to_pfn(pages[0]);
0574 goto done;
0575 }
0576
0577 vaddr = untagged_addr(vaddr);
0578
0579 retry:
0580 vma = vma_lookup(mm, vaddr);
0581
0582 if (vma && vma->vm_flags & VM_PFNMAP) {
0583 ret = follow_fault_pfn(vma, mm, vaddr, pfn, prot & IOMMU_WRITE);
0584 if (ret == -EAGAIN)
0585 goto retry;
0586
0587 if (!ret) {
0588 if (is_invalid_reserved_pfn(*pfn))
0589 ret = 1;
0590 else
0591 ret = -EFAULT;
0592 }
0593 }
0594 done:
0595 mmap_read_unlock(mm);
0596 return ret;
0597 }
0598
0599 static int vfio_wait(struct vfio_iommu *iommu)
0600 {
0601 DEFINE_WAIT(wait);
0602
0603 prepare_to_wait(&iommu->vaddr_wait, &wait, TASK_KILLABLE);
0604 mutex_unlock(&iommu->lock);
0605 schedule();
0606 mutex_lock(&iommu->lock);
0607 finish_wait(&iommu->vaddr_wait, &wait);
0608 if (kthread_should_stop() || !iommu->container_open ||
0609 fatal_signal_pending(current)) {
0610 return -EFAULT;
0611 }
0612 return WAITED;
0613 }
0614
0615
0616
0617
0618
0619
0620
0621 static int vfio_find_dma_valid(struct vfio_iommu *iommu, dma_addr_t start,
0622 size_t size, struct vfio_dma **dma_p)
0623 {
0624 int ret = 0;
0625
0626 do {
0627 *dma_p = vfio_find_dma(iommu, start, size);
0628 if (!*dma_p)
0629 return -EINVAL;
0630 else if (!(*dma_p)->vaddr_invalid)
0631 return ret;
0632 else
0633 ret = vfio_wait(iommu);
0634 } while (ret == WAITED);
0635
0636 return ret;
0637 }
0638
0639
0640
0641
0642
0643
0644 static int vfio_wait_all_valid(struct vfio_iommu *iommu)
0645 {
0646 int ret = 0;
0647
0648 while (iommu->vaddr_invalid_count && ret >= 0)
0649 ret = vfio_wait(iommu);
0650
0651 return ret;
0652 }
0653
0654
0655
0656
0657
0658
0659 static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
0660 long npage, unsigned long *pfn_base,
0661 unsigned long limit, struct vfio_batch *batch)
0662 {
0663 unsigned long pfn;
0664 struct mm_struct *mm = current->mm;
0665 long ret, pinned = 0, lock_acct = 0;
0666 bool rsvd;
0667 dma_addr_t iova = vaddr - dma->vaddr + dma->iova;
0668
0669
0670 if (!mm)
0671 return -ENODEV;
0672
0673 if (batch->size) {
0674
0675 *pfn_base = page_to_pfn(batch->pages[batch->offset]);
0676 pfn = *pfn_base;
0677 rsvd = is_invalid_reserved_pfn(*pfn_base);
0678 } else {
0679 *pfn_base = 0;
0680 }
0681
0682 while (npage) {
0683 if (!batch->size) {
0684
0685 long req_pages = min_t(long, npage, batch->capacity);
0686
0687 ret = vaddr_get_pfns(mm, vaddr, req_pages, dma->prot,
0688 &pfn, batch->pages);
0689 if (ret < 0)
0690 goto unpin_out;
0691
0692 batch->size = ret;
0693 batch->offset = 0;
0694
0695 if (!*pfn_base) {
0696 *pfn_base = pfn;
0697 rsvd = is_invalid_reserved_pfn(*pfn_base);
0698 }
0699 }
0700
0701
0702
0703
0704
0705
0706
0707
0708
0709 while (true) {
0710 if (pfn != *pfn_base + pinned ||
0711 rsvd != is_invalid_reserved_pfn(pfn))
0712 goto out;
0713
0714
0715
0716
0717
0718
0719 if (!rsvd && !vfio_find_vpfn(dma, iova)) {
0720 if (!dma->lock_cap &&
0721 mm->locked_vm + lock_acct + 1 > limit) {
0722 pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
0723 __func__, limit << PAGE_SHIFT);
0724 ret = -ENOMEM;
0725 goto unpin_out;
0726 }
0727 lock_acct++;
0728 }
0729
0730 pinned++;
0731 npage--;
0732 vaddr += PAGE_SIZE;
0733 iova += PAGE_SIZE;
0734 batch->offset++;
0735 batch->size--;
0736
0737 if (!batch->size)
0738 break;
0739
0740 pfn = page_to_pfn(batch->pages[batch->offset]);
0741 }
0742
0743 if (unlikely(disable_hugepages))
0744 break;
0745 }
0746
0747 out:
0748 ret = vfio_lock_acct(dma, lock_acct, false);
0749
0750 unpin_out:
0751 if (batch->size == 1 && !batch->offset) {
0752
0753 put_pfn(pfn, dma->prot);
0754 batch->size = 0;
0755 }
0756
0757 if (ret < 0) {
0758 if (pinned && !rsvd) {
0759 for (pfn = *pfn_base ; pinned ; pfn++, pinned--)
0760 put_pfn(pfn, dma->prot);
0761 }
0762 vfio_batch_unpin(batch, dma);
0763
0764 return ret;
0765 }
0766
0767 return pinned;
0768 }
0769
0770 static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova,
0771 unsigned long pfn, long npage,
0772 bool do_accounting)
0773 {
0774 long unlocked = 0, locked = 0;
0775 long i;
0776
0777 for (i = 0; i < npage; i++, iova += PAGE_SIZE) {
0778 if (put_pfn(pfn++, dma->prot)) {
0779 unlocked++;
0780 if (vfio_find_vpfn(dma, iova))
0781 locked++;
0782 }
0783 }
0784
0785 if (do_accounting)
0786 vfio_lock_acct(dma, locked - unlocked, true);
0787
0788 return unlocked;
0789 }
0790
0791 static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
0792 unsigned long *pfn_base, bool do_accounting)
0793 {
0794 struct page *pages[1];
0795 struct mm_struct *mm;
0796 int ret;
0797
0798 mm = get_task_mm(dma->task);
0799 if (!mm)
0800 return -ENODEV;
0801
0802 ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, pages);
0803 if (ret != 1)
0804 goto out;
0805
0806 ret = 0;
0807
0808 if (do_accounting && !is_invalid_reserved_pfn(*pfn_base)) {
0809 ret = vfio_lock_acct(dma, 1, true);
0810 if (ret) {
0811 put_pfn(*pfn_base, dma->prot);
0812 if (ret == -ENOMEM)
0813 pr_warn("%s: Task %s (%d) RLIMIT_MEMLOCK "
0814 "(%ld) exceeded\n", __func__,
0815 dma->task->comm, task_pid_nr(dma->task),
0816 task_rlimit(dma->task, RLIMIT_MEMLOCK));
0817 }
0818 }
0819
0820 out:
0821 mmput(mm);
0822 return ret;
0823 }
0824
0825 static int vfio_unpin_page_external(struct vfio_dma *dma, dma_addr_t iova,
0826 bool do_accounting)
0827 {
0828 int unlocked;
0829 struct vfio_pfn *vpfn = vfio_find_vpfn(dma, iova);
0830
0831 if (!vpfn)
0832 return 0;
0833
0834 unlocked = vfio_iova_put_vfio_pfn(dma, vpfn);
0835
0836 if (do_accounting)
0837 vfio_lock_acct(dma, -unlocked, true);
0838
0839 return unlocked;
0840 }
0841
0842 static int vfio_iommu_type1_pin_pages(void *iommu_data,
0843 struct iommu_group *iommu_group,
0844 dma_addr_t user_iova,
0845 int npage, int prot,
0846 struct page **pages)
0847 {
0848 struct vfio_iommu *iommu = iommu_data;
0849 struct vfio_iommu_group *group;
0850 int i, j, ret;
0851 unsigned long remote_vaddr;
0852 struct vfio_dma *dma;
0853 bool do_accounting;
0854 dma_addr_t iova;
0855
0856 if (!iommu || !pages)
0857 return -EINVAL;
0858
0859
0860 if (!iommu->v2)
0861 return -EACCES;
0862
0863 mutex_lock(&iommu->lock);
0864
0865
0866
0867
0868
0869 again:
0870 if (iommu->vaddr_invalid_count) {
0871 for (i = 0; i < npage; i++) {
0872 iova = user_iova + PAGE_SIZE * i;
0873 ret = vfio_find_dma_valid(iommu, iova, PAGE_SIZE, &dma);
0874 if (ret < 0)
0875 goto pin_done;
0876 if (ret == WAITED)
0877 goto again;
0878 }
0879 }
0880
0881
0882 if (list_empty(&iommu->device_list)) {
0883 ret = -EINVAL;
0884 goto pin_done;
0885 }
0886
0887
0888
0889
0890
0891
0892 do_accounting = list_empty(&iommu->domain_list);
0893
0894 for (i = 0; i < npage; i++) {
0895 unsigned long phys_pfn;
0896 struct vfio_pfn *vpfn;
0897
0898 iova = user_iova + PAGE_SIZE * i;
0899 dma = vfio_find_dma(iommu, iova, PAGE_SIZE);
0900 if (!dma) {
0901 ret = -EINVAL;
0902 goto pin_unwind;
0903 }
0904
0905 if ((dma->prot & prot) != prot) {
0906 ret = -EPERM;
0907 goto pin_unwind;
0908 }
0909
0910 vpfn = vfio_iova_get_vfio_pfn(dma, iova);
0911 if (vpfn) {
0912 pages[i] = pfn_to_page(vpfn->pfn);
0913 continue;
0914 }
0915
0916 remote_vaddr = dma->vaddr + (iova - dma->iova);
0917 ret = vfio_pin_page_external(dma, remote_vaddr, &phys_pfn,
0918 do_accounting);
0919 if (ret)
0920 goto pin_unwind;
0921
0922 ret = vfio_add_to_pfn_list(dma, iova, phys_pfn);
0923 if (ret) {
0924 if (put_pfn(phys_pfn, dma->prot) && do_accounting)
0925 vfio_lock_acct(dma, -1, true);
0926 goto pin_unwind;
0927 }
0928
0929 pages[i] = pfn_to_page(phys_pfn);
0930
0931 if (iommu->dirty_page_tracking) {
0932 unsigned long pgshift = __ffs(iommu->pgsize_bitmap);
0933
0934
0935
0936
0937
0938 bitmap_set(dma->bitmap,
0939 (iova - dma->iova) >> pgshift, 1);
0940 }
0941 }
0942 ret = i;
0943
0944 group = vfio_iommu_find_iommu_group(iommu, iommu_group);
0945 if (!group->pinned_page_dirty_scope) {
0946 group->pinned_page_dirty_scope = true;
0947 iommu->num_non_pinned_groups--;
0948 }
0949
0950 goto pin_done;
0951
0952 pin_unwind:
0953 pages[i] = NULL;
0954 for (j = 0; j < i; j++) {
0955 dma_addr_t iova;
0956
0957 iova = user_iova + PAGE_SIZE * j;
0958 dma = vfio_find_dma(iommu, iova, PAGE_SIZE);
0959 vfio_unpin_page_external(dma, iova, do_accounting);
0960 pages[j] = NULL;
0961 }
0962 pin_done:
0963 mutex_unlock(&iommu->lock);
0964 return ret;
0965 }
0966
0967 static void vfio_iommu_type1_unpin_pages(void *iommu_data,
0968 dma_addr_t user_iova, int npage)
0969 {
0970 struct vfio_iommu *iommu = iommu_data;
0971 bool do_accounting;
0972 int i;
0973
0974
0975 if (WARN_ON(!iommu->v2))
0976 return;
0977
0978 mutex_lock(&iommu->lock);
0979
0980 do_accounting = list_empty(&iommu->domain_list);
0981 for (i = 0; i < npage; i++) {
0982 dma_addr_t iova = user_iova + PAGE_SIZE * i;
0983 struct vfio_dma *dma;
0984
0985 dma = vfio_find_dma(iommu, iova, PAGE_SIZE);
0986 if (!dma)
0987 break;
0988
0989 vfio_unpin_page_external(dma, iova, do_accounting);
0990 }
0991
0992 mutex_unlock(&iommu->lock);
0993
0994 WARN_ON(i != npage);
0995 }
0996
0997 static long vfio_sync_unpin(struct vfio_dma *dma, struct vfio_domain *domain,
0998 struct list_head *regions,
0999 struct iommu_iotlb_gather *iotlb_gather)
1000 {
1001 long unlocked = 0;
1002 struct vfio_regions *entry, *next;
1003
1004 iommu_iotlb_sync(domain->domain, iotlb_gather);
1005
1006 list_for_each_entry_safe(entry, next, regions, list) {
1007 unlocked += vfio_unpin_pages_remote(dma,
1008 entry->iova,
1009 entry->phys >> PAGE_SHIFT,
1010 entry->len >> PAGE_SHIFT,
1011 false);
1012 list_del(&entry->list);
1013 kfree(entry);
1014 }
1015
1016 cond_resched();
1017
1018 return unlocked;
1019 }
1020
1021
1022
1023
1024
1025
1026
1027
1028 #define VFIO_IOMMU_TLB_SYNC_MAX 512
1029
1030 static size_t unmap_unpin_fast(struct vfio_domain *domain,
1031 struct vfio_dma *dma, dma_addr_t *iova,
1032 size_t len, phys_addr_t phys, long *unlocked,
1033 struct list_head *unmapped_list,
1034 int *unmapped_cnt,
1035 struct iommu_iotlb_gather *iotlb_gather)
1036 {
1037 size_t unmapped = 0;
1038 struct vfio_regions *entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1039
1040 if (entry) {
1041 unmapped = iommu_unmap_fast(domain->domain, *iova, len,
1042 iotlb_gather);
1043
1044 if (!unmapped) {
1045 kfree(entry);
1046 } else {
1047 entry->iova = *iova;
1048 entry->phys = phys;
1049 entry->len = unmapped;
1050 list_add_tail(&entry->list, unmapped_list);
1051
1052 *iova += unmapped;
1053 (*unmapped_cnt)++;
1054 }
1055 }
1056
1057
1058
1059
1060
1061 if (*unmapped_cnt >= VFIO_IOMMU_TLB_SYNC_MAX || !unmapped) {
1062 *unlocked += vfio_sync_unpin(dma, domain, unmapped_list,
1063 iotlb_gather);
1064 *unmapped_cnt = 0;
1065 }
1066
1067 return unmapped;
1068 }
1069
1070 static size_t unmap_unpin_slow(struct vfio_domain *domain,
1071 struct vfio_dma *dma, dma_addr_t *iova,
1072 size_t len, phys_addr_t phys,
1073 long *unlocked)
1074 {
1075 size_t unmapped = iommu_unmap(domain->domain, *iova, len);
1076
1077 if (unmapped) {
1078 *unlocked += vfio_unpin_pages_remote(dma, *iova,
1079 phys >> PAGE_SHIFT,
1080 unmapped >> PAGE_SHIFT,
1081 false);
1082 *iova += unmapped;
1083 cond_resched();
1084 }
1085 return unmapped;
1086 }
1087
1088 static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
1089 bool do_accounting)
1090 {
1091 dma_addr_t iova = dma->iova, end = dma->iova + dma->size;
1092 struct vfio_domain *domain, *d;
1093 LIST_HEAD(unmapped_region_list);
1094 struct iommu_iotlb_gather iotlb_gather;
1095 int unmapped_region_cnt = 0;
1096 long unlocked = 0;
1097
1098 if (!dma->size)
1099 return 0;
1100
1101 if (list_empty(&iommu->domain_list))
1102 return 0;
1103
1104
1105
1106
1107
1108
1109
1110
1111 domain = d = list_first_entry(&iommu->domain_list,
1112 struct vfio_domain, next);
1113
1114 list_for_each_entry_continue(d, &iommu->domain_list, next) {
1115 iommu_unmap(d->domain, dma->iova, dma->size);
1116 cond_resched();
1117 }
1118
1119 iommu_iotlb_gather_init(&iotlb_gather);
1120 while (iova < end) {
1121 size_t unmapped, len;
1122 phys_addr_t phys, next;
1123
1124 phys = iommu_iova_to_phys(domain->domain, iova);
1125 if (WARN_ON(!phys)) {
1126 iova += PAGE_SIZE;
1127 continue;
1128 }
1129
1130
1131
1132
1133
1134
1135 for (len = PAGE_SIZE;
1136 !domain->fgsp && iova + len < end; len += PAGE_SIZE) {
1137 next = iommu_iova_to_phys(domain->domain, iova + len);
1138 if (next != phys + len)
1139 break;
1140 }
1141
1142
1143
1144
1145
1146 unmapped = unmap_unpin_fast(domain, dma, &iova, len, phys,
1147 &unlocked, &unmapped_region_list,
1148 &unmapped_region_cnt,
1149 &iotlb_gather);
1150 if (!unmapped) {
1151 unmapped = unmap_unpin_slow(domain, dma, &iova, len,
1152 phys, &unlocked);
1153 if (WARN_ON(!unmapped))
1154 break;
1155 }
1156 }
1157
1158 dma->iommu_mapped = false;
1159
1160 if (unmapped_region_cnt) {
1161 unlocked += vfio_sync_unpin(dma, domain, &unmapped_region_list,
1162 &iotlb_gather);
1163 }
1164
1165 if (do_accounting) {
1166 vfio_lock_acct(dma, -unlocked, true);
1167 return 0;
1168 }
1169 return unlocked;
1170 }
1171
1172 static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma)
1173 {
1174 WARN_ON(!RB_EMPTY_ROOT(&dma->pfn_list));
1175 vfio_unmap_unpin(iommu, dma, true);
1176 vfio_unlink_dma(iommu, dma);
1177 put_task_struct(dma->task);
1178 vfio_dma_bitmap_free(dma);
1179 if (dma->vaddr_invalid) {
1180 iommu->vaddr_invalid_count--;
1181 wake_up_all(&iommu->vaddr_wait);
1182 }
1183 kfree(dma);
1184 iommu->dma_avail++;
1185 }
1186
1187 static void vfio_update_pgsize_bitmap(struct vfio_iommu *iommu)
1188 {
1189 struct vfio_domain *domain;
1190
1191 iommu->pgsize_bitmap = ULONG_MAX;
1192
1193 list_for_each_entry(domain, &iommu->domain_list, next)
1194 iommu->pgsize_bitmap &= domain->domain->pgsize_bitmap;
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204 if (iommu->pgsize_bitmap & ~PAGE_MASK) {
1205 iommu->pgsize_bitmap &= PAGE_MASK;
1206 iommu->pgsize_bitmap |= PAGE_SIZE;
1207 }
1208 }
1209
1210 static int update_user_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
1211 struct vfio_dma *dma, dma_addr_t base_iova,
1212 size_t pgsize)
1213 {
1214 unsigned long pgshift = __ffs(pgsize);
1215 unsigned long nbits = dma->size >> pgshift;
1216 unsigned long bit_offset = (dma->iova - base_iova) >> pgshift;
1217 unsigned long copy_offset = bit_offset / BITS_PER_LONG;
1218 unsigned long shift = bit_offset % BITS_PER_LONG;
1219 unsigned long leftover;
1220
1221
1222
1223
1224
1225 if (iommu->num_non_pinned_groups && dma->iommu_mapped)
1226 bitmap_set(dma->bitmap, 0, nbits);
1227
1228 if (shift) {
1229 bitmap_shift_left(dma->bitmap, dma->bitmap, shift,
1230 nbits + shift);
1231
1232 if (copy_from_user(&leftover,
1233 (void __user *)(bitmap + copy_offset),
1234 sizeof(leftover)))
1235 return -EFAULT;
1236
1237 bitmap_or(dma->bitmap, dma->bitmap, &leftover, shift);
1238 }
1239
1240 if (copy_to_user((void __user *)(bitmap + copy_offset), dma->bitmap,
1241 DIRTY_BITMAP_BYTES(nbits + shift)))
1242 return -EFAULT;
1243
1244 return 0;
1245 }
1246
1247 static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
1248 dma_addr_t iova, size_t size, size_t pgsize)
1249 {
1250 struct vfio_dma *dma;
1251 struct rb_node *n;
1252 unsigned long pgshift = __ffs(pgsize);
1253 int ret;
1254
1255
1256
1257
1258
1259
1260
1261 dma = vfio_find_dma(iommu, iova, 1);
1262 if (dma && dma->iova != iova)
1263 return -EINVAL;
1264
1265 dma = vfio_find_dma(iommu, iova + size - 1, 0);
1266 if (dma && dma->iova + dma->size != iova + size)
1267 return -EINVAL;
1268
1269 for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) {
1270 struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
1271
1272 if (dma->iova < iova)
1273 continue;
1274
1275 if (dma->iova > iova + size - 1)
1276 break;
1277
1278 ret = update_user_bitmap(bitmap, iommu, dma, iova, pgsize);
1279 if (ret)
1280 return ret;
1281
1282
1283
1284
1285
1286
1287 bitmap_clear(dma->bitmap, 0, dma->size >> pgshift);
1288 vfio_dma_populate_bitmap(dma, pgsize);
1289 }
1290 return 0;
1291 }
1292
1293 static int verify_bitmap_size(uint64_t npages, uint64_t bitmap_size)
1294 {
1295 if (!npages || !bitmap_size || (bitmap_size > DIRTY_BITMAP_SIZE_MAX) ||
1296 (bitmap_size < DIRTY_BITMAP_BYTES(npages)))
1297 return -EINVAL;
1298
1299 return 0;
1300 }
1301
1302
1303
1304
1305
1306
1307 static void vfio_notify_dma_unmap(struct vfio_iommu *iommu,
1308 struct vfio_dma *dma)
1309 {
1310 struct vfio_device *device;
1311
1312 if (list_empty(&iommu->device_list))
1313 return;
1314
1315
1316
1317
1318
1319
1320
1321 mutex_lock(&iommu->device_list_lock);
1322 mutex_unlock(&iommu->lock);
1323
1324 list_for_each_entry(device, &iommu->device_list, iommu_entry)
1325 device->ops->dma_unmap(device, dma->iova, dma->size);
1326
1327 mutex_unlock(&iommu->device_list_lock);
1328 mutex_lock(&iommu->lock);
1329 }
1330
1331 static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
1332 struct vfio_iommu_type1_dma_unmap *unmap,
1333 struct vfio_bitmap *bitmap)
1334 {
1335 struct vfio_dma *dma, *dma_last = NULL;
1336 size_t unmapped = 0, pgsize;
1337 int ret = -EINVAL, retries = 0;
1338 unsigned long pgshift;
1339 dma_addr_t iova = unmap->iova;
1340 u64 size = unmap->size;
1341 bool unmap_all = unmap->flags & VFIO_DMA_UNMAP_FLAG_ALL;
1342 bool invalidate_vaddr = unmap->flags & VFIO_DMA_UNMAP_FLAG_VADDR;
1343 struct rb_node *n, *first_n;
1344
1345 mutex_lock(&iommu->lock);
1346
1347 pgshift = __ffs(iommu->pgsize_bitmap);
1348 pgsize = (size_t)1 << pgshift;
1349
1350 if (iova & (pgsize - 1))
1351 goto unlock;
1352
1353 if (unmap_all) {
1354 if (iova || size)
1355 goto unlock;
1356 size = U64_MAX;
1357 } else if (!size || size & (pgsize - 1) ||
1358 iova + size - 1 < iova || size > SIZE_MAX) {
1359 goto unlock;
1360 }
1361
1362
1363 if ((unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) &&
1364 (!iommu->dirty_page_tracking || (bitmap->pgsize != pgsize))) {
1365 goto unlock;
1366 }
1367
1368 WARN_ON((pgsize - 1) & PAGE_MASK);
1369 again:
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401 if (iommu->v2 && !unmap_all) {
1402 dma = vfio_find_dma(iommu, iova, 1);
1403 if (dma && dma->iova != iova)
1404 goto unlock;
1405
1406 dma = vfio_find_dma(iommu, iova + size - 1, 0);
1407 if (dma && dma->iova + dma->size != iova + size)
1408 goto unlock;
1409 }
1410
1411 ret = 0;
1412 n = first_n = vfio_find_dma_first_node(iommu, iova, size);
1413
1414 while (n) {
1415 dma = rb_entry(n, struct vfio_dma, node);
1416 if (dma->iova >= iova + size)
1417 break;
1418
1419 if (!iommu->v2 && iova > dma->iova)
1420 break;
1421
1422 if (invalidate_vaddr) {
1423 if (dma->vaddr_invalid) {
1424 struct rb_node *last_n = n;
1425
1426 for (n = first_n; n != last_n; n = rb_next(n)) {
1427 dma = rb_entry(n,
1428 struct vfio_dma, node);
1429 dma->vaddr_invalid = false;
1430 iommu->vaddr_invalid_count--;
1431 }
1432 ret = -EINVAL;
1433 unmapped = 0;
1434 break;
1435 }
1436 dma->vaddr_invalid = true;
1437 iommu->vaddr_invalid_count++;
1438 unmapped += dma->size;
1439 n = rb_next(n);
1440 continue;
1441 }
1442
1443 if (!RB_EMPTY_ROOT(&dma->pfn_list)) {
1444 if (dma_last == dma) {
1445 BUG_ON(++retries > 10);
1446 } else {
1447 dma_last = dma;
1448 retries = 0;
1449 }
1450
1451 vfio_notify_dma_unmap(iommu, dma);
1452 goto again;
1453 }
1454
1455 if (unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) {
1456 ret = update_user_bitmap(bitmap->data, iommu, dma,
1457 iova, pgsize);
1458 if (ret)
1459 break;
1460 }
1461
1462 unmapped += dma->size;
1463 n = rb_next(n);
1464 vfio_remove_dma(iommu, dma);
1465 }
1466
1467 unlock:
1468 mutex_unlock(&iommu->lock);
1469
1470
1471 unmap->size = unmapped;
1472
1473 return ret;
1474 }
1475
1476 static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova,
1477 unsigned long pfn, long npage, int prot)
1478 {
1479 struct vfio_domain *d;
1480 int ret;
1481
1482 list_for_each_entry(d, &iommu->domain_list, next) {
1483 ret = iommu_map(d->domain, iova, (phys_addr_t)pfn << PAGE_SHIFT,
1484 npage << PAGE_SHIFT, prot | IOMMU_CACHE);
1485 if (ret)
1486 goto unwind;
1487
1488 cond_resched();
1489 }
1490
1491 return 0;
1492
1493 unwind:
1494 list_for_each_entry_continue_reverse(d, &iommu->domain_list, next) {
1495 iommu_unmap(d->domain, iova, npage << PAGE_SHIFT);
1496 cond_resched();
1497 }
1498
1499 return ret;
1500 }
1501
1502 static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma,
1503 size_t map_size)
1504 {
1505 dma_addr_t iova = dma->iova;
1506 unsigned long vaddr = dma->vaddr;
1507 struct vfio_batch batch;
1508 size_t size = map_size;
1509 long npage;
1510 unsigned long pfn, limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
1511 int ret = 0;
1512
1513 vfio_batch_init(&batch);
1514
1515 while (size) {
1516
1517 npage = vfio_pin_pages_remote(dma, vaddr + dma->size,
1518 size >> PAGE_SHIFT, &pfn, limit,
1519 &batch);
1520 if (npage <= 0) {
1521 WARN_ON(!npage);
1522 ret = (int)npage;
1523 break;
1524 }
1525
1526
1527 ret = vfio_iommu_map(iommu, iova + dma->size, pfn, npage,
1528 dma->prot);
1529 if (ret) {
1530 vfio_unpin_pages_remote(dma, iova + dma->size, pfn,
1531 npage, true);
1532 vfio_batch_unpin(&batch, dma);
1533 break;
1534 }
1535
1536 size -= npage << PAGE_SHIFT;
1537 dma->size += npage << PAGE_SHIFT;
1538 }
1539
1540 vfio_batch_fini(&batch);
1541 dma->iommu_mapped = true;
1542
1543 if (ret)
1544 vfio_remove_dma(iommu, dma);
1545
1546 return ret;
1547 }
1548
1549
1550
1551
1552 static bool vfio_iommu_iova_dma_valid(struct vfio_iommu *iommu,
1553 dma_addr_t start, dma_addr_t end)
1554 {
1555 struct list_head *iova = &iommu->iova_list;
1556 struct vfio_iova *node;
1557
1558 list_for_each_entry(node, iova, list) {
1559 if (start >= node->start && end <= node->end)
1560 return true;
1561 }
1562
1563
1564
1565
1566
1567 return list_empty(iova);
1568 }
1569
1570 static int vfio_dma_do_map(struct vfio_iommu *iommu,
1571 struct vfio_iommu_type1_dma_map *map)
1572 {
1573 bool set_vaddr = map->flags & VFIO_DMA_MAP_FLAG_VADDR;
1574 dma_addr_t iova = map->iova;
1575 unsigned long vaddr = map->vaddr;
1576 size_t size = map->size;
1577 int ret = 0, prot = 0;
1578 size_t pgsize;
1579 struct vfio_dma *dma;
1580
1581
1582 if (map->size != size || map->vaddr != vaddr || map->iova != iova)
1583 return -EINVAL;
1584
1585
1586 if (map->flags & VFIO_DMA_MAP_FLAG_WRITE)
1587 prot |= IOMMU_WRITE;
1588 if (map->flags & VFIO_DMA_MAP_FLAG_READ)
1589 prot |= IOMMU_READ;
1590
1591 if ((prot && set_vaddr) || (!prot && !set_vaddr))
1592 return -EINVAL;
1593
1594 mutex_lock(&iommu->lock);
1595
1596 pgsize = (size_t)1 << __ffs(iommu->pgsize_bitmap);
1597
1598 WARN_ON((pgsize - 1) & PAGE_MASK);
1599
1600 if (!size || (size | iova | vaddr) & (pgsize - 1)) {
1601 ret = -EINVAL;
1602 goto out_unlock;
1603 }
1604
1605
1606 if (iova + size - 1 < iova || vaddr + size - 1 < vaddr) {
1607 ret = -EINVAL;
1608 goto out_unlock;
1609 }
1610
1611 dma = vfio_find_dma(iommu, iova, size);
1612 if (set_vaddr) {
1613 if (!dma) {
1614 ret = -ENOENT;
1615 } else if (!dma->vaddr_invalid || dma->iova != iova ||
1616 dma->size != size) {
1617 ret = -EINVAL;
1618 } else {
1619 dma->vaddr = vaddr;
1620 dma->vaddr_invalid = false;
1621 iommu->vaddr_invalid_count--;
1622 wake_up_all(&iommu->vaddr_wait);
1623 }
1624 goto out_unlock;
1625 } else if (dma) {
1626 ret = -EEXIST;
1627 goto out_unlock;
1628 }
1629
1630 if (!iommu->dma_avail) {
1631 ret = -ENOSPC;
1632 goto out_unlock;
1633 }
1634
1635 if (!vfio_iommu_iova_dma_valid(iommu, iova, iova + size - 1)) {
1636 ret = -EINVAL;
1637 goto out_unlock;
1638 }
1639
1640 dma = kzalloc(sizeof(*dma), GFP_KERNEL);
1641 if (!dma) {
1642 ret = -ENOMEM;
1643 goto out_unlock;
1644 }
1645
1646 iommu->dma_avail--;
1647 dma->iova = iova;
1648 dma->vaddr = vaddr;
1649 dma->prot = prot;
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676 get_task_struct(current->group_leader);
1677 dma->task = current->group_leader;
1678 dma->lock_cap = capable(CAP_IPC_LOCK);
1679
1680 dma->pfn_list = RB_ROOT;
1681
1682
1683 vfio_link_dma(iommu, dma);
1684
1685
1686 if (list_empty(&iommu->domain_list))
1687 dma->size = size;
1688 else
1689 ret = vfio_pin_map_dma(iommu, dma, size);
1690
1691 if (!ret && iommu->dirty_page_tracking) {
1692 ret = vfio_dma_bitmap_alloc(dma, pgsize);
1693 if (ret)
1694 vfio_remove_dma(iommu, dma);
1695 }
1696
1697 out_unlock:
1698 mutex_unlock(&iommu->lock);
1699 return ret;
1700 }
1701
1702 static int vfio_iommu_replay(struct vfio_iommu *iommu,
1703 struct vfio_domain *domain)
1704 {
1705 struct vfio_batch batch;
1706 struct vfio_domain *d = NULL;
1707 struct rb_node *n;
1708 unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
1709 int ret;
1710
1711 ret = vfio_wait_all_valid(iommu);
1712 if (ret < 0)
1713 return ret;
1714
1715
1716 if (!list_empty(&iommu->domain_list))
1717 d = list_first_entry(&iommu->domain_list,
1718 struct vfio_domain, next);
1719
1720 vfio_batch_init(&batch);
1721
1722 n = rb_first(&iommu->dma_list);
1723
1724 for (; n; n = rb_next(n)) {
1725 struct vfio_dma *dma;
1726 dma_addr_t iova;
1727
1728 dma = rb_entry(n, struct vfio_dma, node);
1729 iova = dma->iova;
1730
1731 while (iova < dma->iova + dma->size) {
1732 phys_addr_t phys;
1733 size_t size;
1734
1735 if (dma->iommu_mapped) {
1736 phys_addr_t p;
1737 dma_addr_t i;
1738
1739 if (WARN_ON(!d)) {
1740 ret = -EINVAL;
1741 goto unwind;
1742 }
1743
1744 phys = iommu_iova_to_phys(d->domain, iova);
1745
1746 if (WARN_ON(!phys)) {
1747 iova += PAGE_SIZE;
1748 continue;
1749 }
1750
1751 size = PAGE_SIZE;
1752 p = phys + size;
1753 i = iova + size;
1754 while (i < dma->iova + dma->size &&
1755 p == iommu_iova_to_phys(d->domain, i)) {
1756 size += PAGE_SIZE;
1757 p += PAGE_SIZE;
1758 i += PAGE_SIZE;
1759 }
1760 } else {
1761 unsigned long pfn;
1762 unsigned long vaddr = dma->vaddr +
1763 (iova - dma->iova);
1764 size_t n = dma->iova + dma->size - iova;
1765 long npage;
1766
1767 npage = vfio_pin_pages_remote(dma, vaddr,
1768 n >> PAGE_SHIFT,
1769 &pfn, limit,
1770 &batch);
1771 if (npage <= 0) {
1772 WARN_ON(!npage);
1773 ret = (int)npage;
1774 goto unwind;
1775 }
1776
1777 phys = pfn << PAGE_SHIFT;
1778 size = npage << PAGE_SHIFT;
1779 }
1780
1781 ret = iommu_map(domain->domain, iova, phys,
1782 size, dma->prot | IOMMU_CACHE);
1783 if (ret) {
1784 if (!dma->iommu_mapped) {
1785 vfio_unpin_pages_remote(dma, iova,
1786 phys >> PAGE_SHIFT,
1787 size >> PAGE_SHIFT,
1788 true);
1789 vfio_batch_unpin(&batch, dma);
1790 }
1791 goto unwind;
1792 }
1793
1794 iova += size;
1795 }
1796 }
1797
1798
1799 for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) {
1800 struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
1801
1802 dma->iommu_mapped = true;
1803 }
1804
1805 vfio_batch_fini(&batch);
1806 return 0;
1807
1808 unwind:
1809 for (; n; n = rb_prev(n)) {
1810 struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
1811 dma_addr_t iova;
1812
1813 if (dma->iommu_mapped) {
1814 iommu_unmap(domain->domain, dma->iova, dma->size);
1815 continue;
1816 }
1817
1818 iova = dma->iova;
1819 while (iova < dma->iova + dma->size) {
1820 phys_addr_t phys, p;
1821 size_t size;
1822 dma_addr_t i;
1823
1824 phys = iommu_iova_to_phys(domain->domain, iova);
1825 if (!phys) {
1826 iova += PAGE_SIZE;
1827 continue;
1828 }
1829
1830 size = PAGE_SIZE;
1831 p = phys + size;
1832 i = iova + size;
1833 while (i < dma->iova + dma->size &&
1834 p == iommu_iova_to_phys(domain->domain, i)) {
1835 size += PAGE_SIZE;
1836 p += PAGE_SIZE;
1837 i += PAGE_SIZE;
1838 }
1839
1840 iommu_unmap(domain->domain, iova, size);
1841 vfio_unpin_pages_remote(dma, iova, phys >> PAGE_SHIFT,
1842 size >> PAGE_SHIFT, true);
1843 }
1844 }
1845
1846 vfio_batch_fini(&batch);
1847 return ret;
1848 }
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860 static void vfio_test_domain_fgsp(struct vfio_domain *domain)
1861 {
1862 struct page *pages;
1863 int ret, order = get_order(PAGE_SIZE * 2);
1864
1865 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
1866 if (!pages)
1867 return;
1868
1869 ret = iommu_map(domain->domain, 0, page_to_phys(pages), PAGE_SIZE * 2,
1870 IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE);
1871 if (!ret) {
1872 size_t unmapped = iommu_unmap(domain->domain, 0, PAGE_SIZE);
1873
1874 if (unmapped == PAGE_SIZE)
1875 iommu_unmap(domain->domain, PAGE_SIZE, PAGE_SIZE);
1876 else
1877 domain->fgsp = true;
1878 }
1879
1880 __free_pages(pages, order);
1881 }
1882
1883 static struct vfio_iommu_group *find_iommu_group(struct vfio_domain *domain,
1884 struct iommu_group *iommu_group)
1885 {
1886 struct vfio_iommu_group *g;
1887
1888 list_for_each_entry(g, &domain->group_list, next) {
1889 if (g->iommu_group == iommu_group)
1890 return g;
1891 }
1892
1893 return NULL;
1894 }
1895
1896 static struct vfio_iommu_group*
1897 vfio_iommu_find_iommu_group(struct vfio_iommu *iommu,
1898 struct iommu_group *iommu_group)
1899 {
1900 struct vfio_iommu_group *group;
1901 struct vfio_domain *domain;
1902
1903 list_for_each_entry(domain, &iommu->domain_list, next) {
1904 group = find_iommu_group(domain, iommu_group);
1905 if (group)
1906 return group;
1907 }
1908
1909 list_for_each_entry(group, &iommu->emulated_iommu_groups, next)
1910 if (group->iommu_group == iommu_group)
1911 return group;
1912 return NULL;
1913 }
1914
1915 static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions,
1916 phys_addr_t *base)
1917 {
1918 struct iommu_resv_region *region;
1919 bool ret = false;
1920
1921 list_for_each_entry(region, group_resv_regions, list) {
1922
1923
1924
1925
1926
1927 if (region->type == IOMMU_RESV_MSI) {
1928 ret = false;
1929 break;
1930 }
1931
1932 if (region->type == IOMMU_RESV_SW_MSI) {
1933 *base = region->start;
1934 ret = true;
1935 }
1936 }
1937
1938 return ret;
1939 }
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950 static int vfio_iommu_iova_insert(struct list_head *head,
1951 dma_addr_t start, dma_addr_t end)
1952 {
1953 struct vfio_iova *region;
1954
1955 region = kmalloc(sizeof(*region), GFP_KERNEL);
1956 if (!region)
1957 return -ENOMEM;
1958
1959 INIT_LIST_HEAD(®ion->list);
1960 region->start = start;
1961 region->end = end;
1962
1963 list_add_tail(®ion->list, head);
1964 return 0;
1965 }
1966
1967
1968
1969
1970
1971 static bool vfio_iommu_aper_conflict(struct vfio_iommu *iommu,
1972 dma_addr_t start, dma_addr_t end)
1973 {
1974 struct vfio_iova *first, *last;
1975 struct list_head *iova = &iommu->iova_list;
1976
1977 if (list_empty(iova))
1978 return false;
1979
1980
1981 first = list_first_entry(iova, struct vfio_iova, list);
1982 last = list_last_entry(iova, struct vfio_iova, list);
1983 if (start > last->end || end < first->start)
1984 return true;
1985
1986
1987 if (start > first->start) {
1988 if (vfio_find_dma(iommu, first->start, start - first->start))
1989 return true;
1990 }
1991
1992
1993 if (end < last->end) {
1994 if (vfio_find_dma(iommu, end + 1, last->end - end))
1995 return true;
1996 }
1997
1998 return false;
1999 }
2000
2001
2002
2003
2004
2005 static int vfio_iommu_aper_resize(struct list_head *iova,
2006 dma_addr_t start, dma_addr_t end)
2007 {
2008 struct vfio_iova *node, *next;
2009
2010 if (list_empty(iova))
2011 return vfio_iommu_iova_insert(iova, start, end);
2012
2013
2014 list_for_each_entry_safe(node, next, iova, list) {
2015 if (start < node->start)
2016 break;
2017 if (start >= node->start && start < node->end) {
2018 node->start = start;
2019 break;
2020 }
2021
2022 list_del(&node->list);
2023 kfree(node);
2024 }
2025
2026
2027 list_for_each_entry_safe(node, next, iova, list) {
2028 if (end > node->end)
2029 continue;
2030 if (end > node->start && end <= node->end) {
2031 node->end = end;
2032 continue;
2033 }
2034
2035 list_del(&node->list);
2036 kfree(node);
2037 }
2038
2039 return 0;
2040 }
2041
2042
2043
2044
2045 static bool vfio_iommu_resv_conflict(struct vfio_iommu *iommu,
2046 struct list_head *resv_regions)
2047 {
2048 struct iommu_resv_region *region;
2049
2050
2051 list_for_each_entry(region, resv_regions, list) {
2052 if (region->type == IOMMU_RESV_DIRECT_RELAXABLE)
2053 continue;
2054
2055 if (vfio_find_dma(iommu, region->start, region->length))
2056 return true;
2057 }
2058
2059 return false;
2060 }
2061
2062
2063
2064
2065
2066 static int vfio_iommu_resv_exclude(struct list_head *iova,
2067 struct list_head *resv_regions)
2068 {
2069 struct iommu_resv_region *resv;
2070 struct vfio_iova *n, *next;
2071
2072 list_for_each_entry(resv, resv_regions, list) {
2073 phys_addr_t start, end;
2074
2075 if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE)
2076 continue;
2077
2078 start = resv->start;
2079 end = resv->start + resv->length - 1;
2080
2081 list_for_each_entry_safe(n, next, iova, list) {
2082 int ret = 0;
2083
2084
2085 if (start > n->end || end < n->start)
2086 continue;
2087
2088
2089
2090
2091
2092
2093
2094 if (start > n->start)
2095 ret = vfio_iommu_iova_insert(&n->list, n->start,
2096 start - 1);
2097 if (!ret && end < n->end)
2098 ret = vfio_iommu_iova_insert(&n->list, end + 1,
2099 n->end);
2100 if (ret)
2101 return ret;
2102
2103 list_del(&n->list);
2104 kfree(n);
2105 }
2106 }
2107
2108 if (list_empty(iova))
2109 return -EINVAL;
2110
2111 return 0;
2112 }
2113
2114 static void vfio_iommu_resv_free(struct list_head *resv_regions)
2115 {
2116 struct iommu_resv_region *n, *next;
2117
2118 list_for_each_entry_safe(n, next, resv_regions, list) {
2119 list_del(&n->list);
2120 kfree(n);
2121 }
2122 }
2123
2124 static void vfio_iommu_iova_free(struct list_head *iova)
2125 {
2126 struct vfio_iova *n, *next;
2127
2128 list_for_each_entry_safe(n, next, iova, list) {
2129 list_del(&n->list);
2130 kfree(n);
2131 }
2132 }
2133
2134 static int vfio_iommu_iova_get_copy(struct vfio_iommu *iommu,
2135 struct list_head *iova_copy)
2136 {
2137 struct list_head *iova = &iommu->iova_list;
2138 struct vfio_iova *n;
2139 int ret;
2140
2141 list_for_each_entry(n, iova, list) {
2142 ret = vfio_iommu_iova_insert(iova_copy, n->start, n->end);
2143 if (ret)
2144 goto out_free;
2145 }
2146
2147 return 0;
2148
2149 out_free:
2150 vfio_iommu_iova_free(iova_copy);
2151 return ret;
2152 }
2153
2154 static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu,
2155 struct list_head *iova_copy)
2156 {
2157 struct list_head *iova = &iommu->iova_list;
2158
2159 vfio_iommu_iova_free(iova);
2160
2161 list_splice_tail(iova_copy, iova);
2162 }
2163
2164
2165 static int vfio_iommu_device_capable(struct device *dev, void *data)
2166 {
2167 return device_iommu_capable(dev, (enum iommu_cap)data);
2168 }
2169
2170 static int vfio_iommu_domain_alloc(struct device *dev, void *data)
2171 {
2172 struct iommu_domain **domain = data;
2173
2174 *domain = iommu_domain_alloc(dev->bus);
2175 return 1;
2176 }
2177
2178 static int vfio_iommu_type1_attach_group(void *iommu_data,
2179 struct iommu_group *iommu_group, enum vfio_group_type type)
2180 {
2181 struct vfio_iommu *iommu = iommu_data;
2182 struct vfio_iommu_group *group;
2183 struct vfio_domain *domain, *d;
2184 bool resv_msi, msi_remap;
2185 phys_addr_t resv_msi_base = 0;
2186 struct iommu_domain_geometry *geo;
2187 LIST_HEAD(iova_copy);
2188 LIST_HEAD(group_resv_regions);
2189 int ret = -EINVAL;
2190
2191 mutex_lock(&iommu->lock);
2192
2193
2194 if (vfio_iommu_find_iommu_group(iommu, iommu_group))
2195 goto out_unlock;
2196
2197 ret = -ENOMEM;
2198 group = kzalloc(sizeof(*group), GFP_KERNEL);
2199 if (!group)
2200 goto out_unlock;
2201 group->iommu_group = iommu_group;
2202
2203 if (type == VFIO_EMULATED_IOMMU) {
2204 list_add(&group->next, &iommu->emulated_iommu_groups);
2205
2206
2207
2208
2209
2210
2211 group->pinned_page_dirty_scope = true;
2212 ret = 0;
2213 goto out_unlock;
2214 }
2215
2216 ret = -ENOMEM;
2217 domain = kzalloc(sizeof(*domain), GFP_KERNEL);
2218 if (!domain)
2219 goto out_free_group;
2220
2221
2222
2223
2224
2225
2226 ret = -EIO;
2227 iommu_group_for_each_dev(iommu_group, &domain->domain,
2228 vfio_iommu_domain_alloc);
2229 if (!domain->domain)
2230 goto out_free_domain;
2231
2232 if (iommu->nesting) {
2233 ret = iommu_enable_nesting(domain->domain);
2234 if (ret)
2235 goto out_domain;
2236 }
2237
2238 ret = iommu_attach_group(domain->domain, group->iommu_group);
2239 if (ret)
2240 goto out_domain;
2241
2242
2243 geo = &domain->domain->geometry;
2244 if (vfio_iommu_aper_conflict(iommu, geo->aperture_start,
2245 geo->aperture_end)) {
2246 ret = -EINVAL;
2247 goto out_detach;
2248 }
2249
2250 ret = iommu_get_group_resv_regions(iommu_group, &group_resv_regions);
2251 if (ret)
2252 goto out_detach;
2253
2254 if (vfio_iommu_resv_conflict(iommu, &group_resv_regions)) {
2255 ret = -EINVAL;
2256 goto out_detach;
2257 }
2258
2259
2260
2261
2262
2263
2264 ret = vfio_iommu_iova_get_copy(iommu, &iova_copy);
2265 if (ret)
2266 goto out_detach;
2267
2268 ret = vfio_iommu_aper_resize(&iova_copy, geo->aperture_start,
2269 geo->aperture_end);
2270 if (ret)
2271 goto out_detach;
2272
2273 ret = vfio_iommu_resv_exclude(&iova_copy, &group_resv_regions);
2274 if (ret)
2275 goto out_detach;
2276
2277 resv_msi = vfio_iommu_has_sw_msi(&group_resv_regions, &resv_msi_base);
2278
2279 INIT_LIST_HEAD(&domain->group_list);
2280 list_add(&group->next, &domain->group_list);
2281
2282 msi_remap = irq_domain_check_msi_remap() ||
2283 iommu_group_for_each_dev(iommu_group, (void *)IOMMU_CAP_INTR_REMAP,
2284 vfio_iommu_device_capable);
2285
2286 if (!allow_unsafe_interrupts && !msi_remap) {
2287 pr_warn("%s: No interrupt remapping support. Use the module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU support on this platform\n",
2288 __func__);
2289 ret = -EPERM;
2290 goto out_detach;
2291 }
2292
2293
2294
2295
2296
2297
2298 if (domain->domain->ops->enforce_cache_coherency)
2299 domain->enforce_cache_coherency =
2300 domain->domain->ops->enforce_cache_coherency(
2301 domain->domain);
2302
2303
2304
2305
2306
2307
2308
2309
2310 list_for_each_entry(d, &iommu->domain_list, next) {
2311 if (d->domain->ops == domain->domain->ops &&
2312 d->enforce_cache_coherency ==
2313 domain->enforce_cache_coherency) {
2314 iommu_detach_group(domain->domain, group->iommu_group);
2315 if (!iommu_attach_group(d->domain,
2316 group->iommu_group)) {
2317 list_add(&group->next, &d->group_list);
2318 iommu_domain_free(domain->domain);
2319 kfree(domain);
2320 goto done;
2321 }
2322
2323 ret = iommu_attach_group(domain->domain,
2324 group->iommu_group);
2325 if (ret)
2326 goto out_domain;
2327 }
2328 }
2329
2330 vfio_test_domain_fgsp(domain);
2331
2332
2333 ret = vfio_iommu_replay(iommu, domain);
2334 if (ret)
2335 goto out_detach;
2336
2337 if (resv_msi) {
2338 ret = iommu_get_msi_cookie(domain->domain, resv_msi_base);
2339 if (ret && ret != -ENODEV)
2340 goto out_detach;
2341 }
2342
2343 list_add(&domain->next, &iommu->domain_list);
2344 vfio_update_pgsize_bitmap(iommu);
2345 done:
2346
2347 vfio_iommu_iova_insert_copy(iommu, &iova_copy);
2348
2349
2350
2351
2352
2353
2354 iommu->num_non_pinned_groups++;
2355 mutex_unlock(&iommu->lock);
2356 vfio_iommu_resv_free(&group_resv_regions);
2357
2358 return 0;
2359
2360 out_detach:
2361 iommu_detach_group(domain->domain, group->iommu_group);
2362 out_domain:
2363 iommu_domain_free(domain->domain);
2364 vfio_iommu_iova_free(&iova_copy);
2365 vfio_iommu_resv_free(&group_resv_regions);
2366 out_free_domain:
2367 kfree(domain);
2368 out_free_group:
2369 kfree(group);
2370 out_unlock:
2371 mutex_unlock(&iommu->lock);
2372 return ret;
2373 }
2374
2375 static void vfio_iommu_unmap_unpin_all(struct vfio_iommu *iommu)
2376 {
2377 struct rb_node *node;
2378
2379 while ((node = rb_first(&iommu->dma_list)))
2380 vfio_remove_dma(iommu, rb_entry(node, struct vfio_dma, node));
2381 }
2382
2383 static void vfio_iommu_unmap_unpin_reaccount(struct vfio_iommu *iommu)
2384 {
2385 struct rb_node *n, *p;
2386
2387 n = rb_first(&iommu->dma_list);
2388 for (; n; n = rb_next(n)) {
2389 struct vfio_dma *dma;
2390 long locked = 0, unlocked = 0;
2391
2392 dma = rb_entry(n, struct vfio_dma, node);
2393 unlocked += vfio_unmap_unpin(iommu, dma, false);
2394 p = rb_first(&dma->pfn_list);
2395 for (; p; p = rb_next(p)) {
2396 struct vfio_pfn *vpfn = rb_entry(p, struct vfio_pfn,
2397 node);
2398
2399 if (!is_invalid_reserved_pfn(vpfn->pfn))
2400 locked++;
2401 }
2402 vfio_lock_acct(dma, locked - unlocked, true);
2403 }
2404 }
2405
2406
2407
2408
2409
2410
2411 static void vfio_iommu_aper_expand(struct vfio_iommu *iommu,
2412 struct list_head *iova_copy)
2413 {
2414 struct vfio_domain *domain;
2415 struct vfio_iova *node;
2416 dma_addr_t start = 0;
2417 dma_addr_t end = (dma_addr_t)~0;
2418
2419 if (list_empty(iova_copy))
2420 return;
2421
2422 list_for_each_entry(domain, &iommu->domain_list, next) {
2423 struct iommu_domain_geometry *geo = &domain->domain->geometry;
2424
2425 if (geo->aperture_start > start)
2426 start = geo->aperture_start;
2427 if (geo->aperture_end < end)
2428 end = geo->aperture_end;
2429 }
2430
2431
2432 node = list_first_entry(iova_copy, struct vfio_iova, list);
2433 node->start = start;
2434 node = list_last_entry(iova_copy, struct vfio_iova, list);
2435 node->end = end;
2436 }
2437
2438
2439
2440
2441
2442
2443
2444 static int vfio_iommu_resv_refresh(struct vfio_iommu *iommu,
2445 struct list_head *iova_copy)
2446 {
2447 struct vfio_domain *d;
2448 struct vfio_iommu_group *g;
2449 struct vfio_iova *node;
2450 dma_addr_t start, end;
2451 LIST_HEAD(resv_regions);
2452 int ret;
2453
2454 if (list_empty(iova_copy))
2455 return -EINVAL;
2456
2457 list_for_each_entry(d, &iommu->domain_list, next) {
2458 list_for_each_entry(g, &d->group_list, next) {
2459 ret = iommu_get_group_resv_regions(g->iommu_group,
2460 &resv_regions);
2461 if (ret)
2462 goto done;
2463 }
2464 }
2465
2466 node = list_first_entry(iova_copy, struct vfio_iova, list);
2467 start = node->start;
2468 node = list_last_entry(iova_copy, struct vfio_iova, list);
2469 end = node->end;
2470
2471
2472 vfio_iommu_iova_free(iova_copy);
2473
2474 ret = vfio_iommu_aper_resize(iova_copy, start, end);
2475 if (ret)
2476 goto done;
2477
2478
2479 ret = vfio_iommu_resv_exclude(iova_copy, &resv_regions);
2480 done:
2481 vfio_iommu_resv_free(&resv_regions);
2482 return ret;
2483 }
2484
2485 static void vfio_iommu_type1_detach_group(void *iommu_data,
2486 struct iommu_group *iommu_group)
2487 {
2488 struct vfio_iommu *iommu = iommu_data;
2489 struct vfio_domain *domain;
2490 struct vfio_iommu_group *group;
2491 bool update_dirty_scope = false;
2492 LIST_HEAD(iova_copy);
2493
2494 mutex_lock(&iommu->lock);
2495 list_for_each_entry(group, &iommu->emulated_iommu_groups, next) {
2496 if (group->iommu_group != iommu_group)
2497 continue;
2498 update_dirty_scope = !group->pinned_page_dirty_scope;
2499 list_del(&group->next);
2500 kfree(group);
2501
2502 if (list_empty(&iommu->emulated_iommu_groups) &&
2503 list_empty(&iommu->domain_list)) {
2504 WARN_ON(!list_empty(&iommu->device_list));
2505 vfio_iommu_unmap_unpin_all(iommu);
2506 }
2507 goto detach_group_done;
2508 }
2509
2510
2511
2512
2513
2514
2515 vfio_iommu_iova_get_copy(iommu, &iova_copy);
2516
2517 list_for_each_entry(domain, &iommu->domain_list, next) {
2518 group = find_iommu_group(domain, iommu_group);
2519 if (!group)
2520 continue;
2521
2522 iommu_detach_group(domain->domain, group->iommu_group);
2523 update_dirty_scope = !group->pinned_page_dirty_scope;
2524 list_del(&group->next);
2525 kfree(group);
2526
2527
2528
2529
2530
2531
2532
2533 if (list_empty(&domain->group_list)) {
2534 if (list_is_singular(&iommu->domain_list)) {
2535 if (list_empty(&iommu->emulated_iommu_groups)) {
2536 WARN_ON(!list_empty(
2537 &iommu->device_list));
2538 vfio_iommu_unmap_unpin_all(iommu);
2539 } else {
2540 vfio_iommu_unmap_unpin_reaccount(iommu);
2541 }
2542 }
2543 iommu_domain_free(domain->domain);
2544 list_del(&domain->next);
2545 kfree(domain);
2546 vfio_iommu_aper_expand(iommu, &iova_copy);
2547 vfio_update_pgsize_bitmap(iommu);
2548 }
2549 break;
2550 }
2551
2552 if (!vfio_iommu_resv_refresh(iommu, &iova_copy))
2553 vfio_iommu_iova_insert_copy(iommu, &iova_copy);
2554 else
2555 vfio_iommu_iova_free(&iova_copy);
2556
2557 detach_group_done:
2558
2559
2560
2561
2562 if (update_dirty_scope) {
2563 iommu->num_non_pinned_groups--;
2564 if (iommu->dirty_page_tracking)
2565 vfio_iommu_populate_bitmap_full(iommu);
2566 }
2567 mutex_unlock(&iommu->lock);
2568 }
2569
2570 static void *vfio_iommu_type1_open(unsigned long arg)
2571 {
2572 struct vfio_iommu *iommu;
2573
2574 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
2575 if (!iommu)
2576 return ERR_PTR(-ENOMEM);
2577
2578 switch (arg) {
2579 case VFIO_TYPE1_IOMMU:
2580 break;
2581 case VFIO_TYPE1_NESTING_IOMMU:
2582 iommu->nesting = true;
2583 fallthrough;
2584 case VFIO_TYPE1v2_IOMMU:
2585 iommu->v2 = true;
2586 break;
2587 default:
2588 kfree(iommu);
2589 return ERR_PTR(-EINVAL);
2590 }
2591
2592 INIT_LIST_HEAD(&iommu->domain_list);
2593 INIT_LIST_HEAD(&iommu->iova_list);
2594 iommu->dma_list = RB_ROOT;
2595 iommu->dma_avail = dma_entry_limit;
2596 iommu->container_open = true;
2597 mutex_init(&iommu->lock);
2598 mutex_init(&iommu->device_list_lock);
2599 INIT_LIST_HEAD(&iommu->device_list);
2600 init_waitqueue_head(&iommu->vaddr_wait);
2601 iommu->pgsize_bitmap = PAGE_MASK;
2602 INIT_LIST_HEAD(&iommu->emulated_iommu_groups);
2603
2604 return iommu;
2605 }
2606
2607 static void vfio_release_domain(struct vfio_domain *domain)
2608 {
2609 struct vfio_iommu_group *group, *group_tmp;
2610
2611 list_for_each_entry_safe(group, group_tmp,
2612 &domain->group_list, next) {
2613 iommu_detach_group(domain->domain, group->iommu_group);
2614 list_del(&group->next);
2615 kfree(group);
2616 }
2617
2618 iommu_domain_free(domain->domain);
2619 }
2620
2621 static void vfio_iommu_type1_release(void *iommu_data)
2622 {
2623 struct vfio_iommu *iommu = iommu_data;
2624 struct vfio_domain *domain, *domain_tmp;
2625 struct vfio_iommu_group *group, *next_group;
2626
2627 list_for_each_entry_safe(group, next_group,
2628 &iommu->emulated_iommu_groups, next) {
2629 list_del(&group->next);
2630 kfree(group);
2631 }
2632
2633 vfio_iommu_unmap_unpin_all(iommu);
2634
2635 list_for_each_entry_safe(domain, domain_tmp,
2636 &iommu->domain_list, next) {
2637 vfio_release_domain(domain);
2638 list_del(&domain->next);
2639 kfree(domain);
2640 }
2641
2642 vfio_iommu_iova_free(&iommu->iova_list);
2643
2644 kfree(iommu);
2645 }
2646
2647 static int vfio_domains_have_enforce_cache_coherency(struct vfio_iommu *iommu)
2648 {
2649 struct vfio_domain *domain;
2650 int ret = 1;
2651
2652 mutex_lock(&iommu->lock);
2653 list_for_each_entry(domain, &iommu->domain_list, next) {
2654 if (!(domain->enforce_cache_coherency)) {
2655 ret = 0;
2656 break;
2657 }
2658 }
2659 mutex_unlock(&iommu->lock);
2660
2661 return ret;
2662 }
2663
2664 static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu,
2665 unsigned long arg)
2666 {
2667 switch (arg) {
2668 case VFIO_TYPE1_IOMMU:
2669 case VFIO_TYPE1v2_IOMMU:
2670 case VFIO_TYPE1_NESTING_IOMMU:
2671 case VFIO_UNMAP_ALL:
2672 case VFIO_UPDATE_VADDR:
2673 return 1;
2674 case VFIO_DMA_CC_IOMMU:
2675 if (!iommu)
2676 return 0;
2677 return vfio_domains_have_enforce_cache_coherency(iommu);
2678 default:
2679 return 0;
2680 }
2681 }
2682
2683 static int vfio_iommu_iova_add_cap(struct vfio_info_cap *caps,
2684 struct vfio_iommu_type1_info_cap_iova_range *cap_iovas,
2685 size_t size)
2686 {
2687 struct vfio_info_cap_header *header;
2688 struct vfio_iommu_type1_info_cap_iova_range *iova_cap;
2689
2690 header = vfio_info_cap_add(caps, size,
2691 VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE, 1);
2692 if (IS_ERR(header))
2693 return PTR_ERR(header);
2694
2695 iova_cap = container_of(header,
2696 struct vfio_iommu_type1_info_cap_iova_range,
2697 header);
2698 iova_cap->nr_iovas = cap_iovas->nr_iovas;
2699 memcpy(iova_cap->iova_ranges, cap_iovas->iova_ranges,
2700 cap_iovas->nr_iovas * sizeof(*cap_iovas->iova_ranges));
2701 return 0;
2702 }
2703
2704 static int vfio_iommu_iova_build_caps(struct vfio_iommu *iommu,
2705 struct vfio_info_cap *caps)
2706 {
2707 struct vfio_iommu_type1_info_cap_iova_range *cap_iovas;
2708 struct vfio_iova *iova;
2709 size_t size;
2710 int iovas = 0, i = 0, ret;
2711
2712 list_for_each_entry(iova, &iommu->iova_list, list)
2713 iovas++;
2714
2715 if (!iovas) {
2716
2717
2718
2719
2720 return 0;
2721 }
2722
2723 size = struct_size(cap_iovas, iova_ranges, iovas);
2724
2725 cap_iovas = kzalloc(size, GFP_KERNEL);
2726 if (!cap_iovas)
2727 return -ENOMEM;
2728
2729 cap_iovas->nr_iovas = iovas;
2730
2731 list_for_each_entry(iova, &iommu->iova_list, list) {
2732 cap_iovas->iova_ranges[i].start = iova->start;
2733 cap_iovas->iova_ranges[i].end = iova->end;
2734 i++;
2735 }
2736
2737 ret = vfio_iommu_iova_add_cap(caps, cap_iovas, size);
2738
2739 kfree(cap_iovas);
2740 return ret;
2741 }
2742
2743 static int vfio_iommu_migration_build_caps(struct vfio_iommu *iommu,
2744 struct vfio_info_cap *caps)
2745 {
2746 struct vfio_iommu_type1_info_cap_migration cap_mig;
2747
2748 cap_mig.header.id = VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION;
2749 cap_mig.header.version = 1;
2750
2751 cap_mig.flags = 0;
2752
2753 cap_mig.pgsize_bitmap = (size_t)1 << __ffs(iommu->pgsize_bitmap);
2754 cap_mig.max_dirty_bitmap_size = DIRTY_BITMAP_SIZE_MAX;
2755
2756 return vfio_info_add_capability(caps, &cap_mig.header, sizeof(cap_mig));
2757 }
2758
2759 static int vfio_iommu_dma_avail_build_caps(struct vfio_iommu *iommu,
2760 struct vfio_info_cap *caps)
2761 {
2762 struct vfio_iommu_type1_info_dma_avail cap_dma_avail;
2763
2764 cap_dma_avail.header.id = VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL;
2765 cap_dma_avail.header.version = 1;
2766
2767 cap_dma_avail.avail = iommu->dma_avail;
2768
2769 return vfio_info_add_capability(caps, &cap_dma_avail.header,
2770 sizeof(cap_dma_avail));
2771 }
2772
2773 static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu,
2774 unsigned long arg)
2775 {
2776 struct vfio_iommu_type1_info info;
2777 unsigned long minsz;
2778 struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
2779 unsigned long capsz;
2780 int ret;
2781
2782 minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes);
2783
2784
2785 capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset);
2786
2787 if (copy_from_user(&info, (void __user *)arg, minsz))
2788 return -EFAULT;
2789
2790 if (info.argsz < minsz)
2791 return -EINVAL;
2792
2793 if (info.argsz >= capsz) {
2794 minsz = capsz;
2795 info.cap_offset = 0;
2796 }
2797
2798 mutex_lock(&iommu->lock);
2799 info.flags = VFIO_IOMMU_INFO_PGSIZES;
2800
2801 info.iova_pgsizes = iommu->pgsize_bitmap;
2802
2803 ret = vfio_iommu_migration_build_caps(iommu, &caps);
2804
2805 if (!ret)
2806 ret = vfio_iommu_dma_avail_build_caps(iommu, &caps);
2807
2808 if (!ret)
2809 ret = vfio_iommu_iova_build_caps(iommu, &caps);
2810
2811 mutex_unlock(&iommu->lock);
2812
2813 if (ret)
2814 return ret;
2815
2816 if (caps.size) {
2817 info.flags |= VFIO_IOMMU_INFO_CAPS;
2818
2819 if (info.argsz < sizeof(info) + caps.size) {
2820 info.argsz = sizeof(info) + caps.size;
2821 } else {
2822 vfio_info_cap_shift(&caps, sizeof(info));
2823 if (copy_to_user((void __user *)arg +
2824 sizeof(info), caps.buf,
2825 caps.size)) {
2826 kfree(caps.buf);
2827 return -EFAULT;
2828 }
2829 info.cap_offset = sizeof(info);
2830 }
2831
2832 kfree(caps.buf);
2833 }
2834
2835 return copy_to_user((void __user *)arg, &info, minsz) ?
2836 -EFAULT : 0;
2837 }
2838
2839 static int vfio_iommu_type1_map_dma(struct vfio_iommu *iommu,
2840 unsigned long arg)
2841 {
2842 struct vfio_iommu_type1_dma_map map;
2843 unsigned long minsz;
2844 uint32_t mask = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE |
2845 VFIO_DMA_MAP_FLAG_VADDR;
2846
2847 minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
2848
2849 if (copy_from_user(&map, (void __user *)arg, minsz))
2850 return -EFAULT;
2851
2852 if (map.argsz < minsz || map.flags & ~mask)
2853 return -EINVAL;
2854
2855 return vfio_dma_do_map(iommu, &map);
2856 }
2857
2858 static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu,
2859 unsigned long arg)
2860 {
2861 struct vfio_iommu_type1_dma_unmap unmap;
2862 struct vfio_bitmap bitmap = { 0 };
2863 uint32_t mask = VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP |
2864 VFIO_DMA_UNMAP_FLAG_VADDR |
2865 VFIO_DMA_UNMAP_FLAG_ALL;
2866 unsigned long minsz;
2867 int ret;
2868
2869 minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size);
2870
2871 if (copy_from_user(&unmap, (void __user *)arg, minsz))
2872 return -EFAULT;
2873
2874 if (unmap.argsz < minsz || unmap.flags & ~mask)
2875 return -EINVAL;
2876
2877 if ((unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) &&
2878 (unmap.flags & (VFIO_DMA_UNMAP_FLAG_ALL |
2879 VFIO_DMA_UNMAP_FLAG_VADDR)))
2880 return -EINVAL;
2881
2882 if (unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) {
2883 unsigned long pgshift;
2884
2885 if (unmap.argsz < (minsz + sizeof(bitmap)))
2886 return -EINVAL;
2887
2888 if (copy_from_user(&bitmap,
2889 (void __user *)(arg + minsz),
2890 sizeof(bitmap)))
2891 return -EFAULT;
2892
2893 if (!access_ok((void __user *)bitmap.data, bitmap.size))
2894 return -EINVAL;
2895
2896 pgshift = __ffs(bitmap.pgsize);
2897 ret = verify_bitmap_size(unmap.size >> pgshift,
2898 bitmap.size);
2899 if (ret)
2900 return ret;
2901 }
2902
2903 ret = vfio_dma_do_unmap(iommu, &unmap, &bitmap);
2904 if (ret)
2905 return ret;
2906
2907 return copy_to_user((void __user *)arg, &unmap, minsz) ?
2908 -EFAULT : 0;
2909 }
2910
2911 static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu,
2912 unsigned long arg)
2913 {
2914 struct vfio_iommu_type1_dirty_bitmap dirty;
2915 uint32_t mask = VFIO_IOMMU_DIRTY_PAGES_FLAG_START |
2916 VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP |
2917 VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP;
2918 unsigned long minsz;
2919 int ret = 0;
2920
2921 if (!iommu->v2)
2922 return -EACCES;
2923
2924 minsz = offsetofend(struct vfio_iommu_type1_dirty_bitmap, flags);
2925
2926 if (copy_from_user(&dirty, (void __user *)arg, minsz))
2927 return -EFAULT;
2928
2929 if (dirty.argsz < minsz || dirty.flags & ~mask)
2930 return -EINVAL;
2931
2932
2933 if (__ffs(dirty.flags) != __fls(dirty.flags))
2934 return -EINVAL;
2935
2936 if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_START) {
2937 size_t pgsize;
2938
2939 mutex_lock(&iommu->lock);
2940 pgsize = 1 << __ffs(iommu->pgsize_bitmap);
2941 if (!iommu->dirty_page_tracking) {
2942 ret = vfio_dma_bitmap_alloc_all(iommu, pgsize);
2943 if (!ret)
2944 iommu->dirty_page_tracking = true;
2945 }
2946 mutex_unlock(&iommu->lock);
2947 return ret;
2948 } else if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP) {
2949 mutex_lock(&iommu->lock);
2950 if (iommu->dirty_page_tracking) {
2951 iommu->dirty_page_tracking = false;
2952 vfio_dma_bitmap_free_all(iommu);
2953 }
2954 mutex_unlock(&iommu->lock);
2955 return 0;
2956 } else if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP) {
2957 struct vfio_iommu_type1_dirty_bitmap_get range;
2958 unsigned long pgshift;
2959 size_t data_size = dirty.argsz - minsz;
2960 size_t iommu_pgsize;
2961
2962 if (!data_size || data_size < sizeof(range))
2963 return -EINVAL;
2964
2965 if (copy_from_user(&range, (void __user *)(arg + minsz),
2966 sizeof(range)))
2967 return -EFAULT;
2968
2969 if (range.iova + range.size < range.iova)
2970 return -EINVAL;
2971 if (!access_ok((void __user *)range.bitmap.data,
2972 range.bitmap.size))
2973 return -EINVAL;
2974
2975 pgshift = __ffs(range.bitmap.pgsize);
2976 ret = verify_bitmap_size(range.size >> pgshift,
2977 range.bitmap.size);
2978 if (ret)
2979 return ret;
2980
2981 mutex_lock(&iommu->lock);
2982
2983 iommu_pgsize = (size_t)1 << __ffs(iommu->pgsize_bitmap);
2984
2985
2986 if (range.bitmap.pgsize != iommu_pgsize) {
2987 ret = -EINVAL;
2988 goto out_unlock;
2989 }
2990 if (range.iova & (iommu_pgsize - 1)) {
2991 ret = -EINVAL;
2992 goto out_unlock;
2993 }
2994 if (!range.size || range.size & (iommu_pgsize - 1)) {
2995 ret = -EINVAL;
2996 goto out_unlock;
2997 }
2998
2999 if (iommu->dirty_page_tracking)
3000 ret = vfio_iova_dirty_bitmap(range.bitmap.data,
3001 iommu, range.iova,
3002 range.size,
3003 range.bitmap.pgsize);
3004 else
3005 ret = -EINVAL;
3006 out_unlock:
3007 mutex_unlock(&iommu->lock);
3008
3009 return ret;
3010 }
3011
3012 return -EINVAL;
3013 }
3014
3015 static long vfio_iommu_type1_ioctl(void *iommu_data,
3016 unsigned int cmd, unsigned long arg)
3017 {
3018 struct vfio_iommu *iommu = iommu_data;
3019
3020 switch (cmd) {
3021 case VFIO_CHECK_EXTENSION:
3022 return vfio_iommu_type1_check_extension(iommu, arg);
3023 case VFIO_IOMMU_GET_INFO:
3024 return vfio_iommu_type1_get_info(iommu, arg);
3025 case VFIO_IOMMU_MAP_DMA:
3026 return vfio_iommu_type1_map_dma(iommu, arg);
3027 case VFIO_IOMMU_UNMAP_DMA:
3028 return vfio_iommu_type1_unmap_dma(iommu, arg);
3029 case VFIO_IOMMU_DIRTY_PAGES:
3030 return vfio_iommu_type1_dirty_pages(iommu, arg);
3031 default:
3032 return -ENOTTY;
3033 }
3034 }
3035
3036 static void vfio_iommu_type1_register_device(void *iommu_data,
3037 struct vfio_device *vdev)
3038 {
3039 struct vfio_iommu *iommu = iommu_data;
3040
3041 if (!vdev->ops->dma_unmap)
3042 return;
3043
3044
3045
3046
3047
3048
3049
3050 mutex_lock(&iommu->lock);
3051 mutex_lock(&iommu->device_list_lock);
3052 list_add(&vdev->iommu_entry, &iommu->device_list);
3053 mutex_unlock(&iommu->device_list_lock);
3054 mutex_unlock(&iommu->lock);
3055 }
3056
3057 static void vfio_iommu_type1_unregister_device(void *iommu_data,
3058 struct vfio_device *vdev)
3059 {
3060 struct vfio_iommu *iommu = iommu_data;
3061
3062 if (!vdev->ops->dma_unmap)
3063 return;
3064
3065 mutex_lock(&iommu->lock);
3066 mutex_lock(&iommu->device_list_lock);
3067 list_del(&vdev->iommu_entry);
3068 mutex_unlock(&iommu->device_list_lock);
3069 mutex_unlock(&iommu->lock);
3070 }
3071
3072 static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu,
3073 dma_addr_t user_iova, void *data,
3074 size_t count, bool write,
3075 size_t *copied)
3076 {
3077 struct mm_struct *mm;
3078 unsigned long vaddr;
3079 struct vfio_dma *dma;
3080 bool kthread = current->mm == NULL;
3081 size_t offset;
3082 int ret;
3083
3084 *copied = 0;
3085
3086 ret = vfio_find_dma_valid(iommu, user_iova, 1, &dma);
3087 if (ret < 0)
3088 return ret;
3089
3090 if ((write && !(dma->prot & IOMMU_WRITE)) ||
3091 !(dma->prot & IOMMU_READ))
3092 return -EPERM;
3093
3094 mm = get_task_mm(dma->task);
3095
3096 if (!mm)
3097 return -EPERM;
3098
3099 if (kthread)
3100 kthread_use_mm(mm);
3101 else if (current->mm != mm)
3102 goto out;
3103
3104 offset = user_iova - dma->iova;
3105
3106 if (count > dma->size - offset)
3107 count = dma->size - offset;
3108
3109 vaddr = dma->vaddr + offset;
3110
3111 if (write) {
3112 *copied = copy_to_user((void __user *)vaddr, data,
3113 count) ? 0 : count;
3114 if (*copied && iommu->dirty_page_tracking) {
3115 unsigned long pgshift = __ffs(iommu->pgsize_bitmap);
3116
3117
3118
3119
3120 bitmap_set(dma->bitmap, offset >> pgshift,
3121 ((offset + *copied - 1) >> pgshift) -
3122 (offset >> pgshift) + 1);
3123 }
3124 } else
3125 *copied = copy_from_user(data, (void __user *)vaddr,
3126 count) ? 0 : count;
3127 if (kthread)
3128 kthread_unuse_mm(mm);
3129 out:
3130 mmput(mm);
3131 return *copied ? 0 : -EFAULT;
3132 }
3133
3134 static int vfio_iommu_type1_dma_rw(void *iommu_data, dma_addr_t user_iova,
3135 void *data, size_t count, bool write)
3136 {
3137 struct vfio_iommu *iommu = iommu_data;
3138 int ret = 0;
3139 size_t done;
3140
3141 mutex_lock(&iommu->lock);
3142 while (count > 0) {
3143 ret = vfio_iommu_type1_dma_rw_chunk(iommu, user_iova, data,
3144 count, write, &done);
3145 if (ret)
3146 break;
3147
3148 count -= done;
3149 data += done;
3150 user_iova += done;
3151 }
3152
3153 mutex_unlock(&iommu->lock);
3154 return ret;
3155 }
3156
3157 static struct iommu_domain *
3158 vfio_iommu_type1_group_iommu_domain(void *iommu_data,
3159 struct iommu_group *iommu_group)
3160 {
3161 struct iommu_domain *domain = ERR_PTR(-ENODEV);
3162 struct vfio_iommu *iommu = iommu_data;
3163 struct vfio_domain *d;
3164
3165 if (!iommu || !iommu_group)
3166 return ERR_PTR(-EINVAL);
3167
3168 mutex_lock(&iommu->lock);
3169 list_for_each_entry(d, &iommu->domain_list, next) {
3170 if (find_iommu_group(d, iommu_group)) {
3171 domain = d->domain;
3172 break;
3173 }
3174 }
3175 mutex_unlock(&iommu->lock);
3176
3177 return domain;
3178 }
3179
3180 static void vfio_iommu_type1_notify(void *iommu_data,
3181 enum vfio_iommu_notify_type event)
3182 {
3183 struct vfio_iommu *iommu = iommu_data;
3184
3185 if (event != VFIO_IOMMU_CONTAINER_CLOSE)
3186 return;
3187 mutex_lock(&iommu->lock);
3188 iommu->container_open = false;
3189 mutex_unlock(&iommu->lock);
3190 wake_up_all(&iommu->vaddr_wait);
3191 }
3192
3193 static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = {
3194 .name = "vfio-iommu-type1",
3195 .owner = THIS_MODULE,
3196 .open = vfio_iommu_type1_open,
3197 .release = vfio_iommu_type1_release,
3198 .ioctl = vfio_iommu_type1_ioctl,
3199 .attach_group = vfio_iommu_type1_attach_group,
3200 .detach_group = vfio_iommu_type1_detach_group,
3201 .pin_pages = vfio_iommu_type1_pin_pages,
3202 .unpin_pages = vfio_iommu_type1_unpin_pages,
3203 .register_device = vfio_iommu_type1_register_device,
3204 .unregister_device = vfio_iommu_type1_unregister_device,
3205 .dma_rw = vfio_iommu_type1_dma_rw,
3206 .group_iommu_domain = vfio_iommu_type1_group_iommu_domain,
3207 .notify = vfio_iommu_type1_notify,
3208 };
3209
3210 static int __init vfio_iommu_type1_init(void)
3211 {
3212 return vfio_register_iommu_driver(&vfio_iommu_driver_ops_type1);
3213 }
3214
3215 static void __exit vfio_iommu_type1_cleanup(void)
3216 {
3217 vfio_unregister_iommu_driver(&vfio_iommu_driver_ops_type1);
3218 }
3219
3220 module_init(vfio_iommu_type1_init);
3221 module_exit(vfio_iommu_type1_cleanup);
3222
3223 MODULE_VERSION(DRIVER_VERSION);
3224 MODULE_LICENSE("GPL v2");
3225 MODULE_AUTHOR(DRIVER_AUTHOR);
3226 MODULE_DESCRIPTION(DRIVER_DESC);