0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013 #include <linux/module.h>
0014 #include <linux/pci.h>
0015 #include <linux/slab.h>
0016 #include <linux/uaccess.h>
0017 #include <linux/err.h>
0018 #include <linux/vfio.h>
0019 #include <linux/vmalloc.h>
0020 #include <linux/sched/mm.h>
0021 #include <linux/sched/signal.h>
0022 #include <linux/mm.h>
0023 #include "vfio.h"
0024
0025 #include <asm/iommu.h>
0026 #include <asm/tce.h>
0027 #include <asm/mmu_context.h>
0028
0029 #define DRIVER_VERSION "0.1"
0030 #define DRIVER_AUTHOR "aik@ozlabs.ru"
0031 #define DRIVER_DESC "VFIO IOMMU SPAPR TCE"
0032
0033 static void tce_iommu_detach_group(void *iommu_data,
0034 struct iommu_group *iommu_group);
0035
0036
0037
0038
0039
0040
0041
0042
0043 struct tce_iommu_group {
0044 struct list_head next;
0045 struct iommu_group *grp;
0046 };
0047
0048
0049
0050
0051
0052 struct tce_iommu_prereg {
0053 struct list_head next;
0054 struct mm_iommu_table_group_mem_t *mem;
0055 };
0056
0057
0058
0059
0060
0061
0062 struct tce_container {
0063 struct mutex lock;
0064 bool enabled;
0065 bool v2;
0066 bool def_window_pending;
0067 unsigned long locked_pages;
0068 struct mm_struct *mm;
0069 struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES];
0070 struct list_head group_list;
0071 struct list_head prereg_list;
0072 };
0073
0074 static long tce_iommu_mm_set(struct tce_container *container)
0075 {
0076 if (container->mm) {
0077 if (container->mm == current->mm)
0078 return 0;
0079 return -EPERM;
0080 }
0081 BUG_ON(!current->mm);
0082 container->mm = current->mm;
0083 mmgrab(container->mm);
0084
0085 return 0;
0086 }
0087
0088 static long tce_iommu_prereg_free(struct tce_container *container,
0089 struct tce_iommu_prereg *tcemem)
0090 {
0091 long ret;
0092
0093 ret = mm_iommu_put(container->mm, tcemem->mem);
0094 if (ret)
0095 return ret;
0096
0097 list_del(&tcemem->next);
0098 kfree(tcemem);
0099
0100 return 0;
0101 }
0102
0103 static long tce_iommu_unregister_pages(struct tce_container *container,
0104 __u64 vaddr, __u64 size)
0105 {
0106 struct mm_iommu_table_group_mem_t *mem;
0107 struct tce_iommu_prereg *tcemem;
0108 bool found = false;
0109 long ret;
0110
0111 if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK))
0112 return -EINVAL;
0113
0114 mem = mm_iommu_get(container->mm, vaddr, size >> PAGE_SHIFT);
0115 if (!mem)
0116 return -ENOENT;
0117
0118 list_for_each_entry(tcemem, &container->prereg_list, next) {
0119 if (tcemem->mem == mem) {
0120 found = true;
0121 break;
0122 }
0123 }
0124
0125 if (!found)
0126 ret = -ENOENT;
0127 else
0128 ret = tce_iommu_prereg_free(container, tcemem);
0129
0130 mm_iommu_put(container->mm, mem);
0131
0132 return ret;
0133 }
0134
0135 static long tce_iommu_register_pages(struct tce_container *container,
0136 __u64 vaddr, __u64 size)
0137 {
0138 long ret = 0;
0139 struct mm_iommu_table_group_mem_t *mem = NULL;
0140 struct tce_iommu_prereg *tcemem;
0141 unsigned long entries = size >> PAGE_SHIFT;
0142
0143 if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK) ||
0144 ((vaddr + size) < vaddr))
0145 return -EINVAL;
0146
0147 mem = mm_iommu_get(container->mm, vaddr, entries);
0148 if (mem) {
0149 list_for_each_entry(tcemem, &container->prereg_list, next) {
0150 if (tcemem->mem == mem) {
0151 ret = -EBUSY;
0152 goto put_exit;
0153 }
0154 }
0155 } else {
0156 ret = mm_iommu_new(container->mm, vaddr, entries, &mem);
0157 if (ret)
0158 return ret;
0159 }
0160
0161 tcemem = kzalloc(sizeof(*tcemem), GFP_KERNEL);
0162 if (!tcemem) {
0163 ret = -ENOMEM;
0164 goto put_exit;
0165 }
0166
0167 tcemem->mem = mem;
0168 list_add(&tcemem->next, &container->prereg_list);
0169
0170 container->enabled = true;
0171
0172 return 0;
0173
0174 put_exit:
0175 mm_iommu_put(container->mm, mem);
0176 return ret;
0177 }
0178
0179 static bool tce_page_is_contained(struct mm_struct *mm, unsigned long hpa,
0180 unsigned int it_page_shift)
0181 {
0182 struct page *page;
0183 unsigned long size = 0;
0184
0185 if (mm_iommu_is_devmem(mm, hpa, it_page_shift, &size))
0186 return size == (1UL << it_page_shift);
0187
0188 page = pfn_to_page(hpa >> PAGE_SHIFT);
0189
0190
0191
0192
0193
0194 return page_shift(compound_head(page)) >= it_page_shift;
0195 }
0196
0197 static inline bool tce_groups_attached(struct tce_container *container)
0198 {
0199 return !list_empty(&container->group_list);
0200 }
0201
0202 static long tce_iommu_find_table(struct tce_container *container,
0203 phys_addr_t ioba, struct iommu_table **ptbl)
0204 {
0205 long i;
0206
0207 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
0208 struct iommu_table *tbl = container->tables[i];
0209
0210 if (tbl) {
0211 unsigned long entry = ioba >> tbl->it_page_shift;
0212 unsigned long start = tbl->it_offset;
0213 unsigned long end = start + tbl->it_size;
0214
0215 if ((start <= entry) && (entry < end)) {
0216 *ptbl = tbl;
0217 return i;
0218 }
0219 }
0220 }
0221
0222 return -1;
0223 }
0224
0225 static int tce_iommu_find_free_table(struct tce_container *container)
0226 {
0227 int i;
0228
0229 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
0230 if (!container->tables[i])
0231 return i;
0232 }
0233
0234 return -ENOSPC;
0235 }
0236
0237 static int tce_iommu_enable(struct tce_container *container)
0238 {
0239 int ret = 0;
0240 unsigned long locked;
0241 struct iommu_table_group *table_group;
0242 struct tce_iommu_group *tcegrp;
0243
0244 if (container->enabled)
0245 return -EBUSY;
0246
0247
0248
0249
0250
0251
0252
0253
0254
0255
0256
0257
0258
0259
0260
0261
0262
0263
0264
0265
0266
0267
0268
0269
0270
0271
0272
0273
0274
0275
0276 if (!tce_groups_attached(container))
0277 return -ENODEV;
0278
0279 tcegrp = list_first_entry(&container->group_list,
0280 struct tce_iommu_group, next);
0281 table_group = iommu_group_get_iommudata(tcegrp->grp);
0282 if (!table_group)
0283 return -ENODEV;
0284
0285 if (!table_group->tce32_size)
0286 return -EPERM;
0287
0288 ret = tce_iommu_mm_set(container);
0289 if (ret)
0290 return ret;
0291
0292 locked = table_group->tce32_size >> PAGE_SHIFT;
0293 ret = account_locked_vm(container->mm, locked, true);
0294 if (ret)
0295 return ret;
0296
0297 container->locked_pages = locked;
0298
0299 container->enabled = true;
0300
0301 return ret;
0302 }
0303
0304 static void tce_iommu_disable(struct tce_container *container)
0305 {
0306 if (!container->enabled)
0307 return;
0308
0309 container->enabled = false;
0310
0311 BUG_ON(!container->mm);
0312 account_locked_vm(container->mm, container->locked_pages, false);
0313 }
0314
0315 static void *tce_iommu_open(unsigned long arg)
0316 {
0317 struct tce_container *container;
0318
0319 if ((arg != VFIO_SPAPR_TCE_IOMMU) && (arg != VFIO_SPAPR_TCE_v2_IOMMU)) {
0320 pr_err("tce_vfio: Wrong IOMMU type\n");
0321 return ERR_PTR(-EINVAL);
0322 }
0323
0324 container = kzalloc(sizeof(*container), GFP_KERNEL);
0325 if (!container)
0326 return ERR_PTR(-ENOMEM);
0327
0328 mutex_init(&container->lock);
0329 INIT_LIST_HEAD_RCU(&container->group_list);
0330 INIT_LIST_HEAD_RCU(&container->prereg_list);
0331
0332 container->v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU;
0333
0334 return container;
0335 }
0336
0337 static int tce_iommu_clear(struct tce_container *container,
0338 struct iommu_table *tbl,
0339 unsigned long entry, unsigned long pages);
0340 static void tce_iommu_free_table(struct tce_container *container,
0341 struct iommu_table *tbl);
0342
0343 static void tce_iommu_release(void *iommu_data)
0344 {
0345 struct tce_container *container = iommu_data;
0346 struct tce_iommu_group *tcegrp;
0347 struct tce_iommu_prereg *tcemem, *tmtmp;
0348 long i;
0349
0350 while (tce_groups_attached(container)) {
0351 tcegrp = list_first_entry(&container->group_list,
0352 struct tce_iommu_group, next);
0353 tce_iommu_detach_group(iommu_data, tcegrp->grp);
0354 }
0355
0356
0357
0358
0359
0360 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
0361 struct iommu_table *tbl = container->tables[i];
0362
0363 if (!tbl)
0364 continue;
0365
0366 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
0367 tce_iommu_free_table(container, tbl);
0368 }
0369
0370 list_for_each_entry_safe(tcemem, tmtmp, &container->prereg_list, next)
0371 WARN_ON(tce_iommu_prereg_free(container, tcemem));
0372
0373 tce_iommu_disable(container);
0374 if (container->mm)
0375 mmdrop(container->mm);
0376 mutex_destroy(&container->lock);
0377
0378 kfree(container);
0379 }
0380
0381 static void tce_iommu_unuse_page(unsigned long hpa)
0382 {
0383 struct page *page;
0384
0385 page = pfn_to_page(hpa >> PAGE_SHIFT);
0386 unpin_user_page(page);
0387 }
0388
0389 static int tce_iommu_prereg_ua_to_hpa(struct tce_container *container,
0390 unsigned long tce, unsigned long shift,
0391 unsigned long *phpa, struct mm_iommu_table_group_mem_t **pmem)
0392 {
0393 long ret = 0;
0394 struct mm_iommu_table_group_mem_t *mem;
0395
0396 mem = mm_iommu_lookup(container->mm, tce, 1ULL << shift);
0397 if (!mem)
0398 return -EINVAL;
0399
0400 ret = mm_iommu_ua_to_hpa(mem, tce, shift, phpa);
0401 if (ret)
0402 return -EINVAL;
0403
0404 *pmem = mem;
0405
0406 return 0;
0407 }
0408
0409 static void tce_iommu_unuse_page_v2(struct tce_container *container,
0410 struct iommu_table *tbl, unsigned long entry)
0411 {
0412 struct mm_iommu_table_group_mem_t *mem = NULL;
0413 int ret;
0414 unsigned long hpa = 0;
0415 __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
0416
0417 if (!pua)
0418 return;
0419
0420 ret = tce_iommu_prereg_ua_to_hpa(container, be64_to_cpu(*pua),
0421 tbl->it_page_shift, &hpa, &mem);
0422 if (ret)
0423 pr_debug("%s: tce %llx at #%lx was not cached, ret=%d\n",
0424 __func__, be64_to_cpu(*pua), entry, ret);
0425 if (mem)
0426 mm_iommu_mapped_dec(mem);
0427
0428 *pua = cpu_to_be64(0);
0429 }
0430
0431 static int tce_iommu_clear(struct tce_container *container,
0432 struct iommu_table *tbl,
0433 unsigned long entry, unsigned long pages)
0434 {
0435 unsigned long oldhpa;
0436 long ret;
0437 enum dma_data_direction direction;
0438 unsigned long lastentry = entry + pages, firstentry = entry;
0439
0440 for ( ; entry < lastentry; ++entry) {
0441 if (tbl->it_indirect_levels && tbl->it_userspace) {
0442
0443
0444
0445
0446
0447
0448
0449
0450 __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl,
0451 entry);
0452 if (!pua) {
0453
0454 entry |= tbl->it_level_size - 1;
0455 continue;
0456 }
0457 }
0458
0459 cond_resched();
0460
0461 direction = DMA_NONE;
0462 oldhpa = 0;
0463 ret = iommu_tce_xchg_no_kill(container->mm, tbl, entry, &oldhpa,
0464 &direction);
0465 if (ret)
0466 continue;
0467
0468 if (direction == DMA_NONE)
0469 continue;
0470
0471 if (container->v2) {
0472 tce_iommu_unuse_page_v2(container, tbl, entry);
0473 continue;
0474 }
0475
0476 tce_iommu_unuse_page(oldhpa);
0477 }
0478
0479 iommu_tce_kill(tbl, firstentry, pages);
0480
0481 return 0;
0482 }
0483
0484 static int tce_iommu_use_page(unsigned long tce, unsigned long *hpa)
0485 {
0486 struct page *page = NULL;
0487 enum dma_data_direction direction = iommu_tce_direction(tce);
0488
0489 if (pin_user_pages_fast(tce & PAGE_MASK, 1,
0490 direction != DMA_TO_DEVICE ? FOLL_WRITE : 0,
0491 &page) != 1)
0492 return -EFAULT;
0493
0494 *hpa = __pa((unsigned long) page_address(page));
0495
0496 return 0;
0497 }
0498
0499 static long tce_iommu_build(struct tce_container *container,
0500 struct iommu_table *tbl,
0501 unsigned long entry, unsigned long tce, unsigned long pages,
0502 enum dma_data_direction direction)
0503 {
0504 long i, ret = 0;
0505 unsigned long hpa;
0506 enum dma_data_direction dirtmp;
0507
0508 for (i = 0; i < pages; ++i) {
0509 unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
0510
0511 ret = tce_iommu_use_page(tce, &hpa);
0512 if (ret)
0513 break;
0514
0515 if (!tce_page_is_contained(container->mm, hpa,
0516 tbl->it_page_shift)) {
0517 ret = -EPERM;
0518 break;
0519 }
0520
0521 hpa |= offset;
0522 dirtmp = direction;
0523 ret = iommu_tce_xchg_no_kill(container->mm, tbl, entry + i,
0524 &hpa, &dirtmp);
0525 if (ret) {
0526 tce_iommu_unuse_page(hpa);
0527 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
0528 __func__, entry << tbl->it_page_shift,
0529 tce, ret);
0530 break;
0531 }
0532
0533 if (dirtmp != DMA_NONE)
0534 tce_iommu_unuse_page(hpa);
0535
0536 tce += IOMMU_PAGE_SIZE(tbl);
0537 }
0538
0539 if (ret)
0540 tce_iommu_clear(container, tbl, entry, i);
0541 else
0542 iommu_tce_kill(tbl, entry, pages);
0543
0544 return ret;
0545 }
0546
0547 static long tce_iommu_build_v2(struct tce_container *container,
0548 struct iommu_table *tbl,
0549 unsigned long entry, unsigned long tce, unsigned long pages,
0550 enum dma_data_direction direction)
0551 {
0552 long i, ret = 0;
0553 unsigned long hpa;
0554 enum dma_data_direction dirtmp;
0555
0556 for (i = 0; i < pages; ++i) {
0557 struct mm_iommu_table_group_mem_t *mem = NULL;
0558 __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry + i);
0559
0560 ret = tce_iommu_prereg_ua_to_hpa(container,
0561 tce, tbl->it_page_shift, &hpa, &mem);
0562 if (ret)
0563 break;
0564
0565 if (!tce_page_is_contained(container->mm, hpa,
0566 tbl->it_page_shift)) {
0567 ret = -EPERM;
0568 break;
0569 }
0570
0571
0572 hpa |= tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
0573 dirtmp = direction;
0574
0575
0576 if (mm_iommu_mapped_inc(mem))
0577 break;
0578
0579 ret = iommu_tce_xchg_no_kill(container->mm, tbl, entry + i,
0580 &hpa, &dirtmp);
0581 if (ret) {
0582
0583 tce_iommu_unuse_page_v2(container, tbl, entry + i);
0584 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
0585 __func__, entry << tbl->it_page_shift,
0586 tce, ret);
0587 break;
0588 }
0589
0590 if (dirtmp != DMA_NONE)
0591 tce_iommu_unuse_page_v2(container, tbl, entry + i);
0592
0593 *pua = cpu_to_be64(tce);
0594
0595 tce += IOMMU_PAGE_SIZE(tbl);
0596 }
0597
0598 if (ret)
0599 tce_iommu_clear(container, tbl, entry, i);
0600 else
0601 iommu_tce_kill(tbl, entry, pages);
0602
0603 return ret;
0604 }
0605
0606 static long tce_iommu_create_table(struct tce_container *container,
0607 struct iommu_table_group *table_group,
0608 int num,
0609 __u32 page_shift,
0610 __u64 window_size,
0611 __u32 levels,
0612 struct iommu_table **ptbl)
0613 {
0614 long ret, table_size;
0615
0616 table_size = table_group->ops->get_table_size(page_shift, window_size,
0617 levels);
0618 if (!table_size)
0619 return -EINVAL;
0620
0621 ret = account_locked_vm(container->mm, table_size >> PAGE_SHIFT, true);
0622 if (ret)
0623 return ret;
0624
0625 ret = table_group->ops->create_table(table_group, num,
0626 page_shift, window_size, levels, ptbl);
0627
0628 WARN_ON(!ret && !(*ptbl)->it_ops->free);
0629 WARN_ON(!ret && ((*ptbl)->it_allocated_size > table_size));
0630
0631 return ret;
0632 }
0633
0634 static void tce_iommu_free_table(struct tce_container *container,
0635 struct iommu_table *tbl)
0636 {
0637 unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT;
0638
0639 iommu_tce_table_put(tbl);
0640 account_locked_vm(container->mm, pages, false);
0641 }
0642
0643 static long tce_iommu_create_window(struct tce_container *container,
0644 __u32 page_shift, __u64 window_size, __u32 levels,
0645 __u64 *start_addr)
0646 {
0647 struct tce_iommu_group *tcegrp;
0648 struct iommu_table_group *table_group;
0649 struct iommu_table *tbl = NULL;
0650 long ret, num;
0651
0652 num = tce_iommu_find_free_table(container);
0653 if (num < 0)
0654 return num;
0655
0656
0657 tcegrp = list_first_entry(&container->group_list,
0658 struct tce_iommu_group, next);
0659 table_group = iommu_group_get_iommudata(tcegrp->grp);
0660 if (!table_group)
0661 return -EFAULT;
0662
0663 if (!(table_group->pgsizes & (1ULL << page_shift)))
0664 return -EINVAL;
0665
0666 if (!table_group->ops->set_window || !table_group->ops->unset_window ||
0667 !table_group->ops->get_table_size ||
0668 !table_group->ops->create_table)
0669 return -EPERM;
0670
0671
0672 ret = tce_iommu_create_table(container, table_group, num,
0673 page_shift, window_size, levels, &tbl);
0674 if (ret)
0675 return ret;
0676
0677 BUG_ON(!tbl->it_ops->free);
0678
0679
0680
0681
0682
0683 list_for_each_entry(tcegrp, &container->group_list, next) {
0684 table_group = iommu_group_get_iommudata(tcegrp->grp);
0685
0686 ret = table_group->ops->set_window(table_group, num, tbl);
0687 if (ret)
0688 goto unset_exit;
0689 }
0690
0691 container->tables[num] = tbl;
0692
0693
0694 *start_addr = tbl->it_offset << tbl->it_page_shift;
0695
0696 return 0;
0697
0698 unset_exit:
0699 list_for_each_entry(tcegrp, &container->group_list, next) {
0700 table_group = iommu_group_get_iommudata(tcegrp->grp);
0701 table_group->ops->unset_window(table_group, num);
0702 }
0703 tce_iommu_free_table(container, tbl);
0704
0705 return ret;
0706 }
0707
0708 static long tce_iommu_remove_window(struct tce_container *container,
0709 __u64 start_addr)
0710 {
0711 struct iommu_table_group *table_group = NULL;
0712 struct iommu_table *tbl;
0713 struct tce_iommu_group *tcegrp;
0714 int num;
0715
0716 num = tce_iommu_find_table(container, start_addr, &tbl);
0717 if (num < 0)
0718 return -EINVAL;
0719
0720 BUG_ON(!tbl->it_size);
0721
0722
0723 list_for_each_entry(tcegrp, &container->group_list, next) {
0724 table_group = iommu_group_get_iommudata(tcegrp->grp);
0725
0726
0727
0728
0729
0730
0731
0732
0733 if (!table_group->ops || !table_group->ops->unset_window)
0734 return -EPERM;
0735
0736 table_group->ops->unset_window(table_group, num);
0737 }
0738
0739
0740 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
0741 tce_iommu_free_table(container, tbl);
0742 container->tables[num] = NULL;
0743
0744 return 0;
0745 }
0746
0747 static long tce_iommu_create_default_window(struct tce_container *container)
0748 {
0749 long ret;
0750 __u64 start_addr = 0;
0751 struct tce_iommu_group *tcegrp;
0752 struct iommu_table_group *table_group;
0753
0754 if (!container->def_window_pending)
0755 return 0;
0756
0757 if (!tce_groups_attached(container))
0758 return -ENODEV;
0759
0760 tcegrp = list_first_entry(&container->group_list,
0761 struct tce_iommu_group, next);
0762 table_group = iommu_group_get_iommudata(tcegrp->grp);
0763 if (!table_group)
0764 return -ENODEV;
0765
0766 ret = tce_iommu_create_window(container, IOMMU_PAGE_SHIFT_4K,
0767 table_group->tce32_size, 1, &start_addr);
0768 WARN_ON_ONCE(!ret && start_addr);
0769
0770 if (!ret)
0771 container->def_window_pending = false;
0772
0773 return ret;
0774 }
0775
0776 static long tce_iommu_ioctl(void *iommu_data,
0777 unsigned int cmd, unsigned long arg)
0778 {
0779 struct tce_container *container = iommu_data;
0780 unsigned long minsz, ddwsz;
0781 long ret;
0782
0783 switch (cmd) {
0784 case VFIO_CHECK_EXTENSION:
0785 switch (arg) {
0786 case VFIO_SPAPR_TCE_IOMMU:
0787 case VFIO_SPAPR_TCE_v2_IOMMU:
0788 ret = 1;
0789 break;
0790 default:
0791 ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg);
0792 break;
0793 }
0794
0795 return (ret < 0) ? 0 : ret;
0796 }
0797
0798
0799
0800
0801
0802 BUG_ON(!container);
0803 if (container->mm && container->mm != current->mm)
0804 return -EPERM;
0805
0806 switch (cmd) {
0807 case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
0808 struct vfio_iommu_spapr_tce_info info;
0809 struct tce_iommu_group *tcegrp;
0810 struct iommu_table_group *table_group;
0811
0812 if (!tce_groups_attached(container))
0813 return -ENXIO;
0814
0815 tcegrp = list_first_entry(&container->group_list,
0816 struct tce_iommu_group, next);
0817 table_group = iommu_group_get_iommudata(tcegrp->grp);
0818
0819 if (!table_group)
0820 return -ENXIO;
0821
0822 minsz = offsetofend(struct vfio_iommu_spapr_tce_info,
0823 dma32_window_size);
0824
0825 if (copy_from_user(&info, (void __user *)arg, minsz))
0826 return -EFAULT;
0827
0828 if (info.argsz < minsz)
0829 return -EINVAL;
0830
0831 info.dma32_window_start = table_group->tce32_start;
0832 info.dma32_window_size = table_group->tce32_size;
0833 info.flags = 0;
0834 memset(&info.ddw, 0, sizeof(info.ddw));
0835
0836 if (table_group->max_dynamic_windows_supported &&
0837 container->v2) {
0838 info.flags |= VFIO_IOMMU_SPAPR_INFO_DDW;
0839 info.ddw.pgsizes = table_group->pgsizes;
0840 info.ddw.max_dynamic_windows_supported =
0841 table_group->max_dynamic_windows_supported;
0842 info.ddw.levels = table_group->max_levels;
0843 }
0844
0845 ddwsz = offsetofend(struct vfio_iommu_spapr_tce_info, ddw);
0846
0847 if (info.argsz >= ddwsz)
0848 minsz = ddwsz;
0849
0850 if (copy_to_user((void __user *)arg, &info, minsz))
0851 return -EFAULT;
0852
0853 return 0;
0854 }
0855 case VFIO_IOMMU_MAP_DMA: {
0856 struct vfio_iommu_type1_dma_map param;
0857 struct iommu_table *tbl = NULL;
0858 long num;
0859 enum dma_data_direction direction;
0860
0861 if (!container->enabled)
0862 return -EPERM;
0863
0864 minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
0865
0866 if (copy_from_user(¶m, (void __user *)arg, minsz))
0867 return -EFAULT;
0868
0869 if (param.argsz < minsz)
0870 return -EINVAL;
0871
0872 if (param.flags & ~(VFIO_DMA_MAP_FLAG_READ |
0873 VFIO_DMA_MAP_FLAG_WRITE))
0874 return -EINVAL;
0875
0876 ret = tce_iommu_create_default_window(container);
0877 if (ret)
0878 return ret;
0879
0880 num = tce_iommu_find_table(container, param.iova, &tbl);
0881 if (num < 0)
0882 return -ENXIO;
0883
0884 if ((param.size & ~IOMMU_PAGE_MASK(tbl)) ||
0885 (param.vaddr & ~IOMMU_PAGE_MASK(tbl)))
0886 return -EINVAL;
0887
0888
0889 if (param.flags & VFIO_DMA_MAP_FLAG_READ) {
0890 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
0891 direction = DMA_BIDIRECTIONAL;
0892 else
0893 direction = DMA_TO_DEVICE;
0894 } else {
0895 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
0896 direction = DMA_FROM_DEVICE;
0897 else
0898 return -EINVAL;
0899 }
0900
0901 ret = iommu_tce_put_param_check(tbl, param.iova, param.vaddr);
0902 if (ret)
0903 return ret;
0904
0905 if (container->v2)
0906 ret = tce_iommu_build_v2(container, tbl,
0907 param.iova >> tbl->it_page_shift,
0908 param.vaddr,
0909 param.size >> tbl->it_page_shift,
0910 direction);
0911 else
0912 ret = tce_iommu_build(container, tbl,
0913 param.iova >> tbl->it_page_shift,
0914 param.vaddr,
0915 param.size >> tbl->it_page_shift,
0916 direction);
0917
0918 iommu_flush_tce(tbl);
0919
0920 return ret;
0921 }
0922 case VFIO_IOMMU_UNMAP_DMA: {
0923 struct vfio_iommu_type1_dma_unmap param;
0924 struct iommu_table *tbl = NULL;
0925 long num;
0926
0927 if (!container->enabled)
0928 return -EPERM;
0929
0930 minsz = offsetofend(struct vfio_iommu_type1_dma_unmap,
0931 size);
0932
0933 if (copy_from_user(¶m, (void __user *)arg, minsz))
0934 return -EFAULT;
0935
0936 if (param.argsz < minsz)
0937 return -EINVAL;
0938
0939
0940 if (param.flags)
0941 return -EINVAL;
0942
0943 ret = tce_iommu_create_default_window(container);
0944 if (ret)
0945 return ret;
0946
0947 num = tce_iommu_find_table(container, param.iova, &tbl);
0948 if (num < 0)
0949 return -ENXIO;
0950
0951 if (param.size & ~IOMMU_PAGE_MASK(tbl))
0952 return -EINVAL;
0953
0954 ret = iommu_tce_clear_param_check(tbl, param.iova, 0,
0955 param.size >> tbl->it_page_shift);
0956 if (ret)
0957 return ret;
0958
0959 ret = tce_iommu_clear(container, tbl,
0960 param.iova >> tbl->it_page_shift,
0961 param.size >> tbl->it_page_shift);
0962 iommu_flush_tce(tbl);
0963
0964 return ret;
0965 }
0966 case VFIO_IOMMU_SPAPR_REGISTER_MEMORY: {
0967 struct vfio_iommu_spapr_register_memory param;
0968
0969 if (!container->v2)
0970 break;
0971
0972 minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
0973 size);
0974
0975 ret = tce_iommu_mm_set(container);
0976 if (ret)
0977 return ret;
0978
0979 if (copy_from_user(¶m, (void __user *)arg, minsz))
0980 return -EFAULT;
0981
0982 if (param.argsz < minsz)
0983 return -EINVAL;
0984
0985
0986 if (param.flags)
0987 return -EINVAL;
0988
0989 mutex_lock(&container->lock);
0990 ret = tce_iommu_register_pages(container, param.vaddr,
0991 param.size);
0992 mutex_unlock(&container->lock);
0993
0994 return ret;
0995 }
0996 case VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY: {
0997 struct vfio_iommu_spapr_register_memory param;
0998
0999 if (!container->v2)
1000 break;
1001
1002 if (!container->mm)
1003 return -EPERM;
1004
1005 minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
1006 size);
1007
1008 if (copy_from_user(¶m, (void __user *)arg, minsz))
1009 return -EFAULT;
1010
1011 if (param.argsz < minsz)
1012 return -EINVAL;
1013
1014
1015 if (param.flags)
1016 return -EINVAL;
1017
1018 mutex_lock(&container->lock);
1019 ret = tce_iommu_unregister_pages(container, param.vaddr,
1020 param.size);
1021 mutex_unlock(&container->lock);
1022
1023 return ret;
1024 }
1025 case VFIO_IOMMU_ENABLE:
1026 if (container->v2)
1027 break;
1028
1029 mutex_lock(&container->lock);
1030 ret = tce_iommu_enable(container);
1031 mutex_unlock(&container->lock);
1032 return ret;
1033
1034
1035 case VFIO_IOMMU_DISABLE:
1036 if (container->v2)
1037 break;
1038
1039 mutex_lock(&container->lock);
1040 tce_iommu_disable(container);
1041 mutex_unlock(&container->lock);
1042 return 0;
1043
1044 case VFIO_EEH_PE_OP: {
1045 struct tce_iommu_group *tcegrp;
1046
1047 ret = 0;
1048 list_for_each_entry(tcegrp, &container->group_list, next) {
1049 ret = vfio_spapr_iommu_eeh_ioctl(tcegrp->grp,
1050 cmd, arg);
1051 if (ret)
1052 return ret;
1053 }
1054 return ret;
1055 }
1056
1057 case VFIO_IOMMU_SPAPR_TCE_CREATE: {
1058 struct vfio_iommu_spapr_tce_create create;
1059
1060 if (!container->v2)
1061 break;
1062
1063 ret = tce_iommu_mm_set(container);
1064 if (ret)
1065 return ret;
1066
1067 if (!tce_groups_attached(container))
1068 return -ENXIO;
1069
1070 minsz = offsetofend(struct vfio_iommu_spapr_tce_create,
1071 start_addr);
1072
1073 if (copy_from_user(&create, (void __user *)arg, minsz))
1074 return -EFAULT;
1075
1076 if (create.argsz < minsz)
1077 return -EINVAL;
1078
1079 if (create.flags)
1080 return -EINVAL;
1081
1082 mutex_lock(&container->lock);
1083
1084 ret = tce_iommu_create_default_window(container);
1085 if (!ret)
1086 ret = tce_iommu_create_window(container,
1087 create.page_shift,
1088 create.window_size, create.levels,
1089 &create.start_addr);
1090
1091 mutex_unlock(&container->lock);
1092
1093 if (!ret && copy_to_user((void __user *)arg, &create, minsz))
1094 ret = -EFAULT;
1095
1096 return ret;
1097 }
1098 case VFIO_IOMMU_SPAPR_TCE_REMOVE: {
1099 struct vfio_iommu_spapr_tce_remove remove;
1100
1101 if (!container->v2)
1102 break;
1103
1104 ret = tce_iommu_mm_set(container);
1105 if (ret)
1106 return ret;
1107
1108 if (!tce_groups_attached(container))
1109 return -ENXIO;
1110
1111 minsz = offsetofend(struct vfio_iommu_spapr_tce_remove,
1112 start_addr);
1113
1114 if (copy_from_user(&remove, (void __user *)arg, minsz))
1115 return -EFAULT;
1116
1117 if (remove.argsz < minsz)
1118 return -EINVAL;
1119
1120 if (remove.flags)
1121 return -EINVAL;
1122
1123 if (container->def_window_pending && !remove.start_addr) {
1124 container->def_window_pending = false;
1125 return 0;
1126 }
1127
1128 mutex_lock(&container->lock);
1129
1130 ret = tce_iommu_remove_window(container, remove.start_addr);
1131
1132 mutex_unlock(&container->lock);
1133
1134 return ret;
1135 }
1136 }
1137
1138 return -ENOTTY;
1139 }
1140
1141 static void tce_iommu_release_ownership(struct tce_container *container,
1142 struct iommu_table_group *table_group)
1143 {
1144 int i;
1145
1146 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1147 struct iommu_table *tbl = container->tables[i];
1148
1149 if (!tbl)
1150 continue;
1151
1152 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
1153 if (tbl->it_map)
1154 iommu_release_ownership(tbl);
1155
1156 container->tables[i] = NULL;
1157 }
1158 }
1159
1160 static int tce_iommu_take_ownership(struct tce_container *container,
1161 struct iommu_table_group *table_group)
1162 {
1163 int i, j, rc = 0;
1164
1165 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1166 struct iommu_table *tbl = table_group->tables[i];
1167
1168 if (!tbl || !tbl->it_map)
1169 continue;
1170
1171 rc = iommu_take_ownership(tbl);
1172 if (rc) {
1173 for (j = 0; j < i; ++j)
1174 iommu_release_ownership(
1175 table_group->tables[j]);
1176
1177 return rc;
1178 }
1179 }
1180
1181 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
1182 container->tables[i] = table_group->tables[i];
1183
1184 return 0;
1185 }
1186
1187 static void tce_iommu_release_ownership_ddw(struct tce_container *container,
1188 struct iommu_table_group *table_group)
1189 {
1190 long i;
1191
1192 if (!table_group->ops->unset_window) {
1193 WARN_ON_ONCE(1);
1194 return;
1195 }
1196
1197 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
1198 if (container->tables[i])
1199 table_group->ops->unset_window(table_group, i);
1200
1201 table_group->ops->release_ownership(table_group);
1202 }
1203
1204 static long tce_iommu_take_ownership_ddw(struct tce_container *container,
1205 struct iommu_table_group *table_group)
1206 {
1207 long i, ret = 0;
1208
1209 if (!table_group->ops->create_table || !table_group->ops->set_window ||
1210 !table_group->ops->release_ownership) {
1211 WARN_ON_ONCE(1);
1212 return -EFAULT;
1213 }
1214
1215 table_group->ops->take_ownership(table_group);
1216
1217
1218 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1219 struct iommu_table *tbl = container->tables[i];
1220
1221 if (!tbl)
1222 continue;
1223
1224 ret = table_group->ops->set_window(table_group, i, tbl);
1225 if (ret)
1226 goto release_exit;
1227 }
1228
1229 return 0;
1230
1231 release_exit:
1232 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
1233 table_group->ops->unset_window(table_group, i);
1234
1235 table_group->ops->release_ownership(table_group);
1236
1237 return ret;
1238 }
1239
1240 static int tce_iommu_attach_group(void *iommu_data,
1241 struct iommu_group *iommu_group, enum vfio_group_type type)
1242 {
1243 int ret = 0;
1244 struct tce_container *container = iommu_data;
1245 struct iommu_table_group *table_group;
1246 struct tce_iommu_group *tcegrp = NULL;
1247
1248 if (type == VFIO_EMULATED_IOMMU)
1249 return -EINVAL;
1250
1251 mutex_lock(&container->lock);
1252
1253
1254
1255 table_group = iommu_group_get_iommudata(iommu_group);
1256 if (!table_group) {
1257 ret = -ENODEV;
1258 goto unlock_exit;
1259 }
1260
1261 if (tce_groups_attached(container) && (!table_group->ops ||
1262 !table_group->ops->take_ownership ||
1263 !table_group->ops->release_ownership)) {
1264 ret = -EBUSY;
1265 goto unlock_exit;
1266 }
1267
1268
1269
1270
1271
1272 list_for_each_entry(tcegrp, &container->group_list, next) {
1273 struct iommu_table_group *table_group_tmp;
1274
1275 if (tcegrp->grp == iommu_group) {
1276 pr_warn("tce_vfio: Group %d is already attached\n",
1277 iommu_group_id(iommu_group));
1278 ret = -EBUSY;
1279 goto unlock_exit;
1280 }
1281 table_group_tmp = iommu_group_get_iommudata(tcegrp->grp);
1282 if (table_group_tmp->ops->create_table !=
1283 table_group->ops->create_table) {
1284 pr_warn("tce_vfio: Group %d is incompatible with group %d\n",
1285 iommu_group_id(iommu_group),
1286 iommu_group_id(tcegrp->grp));
1287 ret = -EPERM;
1288 goto unlock_exit;
1289 }
1290 }
1291
1292 tcegrp = kzalloc(sizeof(*tcegrp), GFP_KERNEL);
1293 if (!tcegrp) {
1294 ret = -ENOMEM;
1295 goto unlock_exit;
1296 }
1297
1298 if (!table_group->ops || !table_group->ops->take_ownership ||
1299 !table_group->ops->release_ownership) {
1300 if (container->v2) {
1301 ret = -EPERM;
1302 goto free_exit;
1303 }
1304 ret = tce_iommu_take_ownership(container, table_group);
1305 } else {
1306 if (!container->v2) {
1307 ret = -EPERM;
1308 goto free_exit;
1309 }
1310 ret = tce_iommu_take_ownership_ddw(container, table_group);
1311 if (!tce_groups_attached(container) && !container->tables[0])
1312 container->def_window_pending = true;
1313 }
1314
1315 if (!ret) {
1316 tcegrp->grp = iommu_group;
1317 list_add(&tcegrp->next, &container->group_list);
1318 }
1319
1320 free_exit:
1321 if (ret && tcegrp)
1322 kfree(tcegrp);
1323
1324 unlock_exit:
1325 mutex_unlock(&container->lock);
1326
1327 return ret;
1328 }
1329
1330 static void tce_iommu_detach_group(void *iommu_data,
1331 struct iommu_group *iommu_group)
1332 {
1333 struct tce_container *container = iommu_data;
1334 struct iommu_table_group *table_group;
1335 bool found = false;
1336 struct tce_iommu_group *tcegrp;
1337
1338 mutex_lock(&container->lock);
1339
1340 list_for_each_entry(tcegrp, &container->group_list, next) {
1341 if (tcegrp->grp == iommu_group) {
1342 found = true;
1343 break;
1344 }
1345 }
1346
1347 if (!found) {
1348 pr_warn("tce_vfio: detaching unattached group #%u\n",
1349 iommu_group_id(iommu_group));
1350 goto unlock_exit;
1351 }
1352
1353 list_del(&tcegrp->next);
1354 kfree(tcegrp);
1355
1356 table_group = iommu_group_get_iommudata(iommu_group);
1357 BUG_ON(!table_group);
1358
1359 if (!table_group->ops || !table_group->ops->release_ownership)
1360 tce_iommu_release_ownership(container, table_group);
1361 else
1362 tce_iommu_release_ownership_ddw(container, table_group);
1363
1364 unlock_exit:
1365 mutex_unlock(&container->lock);
1366 }
1367
1368 static const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
1369 .name = "iommu-vfio-powerpc",
1370 .owner = THIS_MODULE,
1371 .open = tce_iommu_open,
1372 .release = tce_iommu_release,
1373 .ioctl = tce_iommu_ioctl,
1374 .attach_group = tce_iommu_attach_group,
1375 .detach_group = tce_iommu_detach_group,
1376 };
1377
1378 static int __init tce_iommu_init(void)
1379 {
1380 return vfio_register_iommu_driver(&tce_iommu_driver_ops);
1381 }
1382
1383 static void __exit tce_iommu_cleanup(void)
1384 {
1385 vfio_unregister_iommu_driver(&tce_iommu_driver_ops);
1386 }
1387
1388 module_init(tce_iommu_init);
1389 module_exit(tce_iommu_cleanup);
1390
1391 MODULE_VERSION(DRIVER_VERSION);
1392 MODULE_LICENSE("GPL v2");
1393 MODULE_AUTHOR(DRIVER_AUTHOR);
1394 MODULE_DESCRIPTION(DRIVER_DESC);
1395