0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013 #include <linux/init.h>
0014 #include <linux/types.h>
0015 #include <linux/slab.h>
0016 #include <linux/mm.h>
0017 #include <linux/spinlock.h>
0018 #include <linux/string.h>
0019 #include <linux/dma-mapping.h>
0020 #include <linux/bitmap.h>
0021 #include <linux/iommu-helper.h>
0022 #include <linux/crash_dump.h>
0023 #include <linux/hash.h>
0024 #include <linux/fault-inject.h>
0025 #include <linux/pci.h>
0026 #include <linux/iommu.h>
0027 #include <linux/sched.h>
0028 #include <linux/debugfs.h>
0029 #include <asm/io.h>
0030 #include <asm/iommu.h>
0031 #include <asm/pci-bridge.h>
0032 #include <asm/machdep.h>
0033 #include <asm/kdump.h>
0034 #include <asm/fadump.h>
0035 #include <asm/vio.h>
0036 #include <asm/tce.h>
0037 #include <asm/mmu_context.h>
0038
0039 #define DBG(...)
0040
0041 #ifdef CONFIG_IOMMU_DEBUGFS
0042 static int iommu_debugfs_weight_get(void *data, u64 *val)
0043 {
0044 struct iommu_table *tbl = data;
0045 *val = bitmap_weight(tbl->it_map, tbl->it_size);
0046 return 0;
0047 }
0048 DEFINE_DEBUGFS_ATTRIBUTE(iommu_debugfs_fops_weight, iommu_debugfs_weight_get, NULL, "%llu\n");
0049
0050 static void iommu_debugfs_add(struct iommu_table *tbl)
0051 {
0052 char name[10];
0053 struct dentry *liobn_entry;
0054
0055 sprintf(name, "%08lx", tbl->it_index);
0056 liobn_entry = debugfs_create_dir(name, iommu_debugfs_dir);
0057
0058 debugfs_create_file_unsafe("weight", 0400, liobn_entry, tbl, &iommu_debugfs_fops_weight);
0059 debugfs_create_ulong("it_size", 0400, liobn_entry, &tbl->it_size);
0060 debugfs_create_ulong("it_page_shift", 0400, liobn_entry, &tbl->it_page_shift);
0061 debugfs_create_ulong("it_reserved_start", 0400, liobn_entry, &tbl->it_reserved_start);
0062 debugfs_create_ulong("it_reserved_end", 0400, liobn_entry, &tbl->it_reserved_end);
0063 debugfs_create_ulong("it_indirect_levels", 0400, liobn_entry, &tbl->it_indirect_levels);
0064 debugfs_create_ulong("it_level_size", 0400, liobn_entry, &tbl->it_level_size);
0065 }
0066
0067 static void iommu_debugfs_del(struct iommu_table *tbl)
0068 {
0069 char name[10];
0070 struct dentry *liobn_entry;
0071
0072 sprintf(name, "%08lx", tbl->it_index);
0073 liobn_entry = debugfs_lookup(name, iommu_debugfs_dir);
0074 debugfs_remove(liobn_entry);
0075 }
0076 #else
0077 static void iommu_debugfs_add(struct iommu_table *tbl){}
0078 static void iommu_debugfs_del(struct iommu_table *tbl){}
0079 #endif
0080
0081 static int novmerge;
0082
0083 static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int);
0084
0085 static int __init setup_iommu(char *str)
0086 {
0087 if (!strcmp(str, "novmerge"))
0088 novmerge = 1;
0089 else if (!strcmp(str, "vmerge"))
0090 novmerge = 0;
0091 return 1;
0092 }
0093
0094 __setup("iommu=", setup_iommu);
0095
0096 static DEFINE_PER_CPU(unsigned int, iommu_pool_hash);
0097
0098
0099
0100
0101
0102
0103
0104
0105 static int __init setup_iommu_pool_hash(void)
0106 {
0107 unsigned int i;
0108
0109 for_each_possible_cpu(i)
0110 per_cpu(iommu_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS);
0111
0112 return 0;
0113 }
0114 subsys_initcall(setup_iommu_pool_hash);
0115
0116 #ifdef CONFIG_FAIL_IOMMU
0117
0118 static DECLARE_FAULT_ATTR(fail_iommu);
0119
0120 static int __init setup_fail_iommu(char *str)
0121 {
0122 return setup_fault_attr(&fail_iommu, str);
0123 }
0124 __setup("fail_iommu=", setup_fail_iommu);
0125
0126 static bool should_fail_iommu(struct device *dev)
0127 {
0128 return dev->archdata.fail_iommu && should_fail(&fail_iommu, 1);
0129 }
0130
0131 static int __init fail_iommu_debugfs(void)
0132 {
0133 struct dentry *dir = fault_create_debugfs_attr("fail_iommu",
0134 NULL, &fail_iommu);
0135
0136 return PTR_ERR_OR_ZERO(dir);
0137 }
0138 late_initcall(fail_iommu_debugfs);
0139
0140 static ssize_t fail_iommu_show(struct device *dev,
0141 struct device_attribute *attr, char *buf)
0142 {
0143 return sprintf(buf, "%d\n", dev->archdata.fail_iommu);
0144 }
0145
0146 static ssize_t fail_iommu_store(struct device *dev,
0147 struct device_attribute *attr, const char *buf,
0148 size_t count)
0149 {
0150 int i;
0151
0152 if (count > 0 && sscanf(buf, "%d", &i) > 0)
0153 dev->archdata.fail_iommu = (i == 0) ? 0 : 1;
0154
0155 return count;
0156 }
0157
0158 static DEVICE_ATTR_RW(fail_iommu);
0159
0160 static int fail_iommu_bus_notify(struct notifier_block *nb,
0161 unsigned long action, void *data)
0162 {
0163 struct device *dev = data;
0164
0165 if (action == BUS_NOTIFY_ADD_DEVICE) {
0166 if (device_create_file(dev, &dev_attr_fail_iommu))
0167 pr_warn("Unable to create IOMMU fault injection sysfs "
0168 "entries\n");
0169 } else if (action == BUS_NOTIFY_DEL_DEVICE) {
0170 device_remove_file(dev, &dev_attr_fail_iommu);
0171 }
0172
0173 return 0;
0174 }
0175
0176 static struct notifier_block fail_iommu_bus_notifier = {
0177 .notifier_call = fail_iommu_bus_notify
0178 };
0179
0180 static int __init fail_iommu_setup(void)
0181 {
0182 #ifdef CONFIG_PCI
0183 bus_register_notifier(&pci_bus_type, &fail_iommu_bus_notifier);
0184 #endif
0185 #ifdef CONFIG_IBMVIO
0186 bus_register_notifier(&vio_bus_type, &fail_iommu_bus_notifier);
0187 #endif
0188
0189 return 0;
0190 }
0191
0192
0193
0194
0195 arch_initcall(fail_iommu_setup);
0196 #else
0197 static inline bool should_fail_iommu(struct device *dev)
0198 {
0199 return false;
0200 }
0201 #endif
0202
0203 static unsigned long iommu_range_alloc(struct device *dev,
0204 struct iommu_table *tbl,
0205 unsigned long npages,
0206 unsigned long *handle,
0207 unsigned long mask,
0208 unsigned int align_order)
0209 {
0210 unsigned long n, end, start;
0211 unsigned long limit;
0212 int largealloc = npages > 15;
0213 int pass = 0;
0214 unsigned long align_mask;
0215 unsigned long flags;
0216 unsigned int pool_nr;
0217 struct iommu_pool *pool;
0218
0219 align_mask = (1ull << align_order) - 1;
0220
0221
0222
0223
0224 if (unlikely(npages == 0)) {
0225 if (printk_ratelimit())
0226 WARN_ON(1);
0227 return DMA_MAPPING_ERROR;
0228 }
0229
0230 if (should_fail_iommu(dev))
0231 return DMA_MAPPING_ERROR;
0232
0233
0234
0235
0236
0237 pool_nr = raw_cpu_read(iommu_pool_hash) & (tbl->nr_pools - 1);
0238
0239 if (largealloc)
0240 pool = &(tbl->large_pool);
0241 else
0242 pool = &(tbl->pools[pool_nr]);
0243
0244 spin_lock_irqsave(&(pool->lock), flags);
0245
0246 again:
0247 if ((pass == 0) && handle && *handle &&
0248 (*handle >= pool->start) && (*handle < pool->end))
0249 start = *handle;
0250 else
0251 start = pool->hint;
0252
0253 limit = pool->end;
0254
0255
0256
0257
0258
0259 if (start >= limit)
0260 start = pool->start;
0261
0262 if (limit + tbl->it_offset > mask) {
0263 limit = mask - tbl->it_offset + 1;
0264
0265
0266
0267
0268 if ((start & mask) >= limit || pass > 0) {
0269 spin_unlock(&(pool->lock));
0270 pool = &(tbl->pools[0]);
0271 spin_lock(&(pool->lock));
0272 start = pool->start;
0273 } else {
0274 start &= mask;
0275 }
0276 }
0277
0278 n = iommu_area_alloc(tbl->it_map, limit, start, npages, tbl->it_offset,
0279 dma_get_seg_boundary_nr_pages(dev, tbl->it_page_shift),
0280 align_mask);
0281 if (n == -1) {
0282 if (likely(pass == 0)) {
0283
0284 pool->hint = pool->start;
0285 pass++;
0286 goto again;
0287
0288 } else if (pass <= tbl->nr_pools) {
0289
0290 spin_unlock(&(pool->lock));
0291 pool_nr = (pool_nr + 1) & (tbl->nr_pools - 1);
0292 pool = &tbl->pools[pool_nr];
0293 spin_lock(&(pool->lock));
0294 pool->hint = pool->start;
0295 pass++;
0296 goto again;
0297
0298 } else if (pass == tbl->nr_pools + 1) {
0299
0300 spin_unlock(&pool->lock);
0301 pool = &tbl->large_pool;
0302 spin_lock(&pool->lock);
0303 pool->hint = pool->start;
0304 pass++;
0305 goto again;
0306
0307 } else {
0308
0309 spin_unlock_irqrestore(&(pool->lock), flags);
0310 return DMA_MAPPING_ERROR;
0311 }
0312 }
0313
0314 end = n + npages;
0315
0316
0317 if (largealloc) {
0318
0319 pool->hint = end;
0320 } else {
0321
0322 pool->hint = (end + tbl->it_blocksize - 1) &
0323 ~(tbl->it_blocksize - 1);
0324 }
0325
0326
0327 if (handle)
0328 *handle = end;
0329
0330 spin_unlock_irqrestore(&(pool->lock), flags);
0331
0332 return n;
0333 }
0334
0335 static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
0336 void *page, unsigned int npages,
0337 enum dma_data_direction direction,
0338 unsigned long mask, unsigned int align_order,
0339 unsigned long attrs)
0340 {
0341 unsigned long entry;
0342 dma_addr_t ret = DMA_MAPPING_ERROR;
0343 int build_fail;
0344
0345 entry = iommu_range_alloc(dev, tbl, npages, NULL, mask, align_order);
0346
0347 if (unlikely(entry == DMA_MAPPING_ERROR))
0348 return DMA_MAPPING_ERROR;
0349
0350 entry += tbl->it_offset;
0351 ret = entry << tbl->it_page_shift;
0352
0353
0354 build_fail = tbl->it_ops->set(tbl, entry, npages,
0355 (unsigned long)page &
0356 IOMMU_PAGE_MASK(tbl), direction, attrs);
0357
0358
0359
0360
0361
0362
0363 if (unlikely(build_fail)) {
0364 __iommu_free(tbl, ret, npages);
0365 return DMA_MAPPING_ERROR;
0366 }
0367
0368
0369 if (tbl->it_ops->flush)
0370 tbl->it_ops->flush(tbl);
0371
0372
0373 mb();
0374
0375 return ret;
0376 }
0377
0378 static bool iommu_free_check(struct iommu_table *tbl, dma_addr_t dma_addr,
0379 unsigned int npages)
0380 {
0381 unsigned long entry, free_entry;
0382
0383 entry = dma_addr >> tbl->it_page_shift;
0384 free_entry = entry - tbl->it_offset;
0385
0386 if (((free_entry + npages) > tbl->it_size) ||
0387 (entry < tbl->it_offset)) {
0388 if (printk_ratelimit()) {
0389 printk(KERN_INFO "iommu_free: invalid entry\n");
0390 printk(KERN_INFO "\tentry = 0x%lx\n", entry);
0391 printk(KERN_INFO "\tdma_addr = 0x%llx\n", (u64)dma_addr);
0392 printk(KERN_INFO "\tTable = 0x%llx\n", (u64)tbl);
0393 printk(KERN_INFO "\tbus# = 0x%llx\n", (u64)tbl->it_busno);
0394 printk(KERN_INFO "\tsize = 0x%llx\n", (u64)tbl->it_size);
0395 printk(KERN_INFO "\tstartOff = 0x%llx\n", (u64)tbl->it_offset);
0396 printk(KERN_INFO "\tindex = 0x%llx\n", (u64)tbl->it_index);
0397 WARN_ON(1);
0398 }
0399
0400 return false;
0401 }
0402
0403 return true;
0404 }
0405
0406 static struct iommu_pool *get_pool(struct iommu_table *tbl,
0407 unsigned long entry)
0408 {
0409 struct iommu_pool *p;
0410 unsigned long largepool_start = tbl->large_pool.start;
0411
0412
0413 if (entry >= largepool_start) {
0414 p = &tbl->large_pool;
0415 } else {
0416 unsigned int pool_nr = entry / tbl->poolsize;
0417
0418 BUG_ON(pool_nr > tbl->nr_pools);
0419 p = &tbl->pools[pool_nr];
0420 }
0421
0422 return p;
0423 }
0424
0425 static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
0426 unsigned int npages)
0427 {
0428 unsigned long entry, free_entry;
0429 unsigned long flags;
0430 struct iommu_pool *pool;
0431
0432 entry = dma_addr >> tbl->it_page_shift;
0433 free_entry = entry - tbl->it_offset;
0434
0435 pool = get_pool(tbl, free_entry);
0436
0437 if (!iommu_free_check(tbl, dma_addr, npages))
0438 return;
0439
0440 tbl->it_ops->clear(tbl, entry, npages);
0441
0442 spin_lock_irqsave(&(pool->lock), flags);
0443 bitmap_clear(tbl->it_map, free_entry, npages);
0444 spin_unlock_irqrestore(&(pool->lock), flags);
0445 }
0446
0447 static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
0448 unsigned int npages)
0449 {
0450 __iommu_free(tbl, dma_addr, npages);
0451
0452
0453
0454
0455
0456 if (tbl->it_ops->flush)
0457 tbl->it_ops->flush(tbl);
0458 }
0459
0460 int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
0461 struct scatterlist *sglist, int nelems,
0462 unsigned long mask, enum dma_data_direction direction,
0463 unsigned long attrs)
0464 {
0465 dma_addr_t dma_next = 0, dma_addr;
0466 struct scatterlist *s, *outs, *segstart;
0467 int outcount, incount, i, build_fail = 0;
0468 unsigned int align;
0469 unsigned long handle;
0470 unsigned int max_seg_size;
0471
0472 BUG_ON(direction == DMA_NONE);
0473
0474 if ((nelems == 0) || !tbl)
0475 return -EINVAL;
0476
0477 outs = s = segstart = &sglist[0];
0478 outcount = 1;
0479 incount = nelems;
0480 handle = 0;
0481
0482
0483 outs->dma_length = 0;
0484
0485 DBG("sg mapping %d elements:\n", nelems);
0486
0487 max_seg_size = dma_get_max_seg_size(dev);
0488 for_each_sg(sglist, s, nelems, i) {
0489 unsigned long vaddr, npages, entry, slen;
0490
0491 slen = s->length;
0492
0493 if (slen == 0) {
0494 dma_next = 0;
0495 continue;
0496 }
0497
0498 vaddr = (unsigned long) sg_virt(s);
0499 npages = iommu_num_pages(vaddr, slen, IOMMU_PAGE_SIZE(tbl));
0500 align = 0;
0501 if (tbl->it_page_shift < PAGE_SHIFT && slen >= PAGE_SIZE &&
0502 (vaddr & ~PAGE_MASK) == 0)
0503 align = PAGE_SHIFT - tbl->it_page_shift;
0504 entry = iommu_range_alloc(dev, tbl, npages, &handle,
0505 mask >> tbl->it_page_shift, align);
0506
0507 DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen);
0508
0509
0510 if (unlikely(entry == DMA_MAPPING_ERROR)) {
0511 if (!(attrs & DMA_ATTR_NO_WARN) &&
0512 printk_ratelimit())
0513 dev_info(dev, "iommu_alloc failed, tbl %p "
0514 "vaddr %lx npages %lu\n", tbl, vaddr,
0515 npages);
0516 goto failure;
0517 }
0518
0519
0520 entry += tbl->it_offset;
0521 dma_addr = entry << tbl->it_page_shift;
0522 dma_addr |= (s->offset & ~IOMMU_PAGE_MASK(tbl));
0523
0524 DBG(" - %lu pages, entry: %lx, dma_addr: %lx\n",
0525 npages, entry, dma_addr);
0526
0527
0528 build_fail = tbl->it_ops->set(tbl, entry, npages,
0529 vaddr & IOMMU_PAGE_MASK(tbl),
0530 direction, attrs);
0531 if(unlikely(build_fail))
0532 goto failure;
0533
0534
0535 if (segstart != s) {
0536 DBG(" - trying merge...\n");
0537
0538
0539
0540 if (novmerge || (dma_addr != dma_next) ||
0541 (outs->dma_length + s->length > max_seg_size)) {
0542
0543 segstart = s;
0544 outcount++;
0545 outs = sg_next(outs);
0546 DBG(" can't merge, new segment.\n");
0547 } else {
0548 outs->dma_length += s->length;
0549 DBG(" merged, new len: %ux\n", outs->dma_length);
0550 }
0551 }
0552
0553 if (segstart == s) {
0554
0555 DBG(" - filling new segment.\n");
0556 outs->dma_address = dma_addr;
0557 outs->dma_length = slen;
0558 }
0559
0560
0561 dma_next = dma_addr + slen;
0562
0563 DBG(" - dma next is: %lx\n", dma_next);
0564 }
0565
0566
0567 if (tbl->it_ops->flush)
0568 tbl->it_ops->flush(tbl);
0569
0570 DBG("mapped %d elements:\n", outcount);
0571
0572
0573
0574
0575 if (outcount < incount) {
0576 outs = sg_next(outs);
0577 outs->dma_length = 0;
0578 }
0579
0580
0581 mb();
0582
0583 return outcount;
0584
0585 failure:
0586 for_each_sg(sglist, s, nelems, i) {
0587 if (s->dma_length != 0) {
0588 unsigned long vaddr, npages;
0589
0590 vaddr = s->dma_address & IOMMU_PAGE_MASK(tbl);
0591 npages = iommu_num_pages(s->dma_address, s->dma_length,
0592 IOMMU_PAGE_SIZE(tbl));
0593 __iommu_free(tbl, vaddr, npages);
0594 s->dma_length = 0;
0595 }
0596 if (s == outs)
0597 break;
0598 }
0599 return -EIO;
0600 }
0601
0602
0603 void ppc_iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
0604 int nelems, enum dma_data_direction direction,
0605 unsigned long attrs)
0606 {
0607 struct scatterlist *sg;
0608
0609 BUG_ON(direction == DMA_NONE);
0610
0611 if (!tbl)
0612 return;
0613
0614 sg = sglist;
0615 while (nelems--) {
0616 unsigned int npages;
0617 dma_addr_t dma_handle = sg->dma_address;
0618
0619 if (sg->dma_length == 0)
0620 break;
0621 npages = iommu_num_pages(dma_handle, sg->dma_length,
0622 IOMMU_PAGE_SIZE(tbl));
0623 __iommu_free(tbl, dma_handle, npages);
0624 sg = sg_next(sg);
0625 }
0626
0627
0628
0629
0630
0631 if (tbl->it_ops->flush)
0632 tbl->it_ops->flush(tbl);
0633 }
0634
0635 static void iommu_table_clear(struct iommu_table *tbl)
0636 {
0637
0638
0639
0640
0641
0642 if (!is_kdump_kernel() || is_fadump_active()) {
0643
0644 tbl->it_ops->clear(tbl, tbl->it_offset, tbl->it_size);
0645 return;
0646 }
0647
0648 #ifdef CONFIG_CRASH_DUMP
0649 if (tbl->it_ops->get) {
0650 unsigned long index, tceval, tcecount = 0;
0651
0652
0653 for (index = 0; index < tbl->it_size; index++) {
0654 tceval = tbl->it_ops->get(tbl, index + tbl->it_offset);
0655
0656
0657
0658 if (tceval && (tceval != 0x7fffffffffffffffUL)) {
0659 __set_bit(index, tbl->it_map);
0660 tcecount++;
0661 }
0662 }
0663
0664 if ((tbl->it_size - tcecount) < KDUMP_MIN_TCE_ENTRIES) {
0665 printk(KERN_WARNING "TCE table is full; freeing ");
0666 printk(KERN_WARNING "%d entries for the kdump boot\n",
0667 KDUMP_MIN_TCE_ENTRIES);
0668 for (index = tbl->it_size - KDUMP_MIN_TCE_ENTRIES;
0669 index < tbl->it_size; index++)
0670 __clear_bit(index, tbl->it_map);
0671 }
0672 }
0673 #endif
0674 }
0675
0676 static void iommu_table_reserve_pages(struct iommu_table *tbl,
0677 unsigned long res_start, unsigned long res_end)
0678 {
0679 int i;
0680
0681 WARN_ON_ONCE(res_end < res_start);
0682
0683
0684
0685
0686
0687 if (tbl->it_offset == 0)
0688 set_bit(0, tbl->it_map);
0689
0690 if (res_start < tbl->it_offset)
0691 res_start = tbl->it_offset;
0692
0693 if (res_end > (tbl->it_offset + tbl->it_size))
0694 res_end = tbl->it_offset + tbl->it_size;
0695
0696
0697 if (res_start >= res_end) {
0698 tbl->it_reserved_start = tbl->it_offset;
0699 tbl->it_reserved_end = tbl->it_offset;
0700 return;
0701 }
0702
0703 tbl->it_reserved_start = res_start;
0704 tbl->it_reserved_end = res_end;
0705
0706 for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
0707 set_bit(i - tbl->it_offset, tbl->it_map);
0708 }
0709
0710
0711
0712
0713
0714 struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid,
0715 unsigned long res_start, unsigned long res_end)
0716 {
0717 unsigned long sz;
0718 static int welcomed = 0;
0719 unsigned int i;
0720 struct iommu_pool *p;
0721
0722 BUG_ON(!tbl->it_ops);
0723
0724
0725 sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
0726
0727 tbl->it_map = vzalloc_node(sz, nid);
0728 if (!tbl->it_map) {
0729 pr_err("%s: Can't allocate %ld bytes\n", __func__, sz);
0730 return NULL;
0731 }
0732
0733 iommu_table_reserve_pages(tbl, res_start, res_end);
0734
0735
0736 if ((tbl->it_size << tbl->it_page_shift) >= (1UL * 1024 * 1024 * 1024))
0737 tbl->nr_pools = IOMMU_NR_POOLS;
0738 else
0739 tbl->nr_pools = 1;
0740
0741
0742 tbl->poolsize = (tbl->it_size * 3 / 4) / tbl->nr_pools;
0743
0744 for (i = 0; i < tbl->nr_pools; i++) {
0745 p = &tbl->pools[i];
0746 spin_lock_init(&(p->lock));
0747 p->start = tbl->poolsize * i;
0748 p->hint = p->start;
0749 p->end = p->start + tbl->poolsize;
0750 }
0751
0752 p = &tbl->large_pool;
0753 spin_lock_init(&(p->lock));
0754 p->start = tbl->poolsize * i;
0755 p->hint = p->start;
0756 p->end = tbl->it_size;
0757
0758 iommu_table_clear(tbl);
0759
0760 if (!welcomed) {
0761 printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n",
0762 novmerge ? "disabled" : "enabled");
0763 welcomed = 1;
0764 }
0765
0766 iommu_debugfs_add(tbl);
0767
0768 return tbl;
0769 }
0770
0771 bool iommu_table_in_use(struct iommu_table *tbl)
0772 {
0773 unsigned long start = 0, end;
0774
0775
0776 if (tbl->it_offset == 0)
0777 start = 1;
0778
0779
0780 if (!tbl->it_reserved_start && !tbl->it_reserved_end)
0781 return find_next_bit(tbl->it_map, tbl->it_size, start) != tbl->it_size;
0782
0783 end = tbl->it_reserved_start - tbl->it_offset;
0784 if (find_next_bit(tbl->it_map, end, start) != end)
0785 return true;
0786
0787 start = tbl->it_reserved_end - tbl->it_offset;
0788 end = tbl->it_size;
0789 return find_next_bit(tbl->it_map, end, start) != end;
0790 }
0791
0792 static void iommu_table_free(struct kref *kref)
0793 {
0794 struct iommu_table *tbl;
0795
0796 tbl = container_of(kref, struct iommu_table, it_kref);
0797
0798 if (tbl->it_ops->free)
0799 tbl->it_ops->free(tbl);
0800
0801 if (!tbl->it_map) {
0802 kfree(tbl);
0803 return;
0804 }
0805
0806 iommu_debugfs_del(tbl);
0807
0808
0809 if (iommu_table_in_use(tbl))
0810 pr_warn("%s: Unexpected TCEs\n", __func__);
0811
0812
0813 vfree(tbl->it_map);
0814
0815
0816 kfree(tbl);
0817 }
0818
0819 struct iommu_table *iommu_tce_table_get(struct iommu_table *tbl)
0820 {
0821 if (kref_get_unless_zero(&tbl->it_kref))
0822 return tbl;
0823
0824 return NULL;
0825 }
0826 EXPORT_SYMBOL_GPL(iommu_tce_table_get);
0827
0828 int iommu_tce_table_put(struct iommu_table *tbl)
0829 {
0830 if (WARN_ON(!tbl))
0831 return 0;
0832
0833 return kref_put(&tbl->it_kref, iommu_table_free);
0834 }
0835 EXPORT_SYMBOL_GPL(iommu_tce_table_put);
0836
0837
0838
0839
0840
0841
0842 dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
0843 struct page *page, unsigned long offset, size_t size,
0844 unsigned long mask, enum dma_data_direction direction,
0845 unsigned long attrs)
0846 {
0847 dma_addr_t dma_handle = DMA_MAPPING_ERROR;
0848 void *vaddr;
0849 unsigned long uaddr;
0850 unsigned int npages, align;
0851
0852 BUG_ON(direction == DMA_NONE);
0853
0854 vaddr = page_address(page) + offset;
0855 uaddr = (unsigned long)vaddr;
0856
0857 if (tbl) {
0858 npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE(tbl));
0859 align = 0;
0860 if (tbl->it_page_shift < PAGE_SHIFT && size >= PAGE_SIZE &&
0861 ((unsigned long)vaddr & ~PAGE_MASK) == 0)
0862 align = PAGE_SHIFT - tbl->it_page_shift;
0863
0864 dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction,
0865 mask >> tbl->it_page_shift, align,
0866 attrs);
0867 if (dma_handle == DMA_MAPPING_ERROR) {
0868 if (!(attrs & DMA_ATTR_NO_WARN) &&
0869 printk_ratelimit()) {
0870 dev_info(dev, "iommu_alloc failed, tbl %p "
0871 "vaddr %p npages %d\n", tbl, vaddr,
0872 npages);
0873 }
0874 } else
0875 dma_handle |= (uaddr & ~IOMMU_PAGE_MASK(tbl));
0876 }
0877
0878 return dma_handle;
0879 }
0880
0881 void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle,
0882 size_t size, enum dma_data_direction direction,
0883 unsigned long attrs)
0884 {
0885 unsigned int npages;
0886
0887 BUG_ON(direction == DMA_NONE);
0888
0889 if (tbl) {
0890 npages = iommu_num_pages(dma_handle, size,
0891 IOMMU_PAGE_SIZE(tbl));
0892 iommu_free(tbl, dma_handle, npages);
0893 }
0894 }
0895
0896
0897
0898
0899
0900 void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
0901 size_t size, dma_addr_t *dma_handle,
0902 unsigned long mask, gfp_t flag, int node)
0903 {
0904 void *ret = NULL;
0905 dma_addr_t mapping;
0906 unsigned int order;
0907 unsigned int nio_pages, io_order;
0908 struct page *page;
0909
0910 size = PAGE_ALIGN(size);
0911 order = get_order(size);
0912
0913
0914
0915
0916
0917
0918 if (order >= IOMAP_MAX_ORDER) {
0919 dev_info(dev, "iommu_alloc_consistent size too large: 0x%lx\n",
0920 size);
0921 return NULL;
0922 }
0923
0924 if (!tbl)
0925 return NULL;
0926
0927
0928 page = alloc_pages_node(node, flag, order);
0929 if (!page)
0930 return NULL;
0931 ret = page_address(page);
0932 memset(ret, 0, size);
0933
0934
0935 nio_pages = size >> tbl->it_page_shift;
0936 io_order = get_iommu_order(size, tbl);
0937 mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
0938 mask >> tbl->it_page_shift, io_order, 0);
0939 if (mapping == DMA_MAPPING_ERROR) {
0940 free_pages((unsigned long)ret, order);
0941 return NULL;
0942 }
0943 *dma_handle = mapping;
0944 return ret;
0945 }
0946
0947 void iommu_free_coherent(struct iommu_table *tbl, size_t size,
0948 void *vaddr, dma_addr_t dma_handle)
0949 {
0950 if (tbl) {
0951 unsigned int nio_pages;
0952
0953 size = PAGE_ALIGN(size);
0954 nio_pages = size >> tbl->it_page_shift;
0955 iommu_free(tbl, dma_handle, nio_pages);
0956 size = PAGE_ALIGN(size);
0957 free_pages((unsigned long)vaddr, get_order(size));
0958 }
0959 }
0960
0961 unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir)
0962 {
0963 switch (dir) {
0964 case DMA_BIDIRECTIONAL:
0965 return TCE_PCI_READ | TCE_PCI_WRITE;
0966 case DMA_FROM_DEVICE:
0967 return TCE_PCI_WRITE;
0968 case DMA_TO_DEVICE:
0969 return TCE_PCI_READ;
0970 default:
0971 return 0;
0972 }
0973 }
0974 EXPORT_SYMBOL_GPL(iommu_direction_to_tce_perm);
0975
0976 #ifdef CONFIG_IOMMU_API
0977
0978
0979
0980 static void group_release(void *iommu_data)
0981 {
0982 struct iommu_table_group *table_group = iommu_data;
0983
0984 table_group->group = NULL;
0985 }
0986
0987 void iommu_register_group(struct iommu_table_group *table_group,
0988 int pci_domain_number, unsigned long pe_num)
0989 {
0990 struct iommu_group *grp;
0991 char *name;
0992
0993 grp = iommu_group_alloc();
0994 if (IS_ERR(grp)) {
0995 pr_warn("powerpc iommu api: cannot create new group, err=%ld\n",
0996 PTR_ERR(grp));
0997 return;
0998 }
0999 table_group->group = grp;
1000 iommu_group_set_iommudata(grp, table_group, group_release);
1001 name = kasprintf(GFP_KERNEL, "domain%d-pe%lx",
1002 pci_domain_number, pe_num);
1003 if (!name)
1004 return;
1005 iommu_group_set_name(grp, name);
1006 kfree(name);
1007 }
1008
1009 enum dma_data_direction iommu_tce_direction(unsigned long tce)
1010 {
1011 if ((tce & TCE_PCI_READ) && (tce & TCE_PCI_WRITE))
1012 return DMA_BIDIRECTIONAL;
1013 else if (tce & TCE_PCI_READ)
1014 return DMA_TO_DEVICE;
1015 else if (tce & TCE_PCI_WRITE)
1016 return DMA_FROM_DEVICE;
1017 else
1018 return DMA_NONE;
1019 }
1020 EXPORT_SYMBOL_GPL(iommu_tce_direction);
1021
1022 void iommu_flush_tce(struct iommu_table *tbl)
1023 {
1024
1025 if (tbl->it_ops->flush)
1026 tbl->it_ops->flush(tbl);
1027
1028
1029 mb();
1030 }
1031 EXPORT_SYMBOL_GPL(iommu_flush_tce);
1032
1033 int iommu_tce_check_ioba(unsigned long page_shift,
1034 unsigned long offset, unsigned long size,
1035 unsigned long ioba, unsigned long npages)
1036 {
1037 unsigned long mask = (1UL << page_shift) - 1;
1038
1039 if (ioba & mask)
1040 return -EINVAL;
1041
1042 ioba >>= page_shift;
1043 if (ioba < offset)
1044 return -EINVAL;
1045
1046 if ((ioba + 1) > (offset + size))
1047 return -EINVAL;
1048
1049 return 0;
1050 }
1051 EXPORT_SYMBOL_GPL(iommu_tce_check_ioba);
1052
1053 int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa)
1054 {
1055 unsigned long mask = (1UL << page_shift) - 1;
1056
1057 if (gpa & mask)
1058 return -EINVAL;
1059
1060 return 0;
1061 }
1062 EXPORT_SYMBOL_GPL(iommu_tce_check_gpa);
1063
1064 extern long iommu_tce_xchg_no_kill(struct mm_struct *mm,
1065 struct iommu_table *tbl,
1066 unsigned long entry, unsigned long *hpa,
1067 enum dma_data_direction *direction)
1068 {
1069 long ret;
1070 unsigned long size = 0;
1071
1072 ret = tbl->it_ops->xchg_no_kill(tbl, entry, hpa, direction);
1073 if (!ret && ((*direction == DMA_FROM_DEVICE) ||
1074 (*direction == DMA_BIDIRECTIONAL)) &&
1075 !mm_iommu_is_devmem(mm, *hpa, tbl->it_page_shift,
1076 &size))
1077 SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT));
1078
1079 return ret;
1080 }
1081 EXPORT_SYMBOL_GPL(iommu_tce_xchg_no_kill);
1082
1083 void iommu_tce_kill(struct iommu_table *tbl,
1084 unsigned long entry, unsigned long pages)
1085 {
1086 if (tbl->it_ops->tce_kill)
1087 tbl->it_ops->tce_kill(tbl, entry, pages);
1088 }
1089 EXPORT_SYMBOL_GPL(iommu_tce_kill);
1090
1091 int iommu_take_ownership(struct iommu_table *tbl)
1092 {
1093 unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
1094 int ret = 0;
1095
1096
1097
1098
1099
1100
1101
1102
1103 if (!tbl->it_ops->xchg_no_kill)
1104 return -EINVAL;
1105
1106 spin_lock_irqsave(&tbl->large_pool.lock, flags);
1107 for (i = 0; i < tbl->nr_pools; i++)
1108 spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock);
1109
1110 if (iommu_table_in_use(tbl)) {
1111 pr_err("iommu_tce: it_map is not empty");
1112 ret = -EBUSY;
1113 } else {
1114 memset(tbl->it_map, 0xff, sz);
1115 }
1116
1117 for (i = 0; i < tbl->nr_pools; i++)
1118 spin_unlock(&tbl->pools[i].lock);
1119 spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
1120
1121 return ret;
1122 }
1123 EXPORT_SYMBOL_GPL(iommu_take_ownership);
1124
1125 void iommu_release_ownership(struct iommu_table *tbl)
1126 {
1127 unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
1128
1129 spin_lock_irqsave(&tbl->large_pool.lock, flags);
1130 for (i = 0; i < tbl->nr_pools; i++)
1131 spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock);
1132
1133 memset(tbl->it_map, 0, sz);
1134
1135 iommu_table_reserve_pages(tbl, tbl->it_reserved_start,
1136 tbl->it_reserved_end);
1137
1138 for (i = 0; i < tbl->nr_pools; i++)
1139 spin_unlock(&tbl->pools[i].lock);
1140 spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
1141 }
1142 EXPORT_SYMBOL_GPL(iommu_release_ownership);
1143
1144 int iommu_add_device(struct iommu_table_group *table_group, struct device *dev)
1145 {
1146
1147
1148
1149
1150
1151 if (!device_is_registered(dev))
1152 return -ENOENT;
1153
1154 if (device_iommu_mapped(dev)) {
1155 pr_debug("%s: Skipping device %s with iommu group %d\n",
1156 __func__, dev_name(dev),
1157 iommu_group_id(dev->iommu_group));
1158 return -EBUSY;
1159 }
1160
1161 pr_debug("%s: Adding %s to iommu group %d\n",
1162 __func__, dev_name(dev), iommu_group_id(table_group->group));
1163
1164 return iommu_group_add_device(table_group->group, dev);
1165 }
1166 EXPORT_SYMBOL_GPL(iommu_add_device);
1167
1168 void iommu_del_device(struct device *dev)
1169 {
1170
1171
1172
1173
1174
1175 if (!device_iommu_mapped(dev)) {
1176 pr_debug("iommu_tce: skipping device %s with no tbl\n",
1177 dev_name(dev));
1178 return;
1179 }
1180
1181 iommu_group_remove_device(dev);
1182 }
1183 EXPORT_SYMBOL_GPL(iommu_del_device);
1184 #endif