Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * A fairly generic DMA-API to IOMMU-API glue layer.
0004  *
0005  * Copyright (C) 2014-2015 ARM Ltd.
0006  *
0007  * based in part on arch/arm/mm/dma-mapping.c:
0008  * Copyright (C) 2000-2004 Russell King
0009  */
0010 
0011 #include <linux/acpi_iort.h>
0012 #include <linux/atomic.h>
0013 #include <linux/crash_dump.h>
0014 #include <linux/device.h>
0015 #include <linux/dma-direct.h>
0016 #include <linux/dma-iommu.h>
0017 #include <linux/dma-map-ops.h>
0018 #include <linux/gfp.h>
0019 #include <linux/huge_mm.h>
0020 #include <linux/iommu.h>
0021 #include <linux/iova.h>
0022 #include <linux/irq.h>
0023 #include <linux/list_sort.h>
0024 #include <linux/memremap.h>
0025 #include <linux/mm.h>
0026 #include <linux/mutex.h>
0027 #include <linux/pci.h>
0028 #include <linux/scatterlist.h>
0029 #include <linux/spinlock.h>
0030 #include <linux/swiotlb.h>
0031 #include <linux/vmalloc.h>
0032 
0033 struct iommu_dma_msi_page {
0034     struct list_head    list;
0035     dma_addr_t      iova;
0036     phys_addr_t     phys;
0037 };
0038 
0039 enum iommu_dma_cookie_type {
0040     IOMMU_DMA_IOVA_COOKIE,
0041     IOMMU_DMA_MSI_COOKIE,
0042 };
0043 
0044 struct iommu_dma_cookie {
0045     enum iommu_dma_cookie_type  type;
0046     union {
0047         /* Full allocator for IOMMU_DMA_IOVA_COOKIE */
0048         struct {
0049             struct iova_domain  iovad;
0050 
0051             struct iova_fq __percpu *fq;    /* Flush queue */
0052             /* Number of TLB flushes that have been started */
0053             atomic64_t      fq_flush_start_cnt;
0054             /* Number of TLB flushes that have been finished */
0055             atomic64_t      fq_flush_finish_cnt;
0056             /* Timer to regularily empty the flush queues */
0057             struct timer_list   fq_timer;
0058             /* 1 when timer is active, 0 when not */
0059             atomic_t        fq_timer_on;
0060         };
0061         /* Trivial linear page allocator for IOMMU_DMA_MSI_COOKIE */
0062         dma_addr_t      msi_iova;
0063     };
0064     struct list_head        msi_page_list;
0065 
0066     /* Domain for flush queue callback; NULL if flush queue not in use */
0067     struct iommu_domain     *fq_domain;
0068     struct mutex            mutex;
0069 };
0070 
0071 static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled);
0072 bool iommu_dma_forcedac __read_mostly;
0073 
0074 static int __init iommu_dma_forcedac_setup(char *str)
0075 {
0076     int ret = kstrtobool(str, &iommu_dma_forcedac);
0077 
0078     if (!ret && iommu_dma_forcedac)
0079         pr_info("Forcing DAC for PCI devices\n");
0080     return ret;
0081 }
0082 early_param("iommu.forcedac", iommu_dma_forcedac_setup);
0083 
0084 /* Number of entries per flush queue */
0085 #define IOVA_FQ_SIZE    256
0086 
0087 /* Timeout (in ms) after which entries are flushed from the queue */
0088 #define IOVA_FQ_TIMEOUT 10
0089 
0090 /* Flush queue entry for deferred flushing */
0091 struct iova_fq_entry {
0092     unsigned long iova_pfn;
0093     unsigned long pages;
0094     struct list_head freelist;
0095     u64 counter; /* Flush counter when this entry was added */
0096 };
0097 
0098 /* Per-CPU flush queue structure */
0099 struct iova_fq {
0100     struct iova_fq_entry entries[IOVA_FQ_SIZE];
0101     unsigned int head, tail;
0102     spinlock_t lock;
0103 };
0104 
0105 #define fq_ring_for_each(i, fq) \
0106     for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE)
0107 
0108 static inline bool fq_full(struct iova_fq *fq)
0109 {
0110     assert_spin_locked(&fq->lock);
0111     return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head);
0112 }
0113 
0114 static inline unsigned int fq_ring_add(struct iova_fq *fq)
0115 {
0116     unsigned int idx = fq->tail;
0117 
0118     assert_spin_locked(&fq->lock);
0119 
0120     fq->tail = (idx + 1) % IOVA_FQ_SIZE;
0121 
0122     return idx;
0123 }
0124 
0125 static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq)
0126 {
0127     u64 counter = atomic64_read(&cookie->fq_flush_finish_cnt);
0128     unsigned int idx;
0129 
0130     assert_spin_locked(&fq->lock);
0131 
0132     fq_ring_for_each(idx, fq) {
0133 
0134         if (fq->entries[idx].counter >= counter)
0135             break;
0136 
0137         put_pages_list(&fq->entries[idx].freelist);
0138         free_iova_fast(&cookie->iovad,
0139                    fq->entries[idx].iova_pfn,
0140                    fq->entries[idx].pages);
0141 
0142         fq->head = (fq->head + 1) % IOVA_FQ_SIZE;
0143     }
0144 }
0145 
0146 static void fq_flush_iotlb(struct iommu_dma_cookie *cookie)
0147 {
0148     atomic64_inc(&cookie->fq_flush_start_cnt);
0149     cookie->fq_domain->ops->flush_iotlb_all(cookie->fq_domain);
0150     atomic64_inc(&cookie->fq_flush_finish_cnt);
0151 }
0152 
0153 static void fq_flush_timeout(struct timer_list *t)
0154 {
0155     struct iommu_dma_cookie *cookie = from_timer(cookie, t, fq_timer);
0156     int cpu;
0157 
0158     atomic_set(&cookie->fq_timer_on, 0);
0159     fq_flush_iotlb(cookie);
0160 
0161     for_each_possible_cpu(cpu) {
0162         unsigned long flags;
0163         struct iova_fq *fq;
0164 
0165         fq = per_cpu_ptr(cookie->fq, cpu);
0166         spin_lock_irqsave(&fq->lock, flags);
0167         fq_ring_free(cookie, fq);
0168         spin_unlock_irqrestore(&fq->lock, flags);
0169     }
0170 }
0171 
0172 static void queue_iova(struct iommu_dma_cookie *cookie,
0173         unsigned long pfn, unsigned long pages,
0174         struct list_head *freelist)
0175 {
0176     struct iova_fq *fq;
0177     unsigned long flags;
0178     unsigned int idx;
0179 
0180     /*
0181      * Order against the IOMMU driver's pagetable update from unmapping
0182      * @pte, to guarantee that fq_flush_iotlb() observes that if called
0183      * from a different CPU before we release the lock below. Full barrier
0184      * so it also pairs with iommu_dma_init_fq() to avoid seeing partially
0185      * written fq state here.
0186      */
0187     smp_mb();
0188 
0189     fq = raw_cpu_ptr(cookie->fq);
0190     spin_lock_irqsave(&fq->lock, flags);
0191 
0192     /*
0193      * First remove all entries from the flush queue that have already been
0194      * flushed out on another CPU. This makes the fq_full() check below less
0195      * likely to be true.
0196      */
0197     fq_ring_free(cookie, fq);
0198 
0199     if (fq_full(fq)) {
0200         fq_flush_iotlb(cookie);
0201         fq_ring_free(cookie, fq);
0202     }
0203 
0204     idx = fq_ring_add(fq);
0205 
0206     fq->entries[idx].iova_pfn = pfn;
0207     fq->entries[idx].pages    = pages;
0208     fq->entries[idx].counter  = atomic64_read(&cookie->fq_flush_start_cnt);
0209     list_splice(freelist, &fq->entries[idx].freelist);
0210 
0211     spin_unlock_irqrestore(&fq->lock, flags);
0212 
0213     /* Avoid false sharing as much as possible. */
0214     if (!atomic_read(&cookie->fq_timer_on) &&
0215         !atomic_xchg(&cookie->fq_timer_on, 1))
0216         mod_timer(&cookie->fq_timer,
0217               jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
0218 }
0219 
0220 static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie)
0221 {
0222     int cpu, idx;
0223 
0224     if (!cookie->fq)
0225         return;
0226 
0227     del_timer_sync(&cookie->fq_timer);
0228     /* The IOVAs will be torn down separately, so just free our queued pages */
0229     for_each_possible_cpu(cpu) {
0230         struct iova_fq *fq = per_cpu_ptr(cookie->fq, cpu);
0231 
0232         fq_ring_for_each(idx, fq)
0233             put_pages_list(&fq->entries[idx].freelist);
0234     }
0235 
0236     free_percpu(cookie->fq);
0237 }
0238 
0239 /* sysfs updates are serialised by the mutex of the group owning @domain */
0240 int iommu_dma_init_fq(struct iommu_domain *domain)
0241 {
0242     struct iommu_dma_cookie *cookie = domain->iova_cookie;
0243     struct iova_fq __percpu *queue;
0244     int i, cpu;
0245 
0246     if (cookie->fq_domain)
0247         return 0;
0248 
0249     atomic64_set(&cookie->fq_flush_start_cnt,  0);
0250     atomic64_set(&cookie->fq_flush_finish_cnt, 0);
0251 
0252     queue = alloc_percpu(struct iova_fq);
0253     if (!queue) {
0254         pr_warn("iova flush queue initialization failed\n");
0255         return -ENOMEM;
0256     }
0257 
0258     for_each_possible_cpu(cpu) {
0259         struct iova_fq *fq = per_cpu_ptr(queue, cpu);
0260 
0261         fq->head = 0;
0262         fq->tail = 0;
0263 
0264         spin_lock_init(&fq->lock);
0265 
0266         for (i = 0; i < IOVA_FQ_SIZE; i++)
0267             INIT_LIST_HEAD(&fq->entries[i].freelist);
0268     }
0269 
0270     cookie->fq = queue;
0271 
0272     timer_setup(&cookie->fq_timer, fq_flush_timeout, 0);
0273     atomic_set(&cookie->fq_timer_on, 0);
0274     /*
0275      * Prevent incomplete fq state being observable. Pairs with path from
0276      * __iommu_dma_unmap() through iommu_dma_free_iova() to queue_iova()
0277      */
0278     smp_wmb();
0279     WRITE_ONCE(cookie->fq_domain, domain);
0280     return 0;
0281 }
0282 
0283 static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
0284 {
0285     if (cookie->type == IOMMU_DMA_IOVA_COOKIE)
0286         return cookie->iovad.granule;
0287     return PAGE_SIZE;
0288 }
0289 
0290 static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type)
0291 {
0292     struct iommu_dma_cookie *cookie;
0293 
0294     cookie = kzalloc(sizeof(*cookie), GFP_KERNEL);
0295     if (cookie) {
0296         INIT_LIST_HEAD(&cookie->msi_page_list);
0297         cookie->type = type;
0298     }
0299     return cookie;
0300 }
0301 
0302 /**
0303  * iommu_get_dma_cookie - Acquire DMA-API resources for a domain
0304  * @domain: IOMMU domain to prepare for DMA-API usage
0305  */
0306 int iommu_get_dma_cookie(struct iommu_domain *domain)
0307 {
0308     if (domain->iova_cookie)
0309         return -EEXIST;
0310 
0311     domain->iova_cookie = cookie_alloc(IOMMU_DMA_IOVA_COOKIE);
0312     if (!domain->iova_cookie)
0313         return -ENOMEM;
0314 
0315     mutex_init(&domain->iova_cookie->mutex);
0316     return 0;
0317 }
0318 
0319 /**
0320  * iommu_get_msi_cookie - Acquire just MSI remapping resources
0321  * @domain: IOMMU domain to prepare
0322  * @base: Start address of IOVA region for MSI mappings
0323  *
0324  * Users who manage their own IOVA allocation and do not want DMA API support,
0325  * but would still like to take advantage of automatic MSI remapping, can use
0326  * this to initialise their own domain appropriately. Users should reserve a
0327  * contiguous IOVA region, starting at @base, large enough to accommodate the
0328  * number of PAGE_SIZE mappings necessary to cover every MSI doorbell address
0329  * used by the devices attached to @domain.
0330  */
0331 int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base)
0332 {
0333     struct iommu_dma_cookie *cookie;
0334 
0335     if (domain->type != IOMMU_DOMAIN_UNMANAGED)
0336         return -EINVAL;
0337 
0338     if (domain->iova_cookie)
0339         return -EEXIST;
0340 
0341     cookie = cookie_alloc(IOMMU_DMA_MSI_COOKIE);
0342     if (!cookie)
0343         return -ENOMEM;
0344 
0345     cookie->msi_iova = base;
0346     domain->iova_cookie = cookie;
0347     return 0;
0348 }
0349 EXPORT_SYMBOL(iommu_get_msi_cookie);
0350 
0351 /**
0352  * iommu_put_dma_cookie - Release a domain's DMA mapping resources
0353  * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() or
0354  *          iommu_get_msi_cookie()
0355  */
0356 void iommu_put_dma_cookie(struct iommu_domain *domain)
0357 {
0358     struct iommu_dma_cookie *cookie = domain->iova_cookie;
0359     struct iommu_dma_msi_page *msi, *tmp;
0360 
0361     if (!cookie)
0362         return;
0363 
0364     if (cookie->type == IOMMU_DMA_IOVA_COOKIE && cookie->iovad.granule) {
0365         iommu_dma_free_fq(cookie);
0366         put_iova_domain(&cookie->iovad);
0367     }
0368 
0369     list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) {
0370         list_del(&msi->list);
0371         kfree(msi);
0372     }
0373     kfree(cookie);
0374     domain->iova_cookie = NULL;
0375 }
0376 
0377 /**
0378  * iommu_dma_get_resv_regions - Reserved region driver helper
0379  * @dev: Device from iommu_get_resv_regions()
0380  * @list: Reserved region list from iommu_get_resv_regions()
0381  *
0382  * IOMMU drivers can use this to implement their .get_resv_regions callback
0383  * for general non-IOMMU-specific reservations. Currently, this covers GICv3
0384  * ITS region reservation on ACPI based ARM platforms that may require HW MSI
0385  * reservation.
0386  */
0387 void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list)
0388 {
0389 
0390     if (!is_of_node(dev_iommu_fwspec_get(dev)->iommu_fwnode))
0391         iort_iommu_get_resv_regions(dev, list);
0392 
0393 }
0394 EXPORT_SYMBOL(iommu_dma_get_resv_regions);
0395 
0396 static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie,
0397         phys_addr_t start, phys_addr_t end)
0398 {
0399     struct iova_domain *iovad = &cookie->iovad;
0400     struct iommu_dma_msi_page *msi_page;
0401     int i, num_pages;
0402 
0403     start -= iova_offset(iovad, start);
0404     num_pages = iova_align(iovad, end - start) >> iova_shift(iovad);
0405 
0406     for (i = 0; i < num_pages; i++) {
0407         msi_page = kmalloc(sizeof(*msi_page), GFP_KERNEL);
0408         if (!msi_page)
0409             return -ENOMEM;
0410 
0411         msi_page->phys = start;
0412         msi_page->iova = start;
0413         INIT_LIST_HEAD(&msi_page->list);
0414         list_add(&msi_page->list, &cookie->msi_page_list);
0415         start += iovad->granule;
0416     }
0417 
0418     return 0;
0419 }
0420 
0421 static int iommu_dma_ranges_sort(void *priv, const struct list_head *a,
0422         const struct list_head *b)
0423 {
0424     struct resource_entry *res_a = list_entry(a, typeof(*res_a), node);
0425     struct resource_entry *res_b = list_entry(b, typeof(*res_b), node);
0426 
0427     return res_a->res->start > res_b->res->start;
0428 }
0429 
0430 static int iova_reserve_pci_windows(struct pci_dev *dev,
0431         struct iova_domain *iovad)
0432 {
0433     struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus);
0434     struct resource_entry *window;
0435     unsigned long lo, hi;
0436     phys_addr_t start = 0, end;
0437 
0438     resource_list_for_each_entry(window, &bridge->windows) {
0439         if (resource_type(window->res) != IORESOURCE_MEM)
0440             continue;
0441 
0442         lo = iova_pfn(iovad, window->res->start - window->offset);
0443         hi = iova_pfn(iovad, window->res->end - window->offset);
0444         reserve_iova(iovad, lo, hi);
0445     }
0446 
0447     /* Get reserved DMA windows from host bridge */
0448     list_sort(NULL, &bridge->dma_ranges, iommu_dma_ranges_sort);
0449     resource_list_for_each_entry(window, &bridge->dma_ranges) {
0450         end = window->res->start - window->offset;
0451 resv_iova:
0452         if (end > start) {
0453             lo = iova_pfn(iovad, start);
0454             hi = iova_pfn(iovad, end);
0455             reserve_iova(iovad, lo, hi);
0456         } else if (end < start) {
0457             /* DMA ranges should be non-overlapping */
0458             dev_err(&dev->dev,
0459                 "Failed to reserve IOVA [%pa-%pa]\n",
0460                 &start, &end);
0461             return -EINVAL;
0462         }
0463 
0464         start = window->res->end - window->offset + 1;
0465         /* If window is last entry */
0466         if (window->node.next == &bridge->dma_ranges &&
0467             end != ~(phys_addr_t)0) {
0468             end = ~(phys_addr_t)0;
0469             goto resv_iova;
0470         }
0471     }
0472 
0473     return 0;
0474 }
0475 
0476 static int iova_reserve_iommu_regions(struct device *dev,
0477         struct iommu_domain *domain)
0478 {
0479     struct iommu_dma_cookie *cookie = domain->iova_cookie;
0480     struct iova_domain *iovad = &cookie->iovad;
0481     struct iommu_resv_region *region;
0482     LIST_HEAD(resv_regions);
0483     int ret = 0;
0484 
0485     if (dev_is_pci(dev)) {
0486         ret = iova_reserve_pci_windows(to_pci_dev(dev), iovad);
0487         if (ret)
0488             return ret;
0489     }
0490 
0491     iommu_get_resv_regions(dev, &resv_regions);
0492     list_for_each_entry(region, &resv_regions, list) {
0493         unsigned long lo, hi;
0494 
0495         /* We ARE the software that manages these! */
0496         if (region->type == IOMMU_RESV_SW_MSI)
0497             continue;
0498 
0499         lo = iova_pfn(iovad, region->start);
0500         hi = iova_pfn(iovad, region->start + region->length - 1);
0501         reserve_iova(iovad, lo, hi);
0502 
0503         if (region->type == IOMMU_RESV_MSI)
0504             ret = cookie_init_hw_msi_region(cookie, region->start,
0505                     region->start + region->length);
0506         if (ret)
0507             break;
0508     }
0509     iommu_put_resv_regions(dev, &resv_regions);
0510 
0511     return ret;
0512 }
0513 
0514 static bool dev_is_untrusted(struct device *dev)
0515 {
0516     return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
0517 }
0518 
0519 static bool dev_use_swiotlb(struct device *dev)
0520 {
0521     return IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev);
0522 }
0523 
0524 /**
0525  * iommu_dma_init_domain - Initialise a DMA mapping domain
0526  * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
0527  * @base: IOVA at which the mappable address space starts
0528  * @limit: Last address of the IOVA space
0529  * @dev: Device the domain is being initialised for
0530  *
0531  * @base and @limit + 1 should be exact multiples of IOMMU page granularity to
0532  * avoid rounding surprises. If necessary, we reserve the page at address 0
0533  * to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but
0534  * any change which could make prior IOVAs invalid will fail.
0535  */
0536 static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
0537                  dma_addr_t limit, struct device *dev)
0538 {
0539     struct iommu_dma_cookie *cookie = domain->iova_cookie;
0540     unsigned long order, base_pfn;
0541     struct iova_domain *iovad;
0542     int ret;
0543 
0544     if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE)
0545         return -EINVAL;
0546 
0547     iovad = &cookie->iovad;
0548 
0549     /* Use the smallest supported page size for IOVA granularity */
0550     order = __ffs(domain->pgsize_bitmap);
0551     base_pfn = max_t(unsigned long, 1, base >> order);
0552 
0553     /* Check the domain allows at least some access to the device... */
0554     if (domain->geometry.force_aperture) {
0555         if (base > domain->geometry.aperture_end ||
0556             limit < domain->geometry.aperture_start) {
0557             pr_warn("specified DMA range outside IOMMU capability\n");
0558             return -EFAULT;
0559         }
0560         /* ...then finally give it a kicking to make sure it fits */
0561         base_pfn = max_t(unsigned long, base_pfn,
0562                 domain->geometry.aperture_start >> order);
0563     }
0564 
0565     /* start_pfn is always nonzero for an already-initialised domain */
0566     mutex_lock(&cookie->mutex);
0567     if (iovad->start_pfn) {
0568         if (1UL << order != iovad->granule ||
0569             base_pfn != iovad->start_pfn) {
0570             pr_warn("Incompatible range for DMA domain\n");
0571             ret = -EFAULT;
0572             goto done_unlock;
0573         }
0574 
0575         ret = 0;
0576         goto done_unlock;
0577     }
0578 
0579     init_iova_domain(iovad, 1UL << order, base_pfn);
0580     ret = iova_domain_init_rcaches(iovad);
0581     if (ret)
0582         goto done_unlock;
0583 
0584     /* If the FQ fails we can simply fall back to strict mode */
0585     if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain))
0586         domain->type = IOMMU_DOMAIN_DMA;
0587 
0588     ret = iova_reserve_iommu_regions(dev, domain);
0589 
0590 done_unlock:
0591     mutex_unlock(&cookie->mutex);
0592     return ret;
0593 }
0594 
0595 /**
0596  * dma_info_to_prot - Translate DMA API directions and attributes to IOMMU API
0597  *                    page flags.
0598  * @dir: Direction of DMA transfer
0599  * @coherent: Is the DMA master cache-coherent?
0600  * @attrs: DMA attributes for the mapping
0601  *
0602  * Return: corresponding IOMMU API page protection flags
0603  */
0604 static int dma_info_to_prot(enum dma_data_direction dir, bool coherent,
0605              unsigned long attrs)
0606 {
0607     int prot = coherent ? IOMMU_CACHE : 0;
0608 
0609     if (attrs & DMA_ATTR_PRIVILEGED)
0610         prot |= IOMMU_PRIV;
0611 
0612     switch (dir) {
0613     case DMA_BIDIRECTIONAL:
0614         return prot | IOMMU_READ | IOMMU_WRITE;
0615     case DMA_TO_DEVICE:
0616         return prot | IOMMU_READ;
0617     case DMA_FROM_DEVICE:
0618         return prot | IOMMU_WRITE;
0619     default:
0620         return 0;
0621     }
0622 }
0623 
0624 static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
0625         size_t size, u64 dma_limit, struct device *dev)
0626 {
0627     struct iommu_dma_cookie *cookie = domain->iova_cookie;
0628     struct iova_domain *iovad = &cookie->iovad;
0629     unsigned long shift, iova_len, iova = 0;
0630 
0631     if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
0632         cookie->msi_iova += size;
0633         return cookie->msi_iova - size;
0634     }
0635 
0636     shift = iova_shift(iovad);
0637     iova_len = size >> shift;
0638 
0639     dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit);
0640 
0641     if (domain->geometry.force_aperture)
0642         dma_limit = min(dma_limit, (u64)domain->geometry.aperture_end);
0643 
0644     /* Try to get PCI devices a SAC address */
0645     if (dma_limit > DMA_BIT_MASK(32) && !iommu_dma_forcedac && dev_is_pci(dev))
0646         iova = alloc_iova_fast(iovad, iova_len,
0647                        DMA_BIT_MASK(32) >> shift, false);
0648 
0649     if (!iova)
0650         iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift,
0651                        true);
0652 
0653     return (dma_addr_t)iova << shift;
0654 }
0655 
0656 static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
0657         dma_addr_t iova, size_t size, struct iommu_iotlb_gather *gather)
0658 {
0659     struct iova_domain *iovad = &cookie->iovad;
0660 
0661     /* The MSI case is only ever cleaning up its most recent allocation */
0662     if (cookie->type == IOMMU_DMA_MSI_COOKIE)
0663         cookie->msi_iova -= size;
0664     else if (gather && gather->queued)
0665         queue_iova(cookie, iova_pfn(iovad, iova),
0666                 size >> iova_shift(iovad),
0667                 &gather->freelist);
0668     else
0669         free_iova_fast(iovad, iova_pfn(iovad, iova),
0670                 size >> iova_shift(iovad));
0671 }
0672 
0673 static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr,
0674         size_t size)
0675 {
0676     struct iommu_domain *domain = iommu_get_dma_domain(dev);
0677     struct iommu_dma_cookie *cookie = domain->iova_cookie;
0678     struct iova_domain *iovad = &cookie->iovad;
0679     size_t iova_off = iova_offset(iovad, dma_addr);
0680     struct iommu_iotlb_gather iotlb_gather;
0681     size_t unmapped;
0682 
0683     dma_addr -= iova_off;
0684     size = iova_align(iovad, size + iova_off);
0685     iommu_iotlb_gather_init(&iotlb_gather);
0686     iotlb_gather.queued = READ_ONCE(cookie->fq_domain);
0687 
0688     unmapped = iommu_unmap_fast(domain, dma_addr, size, &iotlb_gather);
0689     WARN_ON(unmapped != size);
0690 
0691     if (!iotlb_gather.queued)
0692         iommu_iotlb_sync(domain, &iotlb_gather);
0693     iommu_dma_free_iova(cookie, dma_addr, size, &iotlb_gather);
0694 }
0695 
0696 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
0697         size_t size, int prot, u64 dma_mask)
0698 {
0699     struct iommu_domain *domain = iommu_get_dma_domain(dev);
0700     struct iommu_dma_cookie *cookie = domain->iova_cookie;
0701     struct iova_domain *iovad = &cookie->iovad;
0702     size_t iova_off = iova_offset(iovad, phys);
0703     dma_addr_t iova;
0704 
0705     if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&
0706         iommu_deferred_attach(dev, domain))
0707         return DMA_MAPPING_ERROR;
0708 
0709     size = iova_align(iovad, size + iova_off);
0710 
0711     iova = iommu_dma_alloc_iova(domain, size, dma_mask, dev);
0712     if (!iova)
0713         return DMA_MAPPING_ERROR;
0714 
0715     if (iommu_map_atomic(domain, iova, phys - iova_off, size, prot)) {
0716         iommu_dma_free_iova(cookie, iova, size, NULL);
0717         return DMA_MAPPING_ERROR;
0718     }
0719     return iova + iova_off;
0720 }
0721 
0722 static void __iommu_dma_free_pages(struct page **pages, int count)
0723 {
0724     while (count--)
0725         __free_page(pages[count]);
0726     kvfree(pages);
0727 }
0728 
0729 static struct page **__iommu_dma_alloc_pages(struct device *dev,
0730         unsigned int count, unsigned long order_mask, gfp_t gfp)
0731 {
0732     struct page **pages;
0733     unsigned int i = 0, nid = dev_to_node(dev);
0734 
0735     order_mask &= (2U << MAX_ORDER) - 1;
0736     if (!order_mask)
0737         return NULL;
0738 
0739     pages = kvcalloc(count, sizeof(*pages), GFP_KERNEL);
0740     if (!pages)
0741         return NULL;
0742 
0743     /* IOMMU can map any pages, so himem can also be used here */
0744     gfp |= __GFP_NOWARN | __GFP_HIGHMEM;
0745 
0746     /* It makes no sense to muck about with huge pages */
0747     gfp &= ~__GFP_COMP;
0748 
0749     while (count) {
0750         struct page *page = NULL;
0751         unsigned int order_size;
0752 
0753         /*
0754          * Higher-order allocations are a convenience rather
0755          * than a necessity, hence using __GFP_NORETRY until
0756          * falling back to minimum-order allocations.
0757          */
0758         for (order_mask &= (2U << __fls(count)) - 1;
0759              order_mask; order_mask &= ~order_size) {
0760             unsigned int order = __fls(order_mask);
0761             gfp_t alloc_flags = gfp;
0762 
0763             order_size = 1U << order;
0764             if (order_mask > order_size)
0765                 alloc_flags |= __GFP_NORETRY;
0766             page = alloc_pages_node(nid, alloc_flags, order);
0767             if (!page)
0768                 continue;
0769             if (order)
0770                 split_page(page, order);
0771             break;
0772         }
0773         if (!page) {
0774             __iommu_dma_free_pages(pages, i);
0775             return NULL;
0776         }
0777         count -= order_size;
0778         while (order_size--)
0779             pages[i++] = page++;
0780     }
0781     return pages;
0782 }
0783 
0784 /*
0785  * If size is less than PAGE_SIZE, then a full CPU page will be allocated,
0786  * but an IOMMU which supports smaller pages might not map the whole thing.
0787  */
0788 static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev,
0789         size_t size, struct sg_table *sgt, gfp_t gfp, pgprot_t prot,
0790         unsigned long attrs)
0791 {
0792     struct iommu_domain *domain = iommu_get_dma_domain(dev);
0793     struct iommu_dma_cookie *cookie = domain->iova_cookie;
0794     struct iova_domain *iovad = &cookie->iovad;
0795     bool coherent = dev_is_dma_coherent(dev);
0796     int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
0797     unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap;
0798     struct page **pages;
0799     dma_addr_t iova;
0800     ssize_t ret;
0801 
0802     if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&
0803         iommu_deferred_attach(dev, domain))
0804         return NULL;
0805 
0806     min_size = alloc_sizes & -alloc_sizes;
0807     if (min_size < PAGE_SIZE) {
0808         min_size = PAGE_SIZE;
0809         alloc_sizes |= PAGE_SIZE;
0810     } else {
0811         size = ALIGN(size, min_size);
0812     }
0813     if (attrs & DMA_ATTR_ALLOC_SINGLE_PAGES)
0814         alloc_sizes = min_size;
0815 
0816     count = PAGE_ALIGN(size) >> PAGE_SHIFT;
0817     pages = __iommu_dma_alloc_pages(dev, count, alloc_sizes >> PAGE_SHIFT,
0818                     gfp);
0819     if (!pages)
0820         return NULL;
0821 
0822     size = iova_align(iovad, size);
0823     iova = iommu_dma_alloc_iova(domain, size, dev->coherent_dma_mask, dev);
0824     if (!iova)
0825         goto out_free_pages;
0826 
0827     if (sg_alloc_table_from_pages(sgt, pages, count, 0, size, GFP_KERNEL))
0828         goto out_free_iova;
0829 
0830     if (!(ioprot & IOMMU_CACHE)) {
0831         struct scatterlist *sg;
0832         int i;
0833 
0834         for_each_sg(sgt->sgl, sg, sgt->orig_nents, i)
0835             arch_dma_prep_coherent(sg_page(sg), sg->length);
0836     }
0837 
0838     ret = iommu_map_sg_atomic(domain, iova, sgt->sgl, sgt->orig_nents, ioprot);
0839     if (ret < 0 || ret < size)
0840         goto out_free_sg;
0841 
0842     sgt->sgl->dma_address = iova;
0843     sgt->sgl->dma_length = size;
0844     return pages;
0845 
0846 out_free_sg:
0847     sg_free_table(sgt);
0848 out_free_iova:
0849     iommu_dma_free_iova(cookie, iova, size, NULL);
0850 out_free_pages:
0851     __iommu_dma_free_pages(pages, count);
0852     return NULL;
0853 }
0854 
0855 static void *iommu_dma_alloc_remap(struct device *dev, size_t size,
0856         dma_addr_t *dma_handle, gfp_t gfp, pgprot_t prot,
0857         unsigned long attrs)
0858 {
0859     struct page **pages;
0860     struct sg_table sgt;
0861     void *vaddr;
0862 
0863     pages = __iommu_dma_alloc_noncontiguous(dev, size, &sgt, gfp, prot,
0864                         attrs);
0865     if (!pages)
0866         return NULL;
0867     *dma_handle = sgt.sgl->dma_address;
0868     sg_free_table(&sgt);
0869     vaddr = dma_common_pages_remap(pages, size, prot,
0870             __builtin_return_address(0));
0871     if (!vaddr)
0872         goto out_unmap;
0873     return vaddr;
0874 
0875 out_unmap:
0876     __iommu_dma_unmap(dev, *dma_handle, size);
0877     __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
0878     return NULL;
0879 }
0880 
0881 static struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev,
0882         size_t size, enum dma_data_direction dir, gfp_t gfp,
0883         unsigned long attrs)
0884 {
0885     struct dma_sgt_handle *sh;
0886 
0887     sh = kmalloc(sizeof(*sh), gfp);
0888     if (!sh)
0889         return NULL;
0890 
0891     sh->pages = __iommu_dma_alloc_noncontiguous(dev, size, &sh->sgt, gfp,
0892                             PAGE_KERNEL, attrs);
0893     if (!sh->pages) {
0894         kfree(sh);
0895         return NULL;
0896     }
0897     return &sh->sgt;
0898 }
0899 
0900 static void iommu_dma_free_noncontiguous(struct device *dev, size_t size,
0901         struct sg_table *sgt, enum dma_data_direction dir)
0902 {
0903     struct dma_sgt_handle *sh = sgt_handle(sgt);
0904 
0905     __iommu_dma_unmap(dev, sgt->sgl->dma_address, size);
0906     __iommu_dma_free_pages(sh->pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
0907     sg_free_table(&sh->sgt);
0908     kfree(sh);
0909 }
0910 
0911 static void iommu_dma_sync_single_for_cpu(struct device *dev,
0912         dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
0913 {
0914     phys_addr_t phys;
0915 
0916     if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
0917         return;
0918 
0919     phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
0920     if (!dev_is_dma_coherent(dev))
0921         arch_sync_dma_for_cpu(phys, size, dir);
0922 
0923     if (is_swiotlb_buffer(dev, phys))
0924         swiotlb_sync_single_for_cpu(dev, phys, size, dir);
0925 }
0926 
0927 static void iommu_dma_sync_single_for_device(struct device *dev,
0928         dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
0929 {
0930     phys_addr_t phys;
0931 
0932     if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
0933         return;
0934 
0935     phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
0936     if (is_swiotlb_buffer(dev, phys))
0937         swiotlb_sync_single_for_device(dev, phys, size, dir);
0938 
0939     if (!dev_is_dma_coherent(dev))
0940         arch_sync_dma_for_device(phys, size, dir);
0941 }
0942 
0943 static void iommu_dma_sync_sg_for_cpu(struct device *dev,
0944         struct scatterlist *sgl, int nelems,
0945         enum dma_data_direction dir)
0946 {
0947     struct scatterlist *sg;
0948     int i;
0949 
0950     if (dev_use_swiotlb(dev))
0951         for_each_sg(sgl, sg, nelems, i)
0952             iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
0953                               sg->length, dir);
0954     else if (!dev_is_dma_coherent(dev))
0955         for_each_sg(sgl, sg, nelems, i)
0956             arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
0957 }
0958 
0959 static void iommu_dma_sync_sg_for_device(struct device *dev,
0960         struct scatterlist *sgl, int nelems,
0961         enum dma_data_direction dir)
0962 {
0963     struct scatterlist *sg;
0964     int i;
0965 
0966     if (dev_use_swiotlb(dev))
0967         for_each_sg(sgl, sg, nelems, i)
0968             iommu_dma_sync_single_for_device(dev,
0969                              sg_dma_address(sg),
0970                              sg->length, dir);
0971     else if (!dev_is_dma_coherent(dev))
0972         for_each_sg(sgl, sg, nelems, i)
0973             arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
0974 }
0975 
0976 static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
0977         unsigned long offset, size_t size, enum dma_data_direction dir,
0978         unsigned long attrs)
0979 {
0980     phys_addr_t phys = page_to_phys(page) + offset;
0981     bool coherent = dev_is_dma_coherent(dev);
0982     int prot = dma_info_to_prot(dir, coherent, attrs);
0983     struct iommu_domain *domain = iommu_get_dma_domain(dev);
0984     struct iommu_dma_cookie *cookie = domain->iova_cookie;
0985     struct iova_domain *iovad = &cookie->iovad;
0986     dma_addr_t iova, dma_mask = dma_get_mask(dev);
0987 
0988     /*
0989      * If both the physical buffer start address and size are
0990      * page aligned, we don't need to use a bounce page.
0991      */
0992     if (dev_use_swiotlb(dev) && iova_offset(iovad, phys | size)) {
0993         void *padding_start;
0994         size_t padding_size, aligned_size;
0995 
0996         if (!is_swiotlb_active(dev)) {
0997             dev_warn_once(dev, "DMA bounce buffers are inactive, unable to map unaligned transaction.\n");
0998             return DMA_MAPPING_ERROR;
0999         }
1000 
1001         aligned_size = iova_align(iovad, size);
1002         phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size,
1003                           iova_mask(iovad), dir, attrs);
1004 
1005         if (phys == DMA_MAPPING_ERROR)
1006             return DMA_MAPPING_ERROR;
1007 
1008         /* Cleanup the padding area. */
1009         padding_start = phys_to_virt(phys);
1010         padding_size = aligned_size;
1011 
1012         if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
1013             (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) {
1014             padding_start += size;
1015             padding_size -= size;
1016         }
1017 
1018         memset(padding_start, 0, padding_size);
1019     }
1020 
1021     if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
1022         arch_sync_dma_for_device(phys, size, dir);
1023 
1024     iova = __iommu_dma_map(dev, phys, size, prot, dma_mask);
1025     if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(dev, phys))
1026         swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
1027     return iova;
1028 }
1029 
1030 static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
1031         size_t size, enum dma_data_direction dir, unsigned long attrs)
1032 {
1033     struct iommu_domain *domain = iommu_get_dma_domain(dev);
1034     phys_addr_t phys;
1035 
1036     phys = iommu_iova_to_phys(domain, dma_handle);
1037     if (WARN_ON(!phys))
1038         return;
1039 
1040     if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev))
1041         arch_sync_dma_for_cpu(phys, size, dir);
1042 
1043     __iommu_dma_unmap(dev, dma_handle, size);
1044 
1045     if (unlikely(is_swiotlb_buffer(dev, phys)))
1046         swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
1047 }
1048 
1049 /*
1050  * Prepare a successfully-mapped scatterlist to give back to the caller.
1051  *
1052  * At this point the segments are already laid out by iommu_dma_map_sg() to
1053  * avoid individually crossing any boundaries, so we merely need to check a
1054  * segment's start address to avoid concatenating across one.
1055  */
1056 static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents,
1057         dma_addr_t dma_addr)
1058 {
1059     struct scatterlist *s, *cur = sg;
1060     unsigned long seg_mask = dma_get_seg_boundary(dev);
1061     unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev);
1062     int i, count = 0;
1063 
1064     for_each_sg(sg, s, nents, i) {
1065         /* Restore this segment's original unaligned fields first */
1066         dma_addr_t s_dma_addr = sg_dma_address(s);
1067         unsigned int s_iova_off = sg_dma_address(s);
1068         unsigned int s_length = sg_dma_len(s);
1069         unsigned int s_iova_len = s->length;
1070 
1071         sg_dma_address(s) = DMA_MAPPING_ERROR;
1072         sg_dma_len(s) = 0;
1073 
1074         if (sg_is_dma_bus_address(s)) {
1075             if (i > 0)
1076                 cur = sg_next(cur);
1077 
1078             sg_dma_unmark_bus_address(s);
1079             sg_dma_address(cur) = s_dma_addr;
1080             sg_dma_len(cur) = s_length;
1081             sg_dma_mark_bus_address(cur);
1082             count++;
1083             cur_len = 0;
1084             continue;
1085         }
1086 
1087         s->offset += s_iova_off;
1088         s->length = s_length;
1089 
1090         /*
1091          * Now fill in the real DMA data. If...
1092          * - there is a valid output segment to append to
1093          * - and this segment starts on an IOVA page boundary
1094          * - but doesn't fall at a segment boundary
1095          * - and wouldn't make the resulting output segment too long
1096          */
1097         if (cur_len && !s_iova_off && (dma_addr & seg_mask) &&
1098             (max_len - cur_len >= s_length)) {
1099             /* ...then concatenate it with the previous one */
1100             cur_len += s_length;
1101         } else {
1102             /* Otherwise start the next output segment */
1103             if (i > 0)
1104                 cur = sg_next(cur);
1105             cur_len = s_length;
1106             count++;
1107 
1108             sg_dma_address(cur) = dma_addr + s_iova_off;
1109         }
1110 
1111         sg_dma_len(cur) = cur_len;
1112         dma_addr += s_iova_len;
1113 
1114         if (s_length + s_iova_off < s_iova_len)
1115             cur_len = 0;
1116     }
1117     return count;
1118 }
1119 
1120 /*
1121  * If mapping failed, then just restore the original list,
1122  * but making sure the DMA fields are invalidated.
1123  */
1124 static void __invalidate_sg(struct scatterlist *sg, int nents)
1125 {
1126     struct scatterlist *s;
1127     int i;
1128 
1129     for_each_sg(sg, s, nents, i) {
1130         if (sg_is_dma_bus_address(s)) {
1131             sg_dma_unmark_bus_address(s);
1132         } else {
1133             if (sg_dma_address(s) != DMA_MAPPING_ERROR)
1134                 s->offset += sg_dma_address(s);
1135             if (sg_dma_len(s))
1136                 s->length = sg_dma_len(s);
1137         }
1138         sg_dma_address(s) = DMA_MAPPING_ERROR;
1139         sg_dma_len(s) = 0;
1140     }
1141 }
1142 
1143 static void iommu_dma_unmap_sg_swiotlb(struct device *dev, struct scatterlist *sg,
1144         int nents, enum dma_data_direction dir, unsigned long attrs)
1145 {
1146     struct scatterlist *s;
1147     int i;
1148 
1149     for_each_sg(sg, s, nents, i)
1150         iommu_dma_unmap_page(dev, sg_dma_address(s),
1151                 sg_dma_len(s), dir, attrs);
1152 }
1153 
1154 static int iommu_dma_map_sg_swiotlb(struct device *dev, struct scatterlist *sg,
1155         int nents, enum dma_data_direction dir, unsigned long attrs)
1156 {
1157     struct scatterlist *s;
1158     int i;
1159 
1160     for_each_sg(sg, s, nents, i) {
1161         sg_dma_address(s) = iommu_dma_map_page(dev, sg_page(s),
1162                 s->offset, s->length, dir, attrs);
1163         if (sg_dma_address(s) == DMA_MAPPING_ERROR)
1164             goto out_unmap;
1165         sg_dma_len(s) = s->length;
1166     }
1167 
1168     return nents;
1169 
1170 out_unmap:
1171     iommu_dma_unmap_sg_swiotlb(dev, sg, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
1172     return -EIO;
1173 }
1174 
1175 /*
1176  * The DMA API client is passing in a scatterlist which could describe
1177  * any old buffer layout, but the IOMMU API requires everything to be
1178  * aligned to IOMMU pages. Hence the need for this complicated bit of
1179  * impedance-matching, to be able to hand off a suitably-aligned list,
1180  * but still preserve the original offsets and sizes for the caller.
1181  */
1182 static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
1183         int nents, enum dma_data_direction dir, unsigned long attrs)
1184 {
1185     struct iommu_domain *domain = iommu_get_dma_domain(dev);
1186     struct iommu_dma_cookie *cookie = domain->iova_cookie;
1187     struct iova_domain *iovad = &cookie->iovad;
1188     struct scatterlist *s, *prev = NULL;
1189     int prot = dma_info_to_prot(dir, dev_is_dma_coherent(dev), attrs);
1190     struct pci_p2pdma_map_state p2pdma_state = {};
1191     enum pci_p2pdma_map_type map;
1192     dma_addr_t iova;
1193     size_t iova_len = 0;
1194     unsigned long mask = dma_get_seg_boundary(dev);
1195     ssize_t ret;
1196     int i;
1197 
1198     if (static_branch_unlikely(&iommu_deferred_attach_enabled)) {
1199         ret = iommu_deferred_attach(dev, domain);
1200         if (ret)
1201             goto out;
1202     }
1203 
1204     if (dev_use_swiotlb(dev))
1205         return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
1206 
1207     if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
1208         iommu_dma_sync_sg_for_device(dev, sg, nents, dir);
1209 
1210     /*
1211      * Work out how much IOVA space we need, and align the segments to
1212      * IOVA granules for the IOMMU driver to handle. With some clever
1213      * trickery we can modify the list in-place, but reversibly, by
1214      * stashing the unaligned parts in the as-yet-unused DMA fields.
1215      */
1216     for_each_sg(sg, s, nents, i) {
1217         size_t s_iova_off = iova_offset(iovad, s->offset);
1218         size_t s_length = s->length;
1219         size_t pad_len = (mask - iova_len + 1) & mask;
1220 
1221         if (is_pci_p2pdma_page(sg_page(s))) {
1222             map = pci_p2pdma_map_segment(&p2pdma_state, dev, s);
1223             switch (map) {
1224             case PCI_P2PDMA_MAP_BUS_ADDR:
1225                 /*
1226                  * iommu_map_sg() will skip this segment as
1227                  * it is marked as a bus address,
1228                  * __finalise_sg() will copy the dma address
1229                  * into the output segment.
1230                  */
1231                 continue;
1232             case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
1233                 /*
1234                  * Mapping through host bridge should be
1235                  * mapped with regular IOVAs, thus we
1236                  * do nothing here and continue below.
1237                  */
1238                 break;
1239             default:
1240                 ret = -EREMOTEIO;
1241                 goto out_restore_sg;
1242             }
1243         }
1244 
1245         sg_dma_address(s) = s_iova_off;
1246         sg_dma_len(s) = s_length;
1247         s->offset -= s_iova_off;
1248         s_length = iova_align(iovad, s_length + s_iova_off);
1249         s->length = s_length;
1250 
1251         /*
1252          * Due to the alignment of our single IOVA allocation, we can
1253          * depend on these assumptions about the segment boundary mask:
1254          * - If mask size >= IOVA size, then the IOVA range cannot
1255          *   possibly fall across a boundary, so we don't care.
1256          * - If mask size < IOVA size, then the IOVA range must start
1257          *   exactly on a boundary, therefore we can lay things out
1258          *   based purely on segment lengths without needing to know
1259          *   the actual addresses beforehand.
1260          * - The mask must be a power of 2, so pad_len == 0 if
1261          *   iova_len == 0, thus we cannot dereference prev the first
1262          *   time through here (i.e. before it has a meaningful value).
1263          */
1264         if (pad_len && pad_len < s_length - 1) {
1265             prev->length += pad_len;
1266             iova_len += pad_len;
1267         }
1268 
1269         iova_len += s_length;
1270         prev = s;
1271     }
1272 
1273     if (!iova_len)
1274         return __finalise_sg(dev, sg, nents, 0);
1275 
1276     iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev);
1277     if (!iova) {
1278         ret = -ENOMEM;
1279         goto out_restore_sg;
1280     }
1281 
1282     /*
1283      * We'll leave any physical concatenation to the IOMMU driver's
1284      * implementation - it knows better than we do.
1285      */
1286     ret = iommu_map_sg_atomic(domain, iova, sg, nents, prot);
1287     if (ret < 0 || ret < iova_len)
1288         goto out_free_iova;
1289 
1290     return __finalise_sg(dev, sg, nents, iova);
1291 
1292 out_free_iova:
1293     iommu_dma_free_iova(cookie, iova, iova_len, NULL);
1294 out_restore_sg:
1295     __invalidate_sg(sg, nents);
1296 out:
1297     if (ret != -ENOMEM && ret != -EREMOTEIO)
1298         return -EINVAL;
1299     return ret;
1300 }
1301 
1302 static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
1303         int nents, enum dma_data_direction dir, unsigned long attrs)
1304 {
1305     dma_addr_t end = 0, start;
1306     struct scatterlist *tmp;
1307     int i;
1308 
1309     if (dev_use_swiotlb(dev)) {
1310         iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs);
1311         return;
1312     }
1313 
1314     if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
1315         iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir);
1316 
1317     /*
1318      * The scatterlist segments are mapped into a single
1319      * contiguous IOVA allocation, the start and end points
1320      * just have to be determined.
1321      */
1322     for_each_sg(sg, tmp, nents, i) {
1323         if (sg_is_dma_bus_address(tmp)) {
1324             sg_dma_unmark_bus_address(tmp);
1325             continue;
1326         }
1327 
1328         if (sg_dma_len(tmp) == 0)
1329             break;
1330 
1331         start = sg_dma_address(tmp);
1332         break;
1333     }
1334 
1335     nents -= i;
1336     for_each_sg(tmp, tmp, nents, i) {
1337         if (sg_is_dma_bus_address(tmp)) {
1338             sg_dma_unmark_bus_address(tmp);
1339             continue;
1340         }
1341 
1342         if (sg_dma_len(tmp) == 0)
1343             break;
1344 
1345         end = sg_dma_address(tmp) + sg_dma_len(tmp);
1346     }
1347 
1348     if (end)
1349         __iommu_dma_unmap(dev, start, end - start);
1350 }
1351 
1352 static dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
1353         size_t size, enum dma_data_direction dir, unsigned long attrs)
1354 {
1355     return __iommu_dma_map(dev, phys, size,
1356             dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO,
1357             dma_get_mask(dev));
1358 }
1359 
1360 static void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,
1361         size_t size, enum dma_data_direction dir, unsigned long attrs)
1362 {
1363     __iommu_dma_unmap(dev, handle, size);
1364 }
1365 
1366 static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr)
1367 {
1368     size_t alloc_size = PAGE_ALIGN(size);
1369     int count = alloc_size >> PAGE_SHIFT;
1370     struct page *page = NULL, **pages = NULL;
1371 
1372     /* Non-coherent atomic allocation? Easy */
1373     if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
1374         dma_free_from_pool(dev, cpu_addr, alloc_size))
1375         return;
1376 
1377     if (is_vmalloc_addr(cpu_addr)) {
1378         /*
1379          * If it the address is remapped, then it's either non-coherent
1380          * or highmem CMA, or an iommu_dma_alloc_remap() construction.
1381          */
1382         pages = dma_common_find_pages(cpu_addr);
1383         if (!pages)
1384             page = vmalloc_to_page(cpu_addr);
1385         dma_common_free_remap(cpu_addr, alloc_size);
1386     } else {
1387         /* Lowmem means a coherent atomic or CMA allocation */
1388         page = virt_to_page(cpu_addr);
1389     }
1390 
1391     if (pages)
1392         __iommu_dma_free_pages(pages, count);
1393     if (page)
1394         dma_free_contiguous(dev, page, alloc_size);
1395 }
1396 
1397 static void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr,
1398         dma_addr_t handle, unsigned long attrs)
1399 {
1400     __iommu_dma_unmap(dev, handle, size);
1401     __iommu_dma_free(dev, size, cpu_addr);
1402 }
1403 
1404 static void *iommu_dma_alloc_pages(struct device *dev, size_t size,
1405         struct page **pagep, gfp_t gfp, unsigned long attrs)
1406 {
1407     bool coherent = dev_is_dma_coherent(dev);
1408     size_t alloc_size = PAGE_ALIGN(size);
1409     int node = dev_to_node(dev);
1410     struct page *page = NULL;
1411     void *cpu_addr;
1412 
1413     page = dma_alloc_contiguous(dev, alloc_size, gfp);
1414     if (!page)
1415         page = alloc_pages_node(node, gfp, get_order(alloc_size));
1416     if (!page)
1417         return NULL;
1418 
1419     if (!coherent || PageHighMem(page)) {
1420         pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs);
1421 
1422         cpu_addr = dma_common_contiguous_remap(page, alloc_size,
1423                 prot, __builtin_return_address(0));
1424         if (!cpu_addr)
1425             goto out_free_pages;
1426 
1427         if (!coherent)
1428             arch_dma_prep_coherent(page, size);
1429     } else {
1430         cpu_addr = page_address(page);
1431     }
1432 
1433     *pagep = page;
1434     memset(cpu_addr, 0, alloc_size);
1435     return cpu_addr;
1436 out_free_pages:
1437     dma_free_contiguous(dev, page, alloc_size);
1438     return NULL;
1439 }
1440 
1441 static void *iommu_dma_alloc(struct device *dev, size_t size,
1442         dma_addr_t *handle, gfp_t gfp, unsigned long attrs)
1443 {
1444     bool coherent = dev_is_dma_coherent(dev);
1445     int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
1446     struct page *page = NULL;
1447     void *cpu_addr;
1448 
1449     gfp |= __GFP_ZERO;
1450 
1451     if (gfpflags_allow_blocking(gfp) &&
1452         !(attrs & DMA_ATTR_FORCE_CONTIGUOUS)) {
1453         return iommu_dma_alloc_remap(dev, size, handle, gfp,
1454                 dma_pgprot(dev, PAGE_KERNEL, attrs), attrs);
1455     }
1456 
1457     if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
1458         !gfpflags_allow_blocking(gfp) && !coherent)
1459         page = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &cpu_addr,
1460                            gfp, NULL);
1461     else
1462         cpu_addr = iommu_dma_alloc_pages(dev, size, &page, gfp, attrs);
1463     if (!cpu_addr)
1464         return NULL;
1465 
1466     *handle = __iommu_dma_map(dev, page_to_phys(page), size, ioprot,
1467             dev->coherent_dma_mask);
1468     if (*handle == DMA_MAPPING_ERROR) {
1469         __iommu_dma_free(dev, size, cpu_addr);
1470         return NULL;
1471     }
1472 
1473     return cpu_addr;
1474 }
1475 
1476 static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
1477         void *cpu_addr, dma_addr_t dma_addr, size_t size,
1478         unsigned long attrs)
1479 {
1480     unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
1481     unsigned long pfn, off = vma->vm_pgoff;
1482     int ret;
1483 
1484     vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs);
1485 
1486     if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
1487         return ret;
1488 
1489     if (off >= nr_pages || vma_pages(vma) > nr_pages - off)
1490         return -ENXIO;
1491 
1492     if (is_vmalloc_addr(cpu_addr)) {
1493         struct page **pages = dma_common_find_pages(cpu_addr);
1494 
1495         if (pages)
1496             return vm_map_pages(vma, pages, nr_pages);
1497         pfn = vmalloc_to_pfn(cpu_addr);
1498     } else {
1499         pfn = page_to_pfn(virt_to_page(cpu_addr));
1500     }
1501 
1502     return remap_pfn_range(vma, vma->vm_start, pfn + off,
1503                    vma->vm_end - vma->vm_start,
1504                    vma->vm_page_prot);
1505 }
1506 
1507 static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
1508         void *cpu_addr, dma_addr_t dma_addr, size_t size,
1509         unsigned long attrs)
1510 {
1511     struct page *page;
1512     int ret;
1513 
1514     if (is_vmalloc_addr(cpu_addr)) {
1515         struct page **pages = dma_common_find_pages(cpu_addr);
1516 
1517         if (pages) {
1518             return sg_alloc_table_from_pages(sgt, pages,
1519                     PAGE_ALIGN(size) >> PAGE_SHIFT,
1520                     0, size, GFP_KERNEL);
1521         }
1522 
1523         page = vmalloc_to_page(cpu_addr);
1524     } else {
1525         page = virt_to_page(cpu_addr);
1526     }
1527 
1528     ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
1529     if (!ret)
1530         sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
1531     return ret;
1532 }
1533 
1534 static unsigned long iommu_dma_get_merge_boundary(struct device *dev)
1535 {
1536     struct iommu_domain *domain = iommu_get_dma_domain(dev);
1537 
1538     return (1UL << __ffs(domain->pgsize_bitmap)) - 1;
1539 }
1540 
1541 static size_t iommu_dma_opt_mapping_size(void)
1542 {
1543     return iova_rcache_range();
1544 }
1545 
1546 static const struct dma_map_ops iommu_dma_ops = {
1547     .flags          = DMA_F_PCI_P2PDMA_SUPPORTED,
1548     .alloc          = iommu_dma_alloc,
1549     .free           = iommu_dma_free,
1550     .alloc_pages        = dma_common_alloc_pages,
1551     .free_pages     = dma_common_free_pages,
1552     .alloc_noncontiguous    = iommu_dma_alloc_noncontiguous,
1553     .free_noncontiguous = iommu_dma_free_noncontiguous,
1554     .mmap           = iommu_dma_mmap,
1555     .get_sgtable        = iommu_dma_get_sgtable,
1556     .map_page       = iommu_dma_map_page,
1557     .unmap_page     = iommu_dma_unmap_page,
1558     .map_sg         = iommu_dma_map_sg,
1559     .unmap_sg       = iommu_dma_unmap_sg,
1560     .sync_single_for_cpu    = iommu_dma_sync_single_for_cpu,
1561     .sync_single_for_device = iommu_dma_sync_single_for_device,
1562     .sync_sg_for_cpu    = iommu_dma_sync_sg_for_cpu,
1563     .sync_sg_for_device = iommu_dma_sync_sg_for_device,
1564     .map_resource       = iommu_dma_map_resource,
1565     .unmap_resource     = iommu_dma_unmap_resource,
1566     .get_merge_boundary = iommu_dma_get_merge_boundary,
1567     .opt_mapping_size   = iommu_dma_opt_mapping_size,
1568 };
1569 
1570 /*
1571  * The IOMMU core code allocates the default DMA domain, which the underlying
1572  * IOMMU driver needs to support via the dma-iommu layer.
1573  */
1574 void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit)
1575 {
1576     struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
1577 
1578     if (!domain)
1579         goto out_err;
1580 
1581     /*
1582      * The IOMMU core code allocates the default DMA domain, which the
1583      * underlying IOMMU driver needs to support via the dma-iommu layer.
1584      */
1585     if (iommu_is_dma_domain(domain)) {
1586         if (iommu_dma_init_domain(domain, dma_base, dma_limit, dev))
1587             goto out_err;
1588         dev->dma_ops = &iommu_dma_ops;
1589     }
1590 
1591     return;
1592 out_err:
1593      pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
1594          dev_name(dev));
1595 }
1596 EXPORT_SYMBOL_GPL(iommu_setup_dma_ops);
1597 
1598 static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
1599         phys_addr_t msi_addr, struct iommu_domain *domain)
1600 {
1601     struct iommu_dma_cookie *cookie = domain->iova_cookie;
1602     struct iommu_dma_msi_page *msi_page;
1603     dma_addr_t iova;
1604     int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1605     size_t size = cookie_msi_granule(cookie);
1606 
1607     msi_addr &= ~(phys_addr_t)(size - 1);
1608     list_for_each_entry(msi_page, &cookie->msi_page_list, list)
1609         if (msi_page->phys == msi_addr)
1610             return msi_page;
1611 
1612     msi_page = kzalloc(sizeof(*msi_page), GFP_KERNEL);
1613     if (!msi_page)
1614         return NULL;
1615 
1616     iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
1617     if (!iova)
1618         goto out_free_page;
1619 
1620     if (iommu_map(domain, iova, msi_addr, size, prot))
1621         goto out_free_iova;
1622 
1623     INIT_LIST_HEAD(&msi_page->list);
1624     msi_page->phys = msi_addr;
1625     msi_page->iova = iova;
1626     list_add(&msi_page->list, &cookie->msi_page_list);
1627     return msi_page;
1628 
1629 out_free_iova:
1630     iommu_dma_free_iova(cookie, iova, size, NULL);
1631 out_free_page:
1632     kfree(msi_page);
1633     return NULL;
1634 }
1635 
1636 int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
1637 {
1638     struct device *dev = msi_desc_to_dev(desc);
1639     struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
1640     struct iommu_dma_msi_page *msi_page;
1641     static DEFINE_MUTEX(msi_prepare_lock); /* see below */
1642 
1643     if (!domain || !domain->iova_cookie) {
1644         desc->iommu_cookie = NULL;
1645         return 0;
1646     }
1647 
1648     /*
1649      * In fact the whole prepare operation should already be serialised by
1650      * irq_domain_mutex further up the callchain, but that's pretty subtle
1651      * on its own, so consider this locking as failsafe documentation...
1652      */
1653     mutex_lock(&msi_prepare_lock);
1654     msi_page = iommu_dma_get_msi_page(dev, msi_addr, domain);
1655     mutex_unlock(&msi_prepare_lock);
1656 
1657     msi_desc_set_iommu_cookie(desc, msi_page);
1658 
1659     if (!msi_page)
1660         return -ENOMEM;
1661     return 0;
1662 }
1663 
1664 void iommu_dma_compose_msi_msg(struct msi_desc *desc,
1665                    struct msi_msg *msg)
1666 {
1667     struct device *dev = msi_desc_to_dev(desc);
1668     const struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
1669     const struct iommu_dma_msi_page *msi_page;
1670 
1671     msi_page = msi_desc_get_iommu_cookie(desc);
1672 
1673     if (!domain || !domain->iova_cookie || WARN_ON(!msi_page))
1674         return;
1675 
1676     msg->address_hi = upper_32_bits(msi_page->iova);
1677     msg->address_lo &= cookie_msi_granule(domain->iova_cookie) - 1;
1678     msg->address_lo += lower_32_bits(msi_page->iova);
1679 }
1680 
1681 static int iommu_dma_init(void)
1682 {
1683     if (is_kdump_kernel())
1684         static_branch_enable(&iommu_deferred_attach_enabled);
1685 
1686     return iova_cache_get();
1687 }
1688 arch_initcall(iommu_dma_init);