Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * CPU-agnostic ARM page table allocator.
0004  *
0005  * Copyright (C) 2014 ARM Limited
0006  *
0007  * Author: Will Deacon <will.deacon@arm.com>
0008  */
0009 
0010 #define pr_fmt(fmt) "arm-lpae io-pgtable: " fmt
0011 
0012 #include <linux/atomic.h>
0013 #include <linux/bitops.h>
0014 #include <linux/io-pgtable.h>
0015 #include <linux/kernel.h>
0016 #include <linux/sizes.h>
0017 #include <linux/slab.h>
0018 #include <linux/types.h>
0019 #include <linux/dma-mapping.h>
0020 
0021 #include <asm/barrier.h>
0022 
0023 #include "io-pgtable-arm.h"
0024 
0025 #define ARM_LPAE_MAX_ADDR_BITS      52
0026 #define ARM_LPAE_S2_MAX_CONCAT_PAGES    16
0027 #define ARM_LPAE_MAX_LEVELS     4
0028 
0029 /* Struct accessors */
0030 #define io_pgtable_to_data(x)                       \
0031     container_of((x), struct arm_lpae_io_pgtable, iop)
0032 
0033 #define io_pgtable_ops_to_data(x)                   \
0034     io_pgtable_to_data(io_pgtable_ops_to_pgtable(x))
0035 
0036 /*
0037  * Calculate the right shift amount to get to the portion describing level l
0038  * in a virtual address mapped by the pagetable in d.
0039  */
0040 #define ARM_LPAE_LVL_SHIFT(l,d)                     \
0041     (((ARM_LPAE_MAX_LEVELS - (l)) * (d)->bits_per_level) +      \
0042     ilog2(sizeof(arm_lpae_iopte)))
0043 
0044 #define ARM_LPAE_GRANULE(d)                     \
0045     (sizeof(arm_lpae_iopte) << (d)->bits_per_level)
0046 #define ARM_LPAE_PGD_SIZE(d)                        \
0047     (sizeof(arm_lpae_iopte) << (d)->pgd_bits)
0048 
0049 #define ARM_LPAE_PTES_PER_TABLE(d)                  \
0050     (ARM_LPAE_GRANULE(d) >> ilog2(sizeof(arm_lpae_iopte)))
0051 
0052 /*
0053  * Calculate the index at level l used to map virtual address a using the
0054  * pagetable in d.
0055  */
0056 #define ARM_LPAE_PGD_IDX(l,d)                       \
0057     ((l) == (d)->start_level ? (d)->pgd_bits - (d)->bits_per_level : 0)
0058 
0059 #define ARM_LPAE_LVL_IDX(a,l,d)                     \
0060     (((u64)(a) >> ARM_LPAE_LVL_SHIFT(l,d)) &            \
0061      ((1 << ((d)->bits_per_level + ARM_LPAE_PGD_IDX(l,d))) - 1))
0062 
0063 /* Calculate the block/page mapping size at level l for pagetable in d. */
0064 #define ARM_LPAE_BLOCK_SIZE(l,d)    (1ULL << ARM_LPAE_LVL_SHIFT(l,d))
0065 
0066 /* Page table bits */
0067 #define ARM_LPAE_PTE_TYPE_SHIFT     0
0068 #define ARM_LPAE_PTE_TYPE_MASK      0x3
0069 
0070 #define ARM_LPAE_PTE_TYPE_BLOCK     1
0071 #define ARM_LPAE_PTE_TYPE_TABLE     3
0072 #define ARM_LPAE_PTE_TYPE_PAGE      3
0073 
0074 #define ARM_LPAE_PTE_ADDR_MASK      GENMASK_ULL(47,12)
0075 
0076 #define ARM_LPAE_PTE_NSTABLE        (((arm_lpae_iopte)1) << 63)
0077 #define ARM_LPAE_PTE_XN         (((arm_lpae_iopte)3) << 53)
0078 #define ARM_LPAE_PTE_AF         (((arm_lpae_iopte)1) << 10)
0079 #define ARM_LPAE_PTE_SH_NS      (((arm_lpae_iopte)0) << 8)
0080 #define ARM_LPAE_PTE_SH_OS      (((arm_lpae_iopte)2) << 8)
0081 #define ARM_LPAE_PTE_SH_IS      (((arm_lpae_iopte)3) << 8)
0082 #define ARM_LPAE_PTE_NS         (((arm_lpae_iopte)1) << 5)
0083 #define ARM_LPAE_PTE_VALID      (((arm_lpae_iopte)1) << 0)
0084 
0085 #define ARM_LPAE_PTE_ATTR_LO_MASK   (((arm_lpae_iopte)0x3ff) << 2)
0086 /* Ignore the contiguous bit for block splitting */
0087 #define ARM_LPAE_PTE_ATTR_HI_MASK   (((arm_lpae_iopte)6) << 52)
0088 #define ARM_LPAE_PTE_ATTR_MASK      (ARM_LPAE_PTE_ATTR_LO_MASK |    \
0089                      ARM_LPAE_PTE_ATTR_HI_MASK)
0090 /* Software bit for solving coherency races */
0091 #define ARM_LPAE_PTE_SW_SYNC        (((arm_lpae_iopte)1) << 55)
0092 
0093 /* Stage-1 PTE */
0094 #define ARM_LPAE_PTE_AP_UNPRIV      (((arm_lpae_iopte)1) << 6)
0095 #define ARM_LPAE_PTE_AP_RDONLY      (((arm_lpae_iopte)2) << 6)
0096 #define ARM_LPAE_PTE_ATTRINDX_SHIFT 2
0097 #define ARM_LPAE_PTE_nG         (((arm_lpae_iopte)1) << 11)
0098 
0099 /* Stage-2 PTE */
0100 #define ARM_LPAE_PTE_HAP_FAULT      (((arm_lpae_iopte)0) << 6)
0101 #define ARM_LPAE_PTE_HAP_READ       (((arm_lpae_iopte)1) << 6)
0102 #define ARM_LPAE_PTE_HAP_WRITE      (((arm_lpae_iopte)2) << 6)
0103 #define ARM_LPAE_PTE_MEMATTR_OIWB   (((arm_lpae_iopte)0xf) << 2)
0104 #define ARM_LPAE_PTE_MEMATTR_NC     (((arm_lpae_iopte)0x5) << 2)
0105 #define ARM_LPAE_PTE_MEMATTR_DEV    (((arm_lpae_iopte)0x1) << 2)
0106 
0107 /* Register bits */
0108 #define ARM_LPAE_VTCR_SL0_MASK      0x3
0109 
0110 #define ARM_LPAE_TCR_T0SZ_SHIFT     0
0111 
0112 #define ARM_LPAE_VTCR_PS_SHIFT      16
0113 #define ARM_LPAE_VTCR_PS_MASK       0x7
0114 
0115 #define ARM_LPAE_MAIR_ATTR_SHIFT(n) ((n) << 3)
0116 #define ARM_LPAE_MAIR_ATTR_MASK     0xff
0117 #define ARM_LPAE_MAIR_ATTR_DEVICE   0x04
0118 #define ARM_LPAE_MAIR_ATTR_NC       0x44
0119 #define ARM_LPAE_MAIR_ATTR_INC_OWBRWA   0xf4
0120 #define ARM_LPAE_MAIR_ATTR_WBRWA    0xff
0121 #define ARM_LPAE_MAIR_ATTR_IDX_NC   0
0122 #define ARM_LPAE_MAIR_ATTR_IDX_CACHE    1
0123 #define ARM_LPAE_MAIR_ATTR_IDX_DEV  2
0124 #define ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE   3
0125 
0126 #define ARM_MALI_LPAE_TTBR_ADRMODE_TABLE (3u << 0)
0127 #define ARM_MALI_LPAE_TTBR_READ_INNER   BIT(2)
0128 #define ARM_MALI_LPAE_TTBR_SHARE_OUTER  BIT(4)
0129 
0130 #define ARM_MALI_LPAE_MEMATTR_IMP_DEF   0x88ULL
0131 #define ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 0x8DULL
0132 
0133 #define APPLE_DART_PTE_PROT_NO_WRITE (1<<7)
0134 #define APPLE_DART_PTE_PROT_NO_READ (1<<8)
0135 
0136 /* IOPTE accessors */
0137 #define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d))
0138 
0139 #define iopte_type(pte)                 \
0140     (((pte) >> ARM_LPAE_PTE_TYPE_SHIFT) & ARM_LPAE_PTE_TYPE_MASK)
0141 
0142 #define iopte_prot(pte) ((pte) & ARM_LPAE_PTE_ATTR_MASK)
0143 
0144 struct arm_lpae_io_pgtable {
0145     struct io_pgtable   iop;
0146 
0147     int         pgd_bits;
0148     int         start_level;
0149     int         bits_per_level;
0150 
0151     void            *pgd;
0152 };
0153 
0154 typedef u64 arm_lpae_iopte;
0155 
0156 static inline bool iopte_leaf(arm_lpae_iopte pte, int lvl,
0157                   enum io_pgtable_fmt fmt)
0158 {
0159     if (lvl == (ARM_LPAE_MAX_LEVELS - 1) && fmt != ARM_MALI_LPAE)
0160         return iopte_type(pte) == ARM_LPAE_PTE_TYPE_PAGE;
0161 
0162     return iopte_type(pte) == ARM_LPAE_PTE_TYPE_BLOCK;
0163 }
0164 
0165 static arm_lpae_iopte paddr_to_iopte(phys_addr_t paddr,
0166                      struct arm_lpae_io_pgtable *data)
0167 {
0168     arm_lpae_iopte pte = paddr;
0169 
0170     /* Of the bits which overlap, either 51:48 or 15:12 are always RES0 */
0171     return (pte | (pte >> (48 - 12))) & ARM_LPAE_PTE_ADDR_MASK;
0172 }
0173 
0174 static phys_addr_t iopte_to_paddr(arm_lpae_iopte pte,
0175                   struct arm_lpae_io_pgtable *data)
0176 {
0177     u64 paddr = pte & ARM_LPAE_PTE_ADDR_MASK;
0178 
0179     if (ARM_LPAE_GRANULE(data) < SZ_64K)
0180         return paddr;
0181 
0182     /* Rotate the packed high-order bits back to the top */
0183     return (paddr | (paddr << (48 - 12))) & (ARM_LPAE_PTE_ADDR_MASK << 4);
0184 }
0185 
0186 static bool selftest_running = false;
0187 
0188 static dma_addr_t __arm_lpae_dma_addr(void *pages)
0189 {
0190     return (dma_addr_t)virt_to_phys(pages);
0191 }
0192 
0193 static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
0194                     struct io_pgtable_cfg *cfg)
0195 {
0196     struct device *dev = cfg->iommu_dev;
0197     int order = get_order(size);
0198     struct page *p;
0199     dma_addr_t dma;
0200     void *pages;
0201 
0202     VM_BUG_ON((gfp & __GFP_HIGHMEM));
0203     p = alloc_pages_node(dev ? dev_to_node(dev) : NUMA_NO_NODE,
0204                  gfp | __GFP_ZERO, order);
0205     if (!p)
0206         return NULL;
0207 
0208     pages = page_address(p);
0209     if (!cfg->coherent_walk) {
0210         dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE);
0211         if (dma_mapping_error(dev, dma))
0212             goto out_free;
0213         /*
0214          * We depend on the IOMMU being able to work with any physical
0215          * address directly, so if the DMA layer suggests otherwise by
0216          * translating or truncating them, that bodes very badly...
0217          */
0218         if (dma != virt_to_phys(pages))
0219             goto out_unmap;
0220     }
0221 
0222     return pages;
0223 
0224 out_unmap:
0225     dev_err(dev, "Cannot accommodate DMA translation for IOMMU page tables\n");
0226     dma_unmap_single(dev, dma, size, DMA_TO_DEVICE);
0227 out_free:
0228     __free_pages(p, order);
0229     return NULL;
0230 }
0231 
0232 static void __arm_lpae_free_pages(void *pages, size_t size,
0233                   struct io_pgtable_cfg *cfg)
0234 {
0235     if (!cfg->coherent_walk)
0236         dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages),
0237                  size, DMA_TO_DEVICE);
0238     free_pages((unsigned long)pages, get_order(size));
0239 }
0240 
0241 static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, int num_entries,
0242                 struct io_pgtable_cfg *cfg)
0243 {
0244     dma_sync_single_for_device(cfg->iommu_dev, __arm_lpae_dma_addr(ptep),
0245                    sizeof(*ptep) * num_entries, DMA_TO_DEVICE);
0246 }
0247 
0248 static void __arm_lpae_clear_pte(arm_lpae_iopte *ptep, struct io_pgtable_cfg *cfg)
0249 {
0250 
0251     *ptep = 0;
0252 
0253     if (!cfg->coherent_walk)
0254         __arm_lpae_sync_pte(ptep, 1, cfg);
0255 }
0256 
0257 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
0258                    struct iommu_iotlb_gather *gather,
0259                    unsigned long iova, size_t size, size_t pgcount,
0260                    int lvl, arm_lpae_iopte *ptep);
0261 
0262 static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
0263                 phys_addr_t paddr, arm_lpae_iopte prot,
0264                 int lvl, int num_entries, arm_lpae_iopte *ptep)
0265 {
0266     arm_lpae_iopte pte = prot;
0267     struct io_pgtable_cfg *cfg = &data->iop.cfg;
0268     size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
0269     int i;
0270 
0271     if (data->iop.fmt != ARM_MALI_LPAE && lvl == ARM_LPAE_MAX_LEVELS - 1)
0272         pte |= ARM_LPAE_PTE_TYPE_PAGE;
0273     else
0274         pte |= ARM_LPAE_PTE_TYPE_BLOCK;
0275 
0276     for (i = 0; i < num_entries; i++)
0277         ptep[i] = pte | paddr_to_iopte(paddr + i * sz, data);
0278 
0279     if (!cfg->coherent_walk)
0280         __arm_lpae_sync_pte(ptep, num_entries, cfg);
0281 }
0282 
0283 static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
0284                  unsigned long iova, phys_addr_t paddr,
0285                  arm_lpae_iopte prot, int lvl, int num_entries,
0286                  arm_lpae_iopte *ptep)
0287 {
0288     int i;
0289 
0290     for (i = 0; i < num_entries; i++)
0291         if (iopte_leaf(ptep[i], lvl, data->iop.fmt)) {
0292             /* We require an unmap first */
0293             WARN_ON(!selftest_running);
0294             return -EEXIST;
0295         } else if (iopte_type(ptep[i]) == ARM_LPAE_PTE_TYPE_TABLE) {
0296             /*
0297              * We need to unmap and free the old table before
0298              * overwriting it with a block entry.
0299              */
0300             arm_lpae_iopte *tblp;
0301             size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
0302 
0303             tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data);
0304             if (__arm_lpae_unmap(data, NULL, iova + i * sz, sz, 1,
0305                          lvl, tblp) != sz) {
0306                 WARN_ON(1);
0307                 return -EINVAL;
0308             }
0309         }
0310 
0311     __arm_lpae_init_pte(data, paddr, prot, lvl, num_entries, ptep);
0312     return 0;
0313 }
0314 
0315 static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table,
0316                          arm_lpae_iopte *ptep,
0317                          arm_lpae_iopte curr,
0318                          struct arm_lpae_io_pgtable *data)
0319 {
0320     arm_lpae_iopte old, new;
0321     struct io_pgtable_cfg *cfg = &data->iop.cfg;
0322 
0323     new = paddr_to_iopte(__pa(table), data) | ARM_LPAE_PTE_TYPE_TABLE;
0324     if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
0325         new |= ARM_LPAE_PTE_NSTABLE;
0326 
0327     /*
0328      * Ensure the table itself is visible before its PTE can be.
0329      * Whilst we could get away with cmpxchg64_release below, this
0330      * doesn't have any ordering semantics when !CONFIG_SMP.
0331      */
0332     dma_wmb();
0333 
0334     old = cmpxchg64_relaxed(ptep, curr, new);
0335 
0336     if (cfg->coherent_walk || (old & ARM_LPAE_PTE_SW_SYNC))
0337         return old;
0338 
0339     /* Even if it's not ours, there's no point waiting; just kick it */
0340     __arm_lpae_sync_pte(ptep, 1, cfg);
0341     if (old == curr)
0342         WRITE_ONCE(*ptep, new | ARM_LPAE_PTE_SW_SYNC);
0343 
0344     return old;
0345 }
0346 
0347 static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
0348               phys_addr_t paddr, size_t size, size_t pgcount,
0349               arm_lpae_iopte prot, int lvl, arm_lpae_iopte *ptep,
0350               gfp_t gfp, size_t *mapped)
0351 {
0352     arm_lpae_iopte *cptep, pte;
0353     size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
0354     size_t tblsz = ARM_LPAE_GRANULE(data);
0355     struct io_pgtable_cfg *cfg = &data->iop.cfg;
0356     int ret = 0, num_entries, max_entries, map_idx_start;
0357 
0358     /* Find our entry at the current level */
0359     map_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
0360     ptep += map_idx_start;
0361 
0362     /* If we can install a leaf entry at this level, then do so */
0363     if (size == block_size) {
0364         max_entries = ARM_LPAE_PTES_PER_TABLE(data) - map_idx_start;
0365         num_entries = min_t(int, pgcount, max_entries);
0366         ret = arm_lpae_init_pte(data, iova, paddr, prot, lvl, num_entries, ptep);
0367         if (!ret && mapped)
0368             *mapped += num_entries * size;
0369 
0370         return ret;
0371     }
0372 
0373     /* We can't allocate tables at the final level */
0374     if (WARN_ON(lvl >= ARM_LPAE_MAX_LEVELS - 1))
0375         return -EINVAL;
0376 
0377     /* Grab a pointer to the next level */
0378     pte = READ_ONCE(*ptep);
0379     if (!pte) {
0380         cptep = __arm_lpae_alloc_pages(tblsz, gfp, cfg);
0381         if (!cptep)
0382             return -ENOMEM;
0383 
0384         pte = arm_lpae_install_table(cptep, ptep, 0, data);
0385         if (pte)
0386             __arm_lpae_free_pages(cptep, tblsz, cfg);
0387     } else if (!cfg->coherent_walk && !(pte & ARM_LPAE_PTE_SW_SYNC)) {
0388         __arm_lpae_sync_pte(ptep, 1, cfg);
0389     }
0390 
0391     if (pte && !iopte_leaf(pte, lvl, data->iop.fmt)) {
0392         cptep = iopte_deref(pte, data);
0393     } else if (pte) {
0394         /* We require an unmap first */
0395         WARN_ON(!selftest_running);
0396         return -EEXIST;
0397     }
0398 
0399     /* Rinse, repeat */
0400     return __arm_lpae_map(data, iova, paddr, size, pgcount, prot, lvl + 1,
0401                   cptep, gfp, mapped);
0402 }
0403 
0404 static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
0405                        int prot)
0406 {
0407     arm_lpae_iopte pte;
0408 
0409     if (data->iop.fmt == APPLE_DART) {
0410         pte = 0;
0411         if (!(prot & IOMMU_WRITE))
0412             pte |= APPLE_DART_PTE_PROT_NO_WRITE;
0413         if (!(prot & IOMMU_READ))
0414             pte |= APPLE_DART_PTE_PROT_NO_READ;
0415         return pte;
0416     }
0417 
0418     if (data->iop.fmt == ARM_64_LPAE_S1 ||
0419         data->iop.fmt == ARM_32_LPAE_S1) {
0420         pte = ARM_LPAE_PTE_nG;
0421         if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
0422             pte |= ARM_LPAE_PTE_AP_RDONLY;
0423         if (!(prot & IOMMU_PRIV))
0424             pte |= ARM_LPAE_PTE_AP_UNPRIV;
0425     } else {
0426         pte = ARM_LPAE_PTE_HAP_FAULT;
0427         if (prot & IOMMU_READ)
0428             pte |= ARM_LPAE_PTE_HAP_READ;
0429         if (prot & IOMMU_WRITE)
0430             pte |= ARM_LPAE_PTE_HAP_WRITE;
0431     }
0432 
0433     /*
0434      * Note that this logic is structured to accommodate Mali LPAE
0435      * having stage-1-like attributes but stage-2-like permissions.
0436      */
0437     if (data->iop.fmt == ARM_64_LPAE_S2 ||
0438         data->iop.fmt == ARM_32_LPAE_S2) {
0439         if (prot & IOMMU_MMIO)
0440             pte |= ARM_LPAE_PTE_MEMATTR_DEV;
0441         else if (prot & IOMMU_CACHE)
0442             pte |= ARM_LPAE_PTE_MEMATTR_OIWB;
0443         else
0444             pte |= ARM_LPAE_PTE_MEMATTR_NC;
0445     } else {
0446         if (prot & IOMMU_MMIO)
0447             pte |= (ARM_LPAE_MAIR_ATTR_IDX_DEV
0448                 << ARM_LPAE_PTE_ATTRINDX_SHIFT);
0449         else if (prot & IOMMU_CACHE)
0450             pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE
0451                 << ARM_LPAE_PTE_ATTRINDX_SHIFT);
0452     }
0453 
0454     /*
0455      * Also Mali has its own notions of shareability wherein its Inner
0456      * domain covers the cores within the GPU, and its Outer domain is
0457      * "outside the GPU" (i.e. either the Inner or System domain in CPU
0458      * terms, depending on coherency).
0459      */
0460     if (prot & IOMMU_CACHE && data->iop.fmt != ARM_MALI_LPAE)
0461         pte |= ARM_LPAE_PTE_SH_IS;
0462     else
0463         pte |= ARM_LPAE_PTE_SH_OS;
0464 
0465     if (prot & IOMMU_NOEXEC)
0466         pte |= ARM_LPAE_PTE_XN;
0467 
0468     if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS)
0469         pte |= ARM_LPAE_PTE_NS;
0470 
0471     if (data->iop.fmt != ARM_MALI_LPAE)
0472         pte |= ARM_LPAE_PTE_AF;
0473 
0474     return pte;
0475 }
0476 
0477 static int arm_lpae_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
0478                   phys_addr_t paddr, size_t pgsize, size_t pgcount,
0479                   int iommu_prot, gfp_t gfp, size_t *mapped)
0480 {
0481     struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
0482     struct io_pgtable_cfg *cfg = &data->iop.cfg;
0483     arm_lpae_iopte *ptep = data->pgd;
0484     int ret, lvl = data->start_level;
0485     arm_lpae_iopte prot;
0486     long iaext = (s64)iova >> cfg->ias;
0487 
0488     if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize))
0489         return -EINVAL;
0490 
0491     if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
0492         iaext = ~iaext;
0493     if (WARN_ON(iaext || paddr >> cfg->oas))
0494         return -ERANGE;
0495 
0496     /* If no access, then nothing to do */
0497     if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
0498         return 0;
0499 
0500     prot = arm_lpae_prot_to_pte(data, iommu_prot);
0501     ret = __arm_lpae_map(data, iova, paddr, pgsize, pgcount, prot, lvl,
0502                  ptep, gfp, mapped);
0503     /*
0504      * Synchronise all PTE updates for the new mapping before there's
0505      * a chance for anything to kick off a table walk for the new iova.
0506      */
0507     wmb();
0508 
0509     return ret;
0510 }
0511 
0512 static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
0513             phys_addr_t paddr, size_t size, int iommu_prot, gfp_t gfp)
0514 {
0515     return arm_lpae_map_pages(ops, iova, paddr, size, 1, iommu_prot, gfp,
0516                   NULL);
0517 }
0518 
0519 static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
0520                     arm_lpae_iopte *ptep)
0521 {
0522     arm_lpae_iopte *start, *end;
0523     unsigned long table_size;
0524 
0525     if (lvl == data->start_level)
0526         table_size = ARM_LPAE_PGD_SIZE(data);
0527     else
0528         table_size = ARM_LPAE_GRANULE(data);
0529 
0530     start = ptep;
0531 
0532     /* Only leaf entries at the last level */
0533     if (lvl == ARM_LPAE_MAX_LEVELS - 1)
0534         end = ptep;
0535     else
0536         end = (void *)ptep + table_size;
0537 
0538     while (ptep != end) {
0539         arm_lpae_iopte pte = *ptep++;
0540 
0541         if (!pte || iopte_leaf(pte, lvl, data->iop.fmt))
0542             continue;
0543 
0544         __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data));
0545     }
0546 
0547     __arm_lpae_free_pages(start, table_size, &data->iop.cfg);
0548 }
0549 
0550 static void arm_lpae_free_pgtable(struct io_pgtable *iop)
0551 {
0552     struct arm_lpae_io_pgtable *data = io_pgtable_to_data(iop);
0553 
0554     __arm_lpae_free_pgtable(data, data->start_level, data->pgd);
0555     kfree(data);
0556 }
0557 
0558 static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
0559                        struct iommu_iotlb_gather *gather,
0560                        unsigned long iova, size_t size,
0561                        arm_lpae_iopte blk_pte, int lvl,
0562                        arm_lpae_iopte *ptep, size_t pgcount)
0563 {
0564     struct io_pgtable_cfg *cfg = &data->iop.cfg;
0565     arm_lpae_iopte pte, *tablep;
0566     phys_addr_t blk_paddr;
0567     size_t tablesz = ARM_LPAE_GRANULE(data);
0568     size_t split_sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
0569     int ptes_per_table = ARM_LPAE_PTES_PER_TABLE(data);
0570     int i, unmap_idx_start = -1, num_entries = 0, max_entries;
0571 
0572     if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
0573         return 0;
0574 
0575     tablep = __arm_lpae_alloc_pages(tablesz, GFP_ATOMIC, cfg);
0576     if (!tablep)
0577         return 0; /* Bytes unmapped */
0578 
0579     if (size == split_sz) {
0580         unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
0581         max_entries = ptes_per_table - unmap_idx_start;
0582         num_entries = min_t(int, pgcount, max_entries);
0583     }
0584 
0585     blk_paddr = iopte_to_paddr(blk_pte, data);
0586     pte = iopte_prot(blk_pte);
0587 
0588     for (i = 0; i < ptes_per_table; i++, blk_paddr += split_sz) {
0589         /* Unmap! */
0590         if (i >= unmap_idx_start && i < (unmap_idx_start + num_entries))
0591             continue;
0592 
0593         __arm_lpae_init_pte(data, blk_paddr, pte, lvl, 1, &tablep[i]);
0594     }
0595 
0596     pte = arm_lpae_install_table(tablep, ptep, blk_pte, data);
0597     if (pte != blk_pte) {
0598         __arm_lpae_free_pages(tablep, tablesz, cfg);
0599         /*
0600          * We may race against someone unmapping another part of this
0601          * block, but anything else is invalid. We can't misinterpret
0602          * a page entry here since we're never at the last level.
0603          */
0604         if (iopte_type(pte) != ARM_LPAE_PTE_TYPE_TABLE)
0605             return 0;
0606 
0607         tablep = iopte_deref(pte, data);
0608     } else if (unmap_idx_start >= 0) {
0609         for (i = 0; i < num_entries; i++)
0610             io_pgtable_tlb_add_page(&data->iop, gather, iova + i * size, size);
0611 
0612         return num_entries * size;
0613     }
0614 
0615     return __arm_lpae_unmap(data, gather, iova, size, pgcount, lvl, tablep);
0616 }
0617 
0618 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
0619                    struct iommu_iotlb_gather *gather,
0620                    unsigned long iova, size_t size, size_t pgcount,
0621                    int lvl, arm_lpae_iopte *ptep)
0622 {
0623     arm_lpae_iopte pte;
0624     struct io_pgtable *iop = &data->iop;
0625     int i = 0, num_entries, max_entries, unmap_idx_start;
0626 
0627     /* Something went horribly wrong and we ran out of page table */
0628     if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
0629         return 0;
0630 
0631     unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
0632     ptep += unmap_idx_start;
0633     pte = READ_ONCE(*ptep);
0634     if (WARN_ON(!pte))
0635         return 0;
0636 
0637     /* If the size matches this level, we're in the right place */
0638     if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) {
0639         max_entries = ARM_LPAE_PTES_PER_TABLE(data) - unmap_idx_start;
0640         num_entries = min_t(int, pgcount, max_entries);
0641 
0642         while (i < num_entries) {
0643             pte = READ_ONCE(*ptep);
0644             if (WARN_ON(!pte))
0645                 break;
0646 
0647             __arm_lpae_clear_pte(ptep, &iop->cfg);
0648 
0649             if (!iopte_leaf(pte, lvl, iop->fmt)) {
0650                 /* Also flush any partial walks */
0651                 io_pgtable_tlb_flush_walk(iop, iova + i * size, size,
0652                               ARM_LPAE_GRANULE(data));
0653                 __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data));
0654             } else if (!iommu_iotlb_gather_queued(gather)) {
0655                 io_pgtable_tlb_add_page(iop, gather, iova + i * size, size);
0656             }
0657 
0658             ptep++;
0659             i++;
0660         }
0661 
0662         return i * size;
0663     } else if (iopte_leaf(pte, lvl, iop->fmt)) {
0664         /*
0665          * Insert a table at the next level to map the old region,
0666          * minus the part we want to unmap
0667          */
0668         return arm_lpae_split_blk_unmap(data, gather, iova, size, pte,
0669                         lvl + 1, ptep, pgcount);
0670     }
0671 
0672     /* Keep on walkin' */
0673     ptep = iopte_deref(pte, data);
0674     return __arm_lpae_unmap(data, gather, iova, size, pgcount, lvl + 1, ptep);
0675 }
0676 
0677 static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iova,
0678                    size_t pgsize, size_t pgcount,
0679                    struct iommu_iotlb_gather *gather)
0680 {
0681     struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
0682     struct io_pgtable_cfg *cfg = &data->iop.cfg;
0683     arm_lpae_iopte *ptep = data->pgd;
0684     long iaext = (s64)iova >> cfg->ias;
0685 
0686     if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize || !pgcount))
0687         return 0;
0688 
0689     if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
0690         iaext = ~iaext;
0691     if (WARN_ON(iaext))
0692         return 0;
0693 
0694     return __arm_lpae_unmap(data, gather, iova, pgsize, pgcount,
0695                 data->start_level, ptep);
0696 }
0697 
0698 static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
0699                  size_t size, struct iommu_iotlb_gather *gather)
0700 {
0701     return arm_lpae_unmap_pages(ops, iova, size, 1, gather);
0702 }
0703 
0704 static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
0705                      unsigned long iova)
0706 {
0707     struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
0708     arm_lpae_iopte pte, *ptep = data->pgd;
0709     int lvl = data->start_level;
0710 
0711     do {
0712         /* Valid IOPTE pointer? */
0713         if (!ptep)
0714             return 0;
0715 
0716         /* Grab the IOPTE we're interested in */
0717         ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
0718         pte = READ_ONCE(*ptep);
0719 
0720         /* Valid entry? */
0721         if (!pte)
0722             return 0;
0723 
0724         /* Leaf entry? */
0725         if (iopte_leaf(pte, lvl, data->iop.fmt))
0726             goto found_translation;
0727 
0728         /* Take it to the next level */
0729         ptep = iopte_deref(pte, data);
0730     } while (++lvl < ARM_LPAE_MAX_LEVELS);
0731 
0732     /* Ran out of page tables to walk */
0733     return 0;
0734 
0735 found_translation:
0736     iova &= (ARM_LPAE_BLOCK_SIZE(lvl, data) - 1);
0737     return iopte_to_paddr(pte, data) | iova;
0738 }
0739 
0740 static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
0741 {
0742     unsigned long granule, page_sizes;
0743     unsigned int max_addr_bits = 48;
0744 
0745     /*
0746      * We need to restrict the supported page sizes to match the
0747      * translation regime for a particular granule. Aim to match
0748      * the CPU page size if possible, otherwise prefer smaller sizes.
0749      * While we're at it, restrict the block sizes to match the
0750      * chosen granule.
0751      */
0752     if (cfg->pgsize_bitmap & PAGE_SIZE)
0753         granule = PAGE_SIZE;
0754     else if (cfg->pgsize_bitmap & ~PAGE_MASK)
0755         granule = 1UL << __fls(cfg->pgsize_bitmap & ~PAGE_MASK);
0756     else if (cfg->pgsize_bitmap & PAGE_MASK)
0757         granule = 1UL << __ffs(cfg->pgsize_bitmap & PAGE_MASK);
0758     else
0759         granule = 0;
0760 
0761     switch (granule) {
0762     case SZ_4K:
0763         page_sizes = (SZ_4K | SZ_2M | SZ_1G);
0764         break;
0765     case SZ_16K:
0766         page_sizes = (SZ_16K | SZ_32M);
0767         break;
0768     case SZ_64K:
0769         max_addr_bits = 52;
0770         page_sizes = (SZ_64K | SZ_512M);
0771         if (cfg->oas > 48)
0772             page_sizes |= 1ULL << 42; /* 4TB */
0773         break;
0774     default:
0775         page_sizes = 0;
0776     }
0777 
0778     cfg->pgsize_bitmap &= page_sizes;
0779     cfg->ias = min(cfg->ias, max_addr_bits);
0780     cfg->oas = min(cfg->oas, max_addr_bits);
0781 }
0782 
0783 static struct arm_lpae_io_pgtable *
0784 arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
0785 {
0786     struct arm_lpae_io_pgtable *data;
0787     int levels, va_bits, pg_shift;
0788 
0789     arm_lpae_restrict_pgsizes(cfg);
0790 
0791     if (!(cfg->pgsize_bitmap & (SZ_4K | SZ_16K | SZ_64K)))
0792         return NULL;
0793 
0794     if (cfg->ias > ARM_LPAE_MAX_ADDR_BITS)
0795         return NULL;
0796 
0797     if (cfg->oas > ARM_LPAE_MAX_ADDR_BITS)
0798         return NULL;
0799 
0800     data = kmalloc(sizeof(*data), GFP_KERNEL);
0801     if (!data)
0802         return NULL;
0803 
0804     pg_shift = __ffs(cfg->pgsize_bitmap);
0805     data->bits_per_level = pg_shift - ilog2(sizeof(arm_lpae_iopte));
0806 
0807     va_bits = cfg->ias - pg_shift;
0808     levels = DIV_ROUND_UP(va_bits, data->bits_per_level);
0809     data->start_level = ARM_LPAE_MAX_LEVELS - levels;
0810 
0811     /* Calculate the actual size of our pgd (without concatenation) */
0812     data->pgd_bits = va_bits - (data->bits_per_level * (levels - 1));
0813 
0814     data->iop.ops = (struct io_pgtable_ops) {
0815         .map        = arm_lpae_map,
0816         .map_pages  = arm_lpae_map_pages,
0817         .unmap      = arm_lpae_unmap,
0818         .unmap_pages    = arm_lpae_unmap_pages,
0819         .iova_to_phys   = arm_lpae_iova_to_phys,
0820     };
0821 
0822     return data;
0823 }
0824 
0825 static struct io_pgtable *
0826 arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
0827 {
0828     u64 reg;
0829     struct arm_lpae_io_pgtable *data;
0830     typeof(&cfg->arm_lpae_s1_cfg.tcr) tcr = &cfg->arm_lpae_s1_cfg.tcr;
0831     bool tg1;
0832 
0833     if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
0834                 IO_PGTABLE_QUIRK_ARM_TTBR1 |
0835                 IO_PGTABLE_QUIRK_ARM_OUTER_WBWA))
0836         return NULL;
0837 
0838     data = arm_lpae_alloc_pgtable(cfg);
0839     if (!data)
0840         return NULL;
0841 
0842     /* TCR */
0843     if (cfg->coherent_walk) {
0844         tcr->sh = ARM_LPAE_TCR_SH_IS;
0845         tcr->irgn = ARM_LPAE_TCR_RGN_WBWA;
0846         tcr->orgn = ARM_LPAE_TCR_RGN_WBWA;
0847         if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_OUTER_WBWA)
0848             goto out_free_data;
0849     } else {
0850         tcr->sh = ARM_LPAE_TCR_SH_OS;
0851         tcr->irgn = ARM_LPAE_TCR_RGN_NC;
0852         if (!(cfg->quirks & IO_PGTABLE_QUIRK_ARM_OUTER_WBWA))
0853             tcr->orgn = ARM_LPAE_TCR_RGN_NC;
0854         else
0855             tcr->orgn = ARM_LPAE_TCR_RGN_WBWA;
0856     }
0857 
0858     tg1 = cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1;
0859     switch (ARM_LPAE_GRANULE(data)) {
0860     case SZ_4K:
0861         tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_4K : ARM_LPAE_TCR_TG0_4K;
0862         break;
0863     case SZ_16K:
0864         tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_16K : ARM_LPAE_TCR_TG0_16K;
0865         break;
0866     case SZ_64K:
0867         tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_64K : ARM_LPAE_TCR_TG0_64K;
0868         break;
0869     }
0870 
0871     switch (cfg->oas) {
0872     case 32:
0873         tcr->ips = ARM_LPAE_TCR_PS_32_BIT;
0874         break;
0875     case 36:
0876         tcr->ips = ARM_LPAE_TCR_PS_36_BIT;
0877         break;
0878     case 40:
0879         tcr->ips = ARM_LPAE_TCR_PS_40_BIT;
0880         break;
0881     case 42:
0882         tcr->ips = ARM_LPAE_TCR_PS_42_BIT;
0883         break;
0884     case 44:
0885         tcr->ips = ARM_LPAE_TCR_PS_44_BIT;
0886         break;
0887     case 48:
0888         tcr->ips = ARM_LPAE_TCR_PS_48_BIT;
0889         break;
0890     case 52:
0891         tcr->ips = ARM_LPAE_TCR_PS_52_BIT;
0892         break;
0893     default:
0894         goto out_free_data;
0895     }
0896 
0897     tcr->tsz = 64ULL - cfg->ias;
0898 
0899     /* MAIRs */
0900     reg = (ARM_LPAE_MAIR_ATTR_NC
0901            << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC)) |
0902           (ARM_LPAE_MAIR_ATTR_WBRWA
0903            << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) |
0904           (ARM_LPAE_MAIR_ATTR_DEVICE
0905            << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)) |
0906           (ARM_LPAE_MAIR_ATTR_INC_OWBRWA
0907            << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE));
0908 
0909     cfg->arm_lpae_s1_cfg.mair = reg;
0910 
0911     /* Looking good; allocate a pgd */
0912     data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data),
0913                        GFP_KERNEL, cfg);
0914     if (!data->pgd)
0915         goto out_free_data;
0916 
0917     /* Ensure the empty pgd is visible before any actual TTBR write */
0918     wmb();
0919 
0920     /* TTBR */
0921     cfg->arm_lpae_s1_cfg.ttbr = virt_to_phys(data->pgd);
0922     return &data->iop;
0923 
0924 out_free_data:
0925     kfree(data);
0926     return NULL;
0927 }
0928 
0929 static struct io_pgtable *
0930 arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
0931 {
0932     u64 sl;
0933     struct arm_lpae_io_pgtable *data;
0934     typeof(&cfg->arm_lpae_s2_cfg.vtcr) vtcr = &cfg->arm_lpae_s2_cfg.vtcr;
0935 
0936     /* The NS quirk doesn't apply at stage 2 */
0937     if (cfg->quirks)
0938         return NULL;
0939 
0940     data = arm_lpae_alloc_pgtable(cfg);
0941     if (!data)
0942         return NULL;
0943 
0944     /*
0945      * Concatenate PGDs at level 1 if possible in order to reduce
0946      * the depth of the stage-2 walk.
0947      */
0948     if (data->start_level == 0) {
0949         unsigned long pgd_pages;
0950 
0951         pgd_pages = ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte);
0952         if (pgd_pages <= ARM_LPAE_S2_MAX_CONCAT_PAGES) {
0953             data->pgd_bits += data->bits_per_level;
0954             data->start_level++;
0955         }
0956     }
0957 
0958     /* VTCR */
0959     if (cfg->coherent_walk) {
0960         vtcr->sh = ARM_LPAE_TCR_SH_IS;
0961         vtcr->irgn = ARM_LPAE_TCR_RGN_WBWA;
0962         vtcr->orgn = ARM_LPAE_TCR_RGN_WBWA;
0963     } else {
0964         vtcr->sh = ARM_LPAE_TCR_SH_OS;
0965         vtcr->irgn = ARM_LPAE_TCR_RGN_NC;
0966         vtcr->orgn = ARM_LPAE_TCR_RGN_NC;
0967     }
0968 
0969     sl = data->start_level;
0970 
0971     switch (ARM_LPAE_GRANULE(data)) {
0972     case SZ_4K:
0973         vtcr->tg = ARM_LPAE_TCR_TG0_4K;
0974         sl++; /* SL0 format is different for 4K granule size */
0975         break;
0976     case SZ_16K:
0977         vtcr->tg = ARM_LPAE_TCR_TG0_16K;
0978         break;
0979     case SZ_64K:
0980         vtcr->tg = ARM_LPAE_TCR_TG0_64K;
0981         break;
0982     }
0983 
0984     switch (cfg->oas) {
0985     case 32:
0986         vtcr->ps = ARM_LPAE_TCR_PS_32_BIT;
0987         break;
0988     case 36:
0989         vtcr->ps = ARM_LPAE_TCR_PS_36_BIT;
0990         break;
0991     case 40:
0992         vtcr->ps = ARM_LPAE_TCR_PS_40_BIT;
0993         break;
0994     case 42:
0995         vtcr->ps = ARM_LPAE_TCR_PS_42_BIT;
0996         break;
0997     case 44:
0998         vtcr->ps = ARM_LPAE_TCR_PS_44_BIT;
0999         break;
1000     case 48:
1001         vtcr->ps = ARM_LPAE_TCR_PS_48_BIT;
1002         break;
1003     case 52:
1004         vtcr->ps = ARM_LPAE_TCR_PS_52_BIT;
1005         break;
1006     default:
1007         goto out_free_data;
1008     }
1009 
1010     vtcr->tsz = 64ULL - cfg->ias;
1011     vtcr->sl = ~sl & ARM_LPAE_VTCR_SL0_MASK;
1012 
1013     /* Allocate pgd pages */
1014     data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data),
1015                        GFP_KERNEL, cfg);
1016     if (!data->pgd)
1017         goto out_free_data;
1018 
1019     /* Ensure the empty pgd is visible before any actual TTBR write */
1020     wmb();
1021 
1022     /* VTTBR */
1023     cfg->arm_lpae_s2_cfg.vttbr = virt_to_phys(data->pgd);
1024     return &data->iop;
1025 
1026 out_free_data:
1027     kfree(data);
1028     return NULL;
1029 }
1030 
1031 static struct io_pgtable *
1032 arm_32_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
1033 {
1034     if (cfg->ias > 32 || cfg->oas > 40)
1035         return NULL;
1036 
1037     cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
1038     return arm_64_lpae_alloc_pgtable_s1(cfg, cookie);
1039 }
1040 
1041 static struct io_pgtable *
1042 arm_32_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
1043 {
1044     if (cfg->ias > 40 || cfg->oas > 40)
1045         return NULL;
1046 
1047     cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
1048     return arm_64_lpae_alloc_pgtable_s2(cfg, cookie);
1049 }
1050 
1051 static struct io_pgtable *
1052 arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
1053 {
1054     struct arm_lpae_io_pgtable *data;
1055 
1056     /* No quirks for Mali (hopefully) */
1057     if (cfg->quirks)
1058         return NULL;
1059 
1060     if (cfg->ias > 48 || cfg->oas > 40)
1061         return NULL;
1062 
1063     cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
1064 
1065     data = arm_lpae_alloc_pgtable(cfg);
1066     if (!data)
1067         return NULL;
1068 
1069     /* Mali seems to need a full 4-level table regardless of IAS */
1070     if (data->start_level > 0) {
1071         data->start_level = 0;
1072         data->pgd_bits = 0;
1073     }
1074     /*
1075      * MEMATTR: Mali has no actual notion of a non-cacheable type, so the
1076      * best we can do is mimic the out-of-tree driver and hope that the
1077      * "implementation-defined caching policy" is good enough. Similarly,
1078      * we'll use it for the sake of a valid attribute for our 'device'
1079      * index, although callers should never request that in practice.
1080      */
1081     cfg->arm_mali_lpae_cfg.memattr =
1082         (ARM_MALI_LPAE_MEMATTR_IMP_DEF
1083          << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC)) |
1084         (ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC
1085          << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) |
1086         (ARM_MALI_LPAE_MEMATTR_IMP_DEF
1087          << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV));
1088 
1089     data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), GFP_KERNEL,
1090                        cfg);
1091     if (!data->pgd)
1092         goto out_free_data;
1093 
1094     /* Ensure the empty pgd is visible before TRANSTAB can be written */
1095     wmb();
1096 
1097     cfg->arm_mali_lpae_cfg.transtab = virt_to_phys(data->pgd) |
1098                       ARM_MALI_LPAE_TTBR_READ_INNER |
1099                       ARM_MALI_LPAE_TTBR_ADRMODE_TABLE;
1100     if (cfg->coherent_walk)
1101         cfg->arm_mali_lpae_cfg.transtab |= ARM_MALI_LPAE_TTBR_SHARE_OUTER;
1102 
1103     return &data->iop;
1104 
1105 out_free_data:
1106     kfree(data);
1107     return NULL;
1108 }
1109 
1110 static struct io_pgtable *
1111 apple_dart_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
1112 {
1113     struct arm_lpae_io_pgtable *data;
1114     int i;
1115 
1116     if (cfg->oas > 36)
1117         return NULL;
1118 
1119     data = arm_lpae_alloc_pgtable(cfg);
1120     if (!data)
1121         return NULL;
1122 
1123     /*
1124      * The table format itself always uses two levels, but the total VA
1125      * space is mapped by four separate tables, making the MMIO registers
1126      * an effective "level 1". For simplicity, though, we treat this
1127      * equivalently to LPAE stage 2 concatenation at level 2, with the
1128      * additional TTBRs each just pointing at consecutive pages.
1129      */
1130     if (data->start_level < 1)
1131         goto out_free_data;
1132     if (data->start_level == 1 && data->pgd_bits > 2)
1133         goto out_free_data;
1134     if (data->start_level > 1)
1135         data->pgd_bits = 0;
1136     data->start_level = 2;
1137     cfg->apple_dart_cfg.n_ttbrs = 1 << data->pgd_bits;
1138     data->pgd_bits += data->bits_per_level;
1139 
1140     data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), GFP_KERNEL,
1141                        cfg);
1142     if (!data->pgd)
1143         goto out_free_data;
1144 
1145     for (i = 0; i < cfg->apple_dart_cfg.n_ttbrs; ++i)
1146         cfg->apple_dart_cfg.ttbr[i] =
1147             virt_to_phys(data->pgd + i * ARM_LPAE_GRANULE(data));
1148 
1149     return &data->iop;
1150 
1151 out_free_data:
1152     kfree(data);
1153     return NULL;
1154 }
1155 
1156 struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = {
1157     .alloc  = arm_64_lpae_alloc_pgtable_s1,
1158     .free   = arm_lpae_free_pgtable,
1159 };
1160 
1161 struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns = {
1162     .alloc  = arm_64_lpae_alloc_pgtable_s2,
1163     .free   = arm_lpae_free_pgtable,
1164 };
1165 
1166 struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns = {
1167     .alloc  = arm_32_lpae_alloc_pgtable_s1,
1168     .free   = arm_lpae_free_pgtable,
1169 };
1170 
1171 struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s2_init_fns = {
1172     .alloc  = arm_32_lpae_alloc_pgtable_s2,
1173     .free   = arm_lpae_free_pgtable,
1174 };
1175 
1176 struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns = {
1177     .alloc  = arm_mali_lpae_alloc_pgtable,
1178     .free   = arm_lpae_free_pgtable,
1179 };
1180 
1181 struct io_pgtable_init_fns io_pgtable_apple_dart_init_fns = {
1182     .alloc  = apple_dart_alloc_pgtable,
1183     .free   = arm_lpae_free_pgtable,
1184 };
1185 
1186 #ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST
1187 
1188 static struct io_pgtable_cfg *cfg_cookie __initdata;
1189 
1190 static void __init dummy_tlb_flush_all(void *cookie)
1191 {
1192     WARN_ON(cookie != cfg_cookie);
1193 }
1194 
1195 static void __init dummy_tlb_flush(unsigned long iova, size_t size,
1196                    size_t granule, void *cookie)
1197 {
1198     WARN_ON(cookie != cfg_cookie);
1199     WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
1200 }
1201 
1202 static void __init dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
1203                       unsigned long iova, size_t granule,
1204                       void *cookie)
1205 {
1206     dummy_tlb_flush(iova, granule, granule, cookie);
1207 }
1208 
1209 static const struct iommu_flush_ops dummy_tlb_ops __initconst = {
1210     .tlb_flush_all  = dummy_tlb_flush_all,
1211     .tlb_flush_walk = dummy_tlb_flush,
1212     .tlb_add_page   = dummy_tlb_add_page,
1213 };
1214 
1215 static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops)
1216 {
1217     struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
1218     struct io_pgtable_cfg *cfg = &data->iop.cfg;
1219 
1220     pr_err("cfg: pgsize_bitmap 0x%lx, ias %u-bit\n",
1221         cfg->pgsize_bitmap, cfg->ias);
1222     pr_err("data: %d levels, 0x%zx pgd_size, %u pg_shift, %u bits_per_level, pgd @ %p\n",
1223         ARM_LPAE_MAX_LEVELS - data->start_level, ARM_LPAE_PGD_SIZE(data),
1224         ilog2(ARM_LPAE_GRANULE(data)), data->bits_per_level, data->pgd);
1225 }
1226 
1227 #define __FAIL(ops, i)  ({                      \
1228         WARN(1, "selftest: test failed for fmt idx %d\n", (i)); \
1229         arm_lpae_dump_ops(ops);                 \
1230         selftest_running = false;               \
1231         -EFAULT;                        \
1232 })
1233 
1234 static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg)
1235 {
1236     static const enum io_pgtable_fmt fmts[] __initconst = {
1237         ARM_64_LPAE_S1,
1238         ARM_64_LPAE_S2,
1239     };
1240 
1241     int i, j;
1242     unsigned long iova;
1243     size_t size;
1244     struct io_pgtable_ops *ops;
1245 
1246     selftest_running = true;
1247 
1248     for (i = 0; i < ARRAY_SIZE(fmts); ++i) {
1249         cfg_cookie = cfg;
1250         ops = alloc_io_pgtable_ops(fmts[i], cfg, cfg);
1251         if (!ops) {
1252             pr_err("selftest: failed to allocate io pgtable ops\n");
1253             return -ENOMEM;
1254         }
1255 
1256         /*
1257          * Initial sanity checks.
1258          * Empty page tables shouldn't provide any translations.
1259          */
1260         if (ops->iova_to_phys(ops, 42))
1261             return __FAIL(ops, i);
1262 
1263         if (ops->iova_to_phys(ops, SZ_1G + 42))
1264             return __FAIL(ops, i);
1265 
1266         if (ops->iova_to_phys(ops, SZ_2G + 42))
1267             return __FAIL(ops, i);
1268 
1269         /*
1270          * Distinct mappings of different granule sizes.
1271          */
1272         iova = 0;
1273         for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) {
1274             size = 1UL << j;
1275 
1276             if (ops->map(ops, iova, iova, size, IOMMU_READ |
1277                                 IOMMU_WRITE |
1278                                 IOMMU_NOEXEC |
1279                                 IOMMU_CACHE, GFP_KERNEL))
1280                 return __FAIL(ops, i);
1281 
1282             /* Overlapping mappings */
1283             if (!ops->map(ops, iova, iova + size, size,
1284                       IOMMU_READ | IOMMU_NOEXEC, GFP_KERNEL))
1285                 return __FAIL(ops, i);
1286 
1287             if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
1288                 return __FAIL(ops, i);
1289 
1290             iova += SZ_1G;
1291         }
1292 
1293         /* Partial unmap */
1294         size = 1UL << __ffs(cfg->pgsize_bitmap);
1295         if (ops->unmap(ops, SZ_1G + size, size, NULL) != size)
1296             return __FAIL(ops, i);
1297 
1298         /* Remap of partial unmap */
1299         if (ops->map(ops, SZ_1G + size, size, size, IOMMU_READ, GFP_KERNEL))
1300             return __FAIL(ops, i);
1301 
1302         if (ops->iova_to_phys(ops, SZ_1G + size + 42) != (size + 42))
1303             return __FAIL(ops, i);
1304 
1305         /* Full unmap */
1306         iova = 0;
1307         for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) {
1308             size = 1UL << j;
1309 
1310             if (ops->unmap(ops, iova, size, NULL) != size)
1311                 return __FAIL(ops, i);
1312 
1313             if (ops->iova_to_phys(ops, iova + 42))
1314                 return __FAIL(ops, i);
1315 
1316             /* Remap full block */
1317             if (ops->map(ops, iova, iova, size, IOMMU_WRITE, GFP_KERNEL))
1318                 return __FAIL(ops, i);
1319 
1320             if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
1321                 return __FAIL(ops, i);
1322 
1323             iova += SZ_1G;
1324         }
1325 
1326         free_io_pgtable_ops(ops);
1327     }
1328 
1329     selftest_running = false;
1330     return 0;
1331 }
1332 
1333 static int __init arm_lpae_do_selftests(void)
1334 {
1335     static const unsigned long pgsize[] __initconst = {
1336         SZ_4K | SZ_2M | SZ_1G,
1337         SZ_16K | SZ_32M,
1338         SZ_64K | SZ_512M,
1339     };
1340 
1341     static const unsigned int ias[] __initconst = {
1342         32, 36, 40, 42, 44, 48,
1343     };
1344 
1345     int i, j, pass = 0, fail = 0;
1346     struct io_pgtable_cfg cfg = {
1347         .tlb = &dummy_tlb_ops,
1348         .oas = 48,
1349         .coherent_walk = true,
1350     };
1351 
1352     for (i = 0; i < ARRAY_SIZE(pgsize); ++i) {
1353         for (j = 0; j < ARRAY_SIZE(ias); ++j) {
1354             cfg.pgsize_bitmap = pgsize[i];
1355             cfg.ias = ias[j];
1356             pr_info("selftest: pgsize_bitmap 0x%08lx, IAS %u\n",
1357                 pgsize[i], ias[j]);
1358             if (arm_lpae_run_tests(&cfg))
1359                 fail++;
1360             else
1361                 pass++;
1362         }
1363     }
1364 
1365     pr_info("selftest: completed with %d PASS %d FAIL\n", pass, fail);
1366     return fail ? -EFAULT : 0;
1367 }
1368 subsys_initcall(arm_lpae_do_selftests);
1369 #endif