Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * IOMMU API for ARM architected SMMUv3 implementations.
0004  *
0005  * Copyright (C) 2015 ARM Limited
0006  *
0007  * Author: Will Deacon <will.deacon@arm.com>
0008  *
0009  * This driver is powered by bad coffee and bombay mix.
0010  */
0011 
0012 #include <linux/acpi.h>
0013 #include <linux/acpi_iort.h>
0014 #include <linux/bitops.h>
0015 #include <linux/crash_dump.h>
0016 #include <linux/delay.h>
0017 #include <linux/dma-iommu.h>
0018 #include <linux/err.h>
0019 #include <linux/interrupt.h>
0020 #include <linux/io-pgtable.h>
0021 #include <linux/iopoll.h>
0022 #include <linux/module.h>
0023 #include <linux/msi.h>
0024 #include <linux/of.h>
0025 #include <linux/of_address.h>
0026 #include <linux/of_platform.h>
0027 #include <linux/pci.h>
0028 #include <linux/pci-ats.h>
0029 #include <linux/platform_device.h>
0030 
0031 #include <linux/amba/bus.h>
0032 
0033 #include "arm-smmu-v3.h"
0034 #include "../../iommu-sva-lib.h"
0035 
0036 static bool disable_bypass = true;
0037 module_param(disable_bypass, bool, 0444);
0038 MODULE_PARM_DESC(disable_bypass,
0039     "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
0040 
0041 static bool disable_msipolling;
0042 module_param(disable_msipolling, bool, 0444);
0043 MODULE_PARM_DESC(disable_msipolling,
0044     "Disable MSI-based polling for CMD_SYNC completion.");
0045 
0046 enum arm_smmu_msi_index {
0047     EVTQ_MSI_INDEX,
0048     GERROR_MSI_INDEX,
0049     PRIQ_MSI_INDEX,
0050     ARM_SMMU_MAX_MSIS,
0051 };
0052 
0053 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
0054     [EVTQ_MSI_INDEX] = {
0055         ARM_SMMU_EVTQ_IRQ_CFG0,
0056         ARM_SMMU_EVTQ_IRQ_CFG1,
0057         ARM_SMMU_EVTQ_IRQ_CFG2,
0058     },
0059     [GERROR_MSI_INDEX] = {
0060         ARM_SMMU_GERROR_IRQ_CFG0,
0061         ARM_SMMU_GERROR_IRQ_CFG1,
0062         ARM_SMMU_GERROR_IRQ_CFG2,
0063     },
0064     [PRIQ_MSI_INDEX] = {
0065         ARM_SMMU_PRIQ_IRQ_CFG0,
0066         ARM_SMMU_PRIQ_IRQ_CFG1,
0067         ARM_SMMU_PRIQ_IRQ_CFG2,
0068     },
0069 };
0070 
0071 struct arm_smmu_option_prop {
0072     u32 opt;
0073     const char *prop;
0074 };
0075 
0076 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
0077 DEFINE_MUTEX(arm_smmu_asid_lock);
0078 
0079 /*
0080  * Special value used by SVA when a process dies, to quiesce a CD without
0081  * disabling it.
0082  */
0083 struct arm_smmu_ctx_desc quiet_cd = { 0 };
0084 
0085 static struct arm_smmu_option_prop arm_smmu_options[] = {
0086     { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
0087     { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
0088     { 0, NULL},
0089 };
0090 
0091 static void parse_driver_options(struct arm_smmu_device *smmu)
0092 {
0093     int i = 0;
0094 
0095     do {
0096         if (of_property_read_bool(smmu->dev->of_node,
0097                         arm_smmu_options[i].prop)) {
0098             smmu->options |= arm_smmu_options[i].opt;
0099             dev_notice(smmu->dev, "option %s\n",
0100                 arm_smmu_options[i].prop);
0101         }
0102     } while (arm_smmu_options[++i].opt);
0103 }
0104 
0105 /* Low-level queue manipulation functions */
0106 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
0107 {
0108     u32 space, prod, cons;
0109 
0110     prod = Q_IDX(q, q->prod);
0111     cons = Q_IDX(q, q->cons);
0112 
0113     if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
0114         space = (1 << q->max_n_shift) - (prod - cons);
0115     else
0116         space = cons - prod;
0117 
0118     return space >= n;
0119 }
0120 
0121 static bool queue_full(struct arm_smmu_ll_queue *q)
0122 {
0123     return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
0124            Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
0125 }
0126 
0127 static bool queue_empty(struct arm_smmu_ll_queue *q)
0128 {
0129     return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
0130            Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
0131 }
0132 
0133 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
0134 {
0135     return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
0136         (Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
0137            ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
0138         (Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
0139 }
0140 
0141 static void queue_sync_cons_out(struct arm_smmu_queue *q)
0142 {
0143     /*
0144      * Ensure that all CPU accesses (reads and writes) to the queue
0145      * are complete before we update the cons pointer.
0146      */
0147     __iomb();
0148     writel_relaxed(q->llq.cons, q->cons_reg);
0149 }
0150 
0151 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
0152 {
0153     u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
0154     q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
0155 }
0156 
0157 static int queue_sync_prod_in(struct arm_smmu_queue *q)
0158 {
0159     u32 prod;
0160     int ret = 0;
0161 
0162     /*
0163      * We can't use the _relaxed() variant here, as we must prevent
0164      * speculative reads of the queue before we have determined that
0165      * prod has indeed moved.
0166      */
0167     prod = readl(q->prod_reg);
0168 
0169     if (Q_OVF(prod) != Q_OVF(q->llq.prod))
0170         ret = -EOVERFLOW;
0171 
0172     q->llq.prod = prod;
0173     return ret;
0174 }
0175 
0176 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
0177 {
0178     u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
0179     return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
0180 }
0181 
0182 static void queue_poll_init(struct arm_smmu_device *smmu,
0183                 struct arm_smmu_queue_poll *qp)
0184 {
0185     qp->delay = 1;
0186     qp->spin_cnt = 0;
0187     qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
0188     qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
0189 }
0190 
0191 static int queue_poll(struct arm_smmu_queue_poll *qp)
0192 {
0193     if (ktime_compare(ktime_get(), qp->timeout) > 0)
0194         return -ETIMEDOUT;
0195 
0196     if (qp->wfe) {
0197         wfe();
0198     } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
0199         cpu_relax();
0200     } else {
0201         udelay(qp->delay);
0202         qp->delay *= 2;
0203         qp->spin_cnt = 0;
0204     }
0205 
0206     return 0;
0207 }
0208 
0209 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
0210 {
0211     int i;
0212 
0213     for (i = 0; i < n_dwords; ++i)
0214         *dst++ = cpu_to_le64(*src++);
0215 }
0216 
0217 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
0218 {
0219     int i;
0220 
0221     for (i = 0; i < n_dwords; ++i)
0222         *dst++ = le64_to_cpu(*src++);
0223 }
0224 
0225 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
0226 {
0227     if (queue_empty(&q->llq))
0228         return -EAGAIN;
0229 
0230     queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
0231     queue_inc_cons(&q->llq);
0232     queue_sync_cons_out(q);
0233     return 0;
0234 }
0235 
0236 /* High-level queue accessors */
0237 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
0238 {
0239     memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
0240     cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
0241 
0242     switch (ent->opcode) {
0243     case CMDQ_OP_TLBI_EL2_ALL:
0244     case CMDQ_OP_TLBI_NSNH_ALL:
0245         break;
0246     case CMDQ_OP_PREFETCH_CFG:
0247         cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
0248         break;
0249     case CMDQ_OP_CFGI_CD:
0250         cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
0251         fallthrough;
0252     case CMDQ_OP_CFGI_STE:
0253         cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
0254         cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
0255         break;
0256     case CMDQ_OP_CFGI_CD_ALL:
0257         cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
0258         break;
0259     case CMDQ_OP_CFGI_ALL:
0260         /* Cover the entire SID range */
0261         cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
0262         break;
0263     case CMDQ_OP_TLBI_NH_VA:
0264         cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
0265         fallthrough;
0266     case CMDQ_OP_TLBI_EL2_VA:
0267         cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
0268         cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
0269         cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
0270         cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
0271         cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
0272         cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
0273         cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
0274         break;
0275     case CMDQ_OP_TLBI_S2_IPA:
0276         cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
0277         cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
0278         cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
0279         cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
0280         cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
0281         cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
0282         cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
0283         break;
0284     case CMDQ_OP_TLBI_NH_ASID:
0285         cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
0286         fallthrough;
0287     case CMDQ_OP_TLBI_S12_VMALL:
0288         cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
0289         break;
0290     case CMDQ_OP_TLBI_EL2_ASID:
0291         cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
0292         break;
0293     case CMDQ_OP_ATC_INV:
0294         cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
0295         cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
0296         cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
0297         cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
0298         cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
0299         cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
0300         break;
0301     case CMDQ_OP_PRI_RESP:
0302         cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
0303         cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
0304         cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
0305         cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
0306         switch (ent->pri.resp) {
0307         case PRI_RESP_DENY:
0308         case PRI_RESP_FAIL:
0309         case PRI_RESP_SUCC:
0310             break;
0311         default:
0312             return -EINVAL;
0313         }
0314         cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
0315         break;
0316     case CMDQ_OP_RESUME:
0317         cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
0318         cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
0319         cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
0320         break;
0321     case CMDQ_OP_CMD_SYNC:
0322         if (ent->sync.msiaddr) {
0323             cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
0324             cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
0325         } else {
0326             cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
0327         }
0328         cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
0329         cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
0330         break;
0331     default:
0332         return -ENOENT;
0333     }
0334 
0335     return 0;
0336 }
0337 
0338 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
0339 {
0340     return &smmu->cmdq;
0341 }
0342 
0343 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
0344                      struct arm_smmu_queue *q, u32 prod)
0345 {
0346     struct arm_smmu_cmdq_ent ent = {
0347         .opcode = CMDQ_OP_CMD_SYNC,
0348     };
0349 
0350     /*
0351      * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
0352      * payload, so the write will zero the entire command on that platform.
0353      */
0354     if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
0355         ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
0356                    q->ent_dwords * 8;
0357     }
0358 
0359     arm_smmu_cmdq_build_cmd(cmd, &ent);
0360 }
0361 
0362 static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
0363                      struct arm_smmu_queue *q)
0364 {
0365     static const char * const cerror_str[] = {
0366         [CMDQ_ERR_CERROR_NONE_IDX]  = "No error",
0367         [CMDQ_ERR_CERROR_ILL_IDX]   = "Illegal command",
0368         [CMDQ_ERR_CERROR_ABT_IDX]   = "Abort on command fetch",
0369         [CMDQ_ERR_CERROR_ATC_INV_IDX]   = "ATC invalidate timeout",
0370     };
0371 
0372     int i;
0373     u64 cmd[CMDQ_ENT_DWORDS];
0374     u32 cons = readl_relaxed(q->cons_reg);
0375     u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
0376     struct arm_smmu_cmdq_ent cmd_sync = {
0377         .opcode = CMDQ_OP_CMD_SYNC,
0378     };
0379 
0380     dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
0381         idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
0382 
0383     switch (idx) {
0384     case CMDQ_ERR_CERROR_ABT_IDX:
0385         dev_err(smmu->dev, "retrying command fetch\n");
0386         return;
0387     case CMDQ_ERR_CERROR_NONE_IDX:
0388         return;
0389     case CMDQ_ERR_CERROR_ATC_INV_IDX:
0390         /*
0391          * ATC Invalidation Completion timeout. CONS is still pointing
0392          * at the CMD_SYNC. Attempt to complete other pending commands
0393          * by repeating the CMD_SYNC, though we might well end up back
0394          * here since the ATC invalidation may still be pending.
0395          */
0396         return;
0397     case CMDQ_ERR_CERROR_ILL_IDX:
0398     default:
0399         break;
0400     }
0401 
0402     /*
0403      * We may have concurrent producers, so we need to be careful
0404      * not to touch any of the shadow cmdq state.
0405      */
0406     queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
0407     dev_err(smmu->dev, "skipping command in error state:\n");
0408     for (i = 0; i < ARRAY_SIZE(cmd); ++i)
0409         dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
0410 
0411     /* Convert the erroneous command into a CMD_SYNC */
0412     arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
0413 
0414     queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
0415 }
0416 
0417 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
0418 {
0419     __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
0420 }
0421 
0422 /*
0423  * Command queue locking.
0424  * This is a form of bastardised rwlock with the following major changes:
0425  *
0426  * - The only LOCK routines are exclusive_trylock() and shared_lock().
0427  *   Neither have barrier semantics, and instead provide only a control
0428  *   dependency.
0429  *
0430  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
0431  *   fails if the caller appears to be the last lock holder (yes, this is
0432  *   racy). All successful UNLOCK routines have RELEASE semantics.
0433  */
0434 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
0435 {
0436     int val;
0437 
0438     /*
0439      * We can try to avoid the cmpxchg() loop by simply incrementing the
0440      * lock counter. When held in exclusive state, the lock counter is set
0441      * to INT_MIN so these increments won't hurt as the value will remain
0442      * negative.
0443      */
0444     if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
0445         return;
0446 
0447     do {
0448         val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
0449     } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
0450 }
0451 
0452 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
0453 {
0454     (void)atomic_dec_return_release(&cmdq->lock);
0455 }
0456 
0457 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
0458 {
0459     if (atomic_read(&cmdq->lock) == 1)
0460         return false;
0461 
0462     arm_smmu_cmdq_shared_unlock(cmdq);
0463     return true;
0464 }
0465 
0466 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)        \
0467 ({                                  \
0468     bool __ret;                         \
0469     local_irq_save(flags);                      \
0470     __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);   \
0471     if (!__ret)                         \
0472         local_irq_restore(flags);               \
0473     __ret;                              \
0474 })
0475 
0476 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)      \
0477 ({                                  \
0478     atomic_set_release(&cmdq->lock, 0);             \
0479     local_irq_restore(flags);                   \
0480 })
0481 
0482 
0483 /*
0484  * Command queue insertion.
0485  * This is made fiddly by our attempts to achieve some sort of scalability
0486  * since there is one queue shared amongst all of the CPUs in the system.  If
0487  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
0488  * then you'll *love* this monstrosity.
0489  *
0490  * The basic idea is to split the queue up into ranges of commands that are
0491  * owned by a given CPU; the owner may not have written all of the commands
0492  * itself, but is responsible for advancing the hardware prod pointer when
0493  * the time comes. The algorithm is roughly:
0494  *
0495  *  1. Allocate some space in the queue. At this point we also discover
0496  *     whether the head of the queue is currently owned by another CPU,
0497  *     or whether we are the owner.
0498  *
0499  *  2. Write our commands into our allocated slots in the queue.
0500  *
0501  *  3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
0502  *
0503  *  4. If we are an owner:
0504  *      a. Wait for the previous owner to finish.
0505  *      b. Mark the queue head as unowned, which tells us the range
0506  *         that we are responsible for publishing.
0507  *      c. Wait for all commands in our owned range to become valid.
0508  *      d. Advance the hardware prod pointer.
0509  *      e. Tell the next owner we've finished.
0510  *
0511  *  5. If we are inserting a CMD_SYNC (we may or may not have been an
0512  *     owner), then we need to stick around until it has completed:
0513  *      a. If we have MSIs, the SMMU can write back into the CMD_SYNC
0514  *         to clear the first 4 bytes.
0515  *      b. Otherwise, we spin waiting for the hardware cons pointer to
0516  *         advance past our command.
0517  *
0518  * The devil is in the details, particularly the use of locking for handling
0519  * SYNC completion and freeing up space in the queue before we think that it is
0520  * full.
0521  */
0522 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
0523                            u32 sprod, u32 eprod, bool set)
0524 {
0525     u32 swidx, sbidx, ewidx, ebidx;
0526     struct arm_smmu_ll_queue llq = {
0527         .max_n_shift    = cmdq->q.llq.max_n_shift,
0528         .prod       = sprod,
0529     };
0530 
0531     ewidx = BIT_WORD(Q_IDX(&llq, eprod));
0532     ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
0533 
0534     while (llq.prod != eprod) {
0535         unsigned long mask;
0536         atomic_long_t *ptr;
0537         u32 limit = BITS_PER_LONG;
0538 
0539         swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
0540         sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
0541 
0542         ptr = &cmdq->valid_map[swidx];
0543 
0544         if ((swidx == ewidx) && (sbidx < ebidx))
0545             limit = ebidx;
0546 
0547         mask = GENMASK(limit - 1, sbidx);
0548 
0549         /*
0550          * The valid bit is the inverse of the wrap bit. This means
0551          * that a zero-initialised queue is invalid and, after marking
0552          * all entries as valid, they become invalid again when we
0553          * wrap.
0554          */
0555         if (set) {
0556             atomic_long_xor(mask, ptr);
0557         } else { /* Poll */
0558             unsigned long valid;
0559 
0560             valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
0561             atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
0562         }
0563 
0564         llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
0565     }
0566 }
0567 
0568 /* Mark all entries in the range [sprod, eprod) as valid */
0569 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
0570                     u32 sprod, u32 eprod)
0571 {
0572     __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
0573 }
0574 
0575 /* Wait for all entries in the range [sprod, eprod) to become valid */
0576 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
0577                      u32 sprod, u32 eprod)
0578 {
0579     __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
0580 }
0581 
0582 /* Wait for the command queue to become non-full */
0583 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
0584                          struct arm_smmu_ll_queue *llq)
0585 {
0586     unsigned long flags;
0587     struct arm_smmu_queue_poll qp;
0588     struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
0589     int ret = 0;
0590 
0591     /*
0592      * Try to update our copy of cons by grabbing exclusive cmdq access. If
0593      * that fails, spin until somebody else updates it for us.
0594      */
0595     if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
0596         WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
0597         arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
0598         llq->val = READ_ONCE(cmdq->q.llq.val);
0599         return 0;
0600     }
0601 
0602     queue_poll_init(smmu, &qp);
0603     do {
0604         llq->val = READ_ONCE(cmdq->q.llq.val);
0605         if (!queue_full(llq))
0606             break;
0607 
0608         ret = queue_poll(&qp);
0609     } while (!ret);
0610 
0611     return ret;
0612 }
0613 
0614 /*
0615  * Wait until the SMMU signals a CMD_SYNC completion MSI.
0616  * Must be called with the cmdq lock held in some capacity.
0617  */
0618 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
0619                       struct arm_smmu_ll_queue *llq)
0620 {
0621     int ret = 0;
0622     struct arm_smmu_queue_poll qp;
0623     struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
0624     u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
0625 
0626     queue_poll_init(smmu, &qp);
0627 
0628     /*
0629      * The MSI won't generate an event, since it's being written back
0630      * into the command queue.
0631      */
0632     qp.wfe = false;
0633     smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
0634     llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
0635     return ret;
0636 }
0637 
0638 /*
0639  * Wait until the SMMU cons index passes llq->prod.
0640  * Must be called with the cmdq lock held in some capacity.
0641  */
0642 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
0643                            struct arm_smmu_ll_queue *llq)
0644 {
0645     struct arm_smmu_queue_poll qp;
0646     struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
0647     u32 prod = llq->prod;
0648     int ret = 0;
0649 
0650     queue_poll_init(smmu, &qp);
0651     llq->val = READ_ONCE(cmdq->q.llq.val);
0652     do {
0653         if (queue_consumed(llq, prod))
0654             break;
0655 
0656         ret = queue_poll(&qp);
0657 
0658         /*
0659          * This needs to be a readl() so that our subsequent call
0660          * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
0661          *
0662          * Specifically, we need to ensure that we observe all
0663          * shared_lock()s by other CMD_SYNCs that share our owner,
0664          * so that a failing call to tryunlock() means that we're
0665          * the last one out and therefore we can safely advance
0666          * cmdq->q.llq.cons. Roughly speaking:
0667          *
0668          * CPU 0        CPU1            CPU2 (us)
0669          *
0670          * if (sync)
0671          *  shared_lock();
0672          *
0673          * dma_wmb();
0674          * set_valid_map();
0675          *
0676          *          if (owner) {
0677          *              poll_valid_map();
0678          *              <control dependency>
0679          *              writel(prod_reg);
0680          *
0681          *                      readl(cons_reg);
0682          *                      tryunlock();
0683          *
0684          * Requires us to see CPU 0's shared_lock() acquisition.
0685          */
0686         llq->cons = readl(cmdq->q.cons_reg);
0687     } while (!ret);
0688 
0689     return ret;
0690 }
0691 
0692 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
0693                      struct arm_smmu_ll_queue *llq)
0694 {
0695     if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
0696         return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
0697 
0698     return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
0699 }
0700 
0701 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
0702                     u32 prod, int n)
0703 {
0704     int i;
0705     struct arm_smmu_ll_queue llq = {
0706         .max_n_shift    = cmdq->q.llq.max_n_shift,
0707         .prod       = prod,
0708     };
0709 
0710     for (i = 0; i < n; ++i) {
0711         u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
0712 
0713         prod = queue_inc_prod_n(&llq, i);
0714         queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
0715     }
0716 }
0717 
0718 /*
0719  * This is the actual insertion function, and provides the following
0720  * ordering guarantees to callers:
0721  *
0722  * - There is a dma_wmb() before publishing any commands to the queue.
0723  *   This can be relied upon to order prior writes to data structures
0724  *   in memory (such as a CD or an STE) before the command.
0725  *
0726  * - On completion of a CMD_SYNC, there is a control dependency.
0727  *   This can be relied upon to order subsequent writes to memory (e.g.
0728  *   freeing an IOVA) after completion of the CMD_SYNC.
0729  *
0730  * - Command insertion is totally ordered, so if two CPUs each race to
0731  *   insert their own list of commands then all of the commands from one
0732  *   CPU will appear before any of the commands from the other CPU.
0733  */
0734 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
0735                        u64 *cmds, int n, bool sync)
0736 {
0737     u64 cmd_sync[CMDQ_ENT_DWORDS];
0738     u32 prod;
0739     unsigned long flags;
0740     bool owner;
0741     struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
0742     struct arm_smmu_ll_queue llq, head;
0743     int ret = 0;
0744 
0745     llq.max_n_shift = cmdq->q.llq.max_n_shift;
0746 
0747     /* 1. Allocate some space in the queue */
0748     local_irq_save(flags);
0749     llq.val = READ_ONCE(cmdq->q.llq.val);
0750     do {
0751         u64 old;
0752 
0753         while (!queue_has_space(&llq, n + sync)) {
0754             local_irq_restore(flags);
0755             if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
0756                 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
0757             local_irq_save(flags);
0758         }
0759 
0760         head.cons = llq.cons;
0761         head.prod = queue_inc_prod_n(&llq, n + sync) |
0762                          CMDQ_PROD_OWNED_FLAG;
0763 
0764         old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
0765         if (old == llq.val)
0766             break;
0767 
0768         llq.val = old;
0769     } while (1);
0770     owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
0771     head.prod &= ~CMDQ_PROD_OWNED_FLAG;
0772     llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
0773 
0774     /*
0775      * 2. Write our commands into the queue
0776      * Dependency ordering from the cmpxchg() loop above.
0777      */
0778     arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
0779     if (sync) {
0780         prod = queue_inc_prod_n(&llq, n);
0781         arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
0782         queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
0783 
0784         /*
0785          * In order to determine completion of our CMD_SYNC, we must
0786          * ensure that the queue can't wrap twice without us noticing.
0787          * We achieve that by taking the cmdq lock as shared before
0788          * marking our slot as valid.
0789          */
0790         arm_smmu_cmdq_shared_lock(cmdq);
0791     }
0792 
0793     /* 3. Mark our slots as valid, ensuring commands are visible first */
0794     dma_wmb();
0795     arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
0796 
0797     /* 4. If we are the owner, take control of the SMMU hardware */
0798     if (owner) {
0799         /* a. Wait for previous owner to finish */
0800         atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
0801 
0802         /* b. Stop gathering work by clearing the owned flag */
0803         prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
0804                            &cmdq->q.llq.atomic.prod);
0805         prod &= ~CMDQ_PROD_OWNED_FLAG;
0806 
0807         /*
0808          * c. Wait for any gathered work to be written to the queue.
0809          * Note that we read our own entries so that we have the control
0810          * dependency required by (d).
0811          */
0812         arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
0813 
0814         /*
0815          * d. Advance the hardware prod pointer
0816          * Control dependency ordering from the entries becoming valid.
0817          */
0818         writel_relaxed(prod, cmdq->q.prod_reg);
0819 
0820         /*
0821          * e. Tell the next owner we're done
0822          * Make sure we've updated the hardware first, so that we don't
0823          * race to update prod and potentially move it backwards.
0824          */
0825         atomic_set_release(&cmdq->owner_prod, prod);
0826     }
0827 
0828     /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
0829     if (sync) {
0830         llq.prod = queue_inc_prod_n(&llq, n);
0831         ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
0832         if (ret) {
0833             dev_err_ratelimited(smmu->dev,
0834                         "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
0835                         llq.prod,
0836                         readl_relaxed(cmdq->q.prod_reg),
0837                         readl_relaxed(cmdq->q.cons_reg));
0838         }
0839 
0840         /*
0841          * Try to unlock the cmdq lock. This will fail if we're the last
0842          * reader, in which case we can safely update cmdq->q.llq.cons
0843          */
0844         if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
0845             WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
0846             arm_smmu_cmdq_shared_unlock(cmdq);
0847         }
0848     }
0849 
0850     local_irq_restore(flags);
0851     return ret;
0852 }
0853 
0854 static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
0855                      struct arm_smmu_cmdq_ent *ent,
0856                      bool sync)
0857 {
0858     u64 cmd[CMDQ_ENT_DWORDS];
0859 
0860     if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
0861         dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
0862              ent->opcode);
0863         return -EINVAL;
0864     }
0865 
0866     return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
0867 }
0868 
0869 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
0870                    struct arm_smmu_cmdq_ent *ent)
0871 {
0872     return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
0873 }
0874 
0875 static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
0876                          struct arm_smmu_cmdq_ent *ent)
0877 {
0878     return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
0879 }
0880 
0881 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
0882                     struct arm_smmu_cmdq_batch *cmds,
0883                     struct arm_smmu_cmdq_ent *cmd)
0884 {
0885     int index;
0886 
0887     if (cmds->num == CMDQ_BATCH_ENTRIES) {
0888         arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
0889         cmds->num = 0;
0890     }
0891 
0892     index = cmds->num * CMDQ_ENT_DWORDS;
0893     if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
0894         dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
0895              cmd->opcode);
0896         return;
0897     }
0898 
0899     cmds->num++;
0900 }
0901 
0902 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
0903                       struct arm_smmu_cmdq_batch *cmds)
0904 {
0905     return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
0906 }
0907 
0908 static int arm_smmu_page_response(struct device *dev,
0909                   struct iommu_fault_event *unused,
0910                   struct iommu_page_response *resp)
0911 {
0912     struct arm_smmu_cmdq_ent cmd = {0};
0913     struct arm_smmu_master *master = dev_iommu_priv_get(dev);
0914     int sid = master->streams[0].id;
0915 
0916     if (master->stall_enabled) {
0917         cmd.opcode      = CMDQ_OP_RESUME;
0918         cmd.resume.sid      = sid;
0919         cmd.resume.stag     = resp->grpid;
0920         switch (resp->code) {
0921         case IOMMU_PAGE_RESP_INVALID:
0922         case IOMMU_PAGE_RESP_FAILURE:
0923             cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
0924             break;
0925         case IOMMU_PAGE_RESP_SUCCESS:
0926             cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
0927             break;
0928         default:
0929             return -EINVAL;
0930         }
0931     } else {
0932         return -ENODEV;
0933     }
0934 
0935     arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
0936     /*
0937      * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
0938      * RESUME consumption guarantees that the stalled transaction will be
0939      * terminated... at some point in the future. PRI_RESP is fire and
0940      * forget.
0941      */
0942 
0943     return 0;
0944 }
0945 
0946 /* Context descriptor manipulation functions */
0947 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
0948 {
0949     struct arm_smmu_cmdq_ent cmd = {
0950         .opcode = smmu->features & ARM_SMMU_FEAT_E2H ?
0951             CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
0952         .tlbi.asid = asid,
0953     };
0954 
0955     arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
0956 }
0957 
0958 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
0959                  int ssid, bool leaf)
0960 {
0961     size_t i;
0962     unsigned long flags;
0963     struct arm_smmu_master *master;
0964     struct arm_smmu_cmdq_batch cmds;
0965     struct arm_smmu_device *smmu = smmu_domain->smmu;
0966     struct arm_smmu_cmdq_ent cmd = {
0967         .opcode = CMDQ_OP_CFGI_CD,
0968         .cfgi   = {
0969             .ssid   = ssid,
0970             .leaf   = leaf,
0971         },
0972     };
0973 
0974     cmds.num = 0;
0975 
0976     spin_lock_irqsave(&smmu_domain->devices_lock, flags);
0977     list_for_each_entry(master, &smmu_domain->devices, domain_head) {
0978         for (i = 0; i < master->num_streams; i++) {
0979             cmd.cfgi.sid = master->streams[i].id;
0980             arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
0981         }
0982     }
0983     spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
0984 
0985     arm_smmu_cmdq_batch_submit(smmu, &cmds);
0986 }
0987 
0988 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
0989                     struct arm_smmu_l1_ctx_desc *l1_desc)
0990 {
0991     size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
0992 
0993     l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
0994                          &l1_desc->l2ptr_dma, GFP_KERNEL);
0995     if (!l1_desc->l2ptr) {
0996         dev_warn(smmu->dev,
0997              "failed to allocate context descriptor table\n");
0998         return -ENOMEM;
0999     }
1000     return 0;
1001 }
1002 
1003 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
1004                       struct arm_smmu_l1_ctx_desc *l1_desc)
1005 {
1006     u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1007           CTXDESC_L1_DESC_V;
1008 
1009     /* See comment in arm_smmu_write_ctx_desc() */
1010     WRITE_ONCE(*dst, cpu_to_le64(val));
1011 }
1012 
1013 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
1014                    u32 ssid)
1015 {
1016     __le64 *l1ptr;
1017     unsigned int idx;
1018     struct arm_smmu_l1_ctx_desc *l1_desc;
1019     struct arm_smmu_device *smmu = smmu_domain->smmu;
1020     struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1021 
1022     if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1023         return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
1024 
1025     idx = ssid >> CTXDESC_SPLIT;
1026     l1_desc = &cdcfg->l1_desc[idx];
1027     if (!l1_desc->l2ptr) {
1028         if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1029             return NULL;
1030 
1031         l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1032         arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1033         /* An invalid L1CD can be cached */
1034         arm_smmu_sync_cd(smmu_domain, ssid, false);
1035     }
1036     idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1037     return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1038 }
1039 
1040 int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
1041                 struct arm_smmu_ctx_desc *cd)
1042 {
1043     /*
1044      * This function handles the following cases:
1045      *
1046      * (1) Install primary CD, for normal DMA traffic (SSID = 0).
1047      * (2) Install a secondary CD, for SID+SSID traffic.
1048      * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1049      *     CD, then invalidate the old entry and mappings.
1050      * (4) Quiesce the context without clearing the valid bit. Disable
1051      *     translation, and ignore any translation fault.
1052      * (5) Remove a secondary CD.
1053      */
1054     u64 val;
1055     bool cd_live;
1056     __le64 *cdptr;
1057 
1058     if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1059         return -E2BIG;
1060 
1061     cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1062     if (!cdptr)
1063         return -ENOMEM;
1064 
1065     val = le64_to_cpu(cdptr[0]);
1066     cd_live = !!(val & CTXDESC_CD_0_V);
1067 
1068     if (!cd) { /* (5) */
1069         val = 0;
1070     } else if (cd == &quiet_cd) { /* (4) */
1071         val |= CTXDESC_CD_0_TCR_EPD0;
1072     } else if (cd_live) { /* (3) */
1073         val &= ~CTXDESC_CD_0_ASID;
1074         val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1075         /*
1076          * Until CD+TLB invalidation, both ASIDs may be used for tagging
1077          * this substream's traffic
1078          */
1079     } else { /* (1) and (2) */
1080         cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1081         cdptr[2] = 0;
1082         cdptr[3] = cpu_to_le64(cd->mair);
1083 
1084         /*
1085          * STE is live, and the SMMU might read dwords of this CD in any
1086          * order. Ensure that it observes valid values before reading
1087          * V=1.
1088          */
1089         arm_smmu_sync_cd(smmu_domain, ssid, true);
1090 
1091         val = cd->tcr |
1092 #ifdef __BIG_ENDIAN
1093             CTXDESC_CD_0_ENDI |
1094 #endif
1095             CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1096             (cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1097             CTXDESC_CD_0_AA64 |
1098             FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1099             CTXDESC_CD_0_V;
1100 
1101         if (smmu_domain->stall_enabled)
1102             val |= CTXDESC_CD_0_S;
1103     }
1104 
1105     /*
1106      * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1107      * "Configuration structures and configuration invalidation completion"
1108      *
1109      *   The size of single-copy atomic reads made by the SMMU is
1110      *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1111      *   field within an aligned 64-bit span of a structure can be altered
1112      *   without first making the structure invalid.
1113      */
1114     WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1115     arm_smmu_sync_cd(smmu_domain, ssid, true);
1116     return 0;
1117 }
1118 
1119 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1120 {
1121     int ret;
1122     size_t l1size;
1123     size_t max_contexts;
1124     struct arm_smmu_device *smmu = smmu_domain->smmu;
1125     struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1126     struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1127 
1128     max_contexts = 1 << cfg->s1cdmax;
1129 
1130     if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1131         max_contexts <= CTXDESC_L2_ENTRIES) {
1132         cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1133         cdcfg->num_l1_ents = max_contexts;
1134 
1135         l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1136     } else {
1137         cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1138         cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1139                           CTXDESC_L2_ENTRIES);
1140 
1141         cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1142                           sizeof(*cdcfg->l1_desc),
1143                           GFP_KERNEL);
1144         if (!cdcfg->l1_desc)
1145             return -ENOMEM;
1146 
1147         l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1148     }
1149 
1150     cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1151                        GFP_KERNEL);
1152     if (!cdcfg->cdtab) {
1153         dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1154         ret = -ENOMEM;
1155         goto err_free_l1;
1156     }
1157 
1158     return 0;
1159 
1160 err_free_l1:
1161     if (cdcfg->l1_desc) {
1162         devm_kfree(smmu->dev, cdcfg->l1_desc);
1163         cdcfg->l1_desc = NULL;
1164     }
1165     return ret;
1166 }
1167 
1168 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1169 {
1170     int i;
1171     size_t size, l1size;
1172     struct arm_smmu_device *smmu = smmu_domain->smmu;
1173     struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1174 
1175     if (cdcfg->l1_desc) {
1176         size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1177 
1178         for (i = 0; i < cdcfg->num_l1_ents; i++) {
1179             if (!cdcfg->l1_desc[i].l2ptr)
1180                 continue;
1181 
1182             dmam_free_coherent(smmu->dev, size,
1183                        cdcfg->l1_desc[i].l2ptr,
1184                        cdcfg->l1_desc[i].l2ptr_dma);
1185         }
1186         devm_kfree(smmu->dev, cdcfg->l1_desc);
1187         cdcfg->l1_desc = NULL;
1188 
1189         l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1190     } else {
1191         l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1192     }
1193 
1194     dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1195     cdcfg->cdtab_dma = 0;
1196     cdcfg->cdtab = NULL;
1197 }
1198 
1199 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1200 {
1201     bool free;
1202     struct arm_smmu_ctx_desc *old_cd;
1203 
1204     if (!cd->asid)
1205         return false;
1206 
1207     free = refcount_dec_and_test(&cd->refs);
1208     if (free) {
1209         old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1210         WARN_ON(old_cd != cd);
1211     }
1212     return free;
1213 }
1214 
1215 /* Stream table manipulation functions */
1216 static void
1217 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1218 {
1219     u64 val = 0;
1220 
1221     val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1222     val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1223 
1224     /* See comment in arm_smmu_write_ctx_desc() */
1225     WRITE_ONCE(*dst, cpu_to_le64(val));
1226 }
1227 
1228 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1229 {
1230     struct arm_smmu_cmdq_ent cmd = {
1231         .opcode = CMDQ_OP_CFGI_STE,
1232         .cfgi   = {
1233             .sid    = sid,
1234             .leaf   = true,
1235         },
1236     };
1237 
1238     arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1239 }
1240 
1241 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1242                       __le64 *dst)
1243 {
1244     /*
1245      * This is hideously complicated, but we only really care about
1246      * three cases at the moment:
1247      *
1248      * 1. Invalid (all zero) -> bypass/fault (init)
1249      * 2. Bypass/fault -> translation/bypass (attach)
1250      * 3. Translation/bypass -> bypass/fault (detach)
1251      *
1252      * Given that we can't update the STE atomically and the SMMU
1253      * doesn't read the thing in a defined order, that leaves us
1254      * with the following maintenance requirements:
1255      *
1256      * 1. Update Config, return (init time STEs aren't live)
1257      * 2. Write everything apart from dword 0, sync, write dword 0, sync
1258      * 3. Update Config, sync
1259      */
1260     u64 val = le64_to_cpu(dst[0]);
1261     bool ste_live = false;
1262     struct arm_smmu_device *smmu = NULL;
1263     struct arm_smmu_s1_cfg *s1_cfg = NULL;
1264     struct arm_smmu_s2_cfg *s2_cfg = NULL;
1265     struct arm_smmu_domain *smmu_domain = NULL;
1266     struct arm_smmu_cmdq_ent prefetch_cmd = {
1267         .opcode     = CMDQ_OP_PREFETCH_CFG,
1268         .prefetch   = {
1269             .sid    = sid,
1270         },
1271     };
1272 
1273     if (master) {
1274         smmu_domain = master->domain;
1275         smmu = master->smmu;
1276     }
1277 
1278     if (smmu_domain) {
1279         switch (smmu_domain->stage) {
1280         case ARM_SMMU_DOMAIN_S1:
1281             s1_cfg = &smmu_domain->s1_cfg;
1282             break;
1283         case ARM_SMMU_DOMAIN_S2:
1284         case ARM_SMMU_DOMAIN_NESTED:
1285             s2_cfg = &smmu_domain->s2_cfg;
1286             break;
1287         default:
1288             break;
1289         }
1290     }
1291 
1292     if (val & STRTAB_STE_0_V) {
1293         switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1294         case STRTAB_STE_0_CFG_BYPASS:
1295             break;
1296         case STRTAB_STE_0_CFG_S1_TRANS:
1297         case STRTAB_STE_0_CFG_S2_TRANS:
1298             ste_live = true;
1299             break;
1300         case STRTAB_STE_0_CFG_ABORT:
1301             BUG_ON(!disable_bypass);
1302             break;
1303         default:
1304             BUG(); /* STE corruption */
1305         }
1306     }
1307 
1308     /* Nuke the existing STE_0 value, as we're going to rewrite it */
1309     val = STRTAB_STE_0_V;
1310 
1311     /* Bypass/fault */
1312     if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1313         if (!smmu_domain && disable_bypass)
1314             val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1315         else
1316             val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1317 
1318         dst[0] = cpu_to_le64(val);
1319         dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1320                         STRTAB_STE_1_SHCFG_INCOMING));
1321         dst[2] = 0; /* Nuke the VMID */
1322         /*
1323          * The SMMU can perform negative caching, so we must sync
1324          * the STE regardless of whether the old value was live.
1325          */
1326         if (smmu)
1327             arm_smmu_sync_ste_for_sid(smmu, sid);
1328         return;
1329     }
1330 
1331     if (s1_cfg) {
1332         u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
1333             STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
1334 
1335         BUG_ON(ste_live);
1336         dst[1] = cpu_to_le64(
1337              FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1338              FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1339              FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1340              FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1341              FIELD_PREP(STRTAB_STE_1_STRW, strw));
1342 
1343         if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1344             !master->stall_enabled)
1345             dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1346 
1347         val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1348             FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1349             FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1350             FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1351     }
1352 
1353     if (s2_cfg) {
1354         BUG_ON(ste_live);
1355         dst[2] = cpu_to_le64(
1356              FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1357              FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1358 #ifdef __BIG_ENDIAN
1359              STRTAB_STE_2_S2ENDI |
1360 #endif
1361              STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1362              STRTAB_STE_2_S2R);
1363 
1364         dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1365 
1366         val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1367     }
1368 
1369     if (master->ats_enabled)
1370         dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1371                          STRTAB_STE_1_EATS_TRANS));
1372 
1373     arm_smmu_sync_ste_for_sid(smmu, sid);
1374     /* See comment in arm_smmu_write_ctx_desc() */
1375     WRITE_ONCE(dst[0], cpu_to_le64(val));
1376     arm_smmu_sync_ste_for_sid(smmu, sid);
1377 
1378     /* It's likely that we'll want to use the new STE soon */
1379     if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1380         arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1381 }
1382 
1383 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent, bool force)
1384 {
1385     unsigned int i;
1386     u64 val = STRTAB_STE_0_V;
1387 
1388     if (disable_bypass && !force)
1389         val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1390     else
1391         val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1392 
1393     for (i = 0; i < nent; ++i) {
1394         strtab[0] = cpu_to_le64(val);
1395         strtab[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1396                            STRTAB_STE_1_SHCFG_INCOMING));
1397         strtab[2] = 0;
1398         strtab += STRTAB_STE_DWORDS;
1399     }
1400 }
1401 
1402 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1403 {
1404     size_t size;
1405     void *strtab;
1406     struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1407     struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1408 
1409     if (desc->l2ptr)
1410         return 0;
1411 
1412     size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1413     strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1414 
1415     desc->span = STRTAB_SPLIT + 1;
1416     desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1417                       GFP_KERNEL);
1418     if (!desc->l2ptr) {
1419         dev_err(smmu->dev,
1420             "failed to allocate l2 stream table for SID %u\n",
1421             sid);
1422         return -ENOMEM;
1423     }
1424 
1425     arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT, false);
1426     arm_smmu_write_strtab_l1_desc(strtab, desc);
1427     return 0;
1428 }
1429 
1430 static struct arm_smmu_master *
1431 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1432 {
1433     struct rb_node *node;
1434     struct arm_smmu_stream *stream;
1435 
1436     lockdep_assert_held(&smmu->streams_mutex);
1437 
1438     node = smmu->streams.rb_node;
1439     while (node) {
1440         stream = rb_entry(node, struct arm_smmu_stream, node);
1441         if (stream->id < sid)
1442             node = node->rb_right;
1443         else if (stream->id > sid)
1444             node = node->rb_left;
1445         else
1446             return stream->master;
1447     }
1448 
1449     return NULL;
1450 }
1451 
1452 /* IRQ and event handlers */
1453 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1454 {
1455     int ret;
1456     u32 reason;
1457     u32 perm = 0;
1458     struct arm_smmu_master *master;
1459     bool ssid_valid = evt[0] & EVTQ_0_SSV;
1460     u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1461     struct iommu_fault_event fault_evt = { };
1462     struct iommu_fault *flt = &fault_evt.fault;
1463 
1464     switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1465     case EVT_ID_TRANSLATION_FAULT:
1466         reason = IOMMU_FAULT_REASON_PTE_FETCH;
1467         break;
1468     case EVT_ID_ADDR_SIZE_FAULT:
1469         reason = IOMMU_FAULT_REASON_OOR_ADDRESS;
1470         break;
1471     case EVT_ID_ACCESS_FAULT:
1472         reason = IOMMU_FAULT_REASON_ACCESS;
1473         break;
1474     case EVT_ID_PERMISSION_FAULT:
1475         reason = IOMMU_FAULT_REASON_PERMISSION;
1476         break;
1477     default:
1478         return -EOPNOTSUPP;
1479     }
1480 
1481     /* Stage-2 is always pinned at the moment */
1482     if (evt[1] & EVTQ_1_S2)
1483         return -EFAULT;
1484 
1485     if (evt[1] & EVTQ_1_RnW)
1486         perm |= IOMMU_FAULT_PERM_READ;
1487     else
1488         perm |= IOMMU_FAULT_PERM_WRITE;
1489 
1490     if (evt[1] & EVTQ_1_InD)
1491         perm |= IOMMU_FAULT_PERM_EXEC;
1492 
1493     if (evt[1] & EVTQ_1_PnU)
1494         perm |= IOMMU_FAULT_PERM_PRIV;
1495 
1496     if (evt[1] & EVTQ_1_STALL) {
1497         flt->type = IOMMU_FAULT_PAGE_REQ;
1498         flt->prm = (struct iommu_fault_page_request) {
1499             .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1500             .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1501             .perm = perm,
1502             .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1503         };
1504 
1505         if (ssid_valid) {
1506             flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1507             flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1508         }
1509     } else {
1510         flt->type = IOMMU_FAULT_DMA_UNRECOV;
1511         flt->event = (struct iommu_fault_unrecoverable) {
1512             .reason = reason,
1513             .flags = IOMMU_FAULT_UNRECOV_ADDR_VALID,
1514             .perm = perm,
1515             .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1516         };
1517 
1518         if (ssid_valid) {
1519             flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
1520             flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1521         }
1522     }
1523 
1524     mutex_lock(&smmu->streams_mutex);
1525     master = arm_smmu_find_master(smmu, sid);
1526     if (!master) {
1527         ret = -EINVAL;
1528         goto out_unlock;
1529     }
1530 
1531     ret = iommu_report_device_fault(master->dev, &fault_evt);
1532     if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
1533         /* Nobody cared, abort the access */
1534         struct iommu_page_response resp = {
1535             .pasid      = flt->prm.pasid,
1536             .grpid      = flt->prm.grpid,
1537             .code       = IOMMU_PAGE_RESP_FAILURE,
1538         };
1539         arm_smmu_page_response(master->dev, &fault_evt, &resp);
1540     }
1541 
1542 out_unlock:
1543     mutex_unlock(&smmu->streams_mutex);
1544     return ret;
1545 }
1546 
1547 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1548 {
1549     int i, ret;
1550     struct arm_smmu_device *smmu = dev;
1551     struct arm_smmu_queue *q = &smmu->evtq.q;
1552     struct arm_smmu_ll_queue *llq = &q->llq;
1553     static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1554                       DEFAULT_RATELIMIT_BURST);
1555     u64 evt[EVTQ_ENT_DWORDS];
1556 
1557     do {
1558         while (!queue_remove_raw(q, evt)) {
1559             u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1560 
1561             ret = arm_smmu_handle_evt(smmu, evt);
1562             if (!ret || !__ratelimit(&rs))
1563                 continue;
1564 
1565             dev_info(smmu->dev, "event 0x%02x received:\n", id);
1566             for (i = 0; i < ARRAY_SIZE(evt); ++i)
1567                 dev_info(smmu->dev, "\t0x%016llx\n",
1568                      (unsigned long long)evt[i]);
1569 
1570             cond_resched();
1571         }
1572 
1573         /*
1574          * Not much we can do on overflow, so scream and pretend we're
1575          * trying harder.
1576          */
1577         if (queue_sync_prod_in(q) == -EOVERFLOW)
1578             dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1579     } while (!queue_empty(llq));
1580 
1581     /* Sync our overflow flag, as we believe we're up to speed */
1582     llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1583             Q_IDX(llq, llq->cons);
1584     return IRQ_HANDLED;
1585 }
1586 
1587 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1588 {
1589     u32 sid, ssid;
1590     u16 grpid;
1591     bool ssv, last;
1592 
1593     sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1594     ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1595     ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1596     last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1597     grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1598 
1599     dev_info(smmu->dev, "unexpected PRI request received:\n");
1600     dev_info(smmu->dev,
1601          "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1602          sid, ssid, grpid, last ? "L" : "",
1603          evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1604          evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1605          evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1606          evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1607          evt[1] & PRIQ_1_ADDR_MASK);
1608 
1609     if (last) {
1610         struct arm_smmu_cmdq_ent cmd = {
1611             .opcode         = CMDQ_OP_PRI_RESP,
1612             .substream_valid    = ssv,
1613             .pri            = {
1614                 .sid    = sid,
1615                 .ssid   = ssid,
1616                 .grpid  = grpid,
1617                 .resp   = PRI_RESP_DENY,
1618             },
1619         };
1620 
1621         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1622     }
1623 }
1624 
1625 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1626 {
1627     struct arm_smmu_device *smmu = dev;
1628     struct arm_smmu_queue *q = &smmu->priq.q;
1629     struct arm_smmu_ll_queue *llq = &q->llq;
1630     u64 evt[PRIQ_ENT_DWORDS];
1631 
1632     do {
1633         while (!queue_remove_raw(q, evt))
1634             arm_smmu_handle_ppr(smmu, evt);
1635 
1636         if (queue_sync_prod_in(q) == -EOVERFLOW)
1637             dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1638     } while (!queue_empty(llq));
1639 
1640     /* Sync our overflow flag, as we believe we're up to speed */
1641     llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1642               Q_IDX(llq, llq->cons);
1643     queue_sync_cons_out(q);
1644     return IRQ_HANDLED;
1645 }
1646 
1647 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1648 
1649 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1650 {
1651     u32 gerror, gerrorn, active;
1652     struct arm_smmu_device *smmu = dev;
1653 
1654     gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1655     gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1656 
1657     active = gerror ^ gerrorn;
1658     if (!(active & GERROR_ERR_MASK))
1659         return IRQ_NONE; /* No errors pending */
1660 
1661     dev_warn(smmu->dev,
1662          "unexpected global error reported (0x%08x), this could be serious\n",
1663          active);
1664 
1665     if (active & GERROR_SFM_ERR) {
1666         dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1667         arm_smmu_device_disable(smmu);
1668     }
1669 
1670     if (active & GERROR_MSI_GERROR_ABT_ERR)
1671         dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1672 
1673     if (active & GERROR_MSI_PRIQ_ABT_ERR)
1674         dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1675 
1676     if (active & GERROR_MSI_EVTQ_ABT_ERR)
1677         dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1678 
1679     if (active & GERROR_MSI_CMDQ_ABT_ERR)
1680         dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1681 
1682     if (active & GERROR_PRIQ_ABT_ERR)
1683         dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1684 
1685     if (active & GERROR_EVTQ_ABT_ERR)
1686         dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1687 
1688     if (active & GERROR_CMDQ_ERR)
1689         arm_smmu_cmdq_skip_err(smmu);
1690 
1691     writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1692     return IRQ_HANDLED;
1693 }
1694 
1695 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1696 {
1697     struct arm_smmu_device *smmu = dev;
1698 
1699     arm_smmu_evtq_thread(irq, dev);
1700     if (smmu->features & ARM_SMMU_FEAT_PRI)
1701         arm_smmu_priq_thread(irq, dev);
1702 
1703     return IRQ_HANDLED;
1704 }
1705 
1706 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1707 {
1708     arm_smmu_gerror_handler(irq, dev);
1709     return IRQ_WAKE_THREAD;
1710 }
1711 
1712 static void
1713 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1714             struct arm_smmu_cmdq_ent *cmd)
1715 {
1716     size_t log2_span;
1717     size_t span_mask;
1718     /* ATC invalidates are always on 4096-bytes pages */
1719     size_t inval_grain_shift = 12;
1720     unsigned long page_start, page_end;
1721 
1722     /*
1723      * ATS and PASID:
1724      *
1725      * If substream_valid is clear, the PCIe TLP is sent without a PASID
1726      * prefix. In that case all ATC entries within the address range are
1727      * invalidated, including those that were requested with a PASID! There
1728      * is no way to invalidate only entries without PASID.
1729      *
1730      * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1731      * traffic), translation requests without PASID create ATC entries
1732      * without PASID, which must be invalidated with substream_valid clear.
1733      * This has the unpleasant side-effect of invalidating all PASID-tagged
1734      * ATC entries within the address range.
1735      */
1736     *cmd = (struct arm_smmu_cmdq_ent) {
1737         .opcode         = CMDQ_OP_ATC_INV,
1738         .substream_valid    = !!ssid,
1739         .atc.ssid       = ssid,
1740     };
1741 
1742     if (!size) {
1743         cmd->atc.size = ATC_INV_SIZE_ALL;
1744         return;
1745     }
1746 
1747     page_start  = iova >> inval_grain_shift;
1748     page_end    = (iova + size - 1) >> inval_grain_shift;
1749 
1750     /*
1751      * In an ATS Invalidate Request, the address must be aligned on the
1752      * range size, which must be a power of two number of page sizes. We
1753      * thus have to choose between grossly over-invalidating the region, or
1754      * splitting the invalidation into multiple commands. For simplicity
1755      * we'll go with the first solution, but should refine it in the future
1756      * if multiple commands are shown to be more efficient.
1757      *
1758      * Find the smallest power of two that covers the range. The most
1759      * significant differing bit between the start and end addresses,
1760      * fls(start ^ end), indicates the required span. For example:
1761      *
1762      * We want to invalidate pages [8; 11]. This is already the ideal range:
1763      *      x = 0b1000 ^ 0b1011 = 0b11
1764      *      span = 1 << fls(x) = 4
1765      *
1766      * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1767      *      x = 0b0111 ^ 0b1010 = 0b1101
1768      *      span = 1 << fls(x) = 16
1769      */
1770     log2_span   = fls_long(page_start ^ page_end);
1771     span_mask   = (1ULL << log2_span) - 1;
1772 
1773     page_start  &= ~span_mask;
1774 
1775     cmd->atc.addr   = page_start << inval_grain_shift;
1776     cmd->atc.size   = log2_span;
1777 }
1778 
1779 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1780 {
1781     int i;
1782     struct arm_smmu_cmdq_ent cmd;
1783     struct arm_smmu_cmdq_batch cmds;
1784 
1785     arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1786 
1787     cmds.num = 0;
1788     for (i = 0; i < master->num_streams; i++) {
1789         cmd.atc.sid = master->streams[i].id;
1790         arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
1791     }
1792 
1793     return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
1794 }
1795 
1796 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1797                 unsigned long iova, size_t size)
1798 {
1799     int i;
1800     unsigned long flags;
1801     struct arm_smmu_cmdq_ent cmd;
1802     struct arm_smmu_master *master;
1803     struct arm_smmu_cmdq_batch cmds;
1804 
1805     if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1806         return 0;
1807 
1808     /*
1809      * Ensure that we've completed prior invalidation of the main TLBs
1810      * before we read 'nr_ats_masters' in case of a concurrent call to
1811      * arm_smmu_enable_ats():
1812      *
1813      *  // unmap()          // arm_smmu_enable_ats()
1814      *  TLBI+SYNC           atomic_inc(&nr_ats_masters);
1815      *  smp_mb();           [...]
1816      *  atomic_read(&nr_ats_masters);   pci_enable_ats() // writel()
1817      *
1818      * Ensures that we always see the incremented 'nr_ats_masters' count if
1819      * ATS was enabled at the PCI device before completion of the TLBI.
1820      */
1821     smp_mb();
1822     if (!atomic_read(&smmu_domain->nr_ats_masters))
1823         return 0;
1824 
1825     arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1826 
1827     cmds.num = 0;
1828 
1829     spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1830     list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1831         if (!master->ats_enabled)
1832             continue;
1833 
1834         for (i = 0; i < master->num_streams; i++) {
1835             cmd.atc.sid = master->streams[i].id;
1836             arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1837         }
1838     }
1839     spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1840 
1841     return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1842 }
1843 
1844 /* IO_PGTABLE API */
1845 static void arm_smmu_tlb_inv_context(void *cookie)
1846 {
1847     struct arm_smmu_domain *smmu_domain = cookie;
1848     struct arm_smmu_device *smmu = smmu_domain->smmu;
1849     struct arm_smmu_cmdq_ent cmd;
1850 
1851     /*
1852      * NOTE: when io-pgtable is in non-strict mode, we may get here with
1853      * PTEs previously cleared by unmaps on the current CPU not yet visible
1854      * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1855      * insertion to guarantee those are observed before the TLBI. Do be
1856      * careful, 007.
1857      */
1858     if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1859         arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1860     } else {
1861         cmd.opcode  = CMDQ_OP_TLBI_S12_VMALL;
1862         cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1863         arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1864     }
1865     arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1866 }
1867 
1868 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
1869                      unsigned long iova, size_t size,
1870                      size_t granule,
1871                      struct arm_smmu_domain *smmu_domain)
1872 {
1873     struct arm_smmu_device *smmu = smmu_domain->smmu;
1874     unsigned long end = iova + size, num_pages = 0, tg = 0;
1875     size_t inv_range = granule;
1876     struct arm_smmu_cmdq_batch cmds;
1877 
1878     if (!size)
1879         return;
1880 
1881     if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1882         /* Get the leaf page size */
1883         tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1884 
1885         /* Convert page size of 12,14,16 (log2) to 1,2,3 */
1886         cmd->tlbi.tg = (tg - 10) / 2;
1887 
1888         /* Determine what level the granule is at */
1889         cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1890 
1891         num_pages = size >> tg;
1892     }
1893 
1894     cmds.num = 0;
1895 
1896     while (iova < end) {
1897         if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1898             /*
1899              * On each iteration of the loop, the range is 5 bits
1900              * worth of the aligned size remaining.
1901              * The range in pages is:
1902              *
1903              * range = (num_pages & (0x1f << __ffs(num_pages)))
1904              */
1905             unsigned long scale, num;
1906 
1907             /* Determine the power of 2 multiple number of pages */
1908             scale = __ffs(num_pages);
1909             cmd->tlbi.scale = scale;
1910 
1911             /* Determine how many chunks of 2^scale size we have */
1912             num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1913             cmd->tlbi.num = num - 1;
1914 
1915             /* range is num * 2^scale * pgsize */
1916             inv_range = num << (scale + tg);
1917 
1918             /* Clear out the lower order bits for the next iteration */
1919             num_pages -= num << scale;
1920         }
1921 
1922         cmd->tlbi.addr = iova;
1923         arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
1924         iova += inv_range;
1925     }
1926     arm_smmu_cmdq_batch_submit(smmu, &cmds);
1927 }
1928 
1929 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
1930                       size_t granule, bool leaf,
1931                       struct arm_smmu_domain *smmu_domain)
1932 {
1933     struct arm_smmu_cmdq_ent cmd = {
1934         .tlbi = {
1935             .leaf   = leaf,
1936         },
1937     };
1938 
1939     if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1940         cmd.opcode  = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1941                   CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
1942         cmd.tlbi.asid   = smmu_domain->s1_cfg.cd.asid;
1943     } else {
1944         cmd.opcode  = CMDQ_OP_TLBI_S2_IPA;
1945         cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1946     }
1947     __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1948 
1949     /*
1950      * Unfortunately, this can't be leaf-only since we may have
1951      * zapped an entire table.
1952      */
1953     arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size);
1954 }
1955 
1956 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
1957                  size_t granule, bool leaf,
1958                  struct arm_smmu_domain *smmu_domain)
1959 {
1960     struct arm_smmu_cmdq_ent cmd = {
1961         .opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1962               CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
1963         .tlbi = {
1964             .asid   = asid,
1965             .leaf   = leaf,
1966         },
1967     };
1968 
1969     __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1970 }
1971 
1972 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1973                      unsigned long iova, size_t granule,
1974                      void *cookie)
1975 {
1976     struct arm_smmu_domain *smmu_domain = cookie;
1977     struct iommu_domain *domain = &smmu_domain->domain;
1978 
1979     iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1980 }
1981 
1982 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1983                   size_t granule, void *cookie)
1984 {
1985     arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
1986 }
1987 
1988 static const struct iommu_flush_ops arm_smmu_flush_ops = {
1989     .tlb_flush_all  = arm_smmu_tlb_inv_context,
1990     .tlb_flush_walk = arm_smmu_tlb_inv_walk,
1991     .tlb_add_page   = arm_smmu_tlb_inv_page_nosync,
1992 };
1993 
1994 /* IOMMU API */
1995 static bool arm_smmu_capable(enum iommu_cap cap)
1996 {
1997     switch (cap) {
1998     case IOMMU_CAP_CACHE_COHERENCY:
1999         return true;
2000     case IOMMU_CAP_NOEXEC:
2001         return true;
2002     default:
2003         return false;
2004     }
2005 }
2006 
2007 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
2008 {
2009     struct arm_smmu_domain *smmu_domain;
2010 
2011     if (type != IOMMU_DOMAIN_UNMANAGED &&
2012         type != IOMMU_DOMAIN_DMA &&
2013         type != IOMMU_DOMAIN_DMA_FQ &&
2014         type != IOMMU_DOMAIN_IDENTITY)
2015         return NULL;
2016 
2017     /*
2018      * Allocate the domain and initialise some of its data structures.
2019      * We can't really do anything meaningful until we've added a
2020      * master.
2021      */
2022     smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2023     if (!smmu_domain)
2024         return NULL;
2025 
2026     mutex_init(&smmu_domain->init_mutex);
2027     INIT_LIST_HEAD(&smmu_domain->devices);
2028     spin_lock_init(&smmu_domain->devices_lock);
2029     INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
2030 
2031     return &smmu_domain->domain;
2032 }
2033 
2034 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
2035 {
2036     int idx, size = 1 << span;
2037 
2038     do {
2039         idx = find_first_zero_bit(map, size);
2040         if (idx == size)
2041             return -ENOSPC;
2042     } while (test_and_set_bit(idx, map));
2043 
2044     return idx;
2045 }
2046 
2047 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
2048 {
2049     clear_bit(idx, map);
2050 }
2051 
2052 static void arm_smmu_domain_free(struct iommu_domain *domain)
2053 {
2054     struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2055     struct arm_smmu_device *smmu = smmu_domain->smmu;
2056 
2057     free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2058 
2059     /* Free the CD and ASID, if we allocated them */
2060     if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2061         struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2062 
2063         /* Prevent SVA from touching the CD while we're freeing it */
2064         mutex_lock(&arm_smmu_asid_lock);
2065         if (cfg->cdcfg.cdtab)
2066             arm_smmu_free_cd_tables(smmu_domain);
2067         arm_smmu_free_asid(&cfg->cd);
2068         mutex_unlock(&arm_smmu_asid_lock);
2069     } else {
2070         struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2071         if (cfg->vmid)
2072             arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
2073     }
2074 
2075     kfree(smmu_domain);
2076 }
2077 
2078 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2079                        struct arm_smmu_master *master,
2080                        struct io_pgtable_cfg *pgtbl_cfg)
2081 {
2082     int ret;
2083     u32 asid;
2084     struct arm_smmu_device *smmu = smmu_domain->smmu;
2085     struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2086     typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2087 
2088     refcount_set(&cfg->cd.refs, 1);
2089 
2090     /* Prevent SVA from modifying the ASID until it is written to the CD */
2091     mutex_lock(&arm_smmu_asid_lock);
2092     ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
2093                XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2094     if (ret)
2095         goto out_unlock;
2096 
2097     cfg->s1cdmax = master->ssid_bits;
2098 
2099     smmu_domain->stall_enabled = master->stall_enabled;
2100 
2101     ret = arm_smmu_alloc_cd_tables(smmu_domain);
2102     if (ret)
2103         goto out_free_asid;
2104 
2105     cfg->cd.asid    = (u16)asid;
2106     cfg->cd.ttbr    = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2107     cfg->cd.tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2108               FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2109               FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2110               FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2111               FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2112               FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2113               CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2114     cfg->cd.mair    = pgtbl_cfg->arm_lpae_s1_cfg.mair;
2115 
2116     /*
2117      * Note that this will end up calling arm_smmu_sync_cd() before
2118      * the master has been added to the devices list for this domain.
2119      * This isn't an issue because the STE hasn't been installed yet.
2120      */
2121     ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
2122     if (ret)
2123         goto out_free_cd_tables;
2124 
2125     mutex_unlock(&arm_smmu_asid_lock);
2126     return 0;
2127 
2128 out_free_cd_tables:
2129     arm_smmu_free_cd_tables(smmu_domain);
2130 out_free_asid:
2131     arm_smmu_free_asid(&cfg->cd);
2132 out_unlock:
2133     mutex_unlock(&arm_smmu_asid_lock);
2134     return ret;
2135 }
2136 
2137 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2138                        struct arm_smmu_master *master,
2139                        struct io_pgtable_cfg *pgtbl_cfg)
2140 {
2141     int vmid;
2142     struct arm_smmu_device *smmu = smmu_domain->smmu;
2143     struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2144     typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2145 
2146     vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
2147     if (vmid < 0)
2148         return vmid;
2149 
2150     vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2151     cfg->vmid   = (u16)vmid;
2152     cfg->vttbr  = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2153     cfg->vtcr   = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2154               FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2155               FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2156               FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2157               FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2158               FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2159               FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2160     return 0;
2161 }
2162 
2163 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
2164                     struct arm_smmu_master *master)
2165 {
2166     int ret;
2167     unsigned long ias, oas;
2168     enum io_pgtable_fmt fmt;
2169     struct io_pgtable_cfg pgtbl_cfg;
2170     struct io_pgtable_ops *pgtbl_ops;
2171     int (*finalise_stage_fn)(struct arm_smmu_domain *,
2172                  struct arm_smmu_master *,
2173                  struct io_pgtable_cfg *);
2174     struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2175     struct arm_smmu_device *smmu = smmu_domain->smmu;
2176 
2177     if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2178         smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2179         return 0;
2180     }
2181 
2182     /* Restrict the stage to what we can actually support */
2183     if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2184         smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2185     if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2186         smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2187 
2188     switch (smmu_domain->stage) {
2189     case ARM_SMMU_DOMAIN_S1:
2190         ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2191         ias = min_t(unsigned long, ias, VA_BITS);
2192         oas = smmu->ias;
2193         fmt = ARM_64_LPAE_S1;
2194         finalise_stage_fn = arm_smmu_domain_finalise_s1;
2195         break;
2196     case ARM_SMMU_DOMAIN_NESTED:
2197     case ARM_SMMU_DOMAIN_S2:
2198         ias = smmu->ias;
2199         oas = smmu->oas;
2200         fmt = ARM_64_LPAE_S2;
2201         finalise_stage_fn = arm_smmu_domain_finalise_s2;
2202         break;
2203     default:
2204         return -EINVAL;
2205     }
2206 
2207     pgtbl_cfg = (struct io_pgtable_cfg) {
2208         .pgsize_bitmap  = smmu->pgsize_bitmap,
2209         .ias        = ias,
2210         .oas        = oas,
2211         .coherent_walk  = smmu->features & ARM_SMMU_FEAT_COHERENCY,
2212         .tlb        = &arm_smmu_flush_ops,
2213         .iommu_dev  = smmu->dev,
2214     };
2215 
2216     pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2217     if (!pgtbl_ops)
2218         return -ENOMEM;
2219 
2220     domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2221     domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2222     domain->geometry.force_aperture = true;
2223 
2224     ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2225     if (ret < 0) {
2226         free_io_pgtable_ops(pgtbl_ops);
2227         return ret;
2228     }
2229 
2230     smmu_domain->pgtbl_ops = pgtbl_ops;
2231     return 0;
2232 }
2233 
2234 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2235 {
2236     __le64 *step;
2237     struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2238 
2239     if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2240         struct arm_smmu_strtab_l1_desc *l1_desc;
2241         int idx;
2242 
2243         /* Two-level walk */
2244         idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2245         l1_desc = &cfg->l1_desc[idx];
2246         idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2247         step = &l1_desc->l2ptr[idx];
2248     } else {
2249         /* Simple linear lookup */
2250         step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2251     }
2252 
2253     return step;
2254 }
2255 
2256 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2257 {
2258     int i, j;
2259     struct arm_smmu_device *smmu = master->smmu;
2260 
2261     for (i = 0; i < master->num_streams; ++i) {
2262         u32 sid = master->streams[i].id;
2263         __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2264 
2265         /* Bridged PCI devices may end up with duplicated IDs */
2266         for (j = 0; j < i; j++)
2267             if (master->streams[j].id == sid)
2268                 break;
2269         if (j < i)
2270             continue;
2271 
2272         arm_smmu_write_strtab_ent(master, sid, step);
2273     }
2274 }
2275 
2276 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2277 {
2278     struct device *dev = master->dev;
2279     struct arm_smmu_device *smmu = master->smmu;
2280     struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2281 
2282     if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2283         return false;
2284 
2285     if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2286         return false;
2287 
2288     return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2289 }
2290 
2291 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2292 {
2293     size_t stu;
2294     struct pci_dev *pdev;
2295     struct arm_smmu_device *smmu = master->smmu;
2296     struct arm_smmu_domain *smmu_domain = master->domain;
2297 
2298     /* Don't enable ATS at the endpoint if it's not enabled in the STE */
2299     if (!master->ats_enabled)
2300         return;
2301 
2302     /* Smallest Translation Unit: log2 of the smallest supported granule */
2303     stu = __ffs(smmu->pgsize_bitmap);
2304     pdev = to_pci_dev(master->dev);
2305 
2306     atomic_inc(&smmu_domain->nr_ats_masters);
2307     arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2308     if (pci_enable_ats(pdev, stu))
2309         dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2310 }
2311 
2312 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2313 {
2314     struct arm_smmu_domain *smmu_domain = master->domain;
2315 
2316     if (!master->ats_enabled)
2317         return;
2318 
2319     pci_disable_ats(to_pci_dev(master->dev));
2320     /*
2321      * Ensure ATS is disabled at the endpoint before we issue the
2322      * ATC invalidation via the SMMU.
2323      */
2324     wmb();
2325     arm_smmu_atc_inv_master(master);
2326     atomic_dec(&smmu_domain->nr_ats_masters);
2327 }
2328 
2329 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2330 {
2331     int ret;
2332     int features;
2333     int num_pasids;
2334     struct pci_dev *pdev;
2335 
2336     if (!dev_is_pci(master->dev))
2337         return -ENODEV;
2338 
2339     pdev = to_pci_dev(master->dev);
2340 
2341     features = pci_pasid_features(pdev);
2342     if (features < 0)
2343         return features;
2344 
2345     num_pasids = pci_max_pasids(pdev);
2346     if (num_pasids <= 0)
2347         return num_pasids;
2348 
2349     ret = pci_enable_pasid(pdev, features);
2350     if (ret) {
2351         dev_err(&pdev->dev, "Failed to enable PASID\n");
2352         return ret;
2353     }
2354 
2355     master->ssid_bits = min_t(u8, ilog2(num_pasids),
2356                   master->smmu->ssid_bits);
2357     return 0;
2358 }
2359 
2360 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2361 {
2362     struct pci_dev *pdev;
2363 
2364     if (!dev_is_pci(master->dev))
2365         return;
2366 
2367     pdev = to_pci_dev(master->dev);
2368 
2369     if (!pdev->pasid_enabled)
2370         return;
2371 
2372     master->ssid_bits = 0;
2373     pci_disable_pasid(pdev);
2374 }
2375 
2376 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2377 {
2378     unsigned long flags;
2379     struct arm_smmu_domain *smmu_domain = master->domain;
2380 
2381     if (!smmu_domain)
2382         return;
2383 
2384     arm_smmu_disable_ats(master);
2385 
2386     spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2387     list_del(&master->domain_head);
2388     spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2389 
2390     master->domain = NULL;
2391     master->ats_enabled = false;
2392     arm_smmu_install_ste_for_dev(master);
2393 }
2394 
2395 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2396 {
2397     int ret = 0;
2398     unsigned long flags;
2399     struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2400     struct arm_smmu_device *smmu;
2401     struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2402     struct arm_smmu_master *master;
2403 
2404     if (!fwspec)
2405         return -ENOENT;
2406 
2407     master = dev_iommu_priv_get(dev);
2408     smmu = master->smmu;
2409 
2410     /*
2411      * Checking that SVA is disabled ensures that this device isn't bound to
2412      * any mm, and can be safely detached from its old domain. Bonds cannot
2413      * be removed concurrently since we're holding the group mutex.
2414      */
2415     if (arm_smmu_master_sva_enabled(master)) {
2416         dev_err(dev, "cannot attach - SVA enabled\n");
2417         return -EBUSY;
2418     }
2419 
2420     arm_smmu_detach_dev(master);
2421 
2422     mutex_lock(&smmu_domain->init_mutex);
2423 
2424     if (!smmu_domain->smmu) {
2425         smmu_domain->smmu = smmu;
2426         ret = arm_smmu_domain_finalise(domain, master);
2427         if (ret) {
2428             smmu_domain->smmu = NULL;
2429             goto out_unlock;
2430         }
2431     } else if (smmu_domain->smmu != smmu) {
2432         dev_err(dev,
2433             "cannot attach to SMMU %s (upstream of %s)\n",
2434             dev_name(smmu_domain->smmu->dev),
2435             dev_name(smmu->dev));
2436         ret = -ENXIO;
2437         goto out_unlock;
2438     } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2439            master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2440         dev_err(dev,
2441             "cannot attach to incompatible domain (%u SSID bits != %u)\n",
2442             smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2443         ret = -EINVAL;
2444         goto out_unlock;
2445     } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2446            smmu_domain->stall_enabled != master->stall_enabled) {
2447         dev_err(dev, "cannot attach to stall-%s domain\n",
2448             smmu_domain->stall_enabled ? "enabled" : "disabled");
2449         ret = -EINVAL;
2450         goto out_unlock;
2451     }
2452 
2453     master->domain = smmu_domain;
2454 
2455     if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2456         master->ats_enabled = arm_smmu_ats_supported(master);
2457 
2458     arm_smmu_install_ste_for_dev(master);
2459 
2460     spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2461     list_add(&master->domain_head, &smmu_domain->devices);
2462     spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2463 
2464     arm_smmu_enable_ats(master);
2465 
2466 out_unlock:
2467     mutex_unlock(&smmu_domain->init_mutex);
2468     return ret;
2469 }
2470 
2471 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
2472                   phys_addr_t paddr, size_t pgsize, size_t pgcount,
2473                   int prot, gfp_t gfp, size_t *mapped)
2474 {
2475     struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2476 
2477     if (!ops)
2478         return -ENODEV;
2479 
2480     return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
2481 }
2482 
2483 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
2484                    size_t pgsize, size_t pgcount,
2485                    struct iommu_iotlb_gather *gather)
2486 {
2487     struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2488     struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2489 
2490     if (!ops)
2491         return 0;
2492 
2493     return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
2494 }
2495 
2496 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2497 {
2498     struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2499 
2500     if (smmu_domain->smmu)
2501         arm_smmu_tlb_inv_context(smmu_domain);
2502 }
2503 
2504 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2505                 struct iommu_iotlb_gather *gather)
2506 {
2507     struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2508 
2509     if (!gather->pgsize)
2510         return;
2511 
2512     arm_smmu_tlb_inv_range_domain(gather->start,
2513                       gather->end - gather->start + 1,
2514                       gather->pgsize, true, smmu_domain);
2515 }
2516 
2517 static phys_addr_t
2518 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2519 {
2520     struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2521 
2522     if (!ops)
2523         return 0;
2524 
2525     return ops->iova_to_phys(ops, iova);
2526 }
2527 
2528 static struct platform_driver arm_smmu_driver;
2529 
2530 static
2531 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2532 {
2533     struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2534                               fwnode);
2535     put_device(dev);
2536     return dev ? dev_get_drvdata(dev) : NULL;
2537 }
2538 
2539 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2540 {
2541     unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2542 
2543     if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2544         limit *= 1UL << STRTAB_SPLIT;
2545 
2546     return sid < limit;
2547 }
2548 
2549 static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
2550 {
2551     /* Check the SIDs are in range of the SMMU and our stream table */
2552     if (!arm_smmu_sid_in_range(smmu, sid))
2553         return -ERANGE;
2554 
2555     /* Ensure l2 strtab is initialised */
2556     if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2557         return arm_smmu_init_l2_strtab(smmu, sid);
2558 
2559     return 0;
2560 }
2561 
2562 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2563                   struct arm_smmu_master *master)
2564 {
2565     int i;
2566     int ret = 0;
2567     struct arm_smmu_stream *new_stream, *cur_stream;
2568     struct rb_node **new_node, *parent_node = NULL;
2569     struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2570 
2571     master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2572                   GFP_KERNEL);
2573     if (!master->streams)
2574         return -ENOMEM;
2575     master->num_streams = fwspec->num_ids;
2576 
2577     mutex_lock(&smmu->streams_mutex);
2578     for (i = 0; i < fwspec->num_ids; i++) {
2579         u32 sid = fwspec->ids[i];
2580 
2581         new_stream = &master->streams[i];
2582         new_stream->id = sid;
2583         new_stream->master = master;
2584 
2585         ret = arm_smmu_init_sid_strtab(smmu, sid);
2586         if (ret)
2587             break;
2588 
2589         /* Insert into SID tree */
2590         new_node = &(smmu->streams.rb_node);
2591         while (*new_node) {
2592             cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
2593                           node);
2594             parent_node = *new_node;
2595             if (cur_stream->id > new_stream->id) {
2596                 new_node = &((*new_node)->rb_left);
2597             } else if (cur_stream->id < new_stream->id) {
2598                 new_node = &((*new_node)->rb_right);
2599             } else {
2600                 dev_warn(master->dev,
2601                      "stream %u already in tree\n",
2602                      cur_stream->id);
2603                 ret = -EINVAL;
2604                 break;
2605             }
2606         }
2607         if (ret)
2608             break;
2609 
2610         rb_link_node(&new_stream->node, parent_node, new_node);
2611         rb_insert_color(&new_stream->node, &smmu->streams);
2612     }
2613 
2614     if (ret) {
2615         for (i--; i >= 0; i--)
2616             rb_erase(&master->streams[i].node, &smmu->streams);
2617         kfree(master->streams);
2618     }
2619     mutex_unlock(&smmu->streams_mutex);
2620 
2621     return ret;
2622 }
2623 
2624 static void arm_smmu_remove_master(struct arm_smmu_master *master)
2625 {
2626     int i;
2627     struct arm_smmu_device *smmu = master->smmu;
2628     struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2629 
2630     if (!smmu || !master->streams)
2631         return;
2632 
2633     mutex_lock(&smmu->streams_mutex);
2634     for (i = 0; i < fwspec->num_ids; i++)
2635         rb_erase(&master->streams[i].node, &smmu->streams);
2636     mutex_unlock(&smmu->streams_mutex);
2637 
2638     kfree(master->streams);
2639 }
2640 
2641 static struct iommu_ops arm_smmu_ops;
2642 
2643 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2644 {
2645     int ret;
2646     struct arm_smmu_device *smmu;
2647     struct arm_smmu_master *master;
2648     struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2649 
2650     if (!fwspec || fwspec->ops != &arm_smmu_ops)
2651         return ERR_PTR(-ENODEV);
2652 
2653     if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2654         return ERR_PTR(-EBUSY);
2655 
2656     smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2657     if (!smmu)
2658         return ERR_PTR(-ENODEV);
2659 
2660     master = kzalloc(sizeof(*master), GFP_KERNEL);
2661     if (!master)
2662         return ERR_PTR(-ENOMEM);
2663 
2664     master->dev = dev;
2665     master->smmu = smmu;
2666     INIT_LIST_HEAD(&master->bonds);
2667     dev_iommu_priv_set(dev, master);
2668 
2669     ret = arm_smmu_insert_master(smmu, master);
2670     if (ret)
2671         goto err_free_master;
2672 
2673     device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2674     master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2675 
2676     /*
2677      * Note that PASID must be enabled before, and disabled after ATS:
2678      * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2679      *
2680      *   Behavior is undefined if this bit is Set and the value of the PASID
2681      *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2682      *   are changed.
2683      */
2684     arm_smmu_enable_pasid(master);
2685 
2686     if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2687         master->ssid_bits = min_t(u8, master->ssid_bits,
2688                       CTXDESC_LINEAR_CDMAX);
2689 
2690     if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
2691          device_property_read_bool(dev, "dma-can-stall")) ||
2692         smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
2693         master->stall_enabled = true;
2694 
2695     return &smmu->iommu;
2696 
2697 err_free_master:
2698     kfree(master);
2699     dev_iommu_priv_set(dev, NULL);
2700     return ERR_PTR(ret);
2701 }
2702 
2703 static void arm_smmu_release_device(struct device *dev)
2704 {
2705     struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2706 
2707     if (WARN_ON(arm_smmu_master_sva_enabled(master)))
2708         iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
2709     arm_smmu_detach_dev(master);
2710     arm_smmu_disable_pasid(master);
2711     arm_smmu_remove_master(master);
2712     kfree(master);
2713 }
2714 
2715 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2716 {
2717     struct iommu_group *group;
2718 
2719     /*
2720      * We don't support devices sharing stream IDs other than PCI RID
2721      * aliases, since the necessary ID-to-device lookup becomes rather
2722      * impractical given a potential sparse 32-bit stream ID space.
2723      */
2724     if (dev_is_pci(dev))
2725         group = pci_device_group(dev);
2726     else
2727         group = generic_device_group(dev);
2728 
2729     return group;
2730 }
2731 
2732 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2733 {
2734     struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2735     int ret = 0;
2736 
2737     mutex_lock(&smmu_domain->init_mutex);
2738     if (smmu_domain->smmu)
2739         ret = -EPERM;
2740     else
2741         smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2742     mutex_unlock(&smmu_domain->init_mutex);
2743 
2744     return ret;
2745 }
2746 
2747 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2748 {
2749     return iommu_fwspec_add_ids(dev, args->args, 1);
2750 }
2751 
2752 static void arm_smmu_get_resv_regions(struct device *dev,
2753                       struct list_head *head)
2754 {
2755     struct iommu_resv_region *region;
2756     int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2757 
2758     region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2759                      prot, IOMMU_RESV_SW_MSI);
2760     if (!region)
2761         return;
2762 
2763     list_add_tail(&region->list, head);
2764 
2765     iommu_dma_get_resv_regions(dev, head);
2766 }
2767 
2768 static int arm_smmu_dev_enable_feature(struct device *dev,
2769                        enum iommu_dev_features feat)
2770 {
2771     struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2772 
2773     if (!master)
2774         return -ENODEV;
2775 
2776     switch (feat) {
2777     case IOMMU_DEV_FEAT_IOPF:
2778         if (!arm_smmu_master_iopf_supported(master))
2779             return -EINVAL;
2780         if (master->iopf_enabled)
2781             return -EBUSY;
2782         master->iopf_enabled = true;
2783         return 0;
2784     case IOMMU_DEV_FEAT_SVA:
2785         if (!arm_smmu_master_sva_supported(master))
2786             return -EINVAL;
2787         if (arm_smmu_master_sva_enabled(master))
2788             return -EBUSY;
2789         return arm_smmu_master_enable_sva(master);
2790     default:
2791         return -EINVAL;
2792     }
2793 }
2794 
2795 static int arm_smmu_dev_disable_feature(struct device *dev,
2796                     enum iommu_dev_features feat)
2797 {
2798     struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2799 
2800     if (!master)
2801         return -EINVAL;
2802 
2803     switch (feat) {
2804     case IOMMU_DEV_FEAT_IOPF:
2805         if (!master->iopf_enabled)
2806             return -EINVAL;
2807         if (master->sva_enabled)
2808             return -EBUSY;
2809         master->iopf_enabled = false;
2810         return 0;
2811     case IOMMU_DEV_FEAT_SVA:
2812         if (!arm_smmu_master_sva_enabled(master))
2813             return -EINVAL;
2814         return arm_smmu_master_disable_sva(master);
2815     default:
2816         return -EINVAL;
2817     }
2818 }
2819 
2820 static struct iommu_ops arm_smmu_ops = {
2821     .capable        = arm_smmu_capable,
2822     .domain_alloc       = arm_smmu_domain_alloc,
2823     .probe_device       = arm_smmu_probe_device,
2824     .release_device     = arm_smmu_release_device,
2825     .device_group       = arm_smmu_device_group,
2826     .of_xlate       = arm_smmu_of_xlate,
2827     .get_resv_regions   = arm_smmu_get_resv_regions,
2828     .dev_enable_feat    = arm_smmu_dev_enable_feature,
2829     .dev_disable_feat   = arm_smmu_dev_disable_feature,
2830     .sva_bind       = arm_smmu_sva_bind,
2831     .sva_unbind     = arm_smmu_sva_unbind,
2832     .sva_get_pasid      = arm_smmu_sva_get_pasid,
2833     .page_response      = arm_smmu_page_response,
2834     .pgsize_bitmap      = -1UL, /* Restricted during device attach */
2835     .owner          = THIS_MODULE,
2836     .default_domain_ops = &(const struct iommu_domain_ops) {
2837         .attach_dev     = arm_smmu_attach_dev,
2838         .map_pages      = arm_smmu_map_pages,
2839         .unmap_pages        = arm_smmu_unmap_pages,
2840         .flush_iotlb_all    = arm_smmu_flush_iotlb_all,
2841         .iotlb_sync     = arm_smmu_iotlb_sync,
2842         .iova_to_phys       = arm_smmu_iova_to_phys,
2843         .enable_nesting     = arm_smmu_enable_nesting,
2844         .free           = arm_smmu_domain_free,
2845     }
2846 };
2847 
2848 /* Probing and initialisation functions */
2849 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2850                    struct arm_smmu_queue *q,
2851                    void __iomem *page,
2852                    unsigned long prod_off,
2853                    unsigned long cons_off,
2854                    size_t dwords, const char *name)
2855 {
2856     size_t qsz;
2857 
2858     do {
2859         qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2860         q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2861                           GFP_KERNEL);
2862         if (q->base || qsz < PAGE_SIZE)
2863             break;
2864 
2865         q->llq.max_n_shift--;
2866     } while (1);
2867 
2868     if (!q->base) {
2869         dev_err(smmu->dev,
2870             "failed to allocate queue (0x%zx bytes) for %s\n",
2871             qsz, name);
2872         return -ENOMEM;
2873     }
2874 
2875     if (!WARN_ON(q->base_dma & (qsz - 1))) {
2876         dev_info(smmu->dev, "allocated %u entries for %s\n",
2877              1 << q->llq.max_n_shift, name);
2878     }
2879 
2880     q->prod_reg = page + prod_off;
2881     q->cons_reg = page + cons_off;
2882     q->ent_dwords   = dwords;
2883 
2884     q->q_base  = Q_BASE_RWA;
2885     q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2886     q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2887 
2888     q->llq.prod = q->llq.cons = 0;
2889     return 0;
2890 }
2891 
2892 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2893 {
2894     struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2895     unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2896 
2897     atomic_set(&cmdq->owner_prod, 0);
2898     atomic_set(&cmdq->lock, 0);
2899 
2900     cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents,
2901                                   GFP_KERNEL);
2902     if (!cmdq->valid_map)
2903         return -ENOMEM;
2904 
2905     return 0;
2906 }
2907 
2908 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2909 {
2910     int ret;
2911 
2912     /* cmdq */
2913     ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
2914                       ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
2915                       CMDQ_ENT_DWORDS, "cmdq");
2916     if (ret)
2917         return ret;
2918 
2919     ret = arm_smmu_cmdq_init(smmu);
2920     if (ret)
2921         return ret;
2922 
2923     /* evtq */
2924     ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
2925                       ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
2926                       EVTQ_ENT_DWORDS, "evtq");
2927     if (ret)
2928         return ret;
2929 
2930     if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
2931         (smmu->features & ARM_SMMU_FEAT_STALLS)) {
2932         smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
2933         if (!smmu->evtq.iopf)
2934             return -ENOMEM;
2935     }
2936 
2937     /* priq */
2938     if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2939         return 0;
2940 
2941     return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
2942                        ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
2943                        PRIQ_ENT_DWORDS, "priq");
2944 }
2945 
2946 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2947 {
2948     unsigned int i;
2949     struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2950     void *strtab = smmu->strtab_cfg.strtab;
2951 
2952     cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents,
2953                     sizeof(*cfg->l1_desc), GFP_KERNEL);
2954     if (!cfg->l1_desc)
2955         return -ENOMEM;
2956 
2957     for (i = 0; i < cfg->num_l1_ents; ++i) {
2958         arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2959         strtab += STRTAB_L1_DESC_DWORDS << 3;
2960     }
2961 
2962     return 0;
2963 }
2964 
2965 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2966 {
2967     void *strtab;
2968     u64 reg;
2969     u32 size, l1size;
2970     struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2971 
2972     /* Calculate the L1 size, capped to the SIDSIZE. */
2973     size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2974     size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2975     cfg->num_l1_ents = 1 << size;
2976 
2977     size += STRTAB_SPLIT;
2978     if (size < smmu->sid_bits)
2979         dev_warn(smmu->dev,
2980              "2-level strtab only covers %u/%u bits of SID\n",
2981              size, smmu->sid_bits);
2982 
2983     l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2984     strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2985                      GFP_KERNEL);
2986     if (!strtab) {
2987         dev_err(smmu->dev,
2988             "failed to allocate l1 stream table (%u bytes)\n",
2989             l1size);
2990         return -ENOMEM;
2991     }
2992     cfg->strtab = strtab;
2993 
2994     /* Configure strtab_base_cfg for 2 levels */
2995     reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
2996     reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
2997     reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
2998     cfg->strtab_base_cfg = reg;
2999 
3000     return arm_smmu_init_l1_strtab(smmu);
3001 }
3002 
3003 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3004 {
3005     void *strtab;
3006     u64 reg;
3007     u32 size;
3008     struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3009 
3010     size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3011     strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3012                      GFP_KERNEL);
3013     if (!strtab) {
3014         dev_err(smmu->dev,
3015             "failed to allocate linear stream table (%u bytes)\n",
3016             size);
3017         return -ENOMEM;
3018     }
3019     cfg->strtab = strtab;
3020     cfg->num_l1_ents = 1 << smmu->sid_bits;
3021 
3022     /* Configure strtab_base_cfg for a linear table covering all SIDs */
3023     reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3024     reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3025     cfg->strtab_base_cfg = reg;
3026 
3027     arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents, false);
3028     return 0;
3029 }
3030 
3031 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3032 {
3033     u64 reg;
3034     int ret;
3035 
3036     if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3037         ret = arm_smmu_init_strtab_2lvl(smmu);
3038     else
3039         ret = arm_smmu_init_strtab_linear(smmu);
3040 
3041     if (ret)
3042         return ret;
3043 
3044     /* Set the strtab base address */
3045     reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3046     reg |= STRTAB_BASE_RA;
3047     smmu->strtab_cfg.strtab_base = reg;
3048 
3049     /* Allocate the first VMID for stage-2 bypass STEs */
3050     set_bit(0, smmu->vmid_map);
3051     return 0;
3052 }
3053 
3054 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3055 {
3056     int ret;
3057 
3058     mutex_init(&smmu->streams_mutex);
3059     smmu->streams = RB_ROOT;
3060 
3061     ret = arm_smmu_init_queues(smmu);
3062     if (ret)
3063         return ret;
3064 
3065     return arm_smmu_init_strtab(smmu);
3066 }
3067 
3068 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3069                    unsigned int reg_off, unsigned int ack_off)
3070 {
3071     u32 reg;
3072 
3073     writel_relaxed(val, smmu->base + reg_off);
3074     return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3075                       1, ARM_SMMU_POLL_TIMEOUT_US);
3076 }
3077 
3078 /* GBPA is "special" */
3079 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3080 {
3081     int ret;
3082     u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3083 
3084     ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3085                      1, ARM_SMMU_POLL_TIMEOUT_US);
3086     if (ret)
3087         return ret;
3088 
3089     reg &= ~clr;
3090     reg |= set;
3091     writel_relaxed(reg | GBPA_UPDATE, gbpa);
3092     ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3093                      1, ARM_SMMU_POLL_TIMEOUT_US);
3094 
3095     if (ret)
3096         dev_err(smmu->dev, "GBPA not responding to update\n");
3097     return ret;
3098 }
3099 
3100 static void arm_smmu_free_msis(void *data)
3101 {
3102     struct device *dev = data;
3103     platform_msi_domain_free_irqs(dev);
3104 }
3105 
3106 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3107 {
3108     phys_addr_t doorbell;
3109     struct device *dev = msi_desc_to_dev(desc);
3110     struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3111     phys_addr_t *cfg = arm_smmu_msi_cfg[desc->msi_index];
3112 
3113     doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3114     doorbell &= MSI_CFG0_ADDR_MASK;
3115 
3116     writeq_relaxed(doorbell, smmu->base + cfg[0]);
3117     writel_relaxed(msg->data, smmu->base + cfg[1]);
3118     writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3119 }
3120 
3121 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3122 {
3123     int ret, nvec = ARM_SMMU_MAX_MSIS;
3124     struct device *dev = smmu->dev;
3125 
3126     /* Clear the MSI address regs */
3127     writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3128     writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3129 
3130     if (smmu->features & ARM_SMMU_FEAT_PRI)
3131         writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3132     else
3133         nvec--;
3134 
3135     if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3136         return;
3137 
3138     if (!dev->msi.domain) {
3139         dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3140         return;
3141     }
3142 
3143     /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3144     ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3145     if (ret) {
3146         dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3147         return;
3148     }
3149 
3150     smmu->evtq.q.irq = msi_get_virq(dev, EVTQ_MSI_INDEX);
3151     smmu->gerr_irq = msi_get_virq(dev, GERROR_MSI_INDEX);
3152     smmu->priq.q.irq = msi_get_virq(dev, PRIQ_MSI_INDEX);
3153 
3154     /* Add callback to free MSIs on teardown */
3155     devm_add_action(dev, arm_smmu_free_msis, dev);
3156 }
3157 
3158 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3159 {
3160     int irq, ret;
3161 
3162     arm_smmu_setup_msis(smmu);
3163 
3164     /* Request interrupt lines */
3165     irq = smmu->evtq.q.irq;
3166     if (irq) {
3167         ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3168                         arm_smmu_evtq_thread,
3169                         IRQF_ONESHOT,
3170                         "arm-smmu-v3-evtq", smmu);
3171         if (ret < 0)
3172             dev_warn(smmu->dev, "failed to enable evtq irq\n");
3173     } else {
3174         dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3175     }
3176 
3177     irq = smmu->gerr_irq;
3178     if (irq) {
3179         ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3180                        0, "arm-smmu-v3-gerror", smmu);
3181         if (ret < 0)
3182             dev_warn(smmu->dev, "failed to enable gerror irq\n");
3183     } else {
3184         dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3185     }
3186 
3187     if (smmu->features & ARM_SMMU_FEAT_PRI) {
3188         irq = smmu->priq.q.irq;
3189         if (irq) {
3190             ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3191                             arm_smmu_priq_thread,
3192                             IRQF_ONESHOT,
3193                             "arm-smmu-v3-priq",
3194                             smmu);
3195             if (ret < 0)
3196                 dev_warn(smmu->dev,
3197                      "failed to enable priq irq\n");
3198         } else {
3199             dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3200         }
3201     }
3202 }
3203 
3204 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3205 {
3206     int ret, irq;
3207     u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3208 
3209     /* Disable IRQs first */
3210     ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3211                       ARM_SMMU_IRQ_CTRLACK);
3212     if (ret) {
3213         dev_err(smmu->dev, "failed to disable irqs\n");
3214         return ret;
3215     }
3216 
3217     irq = smmu->combined_irq;
3218     if (irq) {
3219         /*
3220          * Cavium ThunderX2 implementation doesn't support unique irq
3221          * lines. Use a single irq line for all the SMMUv3 interrupts.
3222          */
3223         ret = devm_request_threaded_irq(smmu->dev, irq,
3224                     arm_smmu_combined_irq_handler,
3225                     arm_smmu_combined_irq_thread,
3226                     IRQF_ONESHOT,
3227                     "arm-smmu-v3-combined-irq", smmu);
3228         if (ret < 0)
3229             dev_warn(smmu->dev, "failed to enable combined irq\n");
3230     } else
3231         arm_smmu_setup_unique_irqs(smmu);
3232 
3233     if (smmu->features & ARM_SMMU_FEAT_PRI)
3234         irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3235 
3236     /* Enable interrupt generation on the SMMU */
3237     ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3238                       ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3239     if (ret)
3240         dev_warn(smmu->dev, "failed to enable irqs\n");
3241 
3242     return 0;
3243 }
3244 
3245 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3246 {
3247     int ret;
3248 
3249     ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3250     if (ret)
3251         dev_err(smmu->dev, "failed to clear cr0\n");
3252 
3253     return ret;
3254 }
3255 
3256 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3257 {
3258     int ret;
3259     u32 reg, enables;
3260     struct arm_smmu_cmdq_ent cmd;
3261 
3262     /* Clear CR0 and sync (disables SMMU and queue processing) */
3263     reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3264     if (reg & CR0_SMMUEN) {
3265         dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3266         WARN_ON(is_kdump_kernel() && !disable_bypass);
3267         arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3268     }
3269 
3270     ret = arm_smmu_device_disable(smmu);
3271     if (ret)
3272         return ret;
3273 
3274     /* CR1 (table and queue memory attributes) */
3275     reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3276           FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3277           FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3278           FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3279           FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3280           FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3281     writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3282 
3283     /* CR2 (random crap) */
3284     reg = CR2_PTM | CR2_RECINVSID;
3285 
3286     if (smmu->features & ARM_SMMU_FEAT_E2H)
3287         reg |= CR2_E2H;
3288 
3289     writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3290 
3291     /* Stream table */
3292     writeq_relaxed(smmu->strtab_cfg.strtab_base,
3293                smmu->base + ARM_SMMU_STRTAB_BASE);
3294     writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3295                smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3296 
3297     /* Command queue */
3298     writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3299     writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3300     writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3301 
3302     enables = CR0_CMDQEN;
3303     ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3304                       ARM_SMMU_CR0ACK);
3305     if (ret) {
3306         dev_err(smmu->dev, "failed to enable command queue\n");
3307         return ret;
3308     }
3309 
3310     /* Invalidate any cached configuration */
3311     cmd.opcode = CMDQ_OP_CFGI_ALL;
3312     arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3313 
3314     /* Invalidate any stale TLB entries */
3315     if (smmu->features & ARM_SMMU_FEAT_HYP) {
3316         cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3317         arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3318     }
3319 
3320     cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3321     arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3322 
3323     /* Event queue */
3324     writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3325     writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3326     writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3327 
3328     enables |= CR0_EVTQEN;
3329     ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3330                       ARM_SMMU_CR0ACK);
3331     if (ret) {
3332         dev_err(smmu->dev, "failed to enable event queue\n");
3333         return ret;
3334     }
3335 
3336     /* PRI queue */
3337     if (smmu->features & ARM_SMMU_FEAT_PRI) {
3338         writeq_relaxed(smmu->priq.q.q_base,
3339                    smmu->base + ARM_SMMU_PRIQ_BASE);
3340         writel_relaxed(smmu->priq.q.llq.prod,
3341                    smmu->page1 + ARM_SMMU_PRIQ_PROD);
3342         writel_relaxed(smmu->priq.q.llq.cons,
3343                    smmu->page1 + ARM_SMMU_PRIQ_CONS);
3344 
3345         enables |= CR0_PRIQEN;
3346         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3347                           ARM_SMMU_CR0ACK);
3348         if (ret) {
3349             dev_err(smmu->dev, "failed to enable PRI queue\n");
3350             return ret;
3351         }
3352     }
3353 
3354     if (smmu->features & ARM_SMMU_FEAT_ATS) {
3355         enables |= CR0_ATSCHK;
3356         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3357                           ARM_SMMU_CR0ACK);
3358         if (ret) {
3359             dev_err(smmu->dev, "failed to enable ATS check\n");
3360             return ret;
3361         }
3362     }
3363 
3364     ret = arm_smmu_setup_irqs(smmu);
3365     if (ret) {
3366         dev_err(smmu->dev, "failed to setup irqs\n");
3367         return ret;
3368     }
3369 
3370     if (is_kdump_kernel())
3371         enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3372 
3373     /* Enable the SMMU interface, or ensure bypass */
3374     if (!bypass || disable_bypass) {
3375         enables |= CR0_SMMUEN;
3376     } else {
3377         ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3378         if (ret)
3379             return ret;
3380     }
3381     ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3382                       ARM_SMMU_CR0ACK);
3383     if (ret) {
3384         dev_err(smmu->dev, "failed to enable SMMU interface\n");
3385         return ret;
3386     }
3387 
3388     return 0;
3389 }
3390 
3391 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3392 {
3393     u32 reg;
3394     bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3395 
3396     /* IDR0 */
3397     reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3398 
3399     /* 2-level structures */
3400     if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3401         smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3402 
3403     if (reg & IDR0_CD2L)
3404         smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3405 
3406     /*
3407      * Translation table endianness.
3408      * We currently require the same endianness as the CPU, but this
3409      * could be changed later by adding a new IO_PGTABLE_QUIRK.
3410      */
3411     switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3412     case IDR0_TTENDIAN_MIXED:
3413         smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3414         break;
3415 #ifdef __BIG_ENDIAN
3416     case IDR0_TTENDIAN_BE:
3417         smmu->features |= ARM_SMMU_FEAT_TT_BE;
3418         break;
3419 #else
3420     case IDR0_TTENDIAN_LE:
3421         smmu->features |= ARM_SMMU_FEAT_TT_LE;
3422         break;
3423 #endif
3424     default:
3425         dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3426         return -ENXIO;
3427     }
3428 
3429     /* Boolean feature flags */
3430     if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3431         smmu->features |= ARM_SMMU_FEAT_PRI;
3432 
3433     if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3434         smmu->features |= ARM_SMMU_FEAT_ATS;
3435 
3436     if (reg & IDR0_SEV)
3437         smmu->features |= ARM_SMMU_FEAT_SEV;
3438 
3439     if (reg & IDR0_MSI) {
3440         smmu->features |= ARM_SMMU_FEAT_MSI;
3441         if (coherent && !disable_msipolling)
3442             smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3443     }
3444 
3445     if (reg & IDR0_HYP) {
3446         smmu->features |= ARM_SMMU_FEAT_HYP;
3447         if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3448             smmu->features |= ARM_SMMU_FEAT_E2H;
3449     }
3450 
3451     /*
3452      * The coherency feature as set by FW is used in preference to the ID
3453      * register, but warn on mismatch.
3454      */
3455     if (!!(reg & IDR0_COHACC) != coherent)
3456         dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3457              coherent ? "true" : "false");
3458 
3459     switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3460     case IDR0_STALL_MODEL_FORCE:
3461         smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3462         fallthrough;
3463     case IDR0_STALL_MODEL_STALL:
3464         smmu->features |= ARM_SMMU_FEAT_STALLS;
3465     }
3466 
3467     if (reg & IDR0_S1P)
3468         smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3469 
3470     if (reg & IDR0_S2P)
3471         smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3472 
3473     if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3474         dev_err(smmu->dev, "no translation support!\n");
3475         return -ENXIO;
3476     }
3477 
3478     /* We only support the AArch64 table format at present */
3479     switch (FIELD_GET(IDR0_TTF, reg)) {
3480     case IDR0_TTF_AARCH32_64:
3481         smmu->ias = 40;
3482         fallthrough;
3483     case IDR0_TTF_AARCH64:
3484         break;
3485     default:
3486         dev_err(smmu->dev, "AArch64 table format not supported!\n");
3487         return -ENXIO;
3488     }
3489 
3490     /* ASID/VMID sizes */
3491     smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3492     smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3493 
3494     /* IDR1 */
3495     reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3496     if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3497         dev_err(smmu->dev, "embedded implementation not supported\n");
3498         return -ENXIO;
3499     }
3500 
3501     /* Queue sizes, capped to ensure natural alignment */
3502     smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3503                          FIELD_GET(IDR1_CMDQS, reg));
3504     if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3505         /*
3506          * We don't support splitting up batches, so one batch of
3507          * commands plus an extra sync needs to fit inside the command
3508          * queue. There's also no way we can handle the weird alignment
3509          * restrictions on the base pointer for a unit-length queue.
3510          */
3511         dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3512             CMDQ_BATCH_ENTRIES);
3513         return -ENXIO;
3514     }
3515 
3516     smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3517                          FIELD_GET(IDR1_EVTQS, reg));
3518     smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3519                          FIELD_GET(IDR1_PRIQS, reg));
3520 
3521     /* SID/SSID sizes */
3522     smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3523     smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3524 
3525     /*
3526      * If the SMMU supports fewer bits than would fill a single L2 stream
3527      * table, use a linear table instead.
3528      */
3529     if (smmu->sid_bits <= STRTAB_SPLIT)
3530         smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3531 
3532     /* IDR3 */
3533     reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3534     if (FIELD_GET(IDR3_RIL, reg))
3535         smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3536 
3537     /* IDR5 */
3538     reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3539 
3540     /* Maximum number of outstanding stalls */
3541     smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3542 
3543     /* Page sizes */
3544     if (reg & IDR5_GRAN64K)
3545         smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3546     if (reg & IDR5_GRAN16K)
3547         smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3548     if (reg & IDR5_GRAN4K)
3549         smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3550 
3551     /* Input address size */
3552     if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3553         smmu->features |= ARM_SMMU_FEAT_VAX;
3554 
3555     /* Output address size */
3556     switch (FIELD_GET(IDR5_OAS, reg)) {
3557     case IDR5_OAS_32_BIT:
3558         smmu->oas = 32;
3559         break;
3560     case IDR5_OAS_36_BIT:
3561         smmu->oas = 36;
3562         break;
3563     case IDR5_OAS_40_BIT:
3564         smmu->oas = 40;
3565         break;
3566     case IDR5_OAS_42_BIT:
3567         smmu->oas = 42;
3568         break;
3569     case IDR5_OAS_44_BIT:
3570         smmu->oas = 44;
3571         break;
3572     case IDR5_OAS_52_BIT:
3573         smmu->oas = 52;
3574         smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3575         break;
3576     default:
3577         dev_info(smmu->dev,
3578             "unknown output address size. Truncating to 48-bit\n");
3579         fallthrough;
3580     case IDR5_OAS_48_BIT:
3581         smmu->oas = 48;
3582     }
3583 
3584     if (arm_smmu_ops.pgsize_bitmap == -1UL)
3585         arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3586     else
3587         arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3588 
3589     /* Set the DMA mask for our table walker */
3590     if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3591         dev_warn(smmu->dev,
3592              "failed to set DMA mask for table walker\n");
3593 
3594     smmu->ias = max(smmu->ias, smmu->oas);
3595 
3596     if (arm_smmu_sva_supported(smmu))
3597         smmu->features |= ARM_SMMU_FEAT_SVA;
3598 
3599     dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3600          smmu->ias, smmu->oas, smmu->features);
3601     return 0;
3602 }
3603 
3604 #ifdef CONFIG_ACPI
3605 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3606 {
3607     switch (model) {
3608     case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3609         smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3610         break;
3611     case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3612         smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3613         break;
3614     }
3615 
3616     dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3617 }
3618 
3619 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3620                       struct arm_smmu_device *smmu)
3621 {
3622     struct acpi_iort_smmu_v3 *iort_smmu;
3623     struct device *dev = smmu->dev;
3624     struct acpi_iort_node *node;
3625 
3626     node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3627 
3628     /* Retrieve SMMUv3 specific data */
3629     iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3630 
3631     acpi_smmu_get_options(iort_smmu->model, smmu);
3632 
3633     if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3634         smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3635 
3636     return 0;
3637 }
3638 #else
3639 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3640                          struct arm_smmu_device *smmu)
3641 {
3642     return -ENODEV;
3643 }
3644 #endif
3645 
3646 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3647                     struct arm_smmu_device *smmu)
3648 {
3649     struct device *dev = &pdev->dev;
3650     u32 cells;
3651     int ret = -EINVAL;
3652 
3653     if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3654         dev_err(dev, "missing #iommu-cells property\n");
3655     else if (cells != 1)
3656         dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3657     else
3658         ret = 0;
3659 
3660     parse_driver_options(smmu);
3661 
3662     if (of_dma_is_coherent(dev->of_node))
3663         smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3664 
3665     return ret;
3666 }
3667 
3668 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3669 {
3670     if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3671         return SZ_64K;
3672     else
3673         return SZ_128K;
3674 }
3675 
3676 static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3677 {
3678     int err;
3679 
3680 #ifdef CONFIG_PCI
3681     if (pci_bus_type.iommu_ops != ops) {
3682         err = bus_set_iommu(&pci_bus_type, ops);
3683         if (err)
3684             return err;
3685     }
3686 #endif
3687 #ifdef CONFIG_ARM_AMBA
3688     if (amba_bustype.iommu_ops != ops) {
3689         err = bus_set_iommu(&amba_bustype, ops);
3690         if (err)
3691             goto err_reset_pci_ops;
3692     }
3693 #endif
3694     if (platform_bus_type.iommu_ops != ops) {
3695         err = bus_set_iommu(&platform_bus_type, ops);
3696         if (err)
3697             goto err_reset_amba_ops;
3698     }
3699 
3700     return 0;
3701 
3702 err_reset_amba_ops:
3703 #ifdef CONFIG_ARM_AMBA
3704     bus_set_iommu(&amba_bustype, NULL);
3705 #endif
3706 err_reset_pci_ops: __maybe_unused;
3707 #ifdef CONFIG_PCI
3708     bus_set_iommu(&pci_bus_type, NULL);
3709 #endif
3710     return err;
3711 }
3712 
3713 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3714                       resource_size_t size)
3715 {
3716     struct resource res = DEFINE_RES_MEM(start, size);
3717 
3718     return devm_ioremap_resource(dev, &res);
3719 }
3720 
3721 static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
3722 {
3723     struct list_head rmr_list;
3724     struct iommu_resv_region *e;
3725 
3726     INIT_LIST_HEAD(&rmr_list);
3727     iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3728 
3729     list_for_each_entry(e, &rmr_list, list) {
3730         __le64 *step;
3731         struct iommu_iort_rmr_data *rmr;
3732         int ret, i;
3733 
3734         rmr = container_of(e, struct iommu_iort_rmr_data, rr);
3735         for (i = 0; i < rmr->num_sids; i++) {
3736             ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]);
3737             if (ret) {
3738                 dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n",
3739                     rmr->sids[i]);
3740                 continue;
3741             }
3742 
3743             step = arm_smmu_get_step_for_sid(smmu, rmr->sids[i]);
3744             arm_smmu_init_bypass_stes(step, 1, true);
3745         }
3746     }
3747 
3748     iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3749 }
3750 
3751 static int arm_smmu_device_probe(struct platform_device *pdev)
3752 {
3753     int irq, ret;
3754     struct resource *res;
3755     resource_size_t ioaddr;
3756     struct arm_smmu_device *smmu;
3757     struct device *dev = &pdev->dev;
3758     bool bypass;
3759 
3760     smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3761     if (!smmu)
3762         return -ENOMEM;
3763     smmu->dev = dev;
3764 
3765     if (dev->of_node) {
3766         ret = arm_smmu_device_dt_probe(pdev, smmu);
3767     } else {
3768         ret = arm_smmu_device_acpi_probe(pdev, smmu);
3769         if (ret == -ENODEV)
3770             return ret;
3771     }
3772 
3773     /* Set bypass mode according to firmware probing result */
3774     bypass = !!ret;
3775 
3776     /* Base address */
3777     res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3778     if (!res)
3779         return -EINVAL;
3780     if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3781         dev_err(dev, "MMIO region too small (%pr)\n", res);
3782         return -EINVAL;
3783     }
3784     ioaddr = res->start;
3785 
3786     /*
3787      * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3788      * the PMCG registers which are reserved by the PMU driver.
3789      */
3790     smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3791     if (IS_ERR(smmu->base))
3792         return PTR_ERR(smmu->base);
3793 
3794     if (arm_smmu_resource_size(smmu) > SZ_64K) {
3795         smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3796                            ARM_SMMU_REG_SZ);
3797         if (IS_ERR(smmu->page1))
3798             return PTR_ERR(smmu->page1);
3799     } else {
3800         smmu->page1 = smmu->base;
3801     }
3802 
3803     /* Interrupt lines */
3804 
3805     irq = platform_get_irq_byname_optional(pdev, "combined");
3806     if (irq > 0)
3807         smmu->combined_irq = irq;
3808     else {
3809         irq = platform_get_irq_byname_optional(pdev, "eventq");
3810         if (irq > 0)
3811             smmu->evtq.q.irq = irq;
3812 
3813         irq = platform_get_irq_byname_optional(pdev, "priq");
3814         if (irq > 0)
3815             smmu->priq.q.irq = irq;
3816 
3817         irq = platform_get_irq_byname_optional(pdev, "gerror");
3818         if (irq > 0)
3819             smmu->gerr_irq = irq;
3820     }
3821     /* Probe the h/w */
3822     ret = arm_smmu_device_hw_probe(smmu);
3823     if (ret)
3824         return ret;
3825 
3826     /* Initialise in-memory data structures */
3827     ret = arm_smmu_init_structures(smmu);
3828     if (ret)
3829         return ret;
3830 
3831     /* Record our private device structure */
3832     platform_set_drvdata(pdev, smmu);
3833 
3834     /* Check for RMRs and install bypass STEs if any */
3835     arm_smmu_rmr_install_bypass_ste(smmu);
3836 
3837     /* Reset the device */
3838     ret = arm_smmu_device_reset(smmu, bypass);
3839     if (ret)
3840         return ret;
3841 
3842     /* And we're up. Go go go! */
3843     ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3844                      "smmu3.%pa", &ioaddr);
3845     if (ret)
3846         return ret;
3847 
3848     ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
3849     if (ret) {
3850         dev_err(dev, "Failed to register iommu\n");
3851         goto err_sysfs_remove;
3852     }
3853 
3854     ret = arm_smmu_set_bus_ops(&arm_smmu_ops);
3855     if (ret)
3856         goto err_unregister_device;
3857 
3858     return 0;
3859 
3860 err_unregister_device:
3861     iommu_device_unregister(&smmu->iommu);
3862 err_sysfs_remove:
3863     iommu_device_sysfs_remove(&smmu->iommu);
3864     return ret;
3865 }
3866 
3867 static int arm_smmu_device_remove(struct platform_device *pdev)
3868 {
3869     struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3870 
3871     arm_smmu_set_bus_ops(NULL);
3872     iommu_device_unregister(&smmu->iommu);
3873     iommu_device_sysfs_remove(&smmu->iommu);
3874     arm_smmu_device_disable(smmu);
3875     iopf_queue_free(smmu->evtq.iopf);
3876 
3877     return 0;
3878 }
3879 
3880 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3881 {
3882     arm_smmu_device_remove(pdev);
3883 }
3884 
3885 static const struct of_device_id arm_smmu_of_match[] = {
3886     { .compatible = "arm,smmu-v3", },
3887     { },
3888 };
3889 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3890 
3891 static void arm_smmu_driver_unregister(struct platform_driver *drv)
3892 {
3893     arm_smmu_sva_notifier_synchronize();
3894     platform_driver_unregister(drv);
3895 }
3896 
3897 static struct platform_driver arm_smmu_driver = {
3898     .driver = {
3899         .name           = "arm-smmu-v3",
3900         .of_match_table     = arm_smmu_of_match,
3901         .suppress_bind_attrs    = true,
3902     },
3903     .probe  = arm_smmu_device_probe,
3904     .remove = arm_smmu_device_remove,
3905     .shutdown = arm_smmu_device_shutdown,
3906 };
3907 module_driver(arm_smmu_driver, platform_driver_register,
3908           arm_smmu_driver_unregister);
3909 
3910 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3911 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3912 MODULE_ALIAS("platform:arm-smmu-v3");
3913 MODULE_LICENSE("GPL v2");