0001
0002
0003
0004 #include <linux/bitfield.h>
0005 #include <linux/delay.h>
0006 #include <linux/of.h>
0007 #include <linux/platform_device.h>
0008 #include <linux/slab.h>
0009
0010 #include <soc/tegra/mc.h>
0011
0012 #include "arm-smmu.h"
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032 #define MAX_SMMU_INSTANCES 2
0033
0034 struct nvidia_smmu {
0035 struct arm_smmu_device smmu;
0036 void __iomem *bases[MAX_SMMU_INSTANCES];
0037 unsigned int num_instances;
0038 struct tegra_mc *mc;
0039 };
0040
0041 static inline struct nvidia_smmu *to_nvidia_smmu(struct arm_smmu_device *smmu)
0042 {
0043 return container_of(smmu, struct nvidia_smmu, smmu);
0044 }
0045
0046 static inline void __iomem *nvidia_smmu_page(struct arm_smmu_device *smmu,
0047 unsigned int inst, int page)
0048 {
0049 struct nvidia_smmu *nvidia_smmu;
0050
0051 nvidia_smmu = container_of(smmu, struct nvidia_smmu, smmu);
0052 return nvidia_smmu->bases[inst] + (page << smmu->pgshift);
0053 }
0054
0055 static u32 nvidia_smmu_read_reg(struct arm_smmu_device *smmu,
0056 int page, int offset)
0057 {
0058 void __iomem *reg = nvidia_smmu_page(smmu, 0, page) + offset;
0059
0060 return readl_relaxed(reg);
0061 }
0062
0063 static void nvidia_smmu_write_reg(struct arm_smmu_device *smmu,
0064 int page, int offset, u32 val)
0065 {
0066 struct nvidia_smmu *nvidia = to_nvidia_smmu(smmu);
0067 unsigned int i;
0068
0069 for (i = 0; i < nvidia->num_instances; i++) {
0070 void __iomem *reg = nvidia_smmu_page(smmu, i, page) + offset;
0071
0072 writel_relaxed(val, reg);
0073 }
0074 }
0075
0076 static u64 nvidia_smmu_read_reg64(struct arm_smmu_device *smmu,
0077 int page, int offset)
0078 {
0079 void __iomem *reg = nvidia_smmu_page(smmu, 0, page) + offset;
0080
0081 return readq_relaxed(reg);
0082 }
0083
0084 static void nvidia_smmu_write_reg64(struct arm_smmu_device *smmu,
0085 int page, int offset, u64 val)
0086 {
0087 struct nvidia_smmu *nvidia = to_nvidia_smmu(smmu);
0088 unsigned int i;
0089
0090 for (i = 0; i < nvidia->num_instances; i++) {
0091 void __iomem *reg = nvidia_smmu_page(smmu, i, page) + offset;
0092
0093 writeq_relaxed(val, reg);
0094 }
0095 }
0096
0097 static void nvidia_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
0098 int sync, int status)
0099 {
0100 struct nvidia_smmu *nvidia = to_nvidia_smmu(smmu);
0101 unsigned int delay;
0102
0103 arm_smmu_writel(smmu, page, sync, 0);
0104
0105 for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
0106 unsigned int spin_cnt;
0107
0108 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
0109 u32 val = 0;
0110 unsigned int i;
0111
0112 for (i = 0; i < nvidia->num_instances; i++) {
0113 void __iomem *reg;
0114
0115 reg = nvidia_smmu_page(smmu, i, page) + status;
0116 val |= readl_relaxed(reg);
0117 }
0118
0119 if (!(val & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
0120 return;
0121
0122 cpu_relax();
0123 }
0124
0125 udelay(delay);
0126 }
0127
0128 dev_err_ratelimited(smmu->dev,
0129 "TLB sync timed out -- SMMU may be deadlocked\n");
0130 }
0131
0132 static int nvidia_smmu_reset(struct arm_smmu_device *smmu)
0133 {
0134 struct nvidia_smmu *nvidia = to_nvidia_smmu(smmu);
0135 unsigned int i;
0136
0137 for (i = 0; i < nvidia->num_instances; i++) {
0138 u32 val;
0139 void __iomem *reg = nvidia_smmu_page(smmu, i, ARM_SMMU_GR0) +
0140 ARM_SMMU_GR0_sGFSR;
0141
0142
0143 val = readl_relaxed(reg);
0144 writel_relaxed(val, reg);
0145 }
0146
0147 return 0;
0148 }
0149
0150 static irqreturn_t nvidia_smmu_global_fault_inst(int irq,
0151 struct arm_smmu_device *smmu,
0152 int inst)
0153 {
0154 u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
0155 void __iomem *gr0_base = nvidia_smmu_page(smmu, inst, 0);
0156
0157 gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
0158 if (!gfsr)
0159 return IRQ_NONE;
0160
0161 gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
0162 gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
0163 gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
0164
0165 dev_err_ratelimited(smmu->dev,
0166 "Unexpected global fault, this could be serious\n");
0167 dev_err_ratelimited(smmu->dev,
0168 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
0169 gfsr, gfsynr0, gfsynr1, gfsynr2);
0170
0171 writel_relaxed(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
0172 return IRQ_HANDLED;
0173 }
0174
0175 static irqreturn_t nvidia_smmu_global_fault(int irq, void *dev)
0176 {
0177 unsigned int inst;
0178 irqreturn_t ret = IRQ_NONE;
0179 struct arm_smmu_device *smmu = dev;
0180 struct nvidia_smmu *nvidia = to_nvidia_smmu(smmu);
0181
0182 for (inst = 0; inst < nvidia->num_instances; inst++) {
0183 irqreturn_t irq_ret;
0184
0185 irq_ret = nvidia_smmu_global_fault_inst(irq, smmu, inst);
0186 if (irq_ret == IRQ_HANDLED)
0187 ret = IRQ_HANDLED;
0188 }
0189
0190 return ret;
0191 }
0192
0193 static irqreturn_t nvidia_smmu_context_fault_bank(int irq,
0194 struct arm_smmu_device *smmu,
0195 int idx, int inst)
0196 {
0197 u32 fsr, fsynr, cbfrsynra;
0198 unsigned long iova;
0199 void __iomem *gr1_base = nvidia_smmu_page(smmu, inst, 1);
0200 void __iomem *cb_base = nvidia_smmu_page(smmu, inst, smmu->numpage + idx);
0201
0202 fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
0203 if (!(fsr & ARM_SMMU_FSR_FAULT))
0204 return IRQ_NONE;
0205
0206 fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
0207 iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
0208 cbfrsynra = readl_relaxed(gr1_base + ARM_SMMU_GR1_CBFRSYNRA(idx));
0209
0210 dev_err_ratelimited(smmu->dev,
0211 "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
0212 fsr, iova, fsynr, cbfrsynra, idx);
0213
0214 writel_relaxed(fsr, cb_base + ARM_SMMU_CB_FSR);
0215 return IRQ_HANDLED;
0216 }
0217
0218 static irqreturn_t nvidia_smmu_context_fault(int irq, void *dev)
0219 {
0220 int idx;
0221 unsigned int inst;
0222 irqreturn_t ret = IRQ_NONE;
0223 struct arm_smmu_device *smmu;
0224 struct iommu_domain *domain = dev;
0225 struct arm_smmu_domain *smmu_domain;
0226 struct nvidia_smmu *nvidia;
0227
0228 smmu_domain = container_of(domain, struct arm_smmu_domain, domain);
0229 smmu = smmu_domain->smmu;
0230 nvidia = to_nvidia_smmu(smmu);
0231
0232 for (inst = 0; inst < nvidia->num_instances; inst++) {
0233 irqreturn_t irq_ret;
0234
0235
0236
0237
0238
0239 for (idx = 0; idx < smmu->num_context_banks; idx++) {
0240 irq_ret = nvidia_smmu_context_fault_bank(irq, smmu,
0241 idx, inst);
0242 if (irq_ret == IRQ_HANDLED)
0243 ret = IRQ_HANDLED;
0244 }
0245 }
0246
0247 return ret;
0248 }
0249
0250 static void nvidia_smmu_probe_finalize(struct arm_smmu_device *smmu, struct device *dev)
0251 {
0252 struct nvidia_smmu *nvidia = to_nvidia_smmu(smmu);
0253 int err;
0254
0255 err = tegra_mc_probe_device(nvidia->mc, dev);
0256 if (err < 0)
0257 dev_err(smmu->dev, "memory controller probe failed for %s: %d\n",
0258 dev_name(dev), err);
0259 }
0260
0261 static int nvidia_smmu_init_context(struct arm_smmu_domain *smmu_domain,
0262 struct io_pgtable_cfg *pgtbl_cfg,
0263 struct device *dev)
0264 {
0265 struct arm_smmu_device *smmu = smmu_domain->smmu;
0266 const struct device_node *np = smmu->dev->of_node;
0267
0268
0269
0270
0271
0272
0273
0274
0275
0276
0277
0278
0279
0280 if (of_device_is_compatible(np, "nvidia,tegra234-smmu") ||
0281 of_device_is_compatible(np, "nvidia,tegra194-smmu")) {
0282 smmu->pgsize_bitmap = PAGE_SIZE;
0283 pgtbl_cfg->pgsize_bitmap = smmu->pgsize_bitmap;
0284 }
0285
0286 return 0;
0287 }
0288
0289 static const struct arm_smmu_impl nvidia_smmu_impl = {
0290 .read_reg = nvidia_smmu_read_reg,
0291 .write_reg = nvidia_smmu_write_reg,
0292 .read_reg64 = nvidia_smmu_read_reg64,
0293 .write_reg64 = nvidia_smmu_write_reg64,
0294 .reset = nvidia_smmu_reset,
0295 .tlb_sync = nvidia_smmu_tlb_sync,
0296 .global_fault = nvidia_smmu_global_fault,
0297 .context_fault = nvidia_smmu_context_fault,
0298 .probe_finalize = nvidia_smmu_probe_finalize,
0299 .init_context = nvidia_smmu_init_context,
0300 };
0301
0302 static const struct arm_smmu_impl nvidia_smmu_single_impl = {
0303 .probe_finalize = nvidia_smmu_probe_finalize,
0304 .init_context = nvidia_smmu_init_context,
0305 };
0306
0307 struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu)
0308 {
0309 struct resource *res;
0310 struct device *dev = smmu->dev;
0311 struct nvidia_smmu *nvidia_smmu;
0312 struct platform_device *pdev = to_platform_device(dev);
0313 unsigned int i;
0314
0315 nvidia_smmu = devm_krealloc(dev, smmu, sizeof(*nvidia_smmu), GFP_KERNEL);
0316 if (!nvidia_smmu)
0317 return ERR_PTR(-ENOMEM);
0318
0319 nvidia_smmu->mc = devm_tegra_memory_controller_get(dev);
0320 if (IS_ERR(nvidia_smmu->mc))
0321 return ERR_CAST(nvidia_smmu->mc);
0322
0323
0324 nvidia_smmu->bases[0] = smmu->base;
0325 nvidia_smmu->num_instances++;
0326
0327 for (i = 1; i < MAX_SMMU_INSTANCES; i++) {
0328 res = platform_get_resource(pdev, IORESOURCE_MEM, i);
0329 if (!res)
0330 break;
0331
0332 nvidia_smmu->bases[i] = devm_ioremap_resource(dev, res);
0333 if (IS_ERR(nvidia_smmu->bases[i]))
0334 return ERR_CAST(nvidia_smmu->bases[i]);
0335
0336 nvidia_smmu->num_instances++;
0337 }
0338
0339 if (nvidia_smmu->num_instances == 1)
0340 nvidia_smmu->smmu.impl = &nvidia_smmu_single_impl;
0341 else
0342 nvidia_smmu->smmu.impl = &nvidia_smmu_impl;
0343
0344 return &nvidia_smmu->smmu;
0345 }