0001
0002
0003
0004
0005
0006
0007
0008
0009
0010 #define pr_fmt(fmt) "DMAR: " fmt
0011
0012 #include <linux/bitops.h>
0013 #include <linux/cpufeature.h>
0014 #include <linux/dmar.h>
0015 #include <linux/iommu.h>
0016 #include <linux/memory.h>
0017 #include <linux/pci.h>
0018 #include <linux/pci-ats.h>
0019 #include <linux/spinlock.h>
0020
0021 #include "iommu.h"
0022 #include "pasid.h"
0023
0024
0025
0026
0027 u32 intel_pasid_max_id = PASID_MAX;
0028
0029 int vcmd_alloc_pasid(struct intel_iommu *iommu, u32 *pasid)
0030 {
0031 unsigned long flags;
0032 u8 status_code;
0033 int ret = 0;
0034 u64 res;
0035
0036 raw_spin_lock_irqsave(&iommu->register_lock, flags);
0037 dmar_writeq(iommu->reg + DMAR_VCMD_REG, VCMD_CMD_ALLOC);
0038 IOMMU_WAIT_OP(iommu, DMAR_VCRSP_REG, dmar_readq,
0039 !(res & VCMD_VRSP_IP), res);
0040 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
0041
0042 status_code = VCMD_VRSP_SC(res);
0043 switch (status_code) {
0044 case VCMD_VRSP_SC_SUCCESS:
0045 *pasid = VCMD_VRSP_RESULT_PASID(res);
0046 break;
0047 case VCMD_VRSP_SC_NO_PASID_AVAIL:
0048 pr_info("IOMMU: %s: No PASID available\n", iommu->name);
0049 ret = -ENOSPC;
0050 break;
0051 default:
0052 ret = -ENODEV;
0053 pr_warn("IOMMU: %s: Unexpected error code %d\n",
0054 iommu->name, status_code);
0055 }
0056
0057 return ret;
0058 }
0059
0060 void vcmd_free_pasid(struct intel_iommu *iommu, u32 pasid)
0061 {
0062 unsigned long flags;
0063 u8 status_code;
0064 u64 res;
0065
0066 raw_spin_lock_irqsave(&iommu->register_lock, flags);
0067 dmar_writeq(iommu->reg + DMAR_VCMD_REG,
0068 VCMD_CMD_OPERAND(pasid) | VCMD_CMD_FREE);
0069 IOMMU_WAIT_OP(iommu, DMAR_VCRSP_REG, dmar_readq,
0070 !(res & VCMD_VRSP_IP), res);
0071 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
0072
0073 status_code = VCMD_VRSP_SC(res);
0074 switch (status_code) {
0075 case VCMD_VRSP_SC_SUCCESS:
0076 break;
0077 case VCMD_VRSP_SC_INVALID_PASID:
0078 pr_info("IOMMU: %s: Invalid PASID\n", iommu->name);
0079 break;
0080 default:
0081 pr_warn("IOMMU: %s: Unexpected error code %d\n",
0082 iommu->name, status_code);
0083 }
0084 }
0085
0086
0087
0088
0089
0090
0091
0092
0093
0094 int intel_pasid_alloc_table(struct device *dev)
0095 {
0096 struct device_domain_info *info;
0097 struct pasid_table *pasid_table;
0098 struct page *pages;
0099 u32 max_pasid = 0;
0100 int order, size;
0101
0102 might_sleep();
0103 info = dev_iommu_priv_get(dev);
0104 if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table))
0105 return -EINVAL;
0106
0107 pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL);
0108 if (!pasid_table)
0109 return -ENOMEM;
0110
0111 if (info->pasid_supported)
0112 max_pasid = min_t(u32, pci_max_pasids(to_pci_dev(dev)),
0113 intel_pasid_max_id);
0114
0115 size = max_pasid >> (PASID_PDE_SHIFT - 3);
0116 order = size ? get_order(size) : 0;
0117 pages = alloc_pages_node(info->iommu->node,
0118 GFP_KERNEL | __GFP_ZERO, order);
0119 if (!pages) {
0120 kfree(pasid_table);
0121 return -ENOMEM;
0122 }
0123
0124 pasid_table->table = page_address(pages);
0125 pasid_table->order = order;
0126 pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3);
0127 info->pasid_table = pasid_table;
0128
0129 return 0;
0130 }
0131
0132 void intel_pasid_free_table(struct device *dev)
0133 {
0134 struct device_domain_info *info;
0135 struct pasid_table *pasid_table;
0136 struct pasid_dir_entry *dir;
0137 struct pasid_entry *table;
0138 int i, max_pde;
0139
0140 info = dev_iommu_priv_get(dev);
0141 if (!info || !dev_is_pci(dev) || !info->pasid_table)
0142 return;
0143
0144 pasid_table = info->pasid_table;
0145 info->pasid_table = NULL;
0146
0147
0148 dir = pasid_table->table;
0149 max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT;
0150 for (i = 0; i < max_pde; i++) {
0151 table = get_pasid_table_from_pde(&dir[i]);
0152 free_pgtable_page(table);
0153 }
0154
0155 free_pages((unsigned long)pasid_table->table, pasid_table->order);
0156 kfree(pasid_table);
0157 }
0158
0159 struct pasid_table *intel_pasid_get_table(struct device *dev)
0160 {
0161 struct device_domain_info *info;
0162
0163 info = dev_iommu_priv_get(dev);
0164 if (!info)
0165 return NULL;
0166
0167 return info->pasid_table;
0168 }
0169
0170 static int intel_pasid_get_dev_max_id(struct device *dev)
0171 {
0172 struct device_domain_info *info;
0173
0174 info = dev_iommu_priv_get(dev);
0175 if (!info || !info->pasid_table)
0176 return 0;
0177
0178 return info->pasid_table->max_pasid;
0179 }
0180
0181 static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
0182 {
0183 struct device_domain_info *info;
0184 struct pasid_table *pasid_table;
0185 struct pasid_dir_entry *dir;
0186 struct pasid_entry *entries;
0187 int dir_index, index;
0188
0189 pasid_table = intel_pasid_get_table(dev);
0190 if (WARN_ON(!pasid_table || pasid >= intel_pasid_get_dev_max_id(dev)))
0191 return NULL;
0192
0193 dir = pasid_table->table;
0194 info = dev_iommu_priv_get(dev);
0195 dir_index = pasid >> PASID_PDE_SHIFT;
0196 index = pasid & PASID_PTE_MASK;
0197
0198 retry:
0199 entries = get_pasid_table_from_pde(&dir[dir_index]);
0200 if (!entries) {
0201 entries = alloc_pgtable_page(info->iommu->node);
0202 if (!entries)
0203 return NULL;
0204
0205
0206
0207
0208
0209
0210
0211 if (cmpxchg64(&dir[dir_index].val, 0ULL,
0212 (u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) {
0213 free_pgtable_page(entries);
0214 goto retry;
0215 }
0216 }
0217
0218 return &entries[index];
0219 }
0220
0221
0222
0223
0224 static inline void pasid_clear_entry(struct pasid_entry *pe)
0225 {
0226 WRITE_ONCE(pe->val[0], 0);
0227 WRITE_ONCE(pe->val[1], 0);
0228 WRITE_ONCE(pe->val[2], 0);
0229 WRITE_ONCE(pe->val[3], 0);
0230 WRITE_ONCE(pe->val[4], 0);
0231 WRITE_ONCE(pe->val[5], 0);
0232 WRITE_ONCE(pe->val[6], 0);
0233 WRITE_ONCE(pe->val[7], 0);
0234 }
0235
0236 static inline void pasid_clear_entry_with_fpd(struct pasid_entry *pe)
0237 {
0238 WRITE_ONCE(pe->val[0], PASID_PTE_FPD);
0239 WRITE_ONCE(pe->val[1], 0);
0240 WRITE_ONCE(pe->val[2], 0);
0241 WRITE_ONCE(pe->val[3], 0);
0242 WRITE_ONCE(pe->val[4], 0);
0243 WRITE_ONCE(pe->val[5], 0);
0244 WRITE_ONCE(pe->val[6], 0);
0245 WRITE_ONCE(pe->val[7], 0);
0246 }
0247
0248 static void
0249 intel_pasid_clear_entry(struct device *dev, u32 pasid, bool fault_ignore)
0250 {
0251 struct pasid_entry *pe;
0252
0253 pe = intel_pasid_get_entry(dev, pasid);
0254 if (WARN_ON(!pe))
0255 return;
0256
0257 if (fault_ignore && pasid_pte_is_present(pe))
0258 pasid_clear_entry_with_fpd(pe);
0259 else
0260 pasid_clear_entry(pe);
0261 }
0262
0263 static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits)
0264 {
0265 u64 old;
0266
0267 old = READ_ONCE(*ptr);
0268 WRITE_ONCE(*ptr, (old & ~mask) | bits);
0269 }
0270
0271
0272
0273
0274
0275 static inline void
0276 pasid_set_domain_id(struct pasid_entry *pe, u64 value)
0277 {
0278 pasid_set_bits(&pe->val[1], GENMASK_ULL(15, 0), value);
0279 }
0280
0281
0282
0283
0284 static inline u16
0285 pasid_get_domain_id(struct pasid_entry *pe)
0286 {
0287 return (u16)(READ_ONCE(pe->val[1]) & GENMASK_ULL(15, 0));
0288 }
0289
0290
0291
0292
0293
0294 static inline void
0295 pasid_set_slptr(struct pasid_entry *pe, u64 value)
0296 {
0297 pasid_set_bits(&pe->val[0], VTD_PAGE_MASK, value);
0298 }
0299
0300
0301
0302
0303
0304 static inline void
0305 pasid_set_address_width(struct pasid_entry *pe, u64 value)
0306 {
0307 pasid_set_bits(&pe->val[0], GENMASK_ULL(4, 2), value << 2);
0308 }
0309
0310
0311
0312
0313
0314 static inline void
0315 pasid_set_translation_type(struct pasid_entry *pe, u64 value)
0316 {
0317 pasid_set_bits(&pe->val[0], GENMASK_ULL(8, 6), value << 6);
0318 }
0319
0320
0321
0322
0323
0324 static inline void pasid_set_fault_enable(struct pasid_entry *pe)
0325 {
0326 pasid_set_bits(&pe->val[0], 1 << 1, 0);
0327 }
0328
0329
0330
0331
0332
0333 static inline void pasid_set_sre(struct pasid_entry *pe)
0334 {
0335 pasid_set_bits(&pe->val[2], 1 << 0, 1);
0336 }
0337
0338
0339
0340
0341
0342 static inline void pasid_set_wpe(struct pasid_entry *pe)
0343 {
0344 pasid_set_bits(&pe->val[2], 1 << 4, 1 << 4);
0345 }
0346
0347
0348
0349
0350
0351 static inline void pasid_set_present(struct pasid_entry *pe)
0352 {
0353 pasid_set_bits(&pe->val[0], 1 << 0, 1);
0354 }
0355
0356
0357
0358
0359
0360 static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value)
0361 {
0362 pasid_set_bits(&pe->val[1], 1 << 23, value << 23);
0363 }
0364
0365
0366
0367
0368
0369 static inline void
0370 pasid_set_pgsnp(struct pasid_entry *pe)
0371 {
0372 pasid_set_bits(&pe->val[1], 1ULL << 24, 1ULL << 24);
0373 }
0374
0375
0376
0377
0378
0379 static inline void
0380 pasid_set_flptr(struct pasid_entry *pe, u64 value)
0381 {
0382 pasid_set_bits(&pe->val[2], VTD_PAGE_MASK, value);
0383 }
0384
0385
0386
0387
0388
0389 static inline void
0390 pasid_set_flpm(struct pasid_entry *pe, u64 value)
0391 {
0392 pasid_set_bits(&pe->val[2], GENMASK_ULL(3, 2), value << 2);
0393 }
0394
0395
0396
0397
0398
0399 static inline void
0400 pasid_set_eafe(struct pasid_entry *pe)
0401 {
0402 pasid_set_bits(&pe->val[2], 1 << 7, 1 << 7);
0403 }
0404
0405 static void
0406 pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu,
0407 u16 did, u32 pasid)
0408 {
0409 struct qi_desc desc;
0410
0411 desc.qw0 = QI_PC_DID(did) | QI_PC_GRAN(QI_PC_PASID_SEL) |
0412 QI_PC_PASID(pasid) | QI_PC_TYPE;
0413 desc.qw1 = 0;
0414 desc.qw2 = 0;
0415 desc.qw3 = 0;
0416
0417 qi_submit_sync(iommu, &desc, 1, 0);
0418 }
0419
0420 static void
0421 devtlb_invalidation_with_pasid(struct intel_iommu *iommu,
0422 struct device *dev, u32 pasid)
0423 {
0424 struct device_domain_info *info;
0425 u16 sid, qdep, pfsid;
0426
0427 info = dev_iommu_priv_get(dev);
0428 if (!info || !info->ats_enabled)
0429 return;
0430
0431 sid = info->bus << 8 | info->devfn;
0432 qdep = info->ats_qdep;
0433 pfsid = info->pfsid;
0434
0435
0436
0437
0438
0439
0440
0441 if (pasid == PASID_RID2PASID)
0442 qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT);
0443 else
0444 qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, pasid, qdep, 0, 64 - VTD_PAGE_SHIFT);
0445 }
0446
0447 void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
0448 u32 pasid, bool fault_ignore)
0449 {
0450 struct pasid_entry *pte;
0451 u16 did, pgtt;
0452
0453 spin_lock(&iommu->lock);
0454 pte = intel_pasid_get_entry(dev, pasid);
0455 if (WARN_ON(!pte) || !pasid_pte_is_present(pte)) {
0456 spin_unlock(&iommu->lock);
0457 return;
0458 }
0459
0460 did = pasid_get_domain_id(pte);
0461 pgtt = pasid_pte_get_pgtt(pte);
0462 intel_pasid_clear_entry(dev, pasid, fault_ignore);
0463 spin_unlock(&iommu->lock);
0464
0465 if (!ecap_coherent(iommu->ecap))
0466 clflush_cache_range(pte, sizeof(*pte));
0467
0468 pasid_cache_invalidation_with_pasid(iommu, did, pasid);
0469
0470 if (pgtt == PASID_ENTRY_PGTT_PT || pgtt == PASID_ENTRY_PGTT_FL_ONLY)
0471 qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
0472 else
0473 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
0474
0475
0476 if (!cap_caching_mode(iommu->cap))
0477 devtlb_invalidation_with_pasid(iommu, dev, pasid);
0478 }
0479
0480
0481
0482
0483
0484 static void pasid_flush_caches(struct intel_iommu *iommu,
0485 struct pasid_entry *pte,
0486 u32 pasid, u16 did)
0487 {
0488 if (!ecap_coherent(iommu->ecap))
0489 clflush_cache_range(pte, sizeof(*pte));
0490
0491 if (cap_caching_mode(iommu->cap)) {
0492 pasid_cache_invalidation_with_pasid(iommu, did, pasid);
0493 qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
0494 } else {
0495 iommu_flush_write_buffer(iommu);
0496 }
0497 }
0498
0499
0500
0501
0502
0503 int intel_pasid_setup_first_level(struct intel_iommu *iommu,
0504 struct device *dev, pgd_t *pgd,
0505 u32 pasid, u16 did, int flags)
0506 {
0507 struct pasid_entry *pte;
0508
0509 if (!ecap_flts(iommu->ecap)) {
0510 pr_err("No first level translation support on %s\n",
0511 iommu->name);
0512 return -EINVAL;
0513 }
0514
0515 if (flags & PASID_FLAG_SUPERVISOR_MODE) {
0516 #ifdef CONFIG_X86
0517 unsigned long cr0 = read_cr0();
0518
0519
0520 if (unlikely(!(cr0 & X86_CR0_WP))) {
0521 pr_err("No CPU write protect!\n");
0522 return -EINVAL;
0523 }
0524 #endif
0525 if (!ecap_srs(iommu->ecap)) {
0526 pr_err("No supervisor request support on %s\n",
0527 iommu->name);
0528 return -EINVAL;
0529 }
0530 }
0531
0532 if ((flags & PASID_FLAG_FL5LP) && !cap_5lp_support(iommu->cap)) {
0533 pr_err("No 5-level paging support for first-level on %s\n",
0534 iommu->name);
0535 return -EINVAL;
0536 }
0537
0538 spin_lock(&iommu->lock);
0539 pte = intel_pasid_get_entry(dev, pasid);
0540 if (!pte) {
0541 spin_unlock(&iommu->lock);
0542 return -ENODEV;
0543 }
0544
0545 if (pasid_pte_is_present(pte)) {
0546 spin_unlock(&iommu->lock);
0547 return -EBUSY;
0548 }
0549
0550 pasid_clear_entry(pte);
0551
0552
0553 pasid_set_flptr(pte, (u64)__pa(pgd));
0554 if (flags & PASID_FLAG_SUPERVISOR_MODE) {
0555 pasid_set_sre(pte);
0556 pasid_set_wpe(pte);
0557 }
0558
0559 if (flags & PASID_FLAG_FL5LP)
0560 pasid_set_flpm(pte, 1);
0561
0562 if (flags & PASID_FLAG_PAGE_SNOOP)
0563 pasid_set_pgsnp(pte);
0564
0565 pasid_set_domain_id(pte, did);
0566 pasid_set_address_width(pte, iommu->agaw);
0567 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
0568
0569
0570 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY);
0571 pasid_set_present(pte);
0572 spin_unlock(&iommu->lock);
0573
0574 pasid_flush_caches(iommu, pte, pasid, did);
0575
0576 return 0;
0577 }
0578
0579
0580
0581
0582
0583 static inline int iommu_skip_agaw(struct dmar_domain *domain,
0584 struct intel_iommu *iommu,
0585 struct dma_pte **pgd)
0586 {
0587 int agaw;
0588
0589 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
0590 *pgd = phys_to_virt(dma_pte_addr(*pgd));
0591 if (!dma_pte_present(*pgd))
0592 return -EINVAL;
0593 }
0594
0595 return agaw;
0596 }
0597
0598
0599
0600
0601 int intel_pasid_setup_second_level(struct intel_iommu *iommu,
0602 struct dmar_domain *domain,
0603 struct device *dev, u32 pasid)
0604 {
0605 struct pasid_entry *pte;
0606 struct dma_pte *pgd;
0607 u64 pgd_val;
0608 int agaw;
0609 u16 did;
0610
0611
0612
0613
0614
0615 if (!ecap_slts(iommu->ecap)) {
0616 pr_err("No second level translation support on %s\n",
0617 iommu->name);
0618 return -EINVAL;
0619 }
0620
0621 pgd = domain->pgd;
0622 agaw = iommu_skip_agaw(domain, iommu, &pgd);
0623 if (agaw < 0) {
0624 dev_err(dev, "Invalid domain page table\n");
0625 return -EINVAL;
0626 }
0627
0628 pgd_val = virt_to_phys(pgd);
0629 did = domain_id_iommu(domain, iommu);
0630
0631 spin_lock(&iommu->lock);
0632 pte = intel_pasid_get_entry(dev, pasid);
0633 if (!pte) {
0634 spin_unlock(&iommu->lock);
0635 return -ENODEV;
0636 }
0637
0638 if (pasid_pte_is_present(pte)) {
0639 spin_unlock(&iommu->lock);
0640 return -EBUSY;
0641 }
0642
0643 pasid_clear_entry(pte);
0644 pasid_set_domain_id(pte, did);
0645 pasid_set_slptr(pte, pgd_val);
0646 pasid_set_address_width(pte, agaw);
0647 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY);
0648 pasid_set_fault_enable(pte);
0649 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
0650
0651
0652
0653
0654
0655 if (pasid != PASID_RID2PASID)
0656 pasid_set_sre(pte);
0657 pasid_set_present(pte);
0658 spin_unlock(&iommu->lock);
0659
0660 pasid_flush_caches(iommu, pte, pasid, did);
0661
0662 return 0;
0663 }
0664
0665
0666
0667
0668 int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
0669 struct dmar_domain *domain,
0670 struct device *dev, u32 pasid)
0671 {
0672 u16 did = FLPT_DEFAULT_DID;
0673 struct pasid_entry *pte;
0674
0675 spin_lock(&iommu->lock);
0676 pte = intel_pasid_get_entry(dev, pasid);
0677 if (!pte) {
0678 spin_unlock(&iommu->lock);
0679 return -ENODEV;
0680 }
0681
0682 if (pasid_pte_is_present(pte)) {
0683 spin_unlock(&iommu->lock);
0684 return -EBUSY;
0685 }
0686
0687 pasid_clear_entry(pte);
0688 pasid_set_domain_id(pte, did);
0689 pasid_set_address_width(pte, iommu->agaw);
0690 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_PT);
0691 pasid_set_fault_enable(pte);
0692 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
0693
0694
0695
0696
0697
0698 pasid_set_sre(pte);
0699 pasid_set_present(pte);
0700 spin_unlock(&iommu->lock);
0701
0702 pasid_flush_caches(iommu, pte, pasid, did);
0703
0704 return 0;
0705 }
0706
0707
0708
0709
0710 void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu,
0711 struct device *dev, u32 pasid)
0712 {
0713 struct pasid_entry *pte;
0714 u16 did;
0715
0716 spin_lock(&iommu->lock);
0717 pte = intel_pasid_get_entry(dev, pasid);
0718 if (WARN_ON(!pte || !pasid_pte_is_present(pte))) {
0719 spin_unlock(&iommu->lock);
0720 return;
0721 }
0722
0723 pasid_set_pgsnp(pte);
0724 did = pasid_get_domain_id(pte);
0725 spin_unlock(&iommu->lock);
0726
0727 if (!ecap_coherent(iommu->ecap))
0728 clflush_cache_range(pte, sizeof(*pte));
0729
0730
0731
0732
0733
0734
0735
0736
0737
0738
0739
0740
0741 pasid_cache_invalidation_with_pasid(iommu, did, pasid);
0742 qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
0743
0744
0745 if (!cap_caching_mode(iommu->cap))
0746 devtlb_invalidation_with_pasid(iommu, dev, pasid);
0747 }