0001
0002
0003
0004
0005
0006
0007
0008
0009 #define pr_fmt(fmt) "AMD-Vi: " fmt
0010 #define dev_fmt(fmt) pr_fmt(fmt)
0011
0012 #include <linux/atomic.h>
0013 #include <linux/bitops.h>
0014 #include <linux/io-pgtable.h>
0015 #include <linux/kernel.h>
0016 #include <linux/sizes.h>
0017 #include <linux/slab.h>
0018 #include <linux/types.h>
0019 #include <linux/dma-mapping.h>
0020
0021 #include <asm/barrier.h>
0022
0023 #include "amd_iommu_types.h"
0024 #include "amd_iommu.h"
0025
0026 static void v1_tlb_flush_all(void *cookie)
0027 {
0028 }
0029
0030 static void v1_tlb_flush_walk(unsigned long iova, size_t size,
0031 size_t granule, void *cookie)
0032 {
0033 }
0034
0035 static void v1_tlb_add_page(struct iommu_iotlb_gather *gather,
0036 unsigned long iova, size_t granule,
0037 void *cookie)
0038 {
0039 }
0040
0041 static const struct iommu_flush_ops v1_flush_ops = {
0042 .tlb_flush_all = v1_tlb_flush_all,
0043 .tlb_flush_walk = v1_tlb_flush_walk,
0044 .tlb_add_page = v1_tlb_add_page,
0045 };
0046
0047
0048
0049
0050 static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
0051 unsigned long *count)
0052 {
0053 unsigned long pte_mask, pg_size, cnt;
0054 u64 *fpte;
0055
0056 pg_size = PTE_PAGE_SIZE(*pte);
0057 cnt = PAGE_SIZE_PTE_COUNT(pg_size);
0058 pte_mask = ~((cnt << 3) - 1);
0059 fpte = (u64 *)(((unsigned long)pte) & pte_mask);
0060
0061 if (page_size)
0062 *page_size = pg_size;
0063
0064 if (count)
0065 *count = cnt;
0066
0067 return fpte;
0068 }
0069
0070
0071
0072
0073
0074
0075
0076
0077 static void free_pt_page(u64 *pt, struct list_head *freelist)
0078 {
0079 struct page *p = virt_to_page(pt);
0080
0081 list_add_tail(&p->lru, freelist);
0082 }
0083
0084 static void free_pt_lvl(u64 *pt, struct list_head *freelist, int lvl)
0085 {
0086 u64 *p;
0087 int i;
0088
0089 for (i = 0; i < 512; ++i) {
0090
0091 if (!IOMMU_PTE_PRESENT(pt[i]))
0092 continue;
0093
0094
0095 if (PM_PTE_LEVEL(pt[i]) == 0 ||
0096 PM_PTE_LEVEL(pt[i]) == 7)
0097 continue;
0098
0099
0100
0101
0102
0103 p = IOMMU_PTE_PAGE(pt[i]);
0104 if (lvl > 2)
0105 free_pt_lvl(p, freelist, lvl - 1);
0106 else
0107 free_pt_page(p, freelist);
0108 }
0109
0110 free_pt_page(pt, freelist);
0111 }
0112
0113 static void free_sub_pt(u64 *root, int mode, struct list_head *freelist)
0114 {
0115 switch (mode) {
0116 case PAGE_MODE_NONE:
0117 case PAGE_MODE_7_LEVEL:
0118 break;
0119 case PAGE_MODE_1_LEVEL:
0120 free_pt_page(root, freelist);
0121 break;
0122 case PAGE_MODE_2_LEVEL:
0123 case PAGE_MODE_3_LEVEL:
0124 case PAGE_MODE_4_LEVEL:
0125 case PAGE_MODE_5_LEVEL:
0126 case PAGE_MODE_6_LEVEL:
0127 free_pt_lvl(root, freelist, mode);
0128 break;
0129 default:
0130 BUG();
0131 }
0132 }
0133
0134 void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
0135 u64 *root, int mode)
0136 {
0137 u64 pt_root;
0138
0139
0140 pt_root = mode & 7;
0141 pt_root |= (u64)root;
0142
0143 amd_iommu_domain_set_pt_root(domain, pt_root);
0144 }
0145
0146
0147
0148
0149
0150
0151 static bool increase_address_space(struct protection_domain *domain,
0152 unsigned long address,
0153 gfp_t gfp)
0154 {
0155 unsigned long flags;
0156 bool ret = true;
0157 u64 *pte;
0158
0159 pte = (void *)get_zeroed_page(gfp);
0160 if (!pte)
0161 return false;
0162
0163 spin_lock_irqsave(&domain->lock, flags);
0164
0165 if (address <= PM_LEVEL_SIZE(domain->iop.mode))
0166 goto out;
0167
0168 ret = false;
0169 if (WARN_ON_ONCE(domain->iop.mode == PAGE_MODE_6_LEVEL))
0170 goto out;
0171
0172 *pte = PM_LEVEL_PDE(domain->iop.mode, iommu_virt_to_phys(domain->iop.root));
0173
0174 domain->iop.root = pte;
0175 domain->iop.mode += 1;
0176 amd_iommu_update_and_flush_device_table(domain);
0177 amd_iommu_domain_flush_complete(domain);
0178
0179
0180
0181
0182
0183 amd_iommu_domain_set_pgtable(domain, pte, domain->iop.mode);
0184
0185 pte = NULL;
0186 ret = true;
0187
0188 out:
0189 spin_unlock_irqrestore(&domain->lock, flags);
0190 free_page((unsigned long)pte);
0191
0192 return ret;
0193 }
0194
0195 static u64 *alloc_pte(struct protection_domain *domain,
0196 unsigned long address,
0197 unsigned long page_size,
0198 u64 **pte_page,
0199 gfp_t gfp,
0200 bool *updated)
0201 {
0202 int level, end_lvl;
0203 u64 *pte, *page;
0204
0205 BUG_ON(!is_power_of_2(page_size));
0206
0207 while (address > PM_LEVEL_SIZE(domain->iop.mode)) {
0208
0209
0210
0211
0212 if (!increase_address_space(domain, address, gfp))
0213 return NULL;
0214 }
0215
0216
0217 level = domain->iop.mode - 1;
0218 pte = &domain->iop.root[PM_LEVEL_INDEX(level, address)];
0219 address = PAGE_SIZE_ALIGN(address, page_size);
0220 end_lvl = PAGE_SIZE_LEVEL(page_size);
0221
0222 while (level > end_lvl) {
0223 u64 __pte, __npte;
0224 int pte_level;
0225
0226 __pte = *pte;
0227 pte_level = PM_PTE_LEVEL(__pte);
0228
0229
0230
0231
0232
0233 if (IOMMU_PTE_PRESENT(__pte) &&
0234 pte_level == PAGE_MODE_7_LEVEL) {
0235 unsigned long count, i;
0236 u64 *lpte;
0237
0238 lpte = first_pte_l7(pte, NULL, &count);
0239
0240
0241
0242
0243
0244 for (i = 0; i < count; ++i)
0245 cmpxchg64(&lpte[i], __pte, 0ULL);
0246
0247 *updated = true;
0248 continue;
0249 }
0250
0251 if (!IOMMU_PTE_PRESENT(__pte) ||
0252 pte_level == PAGE_MODE_NONE) {
0253 page = (u64 *)get_zeroed_page(gfp);
0254
0255 if (!page)
0256 return NULL;
0257
0258 __npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page));
0259
0260
0261 if (!try_cmpxchg64(pte, &__pte, __npte))
0262 free_page((unsigned long)page);
0263 else if (IOMMU_PTE_PRESENT(__pte))
0264 *updated = true;
0265
0266 continue;
0267 }
0268
0269
0270 if (pte_level != level)
0271 return NULL;
0272
0273 level -= 1;
0274
0275 pte = IOMMU_PTE_PAGE(__pte);
0276
0277 if (pte_page && level == end_lvl)
0278 *pte_page = pte;
0279
0280 pte = &pte[PM_LEVEL_INDEX(level, address)];
0281 }
0282
0283 return pte;
0284 }
0285
0286
0287
0288
0289
0290 static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
0291 unsigned long address,
0292 unsigned long *page_size)
0293 {
0294 int level;
0295 u64 *pte;
0296
0297 *page_size = 0;
0298
0299 if (address > PM_LEVEL_SIZE(pgtable->mode))
0300 return NULL;
0301
0302 level = pgtable->mode - 1;
0303 pte = &pgtable->root[PM_LEVEL_INDEX(level, address)];
0304 *page_size = PTE_LEVEL_PAGE_SIZE(level);
0305
0306 while (level > 0) {
0307
0308
0309 if (!IOMMU_PTE_PRESENT(*pte))
0310 return NULL;
0311
0312
0313 if (PM_PTE_LEVEL(*pte) == 7 ||
0314 PM_PTE_LEVEL(*pte) == 0)
0315 break;
0316
0317
0318 if (PM_PTE_LEVEL(*pte) != level)
0319 return NULL;
0320
0321 level -= 1;
0322
0323
0324 pte = IOMMU_PTE_PAGE(*pte);
0325 pte = &pte[PM_LEVEL_INDEX(level, address)];
0326 *page_size = PTE_LEVEL_PAGE_SIZE(level);
0327 }
0328
0329
0330
0331
0332
0333 if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL)
0334 pte = first_pte_l7(pte, page_size, NULL);
0335
0336 return pte;
0337 }
0338
0339 static void free_clear_pte(u64 *pte, u64 pteval, struct list_head *freelist)
0340 {
0341 u64 *pt;
0342 int mode;
0343
0344 while (!try_cmpxchg64(pte, &pteval, 0))
0345 pr_warn("AMD-Vi: IOMMU pte changed since we read it\n");
0346
0347 if (!IOMMU_PTE_PRESENT(pteval))
0348 return;
0349
0350 pt = IOMMU_PTE_PAGE(pteval);
0351 mode = IOMMU_PTE_MODE(pteval);
0352
0353 free_sub_pt(pt, mode, freelist);
0354 }
0355
0356
0357
0358
0359
0360
0361
0362
0363 static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
0364 phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
0365 {
0366 struct protection_domain *dom = io_pgtable_ops_to_domain(ops);
0367 LIST_HEAD(freelist);
0368 bool updated = false;
0369 u64 __pte, *pte;
0370 int ret, i, count;
0371
0372 BUG_ON(!IS_ALIGNED(iova, size));
0373 BUG_ON(!IS_ALIGNED(paddr, size));
0374
0375 ret = -EINVAL;
0376 if (!(prot & IOMMU_PROT_MASK))
0377 goto out;
0378
0379 count = PAGE_SIZE_PTE_COUNT(size);
0380 pte = alloc_pte(dom, iova, size, NULL, gfp, &updated);
0381
0382 ret = -ENOMEM;
0383 if (!pte)
0384 goto out;
0385
0386 for (i = 0; i < count; ++i)
0387 free_clear_pte(&pte[i], pte[i], &freelist);
0388
0389 if (!list_empty(&freelist))
0390 updated = true;
0391
0392 if (count > 1) {
0393 __pte = PAGE_SIZE_PTE(__sme_set(paddr), size);
0394 __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC;
0395 } else
0396 __pte = __sme_set(paddr) | IOMMU_PTE_PR | IOMMU_PTE_FC;
0397
0398 if (prot & IOMMU_PROT_IR)
0399 __pte |= IOMMU_PTE_IR;
0400 if (prot & IOMMU_PROT_IW)
0401 __pte |= IOMMU_PTE_IW;
0402
0403 for (i = 0; i < count; ++i)
0404 pte[i] = __pte;
0405
0406 ret = 0;
0407
0408 out:
0409 if (updated) {
0410 unsigned long flags;
0411
0412 spin_lock_irqsave(&dom->lock, flags);
0413
0414
0415
0416
0417
0418 amd_iommu_domain_flush_tlb_pde(dom);
0419 amd_iommu_domain_flush_complete(dom);
0420 spin_unlock_irqrestore(&dom->lock, flags);
0421 }
0422
0423
0424 put_pages_list(&freelist);
0425
0426 return ret;
0427 }
0428
0429 static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops,
0430 unsigned long iova,
0431 size_t size,
0432 struct iommu_iotlb_gather *gather)
0433 {
0434 struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
0435 unsigned long long unmapped;
0436 unsigned long unmap_size;
0437 u64 *pte;
0438
0439 BUG_ON(!is_power_of_2(size));
0440
0441 unmapped = 0;
0442
0443 while (unmapped < size) {
0444 pte = fetch_pte(pgtable, iova, &unmap_size);
0445 if (pte) {
0446 int i, count;
0447
0448 count = PAGE_SIZE_PTE_COUNT(unmap_size);
0449 for (i = 0; i < count; i++)
0450 pte[i] = 0ULL;
0451 }
0452
0453 iova = (iova & ~(unmap_size - 1)) + unmap_size;
0454 unmapped += unmap_size;
0455 }
0456
0457 BUG_ON(unmapped && !is_power_of_2(unmapped));
0458
0459 return unmapped;
0460 }
0461
0462 static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova)
0463 {
0464 struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
0465 unsigned long offset_mask, pte_pgsize;
0466 u64 *pte, __pte;
0467
0468 pte = fetch_pte(pgtable, iova, &pte_pgsize);
0469
0470 if (!pte || !IOMMU_PTE_PRESENT(*pte))
0471 return 0;
0472
0473 offset_mask = pte_pgsize - 1;
0474 __pte = __sme_clr(*pte & PM_ADDR_MASK);
0475
0476 return (__pte & ~offset_mask) | (iova & offset_mask);
0477 }
0478
0479
0480
0481
0482 static void v1_free_pgtable(struct io_pgtable *iop)
0483 {
0484 struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop);
0485 struct protection_domain *dom;
0486 LIST_HEAD(freelist);
0487
0488 if (pgtable->mode == PAGE_MODE_NONE)
0489 return;
0490
0491 dom = container_of(pgtable, struct protection_domain, iop);
0492
0493
0494 BUG_ON(pgtable->mode < PAGE_MODE_NONE ||
0495 pgtable->mode > PAGE_MODE_6_LEVEL);
0496
0497 free_sub_pt(pgtable->root, pgtable->mode, &freelist);
0498
0499
0500 amd_iommu_domain_clr_pt_root(dom);
0501
0502
0503 amd_iommu_domain_update(dom);
0504
0505 put_pages_list(&freelist);
0506 }
0507
0508 static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
0509 {
0510 struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg);
0511
0512 cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES,
0513 cfg->ias = IOMMU_IN_ADDR_BIT_SIZE,
0514 cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE,
0515 cfg->tlb = &v1_flush_ops;
0516
0517 pgtable->iop.ops.map = iommu_v1_map_page;
0518 pgtable->iop.ops.unmap = iommu_v1_unmap_page;
0519 pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys;
0520
0521 return &pgtable->iop;
0522 }
0523
0524 struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns = {
0525 .alloc = v1_alloc_pgtable,
0526 .free = v1_free_pgtable,
0527 };