0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013 #define pr_fmt(fmt) "DMAR: " fmt
0014 #define dev_fmt(fmt) pr_fmt(fmt)
0015
0016 #include <linux/crash_dump.h>
0017 #include <linux/dma-direct.h>
0018 #include <linux/dma-iommu.h>
0019 #include <linux/dmi.h>
0020 #include <linux/intel-svm.h>
0021 #include <linux/memory.h>
0022 #include <linux/pci.h>
0023 #include <linux/pci-ats.h>
0024 #include <linux/spinlock.h>
0025 #include <linux/syscore_ops.h>
0026 #include <linux/tboot.h>
0027
0028 #include "iommu.h"
0029 #include "../irq_remapping.h"
0030 #include "../iommu-sva-lib.h"
0031 #include "pasid.h"
0032 #include "cap_audit.h"
0033
0034 #define ROOT_SIZE VTD_PAGE_SIZE
0035 #define CONTEXT_SIZE VTD_PAGE_SIZE
0036
0037 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
0038 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
0039 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
0040 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
0041
0042 #define IOAPIC_RANGE_START (0xfee00000)
0043 #define IOAPIC_RANGE_END (0xfeefffff)
0044 #define IOVA_START_ADDR (0x1000)
0045
0046 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
0047
0048 #define MAX_AGAW_WIDTH 64
0049 #define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
0050
0051 #define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << ((gaw) - VTD_PAGE_SHIFT)) - 1)
0052 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << (gaw)) - 1)
0053
0054
0055
0056 #define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
0057 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
0058 #define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
0059
0060
0061 #define IOVA_START_PFN (1)
0062
0063 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
0064
0065
0066 #define LEVEL_STRIDE (9)
0067 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
0068
0069 static inline int agaw_to_level(int agaw)
0070 {
0071 return agaw + 2;
0072 }
0073
0074 static inline int agaw_to_width(int agaw)
0075 {
0076 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
0077 }
0078
0079 static inline int width_to_agaw(int width)
0080 {
0081 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
0082 }
0083
0084 static inline unsigned int level_to_offset_bits(int level)
0085 {
0086 return (level - 1) * LEVEL_STRIDE;
0087 }
0088
0089 static inline int pfn_level_offset(u64 pfn, int level)
0090 {
0091 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
0092 }
0093
0094 static inline u64 level_mask(int level)
0095 {
0096 return -1ULL << level_to_offset_bits(level);
0097 }
0098
0099 static inline u64 level_size(int level)
0100 {
0101 return 1ULL << level_to_offset_bits(level);
0102 }
0103
0104 static inline u64 align_to_level(u64 pfn, int level)
0105 {
0106 return (pfn + level_size(level) - 1) & level_mask(level);
0107 }
0108
0109 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
0110 {
0111 return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
0112 }
0113
0114
0115
0116 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
0117 {
0118 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
0119 }
0120 static inline unsigned long page_to_dma_pfn(struct page *pg)
0121 {
0122 return mm_to_dma_pfn(page_to_pfn(pg));
0123 }
0124 static inline unsigned long virt_to_dma_pfn(void *p)
0125 {
0126 return page_to_dma_pfn(virt_to_page(p));
0127 }
0128
0129 static void __init check_tylersburg_isoch(void);
0130 static int rwbf_quirk;
0131
0132
0133
0134
0135
0136 static int force_on = 0;
0137 static int intel_iommu_tboot_noforce;
0138 static int no_platform_optin;
0139
0140 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
0141
0142
0143
0144
0145
0146 static phys_addr_t root_entry_lctp(struct root_entry *re)
0147 {
0148 if (!(re->lo & 1))
0149 return 0;
0150
0151 return re->lo & VTD_PAGE_MASK;
0152 }
0153
0154
0155
0156
0157
0158 static phys_addr_t root_entry_uctp(struct root_entry *re)
0159 {
0160 if (!(re->hi & 1))
0161 return 0;
0162
0163 return re->hi & VTD_PAGE_MASK;
0164 }
0165
0166 static inline void context_set_present(struct context_entry *context)
0167 {
0168 context->lo |= 1;
0169 }
0170
0171 static inline void context_set_fault_enable(struct context_entry *context)
0172 {
0173 context->lo &= (((u64)-1) << 2) | 1;
0174 }
0175
0176 static inline void context_set_translation_type(struct context_entry *context,
0177 unsigned long value)
0178 {
0179 context->lo &= (((u64)-1) << 4) | 3;
0180 context->lo |= (value & 3) << 2;
0181 }
0182
0183 static inline void context_set_address_root(struct context_entry *context,
0184 unsigned long value)
0185 {
0186 context->lo &= ~VTD_PAGE_MASK;
0187 context->lo |= value & VTD_PAGE_MASK;
0188 }
0189
0190 static inline void context_set_address_width(struct context_entry *context,
0191 unsigned long value)
0192 {
0193 context->hi |= value & 7;
0194 }
0195
0196 static inline void context_set_domain_id(struct context_entry *context,
0197 unsigned long value)
0198 {
0199 context->hi |= (value & ((1 << 16) - 1)) << 8;
0200 }
0201
0202 static inline int context_domain_id(struct context_entry *c)
0203 {
0204 return((c->hi >> 8) & 0xffff);
0205 }
0206
0207 static inline void context_clear_entry(struct context_entry *context)
0208 {
0209 context->lo = 0;
0210 context->hi = 0;
0211 }
0212
0213 static inline bool context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn)
0214 {
0215 if (!iommu->copied_tables)
0216 return false;
0217
0218 return test_bit(((long)bus << 8) | devfn, iommu->copied_tables);
0219 }
0220
0221 static inline void
0222 set_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn)
0223 {
0224 set_bit(((long)bus << 8) | devfn, iommu->copied_tables);
0225 }
0226
0227 static inline void
0228 clear_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn)
0229 {
0230 clear_bit(((long)bus << 8) | devfn, iommu->copied_tables);
0231 }
0232
0233
0234
0235
0236
0237
0238
0239 static struct dmar_domain *si_domain;
0240 static int hw_pass_through = 1;
0241
0242 struct dmar_rmrr_unit {
0243 struct list_head list;
0244 struct acpi_dmar_header *hdr;
0245 u64 base_address;
0246 u64 end_address;
0247 struct dmar_dev_scope *devices;
0248 int devices_cnt;
0249 };
0250
0251 struct dmar_atsr_unit {
0252 struct list_head list;
0253 struct acpi_dmar_header *hdr;
0254 struct dmar_dev_scope *devices;
0255 int devices_cnt;
0256 u8 include_all:1;
0257 };
0258
0259 struct dmar_satc_unit {
0260 struct list_head list;
0261 struct acpi_dmar_header *hdr;
0262 struct dmar_dev_scope *devices;
0263 struct intel_iommu *iommu;
0264 int devices_cnt;
0265 u8 atc_required:1;
0266 };
0267
0268 static LIST_HEAD(dmar_atsr_units);
0269 static LIST_HEAD(dmar_rmrr_units);
0270 static LIST_HEAD(dmar_satc_units);
0271
0272 #define for_each_rmrr_units(rmrr) \
0273 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
0274
0275 static void dmar_remove_one_dev_info(struct device *dev);
0276
0277 int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON);
0278 int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON);
0279
0280 int intel_iommu_enabled = 0;
0281 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
0282
0283 static int dmar_map_gfx = 1;
0284 static int intel_iommu_superpage = 1;
0285 static int iommu_identity_mapping;
0286 static int iommu_skip_te_disable;
0287
0288 #define IDENTMAP_GFX 2
0289 #define IDENTMAP_AZALIA 4
0290
0291 const struct iommu_ops intel_iommu_ops;
0292
0293 static bool translation_pre_enabled(struct intel_iommu *iommu)
0294 {
0295 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
0296 }
0297
0298 static void clear_translation_pre_enabled(struct intel_iommu *iommu)
0299 {
0300 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
0301 }
0302
0303 static void init_translation_status(struct intel_iommu *iommu)
0304 {
0305 u32 gsts;
0306
0307 gsts = readl(iommu->reg + DMAR_GSTS_REG);
0308 if (gsts & DMA_GSTS_TES)
0309 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
0310 }
0311
0312 static int __init intel_iommu_setup(char *str)
0313 {
0314 if (!str)
0315 return -EINVAL;
0316
0317 while (*str) {
0318 if (!strncmp(str, "on", 2)) {
0319 dmar_disabled = 0;
0320 pr_info("IOMMU enabled\n");
0321 } else if (!strncmp(str, "off", 3)) {
0322 dmar_disabled = 1;
0323 no_platform_optin = 1;
0324 pr_info("IOMMU disabled\n");
0325 } else if (!strncmp(str, "igfx_off", 8)) {
0326 dmar_map_gfx = 0;
0327 pr_info("Disable GFX device mapping\n");
0328 } else if (!strncmp(str, "forcedac", 8)) {
0329 pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n");
0330 iommu_dma_forcedac = true;
0331 } else if (!strncmp(str, "strict", 6)) {
0332 pr_warn("intel_iommu=strict deprecated; use iommu.strict=1 instead\n");
0333 iommu_set_dma_strict();
0334 } else if (!strncmp(str, "sp_off", 6)) {
0335 pr_info("Disable supported super page\n");
0336 intel_iommu_superpage = 0;
0337 } else if (!strncmp(str, "sm_on", 5)) {
0338 pr_info("Enable scalable mode if hardware supports\n");
0339 intel_iommu_sm = 1;
0340 } else if (!strncmp(str, "sm_off", 6)) {
0341 pr_info("Scalable mode is disallowed\n");
0342 intel_iommu_sm = 0;
0343 } else if (!strncmp(str, "tboot_noforce", 13)) {
0344 pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
0345 intel_iommu_tboot_noforce = 1;
0346 } else {
0347 pr_notice("Unknown option - '%s'\n", str);
0348 }
0349
0350 str += strcspn(str, ",");
0351 while (*str == ',')
0352 str++;
0353 }
0354
0355 return 1;
0356 }
0357 __setup("intel_iommu=", intel_iommu_setup);
0358
0359 void *alloc_pgtable_page(int node)
0360 {
0361 struct page *page;
0362 void *vaddr = NULL;
0363
0364 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
0365 if (page)
0366 vaddr = page_address(page);
0367 return vaddr;
0368 }
0369
0370 void free_pgtable_page(void *vaddr)
0371 {
0372 free_page((unsigned long)vaddr);
0373 }
0374
0375 static inline int domain_type_is_si(struct dmar_domain *domain)
0376 {
0377 return domain->domain.type == IOMMU_DOMAIN_IDENTITY;
0378 }
0379
0380 static inline bool domain_use_first_level(struct dmar_domain *domain)
0381 {
0382 return domain->flags & DOMAIN_FLAG_USE_FIRST_LEVEL;
0383 }
0384
0385 static inline int domain_pfn_supported(struct dmar_domain *domain,
0386 unsigned long pfn)
0387 {
0388 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
0389
0390 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
0391 }
0392
0393
0394
0395
0396
0397
0398 static unsigned long __iommu_calculate_sagaw(struct intel_iommu *iommu)
0399 {
0400 unsigned long fl_sagaw, sl_sagaw;
0401
0402 fl_sagaw = BIT(2) | (cap_5lp_support(iommu->cap) ? BIT(3) : 0);
0403 sl_sagaw = cap_sagaw(iommu->cap);
0404
0405
0406 if (!sm_supported(iommu) || !ecap_flts(iommu->ecap))
0407 return sl_sagaw;
0408
0409
0410 if (!ecap_slts(iommu->ecap))
0411 return fl_sagaw;
0412
0413 return fl_sagaw & sl_sagaw;
0414 }
0415
0416 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
0417 {
0418 unsigned long sagaw;
0419 int agaw;
0420
0421 sagaw = __iommu_calculate_sagaw(iommu);
0422 for (agaw = width_to_agaw(max_gaw); agaw >= 0; agaw--) {
0423 if (test_bit(agaw, &sagaw))
0424 break;
0425 }
0426
0427 return agaw;
0428 }
0429
0430
0431
0432
0433 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
0434 {
0435 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
0436 }
0437
0438
0439
0440
0441
0442
0443 int iommu_calculate_agaw(struct intel_iommu *iommu)
0444 {
0445 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
0446 }
0447
0448 static inline bool iommu_paging_structure_coherency(struct intel_iommu *iommu)
0449 {
0450 return sm_supported(iommu) ?
0451 ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap);
0452 }
0453
0454 static void domain_update_iommu_coherency(struct dmar_domain *domain)
0455 {
0456 struct iommu_domain_info *info;
0457 struct dmar_drhd_unit *drhd;
0458 struct intel_iommu *iommu;
0459 bool found = false;
0460 unsigned long i;
0461
0462 domain->iommu_coherency = true;
0463 xa_for_each(&domain->iommu_array, i, info) {
0464 found = true;
0465 if (!iommu_paging_structure_coherency(info->iommu)) {
0466 domain->iommu_coherency = false;
0467 break;
0468 }
0469 }
0470 if (found)
0471 return;
0472
0473
0474 rcu_read_lock();
0475 for_each_active_iommu(iommu, drhd) {
0476 if (!iommu_paging_structure_coherency(iommu)) {
0477 domain->iommu_coherency = false;
0478 break;
0479 }
0480 }
0481 rcu_read_unlock();
0482 }
0483
0484 static int domain_update_iommu_superpage(struct dmar_domain *domain,
0485 struct intel_iommu *skip)
0486 {
0487 struct dmar_drhd_unit *drhd;
0488 struct intel_iommu *iommu;
0489 int mask = 0x3;
0490
0491 if (!intel_iommu_superpage)
0492 return 0;
0493
0494
0495 rcu_read_lock();
0496 for_each_active_iommu(iommu, drhd) {
0497 if (iommu != skip) {
0498 if (domain && domain_use_first_level(domain)) {
0499 if (!cap_fl1gp_support(iommu->cap))
0500 mask = 0x1;
0501 } else {
0502 mask &= cap_super_page_val(iommu->cap);
0503 }
0504
0505 if (!mask)
0506 break;
0507 }
0508 }
0509 rcu_read_unlock();
0510
0511 return fls(mask);
0512 }
0513
0514 static int domain_update_device_node(struct dmar_domain *domain)
0515 {
0516 struct device_domain_info *info;
0517 int nid = NUMA_NO_NODE;
0518 unsigned long flags;
0519
0520 spin_lock_irqsave(&domain->lock, flags);
0521 list_for_each_entry(info, &domain->devices, link) {
0522
0523
0524
0525
0526
0527
0528 nid = dev_to_node(info->dev);
0529 if (nid != NUMA_NO_NODE)
0530 break;
0531 }
0532 spin_unlock_irqrestore(&domain->lock, flags);
0533
0534 return nid;
0535 }
0536
0537 static void domain_update_iotlb(struct dmar_domain *domain);
0538
0539
0540 static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain)
0541 {
0542 unsigned long bitmap = 0;
0543
0544
0545
0546
0547
0548 if (domain->iommu_superpage == 1)
0549 bitmap |= SZ_2M;
0550 else if (domain->iommu_superpage == 2)
0551 bitmap |= SZ_2M | SZ_1G;
0552
0553 return bitmap;
0554 }
0555
0556
0557 static void domain_update_iommu_cap(struct dmar_domain *domain)
0558 {
0559 domain_update_iommu_coherency(domain);
0560 domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL);
0561
0562
0563
0564
0565
0566 if (domain->nid == NUMA_NO_NODE)
0567 domain->nid = domain_update_device_node(domain);
0568
0569
0570
0571
0572
0573
0574
0575
0576 if (domain_use_first_level(domain))
0577 domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1);
0578 else
0579 domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw);
0580
0581 domain->domain.pgsize_bitmap |= domain_super_pgsize_bitmap(domain);
0582 domain_update_iotlb(domain);
0583 }
0584
0585 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
0586 u8 devfn, int alloc)
0587 {
0588 struct root_entry *root = &iommu->root_entry[bus];
0589 struct context_entry *context;
0590 u64 *entry;
0591
0592
0593
0594
0595
0596 if (!alloc && context_copied(iommu, bus, devfn))
0597 return NULL;
0598
0599 entry = &root->lo;
0600 if (sm_supported(iommu)) {
0601 if (devfn >= 0x80) {
0602 devfn -= 0x80;
0603 entry = &root->hi;
0604 }
0605 devfn *= 2;
0606 }
0607 if (*entry & 1)
0608 context = phys_to_virt(*entry & VTD_PAGE_MASK);
0609 else {
0610 unsigned long phy_addr;
0611 if (!alloc)
0612 return NULL;
0613
0614 context = alloc_pgtable_page(iommu->node);
0615 if (!context)
0616 return NULL;
0617
0618 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
0619 phy_addr = virt_to_phys((void *)context);
0620 *entry = phy_addr | 1;
0621 __iommu_flush_cache(iommu, entry, sizeof(*entry));
0622 }
0623 return &context[devfn];
0624 }
0625
0626
0627
0628
0629
0630
0631
0632
0633
0634 static bool
0635 is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
0636 {
0637 struct pci_dev *pdev, *pbridge;
0638
0639 if (!dev_is_pci(dev) || !dev_is_pci(bridge))
0640 return false;
0641
0642 pdev = to_pci_dev(dev);
0643 pbridge = to_pci_dev(bridge);
0644
0645 if (pbridge->subordinate &&
0646 pbridge->subordinate->number <= pdev->bus->number &&
0647 pbridge->subordinate->busn_res.end >= pdev->bus->number)
0648 return true;
0649
0650 return false;
0651 }
0652
0653 static bool quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
0654 {
0655 struct dmar_drhd_unit *drhd;
0656 u32 vtbar;
0657 int rc;
0658
0659
0660
0661
0662
0663
0664 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
0665 if (rc) {
0666
0667 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
0668 return false;
0669 }
0670 vtbar &= 0xffff0000;
0671
0672
0673 drhd = dmar_find_matched_drhd_unit(pdev);
0674 if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
0675 pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
0676 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
0677 return true;
0678 }
0679
0680 return false;
0681 }
0682
0683 static bool iommu_is_dummy(struct intel_iommu *iommu, struct device *dev)
0684 {
0685 if (!iommu || iommu->drhd->ignored)
0686 return true;
0687
0688 if (dev_is_pci(dev)) {
0689 struct pci_dev *pdev = to_pci_dev(dev);
0690
0691 if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
0692 pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SNB &&
0693 quirk_ioat_snb_local_iommu(pdev))
0694 return true;
0695 }
0696
0697 return false;
0698 }
0699
0700 struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
0701 {
0702 struct dmar_drhd_unit *drhd = NULL;
0703 struct pci_dev *pdev = NULL;
0704 struct intel_iommu *iommu;
0705 struct device *tmp;
0706 u16 segment = 0;
0707 int i;
0708
0709 if (!dev)
0710 return NULL;
0711
0712 if (dev_is_pci(dev)) {
0713 struct pci_dev *pf_pdev;
0714
0715 pdev = pci_real_dma_dev(to_pci_dev(dev));
0716
0717
0718
0719 pf_pdev = pci_physfn(pdev);
0720 dev = &pf_pdev->dev;
0721 segment = pci_domain_nr(pdev->bus);
0722 } else if (has_acpi_companion(dev))
0723 dev = &ACPI_COMPANION(dev)->dev;
0724
0725 rcu_read_lock();
0726 for_each_iommu(iommu, drhd) {
0727 if (pdev && segment != drhd->segment)
0728 continue;
0729
0730 for_each_active_dev_scope(drhd->devices,
0731 drhd->devices_cnt, i, tmp) {
0732 if (tmp == dev) {
0733
0734
0735
0736
0737 if (pdev && pdev->is_virtfn)
0738 goto got_pdev;
0739
0740 if (bus && devfn) {
0741 *bus = drhd->devices[i].bus;
0742 *devfn = drhd->devices[i].devfn;
0743 }
0744 goto out;
0745 }
0746
0747 if (is_downstream_to_pci_bridge(dev, tmp))
0748 goto got_pdev;
0749 }
0750
0751 if (pdev && drhd->include_all) {
0752 got_pdev:
0753 if (bus && devfn) {
0754 *bus = pdev->bus->number;
0755 *devfn = pdev->devfn;
0756 }
0757 goto out;
0758 }
0759 }
0760 iommu = NULL;
0761 out:
0762 if (iommu_is_dummy(iommu, dev))
0763 iommu = NULL;
0764
0765 rcu_read_unlock();
0766
0767 return iommu;
0768 }
0769
0770 static void domain_flush_cache(struct dmar_domain *domain,
0771 void *addr, int size)
0772 {
0773 if (!domain->iommu_coherency)
0774 clflush_cache_range(addr, size);
0775 }
0776
0777 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
0778 {
0779 struct context_entry *context;
0780 int ret = 0;
0781
0782 spin_lock(&iommu->lock);
0783 context = iommu_context_addr(iommu, bus, devfn, 0);
0784 if (context)
0785 ret = context_present(context);
0786 spin_unlock(&iommu->lock);
0787 return ret;
0788 }
0789
0790 static void free_context_table(struct intel_iommu *iommu)
0791 {
0792 struct context_entry *context;
0793 int i;
0794
0795 if (!iommu->root_entry)
0796 return;
0797
0798 for (i = 0; i < ROOT_ENTRY_NR; i++) {
0799 context = iommu_context_addr(iommu, i, 0, 0);
0800 if (context)
0801 free_pgtable_page(context);
0802
0803 if (!sm_supported(iommu))
0804 continue;
0805
0806 context = iommu_context_addr(iommu, i, 0x80, 0);
0807 if (context)
0808 free_pgtable_page(context);
0809 }
0810
0811 free_pgtable_page(iommu->root_entry);
0812 iommu->root_entry = NULL;
0813 }
0814
0815 #ifdef CONFIG_DMAR_DEBUG
0816 static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn,
0817 u8 bus, u8 devfn, struct dma_pte *parent, int level)
0818 {
0819 struct dma_pte *pte;
0820 int offset;
0821
0822 while (1) {
0823 offset = pfn_level_offset(pfn, level);
0824 pte = &parent[offset];
0825 if (!pte || (dma_pte_superpage(pte) || !dma_pte_present(pte))) {
0826 pr_info("PTE not present at level %d\n", level);
0827 break;
0828 }
0829
0830 pr_info("pte level: %d, pte value: 0x%016llx\n", level, pte->val);
0831
0832 if (level == 1)
0833 break;
0834
0835 parent = phys_to_virt(dma_pte_addr(pte));
0836 level--;
0837 }
0838 }
0839
0840 void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
0841 unsigned long long addr, u32 pasid)
0842 {
0843 struct pasid_dir_entry *dir, *pde;
0844 struct pasid_entry *entries, *pte;
0845 struct context_entry *ctx_entry;
0846 struct root_entry *rt_entry;
0847 int i, dir_index, index, level;
0848 u8 devfn = source_id & 0xff;
0849 u8 bus = source_id >> 8;
0850 struct dma_pte *pgtable;
0851
0852 pr_info("Dump %s table entries for IOVA 0x%llx\n", iommu->name, addr);
0853
0854
0855 rt_entry = &iommu->root_entry[bus];
0856 if (!rt_entry) {
0857 pr_info("root table entry is not present\n");
0858 return;
0859 }
0860
0861 if (sm_supported(iommu))
0862 pr_info("scalable mode root entry: hi 0x%016llx, low 0x%016llx\n",
0863 rt_entry->hi, rt_entry->lo);
0864 else
0865 pr_info("root entry: 0x%016llx", rt_entry->lo);
0866
0867
0868 ctx_entry = iommu_context_addr(iommu, bus, devfn, 0);
0869 if (!ctx_entry) {
0870 pr_info("context table entry is not present\n");
0871 return;
0872 }
0873
0874 pr_info("context entry: hi 0x%016llx, low 0x%016llx\n",
0875 ctx_entry->hi, ctx_entry->lo);
0876
0877
0878 if (!sm_supported(iommu)) {
0879 level = agaw_to_level(ctx_entry->hi & 7);
0880 pgtable = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
0881 goto pgtable_walk;
0882 }
0883
0884
0885 dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
0886 if (!dir) {
0887 pr_info("pasid directory entry is not present\n");
0888 return;
0889 }
0890
0891 if (intel_iommu_sm && pasid == INVALID_IOASID)
0892 pasid = PASID_RID2PASID;
0893
0894 dir_index = pasid >> PASID_PDE_SHIFT;
0895 pde = &dir[dir_index];
0896 pr_info("pasid dir entry: 0x%016llx\n", pde->val);
0897
0898
0899 entries = get_pasid_table_from_pde(pde);
0900 if (!entries) {
0901 pr_info("pasid table entry is not present\n");
0902 return;
0903 }
0904 index = pasid & PASID_PTE_MASK;
0905 pte = &entries[index];
0906 for (i = 0; i < ARRAY_SIZE(pte->val); i++)
0907 pr_info("pasid table entry[%d]: 0x%016llx\n", i, pte->val[i]);
0908
0909 if (pasid_pte_get_pgtt(pte) == PASID_ENTRY_PGTT_FL_ONLY) {
0910 level = pte->val[2] & BIT_ULL(2) ? 5 : 4;
0911 pgtable = phys_to_virt(pte->val[2] & VTD_PAGE_MASK);
0912 } else {
0913 level = agaw_to_level((pte->val[0] >> 2) & 0x7);
0914 pgtable = phys_to_virt(pte->val[0] & VTD_PAGE_MASK);
0915 }
0916
0917 pgtable_walk:
0918 pgtable_walk(iommu, addr >> VTD_PAGE_SHIFT, bus, devfn, pgtable, level);
0919 }
0920 #endif
0921
0922 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
0923 unsigned long pfn, int *target_level)
0924 {
0925 struct dma_pte *parent, *pte;
0926 int level = agaw_to_level(domain->agaw);
0927 int offset;
0928
0929 BUG_ON(!domain->pgd);
0930
0931 if (!domain_pfn_supported(domain, pfn))
0932
0933 return NULL;
0934
0935 parent = domain->pgd;
0936
0937 while (1) {
0938 void *tmp_page;
0939
0940 offset = pfn_level_offset(pfn, level);
0941 pte = &parent[offset];
0942 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
0943 break;
0944 if (level == *target_level)
0945 break;
0946
0947 if (!dma_pte_present(pte)) {
0948 uint64_t pteval;
0949
0950 tmp_page = alloc_pgtable_page(domain->nid);
0951
0952 if (!tmp_page)
0953 return NULL;
0954
0955 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
0956 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
0957 if (domain_use_first_level(domain)) {
0958 pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US;
0959 if (iommu_is_dma_domain(&domain->domain))
0960 pteval |= DMA_FL_PTE_ACCESS;
0961 }
0962 if (cmpxchg64(&pte->val, 0ULL, pteval))
0963
0964 free_pgtable_page(tmp_page);
0965 else
0966 domain_flush_cache(domain, pte, sizeof(*pte));
0967 }
0968 if (level == 1)
0969 break;
0970
0971 parent = phys_to_virt(dma_pte_addr(pte));
0972 level--;
0973 }
0974
0975 if (!*target_level)
0976 *target_level = level;
0977
0978 return pte;
0979 }
0980
0981
0982 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
0983 unsigned long pfn,
0984 int level, int *large_page)
0985 {
0986 struct dma_pte *parent, *pte;
0987 int total = agaw_to_level(domain->agaw);
0988 int offset;
0989
0990 parent = domain->pgd;
0991 while (level <= total) {
0992 offset = pfn_level_offset(pfn, total);
0993 pte = &parent[offset];
0994 if (level == total)
0995 return pte;
0996
0997 if (!dma_pte_present(pte)) {
0998 *large_page = total;
0999 break;
1000 }
1001
1002 if (dma_pte_superpage(pte)) {
1003 *large_page = total;
1004 return pte;
1005 }
1006
1007 parent = phys_to_virt(dma_pte_addr(pte));
1008 total--;
1009 }
1010 return NULL;
1011 }
1012
1013
1014 static void dma_pte_clear_range(struct dmar_domain *domain,
1015 unsigned long start_pfn,
1016 unsigned long last_pfn)
1017 {
1018 unsigned int large_page;
1019 struct dma_pte *first_pte, *pte;
1020
1021 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1022 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1023 BUG_ON(start_pfn > last_pfn);
1024
1025
1026 do {
1027 large_page = 1;
1028 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
1029 if (!pte) {
1030 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
1031 continue;
1032 }
1033 do {
1034 dma_clear_pte(pte);
1035 start_pfn += lvl_to_nr_pages(large_page);
1036 pte++;
1037 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1038
1039 domain_flush_cache(domain, first_pte,
1040 (void *)pte - (void *)first_pte);
1041
1042 } while (start_pfn && start_pfn <= last_pfn);
1043 }
1044
1045 static void dma_pte_free_level(struct dmar_domain *domain, int level,
1046 int retain_level, struct dma_pte *pte,
1047 unsigned long pfn, unsigned long start_pfn,
1048 unsigned long last_pfn)
1049 {
1050 pfn = max(start_pfn, pfn);
1051 pte = &pte[pfn_level_offset(pfn, level)];
1052
1053 do {
1054 unsigned long level_pfn;
1055 struct dma_pte *level_pte;
1056
1057 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1058 goto next;
1059
1060 level_pfn = pfn & level_mask(level);
1061 level_pte = phys_to_virt(dma_pte_addr(pte));
1062
1063 if (level > 2) {
1064 dma_pte_free_level(domain, level - 1, retain_level,
1065 level_pte, level_pfn, start_pfn,
1066 last_pfn);
1067 }
1068
1069
1070
1071
1072
1073 if (level < retain_level && !(start_pfn > level_pfn ||
1074 last_pfn < level_pfn + level_size(level) - 1)) {
1075 dma_clear_pte(pte);
1076 domain_flush_cache(domain, pte, sizeof(*pte));
1077 free_pgtable_page(level_pte);
1078 }
1079 next:
1080 pfn += level_size(level);
1081 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1082 }
1083
1084
1085
1086
1087
1088 static void dma_pte_free_pagetable(struct dmar_domain *domain,
1089 unsigned long start_pfn,
1090 unsigned long last_pfn,
1091 int retain_level)
1092 {
1093 dma_pte_clear_range(domain, start_pfn, last_pfn);
1094
1095
1096 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
1097 domain->pgd, 0, start_pfn, last_pfn);
1098
1099
1100 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1101 free_pgtable_page(domain->pgd);
1102 domain->pgd = NULL;
1103 }
1104 }
1105
1106
1107
1108
1109
1110
1111
1112 static void dma_pte_list_pagetables(struct dmar_domain *domain,
1113 int level, struct dma_pte *pte,
1114 struct list_head *freelist)
1115 {
1116 struct page *pg;
1117
1118 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1119 list_add_tail(&pg->lru, freelist);
1120
1121 if (level == 1)
1122 return;
1123
1124 pte = page_address(pg);
1125 do {
1126 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1127 dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1128 pte++;
1129 } while (!first_pte_in_page(pte));
1130 }
1131
1132 static void dma_pte_clear_level(struct dmar_domain *domain, int level,
1133 struct dma_pte *pte, unsigned long pfn,
1134 unsigned long start_pfn, unsigned long last_pfn,
1135 struct list_head *freelist)
1136 {
1137 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1138
1139 pfn = max(start_pfn, pfn);
1140 pte = &pte[pfn_level_offset(pfn, level)];
1141
1142 do {
1143 unsigned long level_pfn = pfn & level_mask(level);
1144
1145 if (!dma_pte_present(pte))
1146 goto next;
1147
1148
1149 if (start_pfn <= level_pfn &&
1150 last_pfn >= level_pfn + level_size(level) - 1) {
1151
1152
1153 if (level > 1 && !dma_pte_superpage(pte))
1154 dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1155
1156 dma_clear_pte(pte);
1157 if (!first_pte)
1158 first_pte = pte;
1159 last_pte = pte;
1160 } else if (level > 1) {
1161
1162 dma_pte_clear_level(domain, level - 1,
1163 phys_to_virt(dma_pte_addr(pte)),
1164 level_pfn, start_pfn, last_pfn,
1165 freelist);
1166 }
1167 next:
1168 pfn = level_pfn + level_size(level);
1169 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1170
1171 if (first_pte)
1172 domain_flush_cache(domain, first_pte,
1173 (void *)++last_pte - (void *)first_pte);
1174 }
1175
1176
1177
1178
1179 static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn,
1180 unsigned long last_pfn, struct list_head *freelist)
1181 {
1182 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1183 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1184 BUG_ON(start_pfn > last_pfn);
1185
1186
1187 dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1188 domain->pgd, 0, start_pfn, last_pfn, freelist);
1189
1190
1191 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1192 struct page *pgd_page = virt_to_page(domain->pgd);
1193 list_add_tail(&pgd_page->lru, freelist);
1194 domain->pgd = NULL;
1195 }
1196 }
1197
1198
1199 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1200 {
1201 struct root_entry *root;
1202
1203 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1204 if (!root) {
1205 pr_err("Allocating root entry for %s failed\n",
1206 iommu->name);
1207 return -ENOMEM;
1208 }
1209
1210 __iommu_flush_cache(iommu, root, ROOT_SIZE);
1211 iommu->root_entry = root;
1212
1213 return 0;
1214 }
1215
1216 static void iommu_set_root_entry(struct intel_iommu *iommu)
1217 {
1218 u64 addr;
1219 u32 sts;
1220 unsigned long flag;
1221
1222 addr = virt_to_phys(iommu->root_entry);
1223 if (sm_supported(iommu))
1224 addr |= DMA_RTADDR_SMT;
1225
1226 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1227 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1228
1229 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1230
1231
1232 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1233 readl, (sts & DMA_GSTS_RTPS), sts);
1234
1235 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1236
1237 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1238 if (sm_supported(iommu))
1239 qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
1240 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1241 }
1242
1243 void iommu_flush_write_buffer(struct intel_iommu *iommu)
1244 {
1245 u32 val;
1246 unsigned long flag;
1247
1248 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1249 return;
1250
1251 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1252 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1253
1254
1255 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1256 readl, (!(val & DMA_GSTS_WBFS)), val);
1257
1258 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1259 }
1260
1261
1262 static void __iommu_flush_context(struct intel_iommu *iommu,
1263 u16 did, u16 source_id, u8 function_mask,
1264 u64 type)
1265 {
1266 u64 val = 0;
1267 unsigned long flag;
1268
1269 switch (type) {
1270 case DMA_CCMD_GLOBAL_INVL:
1271 val = DMA_CCMD_GLOBAL_INVL;
1272 break;
1273 case DMA_CCMD_DOMAIN_INVL:
1274 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1275 break;
1276 case DMA_CCMD_DEVICE_INVL:
1277 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1278 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1279 break;
1280 default:
1281 BUG();
1282 }
1283 val |= DMA_CCMD_ICC;
1284
1285 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1286 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1287
1288
1289 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1290 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1291
1292 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1293 }
1294
1295
1296 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1297 u64 addr, unsigned int size_order, u64 type)
1298 {
1299 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1300 u64 val = 0, val_iva = 0;
1301 unsigned long flag;
1302
1303 switch (type) {
1304 case DMA_TLB_GLOBAL_FLUSH:
1305
1306 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1307 break;
1308 case DMA_TLB_DSI_FLUSH:
1309 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1310 break;
1311 case DMA_TLB_PSI_FLUSH:
1312 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1313
1314 val_iva = size_order | addr;
1315 break;
1316 default:
1317 BUG();
1318 }
1319
1320 #if 0
1321
1322
1323
1324
1325 if (cap_read_drain(iommu->cap))
1326 val |= DMA_TLB_READ_DRAIN;
1327 #endif
1328 if (cap_write_drain(iommu->cap))
1329 val |= DMA_TLB_WRITE_DRAIN;
1330
1331 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1332
1333 if (val_iva)
1334 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1335 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1336
1337
1338 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1339 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1340
1341 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1342
1343
1344 if (DMA_TLB_IAIG(val) == 0)
1345 pr_err("Flush IOTLB failed\n");
1346 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1347 pr_debug("TLB flush request %Lx, actual %Lx\n",
1348 (unsigned long long)DMA_TLB_IIRG(type),
1349 (unsigned long long)DMA_TLB_IAIG(val));
1350 }
1351
1352 static struct device_domain_info *
1353 iommu_support_dev_iotlb(struct dmar_domain *domain, struct intel_iommu *iommu,
1354 u8 bus, u8 devfn)
1355 {
1356 struct device_domain_info *info;
1357 unsigned long flags;
1358
1359 if (!iommu->qi)
1360 return NULL;
1361
1362 spin_lock_irqsave(&domain->lock, flags);
1363 list_for_each_entry(info, &domain->devices, link) {
1364 if (info->iommu == iommu && info->bus == bus &&
1365 info->devfn == devfn) {
1366 spin_unlock_irqrestore(&domain->lock, flags);
1367 return info->ats_supported ? info : NULL;
1368 }
1369 }
1370 spin_unlock_irqrestore(&domain->lock, flags);
1371
1372 return NULL;
1373 }
1374
1375 static void domain_update_iotlb(struct dmar_domain *domain)
1376 {
1377 struct device_domain_info *info;
1378 bool has_iotlb_device = false;
1379 unsigned long flags;
1380
1381 spin_lock_irqsave(&domain->lock, flags);
1382 list_for_each_entry(info, &domain->devices, link) {
1383 if (info->ats_enabled) {
1384 has_iotlb_device = true;
1385 break;
1386 }
1387 }
1388 domain->has_iotlb_device = has_iotlb_device;
1389 spin_unlock_irqrestore(&domain->lock, flags);
1390 }
1391
1392 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1393 {
1394 struct pci_dev *pdev;
1395
1396 if (!info || !dev_is_pci(info->dev))
1397 return;
1398
1399 pdev = to_pci_dev(info->dev);
1400
1401
1402
1403
1404
1405 if (!ecap_dit(info->iommu->ecap))
1406 info->pfsid = 0;
1407 else {
1408 struct pci_dev *pf_pdev;
1409
1410
1411 pf_pdev = pci_physfn(pdev);
1412 info->pfsid = pci_dev_id(pf_pdev);
1413 }
1414
1415 #ifdef CONFIG_INTEL_IOMMU_SVM
1416
1417
1418
1419
1420
1421 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1422 info->pasid_enabled = 1;
1423
1424 if (info->pri_supported &&
1425 (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) &&
1426 !pci_reset_pri(pdev) && !pci_enable_pri(pdev, PRQ_DEPTH))
1427 info->pri_enabled = 1;
1428 #endif
1429 if (info->ats_supported && pci_ats_page_aligned(pdev) &&
1430 !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1431 info->ats_enabled = 1;
1432 domain_update_iotlb(info->domain);
1433 info->ats_qdep = pci_ats_queue_depth(pdev);
1434 }
1435 }
1436
1437 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1438 {
1439 struct pci_dev *pdev;
1440
1441 if (!dev_is_pci(info->dev))
1442 return;
1443
1444 pdev = to_pci_dev(info->dev);
1445
1446 if (info->ats_enabled) {
1447 pci_disable_ats(pdev);
1448 info->ats_enabled = 0;
1449 domain_update_iotlb(info->domain);
1450 }
1451 #ifdef CONFIG_INTEL_IOMMU_SVM
1452 if (info->pri_enabled) {
1453 pci_disable_pri(pdev);
1454 info->pri_enabled = 0;
1455 }
1456 if (info->pasid_enabled) {
1457 pci_disable_pasid(pdev);
1458 info->pasid_enabled = 0;
1459 }
1460 #endif
1461 }
1462
1463 static void __iommu_flush_dev_iotlb(struct device_domain_info *info,
1464 u64 addr, unsigned int mask)
1465 {
1466 u16 sid, qdep;
1467
1468 if (!info || !info->ats_enabled)
1469 return;
1470
1471 sid = info->bus << 8 | info->devfn;
1472 qdep = info->ats_qdep;
1473 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1474 qdep, addr, mask);
1475 }
1476
1477 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1478 u64 addr, unsigned mask)
1479 {
1480 struct device_domain_info *info;
1481 unsigned long flags;
1482
1483 if (!domain->has_iotlb_device)
1484 return;
1485
1486 spin_lock_irqsave(&domain->lock, flags);
1487 list_for_each_entry(info, &domain->devices, link)
1488 __iommu_flush_dev_iotlb(info, addr, mask);
1489 spin_unlock_irqrestore(&domain->lock, flags);
1490 }
1491
1492 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1493 struct dmar_domain *domain,
1494 unsigned long pfn, unsigned int pages,
1495 int ih, int map)
1496 {
1497 unsigned int aligned_pages = __roundup_pow_of_two(pages);
1498 unsigned int mask = ilog2(aligned_pages);
1499 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1500 u16 did = domain_id_iommu(domain, iommu);
1501
1502 BUG_ON(pages == 0);
1503
1504 if (ih)
1505 ih = 1 << 6;
1506
1507 if (domain_use_first_level(domain)) {
1508 qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, pages, ih);
1509 } else {
1510 unsigned long bitmask = aligned_pages - 1;
1511
1512
1513
1514
1515
1516
1517 if (unlikely(bitmask & pfn)) {
1518 unsigned long end_pfn = pfn + pages - 1, shared_bits;
1519
1520
1521
1522
1523
1524
1525
1526
1527 shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
1528 mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG;
1529 }
1530
1531
1532
1533
1534
1535 if (!cap_pgsel_inv(iommu->cap) ||
1536 mask > cap_max_amask_val(iommu->cap))
1537 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1538 DMA_TLB_DSI_FLUSH);
1539 else
1540 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1541 DMA_TLB_PSI_FLUSH);
1542 }
1543
1544
1545
1546
1547
1548 if (!cap_caching_mode(iommu->cap) || !map)
1549 iommu_flush_dev_iotlb(domain, addr, mask);
1550 }
1551
1552
1553 static inline void __mapping_notify_one(struct intel_iommu *iommu,
1554 struct dmar_domain *domain,
1555 unsigned long pfn, unsigned int pages)
1556 {
1557
1558
1559
1560
1561 if (cap_caching_mode(iommu->cap) && !domain_use_first_level(domain))
1562 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1563 else
1564 iommu_flush_write_buffer(iommu);
1565 }
1566
1567 static void intel_flush_iotlb_all(struct iommu_domain *domain)
1568 {
1569 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
1570 struct iommu_domain_info *info;
1571 unsigned long idx;
1572
1573 xa_for_each(&dmar_domain->iommu_array, idx, info) {
1574 struct intel_iommu *iommu = info->iommu;
1575 u16 did = domain_id_iommu(dmar_domain, iommu);
1576
1577 if (domain_use_first_level(dmar_domain))
1578 qi_flush_piotlb(iommu, did, PASID_RID2PASID, 0, -1, 0);
1579 else
1580 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1581 DMA_TLB_DSI_FLUSH);
1582
1583 if (!cap_caching_mode(iommu->cap))
1584 iommu_flush_dev_iotlb(dmar_domain, 0, MAX_AGAW_PFN_WIDTH);
1585 }
1586 }
1587
1588 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1589 {
1590 u32 pmen;
1591 unsigned long flags;
1592
1593 if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1594 return;
1595
1596 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1597 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1598 pmen &= ~DMA_PMEN_EPM;
1599 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1600
1601
1602 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1603 readl, !(pmen & DMA_PMEN_PRS), pmen);
1604
1605 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1606 }
1607
1608 static void iommu_enable_translation(struct intel_iommu *iommu)
1609 {
1610 u32 sts;
1611 unsigned long flags;
1612
1613 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1614 iommu->gcmd |= DMA_GCMD_TE;
1615 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1616
1617
1618 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1619 readl, (sts & DMA_GSTS_TES), sts);
1620
1621 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1622 }
1623
1624 static void iommu_disable_translation(struct intel_iommu *iommu)
1625 {
1626 u32 sts;
1627 unsigned long flag;
1628
1629 if (iommu_skip_te_disable && iommu->drhd->gfx_dedicated &&
1630 (cap_read_drain(iommu->cap) || cap_write_drain(iommu->cap)))
1631 return;
1632
1633 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1634 iommu->gcmd &= ~DMA_GCMD_TE;
1635 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1636
1637
1638 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1639 readl, (!(sts & DMA_GSTS_TES)), sts);
1640
1641 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1642 }
1643
1644 static int iommu_init_domains(struct intel_iommu *iommu)
1645 {
1646 u32 ndomains;
1647
1648 ndomains = cap_ndoms(iommu->cap);
1649 pr_debug("%s: Number of Domains supported <%d>\n",
1650 iommu->name, ndomains);
1651
1652 spin_lock_init(&iommu->lock);
1653
1654 iommu->domain_ids = bitmap_zalloc(ndomains, GFP_KERNEL);
1655 if (!iommu->domain_ids)
1656 return -ENOMEM;
1657
1658
1659
1660
1661
1662
1663
1664 set_bit(0, iommu->domain_ids);
1665
1666
1667
1668
1669
1670
1671
1672
1673 if (sm_supported(iommu))
1674 set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1675
1676 return 0;
1677 }
1678
1679 static void disable_dmar_iommu(struct intel_iommu *iommu)
1680 {
1681 if (!iommu->domain_ids)
1682 return;
1683
1684
1685
1686
1687
1688 if (WARN_ON(bitmap_weight(iommu->domain_ids, cap_ndoms(iommu->cap))
1689 > NUM_RESERVED_DID))
1690 return;
1691
1692 if (iommu->gcmd & DMA_GCMD_TE)
1693 iommu_disable_translation(iommu);
1694 }
1695
1696 static void free_dmar_iommu(struct intel_iommu *iommu)
1697 {
1698 if (iommu->domain_ids) {
1699 bitmap_free(iommu->domain_ids);
1700 iommu->domain_ids = NULL;
1701 }
1702
1703 if (iommu->copied_tables) {
1704 bitmap_free(iommu->copied_tables);
1705 iommu->copied_tables = NULL;
1706 }
1707
1708
1709 free_context_table(iommu);
1710
1711 #ifdef CONFIG_INTEL_IOMMU_SVM
1712 if (pasid_supported(iommu)) {
1713 if (ecap_prs(iommu->ecap))
1714 intel_svm_finish_prq(iommu);
1715 }
1716 if (vccap_pasid(iommu->vccap))
1717 ioasid_unregister_allocator(&iommu->pasid_allocator);
1718
1719 #endif
1720 }
1721
1722
1723
1724
1725
1726 static bool first_level_by_default(unsigned int type)
1727 {
1728
1729 if (!scalable_mode_support())
1730 return false;
1731
1732
1733 if (intel_cap_flts_sanity() ^ intel_cap_slts_sanity())
1734 return intel_cap_flts_sanity();
1735
1736
1737 return type != IOMMU_DOMAIN_UNMANAGED;
1738 }
1739
1740 static struct dmar_domain *alloc_domain(unsigned int type)
1741 {
1742 struct dmar_domain *domain;
1743
1744 domain = kzalloc(sizeof(*domain), GFP_KERNEL);
1745 if (!domain)
1746 return NULL;
1747
1748 domain->nid = NUMA_NO_NODE;
1749 if (first_level_by_default(type))
1750 domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL;
1751 domain->has_iotlb_device = false;
1752 INIT_LIST_HEAD(&domain->devices);
1753 spin_lock_init(&domain->lock);
1754 xa_init(&domain->iommu_array);
1755
1756 return domain;
1757 }
1758
1759 static int domain_attach_iommu(struct dmar_domain *domain,
1760 struct intel_iommu *iommu)
1761 {
1762 struct iommu_domain_info *info, *curr;
1763 unsigned long ndomains;
1764 int num, ret = -ENOSPC;
1765
1766 info = kzalloc(sizeof(*info), GFP_KERNEL);
1767 if (!info)
1768 return -ENOMEM;
1769
1770 spin_lock(&iommu->lock);
1771 curr = xa_load(&domain->iommu_array, iommu->seq_id);
1772 if (curr) {
1773 curr->refcnt++;
1774 spin_unlock(&iommu->lock);
1775 kfree(info);
1776 return 0;
1777 }
1778
1779 ndomains = cap_ndoms(iommu->cap);
1780 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1781 if (num >= ndomains) {
1782 pr_err("%s: No free domain ids\n", iommu->name);
1783 goto err_unlock;
1784 }
1785
1786 set_bit(num, iommu->domain_ids);
1787 info->refcnt = 1;
1788 info->did = num;
1789 info->iommu = iommu;
1790 curr = xa_cmpxchg(&domain->iommu_array, iommu->seq_id,
1791 NULL, info, GFP_ATOMIC);
1792 if (curr) {
1793 ret = xa_err(curr) ? : -EBUSY;
1794 goto err_clear;
1795 }
1796 domain_update_iommu_cap(domain);
1797
1798 spin_unlock(&iommu->lock);
1799 return 0;
1800
1801 err_clear:
1802 clear_bit(info->did, iommu->domain_ids);
1803 err_unlock:
1804 spin_unlock(&iommu->lock);
1805 kfree(info);
1806 return ret;
1807 }
1808
1809 static void domain_detach_iommu(struct dmar_domain *domain,
1810 struct intel_iommu *iommu)
1811 {
1812 struct iommu_domain_info *info;
1813
1814 spin_lock(&iommu->lock);
1815 info = xa_load(&domain->iommu_array, iommu->seq_id);
1816 if (--info->refcnt == 0) {
1817 clear_bit(info->did, iommu->domain_ids);
1818 xa_erase(&domain->iommu_array, iommu->seq_id);
1819 domain->nid = NUMA_NO_NODE;
1820 domain_update_iommu_cap(domain);
1821 kfree(info);
1822 }
1823 spin_unlock(&iommu->lock);
1824 }
1825
1826 static inline int guestwidth_to_adjustwidth(int gaw)
1827 {
1828 int agaw;
1829 int r = (gaw - 12) % 9;
1830
1831 if (r == 0)
1832 agaw = gaw;
1833 else
1834 agaw = gaw + 9 - r;
1835 if (agaw > 64)
1836 agaw = 64;
1837 return agaw;
1838 }
1839
1840 static void domain_exit(struct dmar_domain *domain)
1841 {
1842 if (domain->pgd) {
1843 LIST_HEAD(freelist);
1844
1845 domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw), &freelist);
1846 put_pages_list(&freelist);
1847 }
1848
1849 if (WARN_ON(!list_empty(&domain->devices)))
1850 return;
1851
1852 kfree(domain);
1853 }
1854
1855
1856
1857
1858
1859
1860 static inline unsigned long context_get_sm_pds(struct pasid_table *table)
1861 {
1862 unsigned long pds, max_pde;
1863
1864 max_pde = table->max_pasid >> PASID_PDE_SHIFT;
1865 pds = find_first_bit(&max_pde, MAX_NR_PASID_BITS);
1866 if (pds < 7)
1867 return 0;
1868
1869 return pds - 7;
1870 }
1871
1872
1873
1874
1875
1876
1877 static inline void
1878 context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
1879 {
1880 context->hi |= pasid & ((1 << 20) - 1);
1881 }
1882
1883
1884
1885
1886
1887 static inline void context_set_sm_dte(struct context_entry *context)
1888 {
1889 context->lo |= (1 << 2);
1890 }
1891
1892
1893
1894
1895
1896 static inline void context_set_sm_pre(struct context_entry *context)
1897 {
1898 context->lo |= (1 << 4);
1899 }
1900
1901
1902 #define context_pdts(pds) (((pds) & 0x7) << 9)
1903
1904 static int domain_context_mapping_one(struct dmar_domain *domain,
1905 struct intel_iommu *iommu,
1906 struct pasid_table *table,
1907 u8 bus, u8 devfn)
1908 {
1909 struct device_domain_info *info =
1910 iommu_support_dev_iotlb(domain, iommu, bus, devfn);
1911 u16 did = domain_id_iommu(domain, iommu);
1912 int translation = CONTEXT_TT_MULTI_LEVEL;
1913 struct context_entry *context;
1914 int ret;
1915
1916 WARN_ON(did == 0);
1917
1918 if (hw_pass_through && domain_type_is_si(domain))
1919 translation = CONTEXT_TT_PASS_THROUGH;
1920
1921 pr_debug("Set context mapping for %02x:%02x.%d\n",
1922 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1923
1924 BUG_ON(!domain->pgd);
1925
1926 spin_lock(&iommu->lock);
1927 ret = -ENOMEM;
1928 context = iommu_context_addr(iommu, bus, devfn, 1);
1929 if (!context)
1930 goto out_unlock;
1931
1932 ret = 0;
1933 if (context_present(context) && !context_copied(iommu, bus, devfn))
1934 goto out_unlock;
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945 if (context_copied(iommu, bus, devfn)) {
1946 u16 did_old = context_domain_id(context);
1947
1948 if (did_old < cap_ndoms(iommu->cap)) {
1949 iommu->flush.flush_context(iommu, did_old,
1950 (((u16)bus) << 8) | devfn,
1951 DMA_CCMD_MASK_NOBIT,
1952 DMA_CCMD_DEVICE_INVL);
1953 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
1954 DMA_TLB_DSI_FLUSH);
1955 }
1956
1957 clear_context_copied(iommu, bus, devfn);
1958 }
1959
1960 context_clear_entry(context);
1961
1962 if (sm_supported(iommu)) {
1963 unsigned long pds;
1964
1965 WARN_ON(!table);
1966
1967
1968 pds = context_get_sm_pds(table);
1969 context->lo = (u64)virt_to_phys(table->table) |
1970 context_pdts(pds);
1971
1972
1973 context_set_sm_rid2pasid(context, PASID_RID2PASID);
1974
1975
1976
1977
1978
1979 if (info && info->ats_supported)
1980 context_set_sm_dte(context);
1981 if (info && info->pri_supported)
1982 context_set_sm_pre(context);
1983 } else {
1984 struct dma_pte *pgd = domain->pgd;
1985 int agaw;
1986
1987 context_set_domain_id(context, did);
1988
1989 if (translation != CONTEXT_TT_PASS_THROUGH) {
1990
1991
1992
1993
1994 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
1995 ret = -ENOMEM;
1996 pgd = phys_to_virt(dma_pte_addr(pgd));
1997 if (!dma_pte_present(pgd))
1998 goto out_unlock;
1999 }
2000
2001 if (info && info->ats_supported)
2002 translation = CONTEXT_TT_DEV_IOTLB;
2003 else
2004 translation = CONTEXT_TT_MULTI_LEVEL;
2005
2006 context_set_address_root(context, virt_to_phys(pgd));
2007 context_set_address_width(context, agaw);
2008 } else {
2009
2010
2011
2012
2013
2014 context_set_address_width(context, iommu->msagaw);
2015 }
2016
2017 context_set_translation_type(context, translation);
2018 }
2019
2020 context_set_fault_enable(context);
2021 context_set_present(context);
2022 if (!ecap_coherent(iommu->ecap))
2023 clflush_cache_range(context, sizeof(*context));
2024
2025
2026
2027
2028
2029
2030
2031 if (cap_caching_mode(iommu->cap)) {
2032 iommu->flush.flush_context(iommu, 0,
2033 (((u16)bus) << 8) | devfn,
2034 DMA_CCMD_MASK_NOBIT,
2035 DMA_CCMD_DEVICE_INVL);
2036 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
2037 } else {
2038 iommu_flush_write_buffer(iommu);
2039 }
2040 iommu_enable_dev_iotlb(info);
2041
2042 ret = 0;
2043
2044 out_unlock:
2045 spin_unlock(&iommu->lock);
2046
2047 return ret;
2048 }
2049
2050 struct domain_context_mapping_data {
2051 struct dmar_domain *domain;
2052 struct intel_iommu *iommu;
2053 struct pasid_table *table;
2054 };
2055
2056 static int domain_context_mapping_cb(struct pci_dev *pdev,
2057 u16 alias, void *opaque)
2058 {
2059 struct domain_context_mapping_data *data = opaque;
2060
2061 return domain_context_mapping_one(data->domain, data->iommu,
2062 data->table, PCI_BUS_NUM(alias),
2063 alias & 0xff);
2064 }
2065
2066 static int
2067 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
2068 {
2069 struct domain_context_mapping_data data;
2070 struct pasid_table *table;
2071 struct intel_iommu *iommu;
2072 u8 bus, devfn;
2073
2074 iommu = device_to_iommu(dev, &bus, &devfn);
2075 if (!iommu)
2076 return -ENODEV;
2077
2078 table = intel_pasid_get_table(dev);
2079
2080 if (!dev_is_pci(dev))
2081 return domain_context_mapping_one(domain, iommu, table,
2082 bus, devfn);
2083
2084 data.domain = domain;
2085 data.iommu = iommu;
2086 data.table = table;
2087
2088 return pci_for_each_dma_alias(to_pci_dev(dev),
2089 &domain_context_mapping_cb, &data);
2090 }
2091
2092 static int domain_context_mapped_cb(struct pci_dev *pdev,
2093 u16 alias, void *opaque)
2094 {
2095 struct intel_iommu *iommu = opaque;
2096
2097 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2098 }
2099
2100 static int domain_context_mapped(struct device *dev)
2101 {
2102 struct intel_iommu *iommu;
2103 u8 bus, devfn;
2104
2105 iommu = device_to_iommu(dev, &bus, &devfn);
2106 if (!iommu)
2107 return -ENODEV;
2108
2109 if (!dev_is_pci(dev))
2110 return device_context_mapped(iommu, bus, devfn);
2111
2112 return !pci_for_each_dma_alias(to_pci_dev(dev),
2113 domain_context_mapped_cb, iommu);
2114 }
2115
2116
2117 static inline unsigned long aligned_nrpages(unsigned long host_addr,
2118 size_t size)
2119 {
2120 host_addr &= ~PAGE_MASK;
2121 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2122 }
2123
2124
2125 static inline int hardware_largepage_caps(struct dmar_domain *domain,
2126 unsigned long iov_pfn,
2127 unsigned long phy_pfn,
2128 unsigned long pages)
2129 {
2130 int support, level = 1;
2131 unsigned long pfnmerge;
2132
2133 support = domain->iommu_superpage;
2134
2135
2136
2137
2138
2139 pfnmerge = iov_pfn | phy_pfn;
2140
2141 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2142 pages >>= VTD_STRIDE_SHIFT;
2143 if (!pages)
2144 break;
2145 pfnmerge >>= VTD_STRIDE_SHIFT;
2146 level++;
2147 support--;
2148 }
2149 return level;
2150 }
2151
2152
2153
2154
2155
2156
2157 static void switch_to_super_page(struct dmar_domain *domain,
2158 unsigned long start_pfn,
2159 unsigned long end_pfn, int level)
2160 {
2161 unsigned long lvl_pages = lvl_to_nr_pages(level);
2162 struct iommu_domain_info *info;
2163 struct dma_pte *pte = NULL;
2164 unsigned long i;
2165
2166 while (start_pfn <= end_pfn) {
2167 if (!pte)
2168 pte = pfn_to_dma_pte(domain, start_pfn, &level);
2169
2170 if (dma_pte_present(pte)) {
2171 dma_pte_free_pagetable(domain, start_pfn,
2172 start_pfn + lvl_pages - 1,
2173 level + 1);
2174
2175 xa_for_each(&domain->iommu_array, i, info)
2176 iommu_flush_iotlb_psi(info->iommu, domain,
2177 start_pfn, lvl_pages,
2178 0, 0);
2179 }
2180
2181 pte++;
2182 start_pfn += lvl_pages;
2183 if (first_pte_in_page(pte))
2184 pte = NULL;
2185 }
2186 }
2187
2188 static int
2189 __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2190 unsigned long phys_pfn, unsigned long nr_pages, int prot)
2191 {
2192 struct dma_pte *first_pte = NULL, *pte = NULL;
2193 unsigned int largepage_lvl = 0;
2194 unsigned long lvl_pages = 0;
2195 phys_addr_t pteval;
2196 u64 attr;
2197
2198 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2199
2200 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2201 return -EINVAL;
2202
2203 attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
2204 attr |= DMA_FL_PTE_PRESENT;
2205 if (domain_use_first_level(domain)) {
2206 attr |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS;
2207 if (prot & DMA_PTE_WRITE)
2208 attr |= DMA_FL_PTE_DIRTY;
2209 }
2210
2211 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
2212
2213 while (nr_pages > 0) {
2214 uint64_t tmp;
2215
2216 if (!pte) {
2217 largepage_lvl = hardware_largepage_caps(domain, iov_pfn,
2218 phys_pfn, nr_pages);
2219
2220 pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2221 if (!pte)
2222 return -ENOMEM;
2223 first_pte = pte;
2224
2225 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2226
2227
2228 if (largepage_lvl > 1) {
2229 unsigned long end_pfn;
2230 unsigned long pages_to_remove;
2231
2232 pteval |= DMA_PTE_LARGE_PAGE;
2233 pages_to_remove = min_t(unsigned long, nr_pages,
2234 nr_pte_to_next_page(pte) * lvl_pages);
2235 end_pfn = iov_pfn + pages_to_remove - 1;
2236 switch_to_super_page(domain, iov_pfn, end_pfn, largepage_lvl);
2237 } else {
2238 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2239 }
2240
2241 }
2242
2243
2244
2245 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2246 if (tmp) {
2247 static int dumps = 5;
2248 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2249 iov_pfn, tmp, (unsigned long long)pteval);
2250 if (dumps) {
2251 dumps--;
2252 debug_dma_dump_mappings(NULL);
2253 }
2254 WARN_ON(1);
2255 }
2256
2257 nr_pages -= lvl_pages;
2258 iov_pfn += lvl_pages;
2259 phys_pfn += lvl_pages;
2260 pteval += lvl_pages * VTD_PAGE_SIZE;
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274 pte++;
2275 if (!nr_pages || first_pte_in_page(pte) ||
2276 (largepage_lvl > 1 && nr_pages < lvl_pages)) {
2277 domain_flush_cache(domain, first_pte,
2278 (void *)pte - (void *)first_pte);
2279 pte = NULL;
2280 }
2281 }
2282
2283 return 0;
2284 }
2285
2286 static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 devfn)
2287 {
2288 struct intel_iommu *iommu = info->iommu;
2289 struct context_entry *context;
2290 u16 did_old;
2291
2292 if (!iommu)
2293 return;
2294
2295 spin_lock(&iommu->lock);
2296 context = iommu_context_addr(iommu, bus, devfn, 0);
2297 if (!context) {
2298 spin_unlock(&iommu->lock);
2299 return;
2300 }
2301
2302 if (sm_supported(iommu)) {
2303 if (hw_pass_through && domain_type_is_si(info->domain))
2304 did_old = FLPT_DEFAULT_DID;
2305 else
2306 did_old = domain_id_iommu(info->domain, iommu);
2307 } else {
2308 did_old = context_domain_id(context);
2309 }
2310
2311 context_clear_entry(context);
2312 __iommu_flush_cache(iommu, context, sizeof(*context));
2313 spin_unlock(&iommu->lock);
2314 iommu->flush.flush_context(iommu,
2315 did_old,
2316 (((u16)bus) << 8) | devfn,
2317 DMA_CCMD_MASK_NOBIT,
2318 DMA_CCMD_DEVICE_INVL);
2319
2320 if (sm_supported(iommu))
2321 qi_flush_pasid_cache(iommu, did_old, QI_PC_ALL_PASIDS, 0);
2322
2323 iommu->flush.flush_iotlb(iommu,
2324 did_old,
2325 0,
2326 0,
2327 DMA_TLB_DSI_FLUSH);
2328
2329 __iommu_flush_dev_iotlb(info, 0, MAX_AGAW_PFN_WIDTH);
2330 }
2331
2332 static int domain_setup_first_level(struct intel_iommu *iommu,
2333 struct dmar_domain *domain,
2334 struct device *dev,
2335 u32 pasid)
2336 {
2337 struct dma_pte *pgd = domain->pgd;
2338 int agaw, level;
2339 int flags = 0;
2340
2341
2342
2343
2344
2345 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2346 pgd = phys_to_virt(dma_pte_addr(pgd));
2347 if (!dma_pte_present(pgd))
2348 return -ENOMEM;
2349 }
2350
2351 level = agaw_to_level(agaw);
2352 if (level != 4 && level != 5)
2353 return -EINVAL;
2354
2355 if (pasid != PASID_RID2PASID)
2356 flags |= PASID_FLAG_SUPERVISOR_MODE;
2357 if (level == 5)
2358 flags |= PASID_FLAG_FL5LP;
2359
2360 if (domain->force_snooping)
2361 flags |= PASID_FLAG_PAGE_SNOOP;
2362
2363 return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid,
2364 domain_id_iommu(domain, iommu),
2365 flags);
2366 }
2367
2368 static bool dev_is_real_dma_subdevice(struct device *dev)
2369 {
2370 return dev && dev_is_pci(dev) &&
2371 pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev);
2372 }
2373
2374 static int iommu_domain_identity_map(struct dmar_domain *domain,
2375 unsigned long first_vpfn,
2376 unsigned long last_vpfn)
2377 {
2378
2379
2380
2381
2382 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2383
2384 return __domain_mapping(domain, first_vpfn,
2385 first_vpfn, last_vpfn - first_vpfn + 1,
2386 DMA_PTE_READ|DMA_PTE_WRITE);
2387 }
2388
2389 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2390
2391 static int __init si_domain_init(int hw)
2392 {
2393 struct dmar_rmrr_unit *rmrr;
2394 struct device *dev;
2395 int i, nid, ret;
2396
2397 si_domain = alloc_domain(IOMMU_DOMAIN_IDENTITY);
2398 if (!si_domain)
2399 return -EFAULT;
2400
2401 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2402 domain_exit(si_domain);
2403 return -EFAULT;
2404 }
2405
2406 if (hw)
2407 return 0;
2408
2409 for_each_online_node(nid) {
2410 unsigned long start_pfn, end_pfn;
2411 int i;
2412
2413 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2414 ret = iommu_domain_identity_map(si_domain,
2415 mm_to_dma_pfn(start_pfn),
2416 mm_to_dma_pfn(end_pfn));
2417 if (ret)
2418 return ret;
2419 }
2420 }
2421
2422
2423
2424
2425
2426 for_each_rmrr_units(rmrr) {
2427 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2428 i, dev) {
2429 unsigned long long start = rmrr->base_address;
2430 unsigned long long end = rmrr->end_address;
2431
2432 if (WARN_ON(end < start ||
2433 end >> agaw_to_width(si_domain->agaw)))
2434 continue;
2435
2436 ret = iommu_domain_identity_map(si_domain,
2437 mm_to_dma_pfn(start >> PAGE_SHIFT),
2438 mm_to_dma_pfn(end >> PAGE_SHIFT));
2439 if (ret)
2440 return ret;
2441 }
2442 }
2443
2444 return 0;
2445 }
2446
2447 static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2448 {
2449 struct device_domain_info *info = dev_iommu_priv_get(dev);
2450 struct intel_iommu *iommu;
2451 unsigned long flags;
2452 u8 bus, devfn;
2453 int ret;
2454
2455 iommu = device_to_iommu(dev, &bus, &devfn);
2456 if (!iommu)
2457 return -ENODEV;
2458
2459 ret = domain_attach_iommu(domain, iommu);
2460 if (ret)
2461 return ret;
2462 info->domain = domain;
2463 spin_lock_irqsave(&domain->lock, flags);
2464 list_add(&info->link, &domain->devices);
2465 spin_unlock_irqrestore(&domain->lock, flags);
2466
2467
2468 if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
2469 ret = intel_pasid_alloc_table(dev);
2470 if (ret) {
2471 dev_err(dev, "PASID table allocation failed\n");
2472 dmar_remove_one_dev_info(dev);
2473 return ret;
2474 }
2475
2476
2477 if (hw_pass_through && domain_type_is_si(domain))
2478 ret = intel_pasid_setup_pass_through(iommu, domain,
2479 dev, PASID_RID2PASID);
2480 else if (domain_use_first_level(domain))
2481 ret = domain_setup_first_level(iommu, domain, dev,
2482 PASID_RID2PASID);
2483 else
2484 ret = intel_pasid_setup_second_level(iommu, domain,
2485 dev, PASID_RID2PASID);
2486 if (ret) {
2487 dev_err(dev, "Setup RID2PASID failed\n");
2488 dmar_remove_one_dev_info(dev);
2489 return ret;
2490 }
2491 }
2492
2493 ret = domain_context_mapping(domain, dev);
2494 if (ret) {
2495 dev_err(dev, "Domain context map failed\n");
2496 dmar_remove_one_dev_info(dev);
2497 return ret;
2498 }
2499
2500 return 0;
2501 }
2502
2503 static bool device_has_rmrr(struct device *dev)
2504 {
2505 struct dmar_rmrr_unit *rmrr;
2506 struct device *tmp;
2507 int i;
2508
2509 rcu_read_lock();
2510 for_each_rmrr_units(rmrr) {
2511
2512
2513
2514
2515 for_each_active_dev_scope(rmrr->devices,
2516 rmrr->devices_cnt, i, tmp)
2517 if (tmp == dev ||
2518 is_downstream_to_pci_bridge(dev, tmp)) {
2519 rcu_read_unlock();
2520 return true;
2521 }
2522 }
2523 rcu_read_unlock();
2524 return false;
2525 }
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542 static bool device_rmrr_is_relaxable(struct device *dev)
2543 {
2544 struct pci_dev *pdev;
2545
2546 if (!dev_is_pci(dev))
2547 return false;
2548
2549 pdev = to_pci_dev(dev);
2550 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2551 return true;
2552 else
2553 return false;
2554 }
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573 static bool device_is_rmrr_locked(struct device *dev)
2574 {
2575 if (!device_has_rmrr(dev))
2576 return false;
2577
2578 if (device_rmrr_is_relaxable(dev))
2579 return false;
2580
2581 return true;
2582 }
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595 static int device_def_domain_type(struct device *dev)
2596 {
2597 if (dev_is_pci(dev)) {
2598 struct pci_dev *pdev = to_pci_dev(dev);
2599
2600 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2601 return IOMMU_DOMAIN_IDENTITY;
2602
2603 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2604 return IOMMU_DOMAIN_IDENTITY;
2605 }
2606
2607 return 0;
2608 }
2609
2610 static void intel_iommu_init_qi(struct intel_iommu *iommu)
2611 {
2612
2613
2614
2615
2616
2617
2618 if (!iommu->qi) {
2619
2620
2621
2622 dmar_fault(-1, iommu);
2623
2624
2625
2626
2627 dmar_disable_qi(iommu);
2628 }
2629
2630 if (dmar_enable_qi(iommu)) {
2631
2632
2633
2634 iommu->flush.flush_context = __iommu_flush_context;
2635 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2636 pr_info("%s: Using Register based invalidation\n",
2637 iommu->name);
2638 } else {
2639 iommu->flush.flush_context = qi_flush_context;
2640 iommu->flush.flush_iotlb = qi_flush_iotlb;
2641 pr_info("%s: Using Queued invalidation\n", iommu->name);
2642 }
2643 }
2644
2645 static int copy_context_table(struct intel_iommu *iommu,
2646 struct root_entry *old_re,
2647 struct context_entry **tbl,
2648 int bus, bool ext)
2649 {
2650 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
2651 struct context_entry *new_ce = NULL, ce;
2652 struct context_entry *old_ce = NULL;
2653 struct root_entry re;
2654 phys_addr_t old_ce_phys;
2655
2656 tbl_idx = ext ? bus * 2 : bus;
2657 memcpy(&re, old_re, sizeof(re));
2658
2659 for (devfn = 0; devfn < 256; devfn++) {
2660
2661 idx = (ext ? devfn * 2 : devfn) % 256;
2662
2663 if (idx == 0) {
2664
2665 if (new_ce) {
2666 tbl[tbl_idx] = new_ce;
2667 __iommu_flush_cache(iommu, new_ce,
2668 VTD_PAGE_SIZE);
2669 pos = 1;
2670 }
2671
2672 if (old_ce)
2673 memunmap(old_ce);
2674
2675 ret = 0;
2676 if (devfn < 0x80)
2677 old_ce_phys = root_entry_lctp(&re);
2678 else
2679 old_ce_phys = root_entry_uctp(&re);
2680
2681 if (!old_ce_phys) {
2682 if (ext && devfn == 0) {
2683
2684 devfn = 0x7f;
2685 continue;
2686 } else {
2687 goto out;
2688 }
2689 }
2690
2691 ret = -ENOMEM;
2692 old_ce = memremap(old_ce_phys, PAGE_SIZE,
2693 MEMREMAP_WB);
2694 if (!old_ce)
2695 goto out;
2696
2697 new_ce = alloc_pgtable_page(iommu->node);
2698 if (!new_ce)
2699 goto out_unmap;
2700
2701 ret = 0;
2702 }
2703
2704
2705 memcpy(&ce, old_ce + idx, sizeof(ce));
2706
2707 if (!context_present(&ce))
2708 continue;
2709
2710 did = context_domain_id(&ce);
2711 if (did >= 0 && did < cap_ndoms(iommu->cap))
2712 set_bit(did, iommu->domain_ids);
2713
2714 set_context_copied(iommu, bus, devfn);
2715 new_ce[idx] = ce;
2716 }
2717
2718 tbl[tbl_idx + pos] = new_ce;
2719
2720 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
2721
2722 out_unmap:
2723 memunmap(old_ce);
2724
2725 out:
2726 return ret;
2727 }
2728
2729 static int copy_translation_tables(struct intel_iommu *iommu)
2730 {
2731 struct context_entry **ctxt_tbls;
2732 struct root_entry *old_rt;
2733 phys_addr_t old_rt_phys;
2734 int ctxt_table_entries;
2735 u64 rtaddr_reg;
2736 int bus, ret;
2737 bool new_ext, ext;
2738
2739 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
2740 ext = !!(rtaddr_reg & DMA_RTADDR_SMT);
2741 new_ext = !!sm_supported(iommu);
2742
2743
2744
2745
2746
2747
2748
2749 if (new_ext != ext)
2750 return -EINVAL;
2751
2752 iommu->copied_tables = bitmap_zalloc(BIT_ULL(16), GFP_KERNEL);
2753 if (!iommu->copied_tables)
2754 return -ENOMEM;
2755
2756 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
2757 if (!old_rt_phys)
2758 return -EINVAL;
2759
2760 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
2761 if (!old_rt)
2762 return -ENOMEM;
2763
2764
2765 ctxt_table_entries = ext ? 512 : 256;
2766 ret = -ENOMEM;
2767 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
2768 if (!ctxt_tbls)
2769 goto out_unmap;
2770
2771 for (bus = 0; bus < 256; bus++) {
2772 ret = copy_context_table(iommu, &old_rt[bus],
2773 ctxt_tbls, bus, ext);
2774 if (ret) {
2775 pr_err("%s: Failed to copy context table for bus %d\n",
2776 iommu->name, bus);
2777 continue;
2778 }
2779 }
2780
2781 spin_lock(&iommu->lock);
2782
2783
2784 for (bus = 0; bus < 256; bus++) {
2785 int idx = ext ? bus * 2 : bus;
2786 u64 val;
2787
2788 if (ctxt_tbls[idx]) {
2789 val = virt_to_phys(ctxt_tbls[idx]) | 1;
2790 iommu->root_entry[bus].lo = val;
2791 }
2792
2793 if (!ext || !ctxt_tbls[idx + 1])
2794 continue;
2795
2796 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
2797 iommu->root_entry[bus].hi = val;
2798 }
2799
2800 spin_unlock(&iommu->lock);
2801
2802 kfree(ctxt_tbls);
2803
2804 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
2805
2806 ret = 0;
2807
2808 out_unmap:
2809 memunmap(old_rt);
2810
2811 return ret;
2812 }
2813
2814 #ifdef CONFIG_INTEL_IOMMU_SVM
2815 static ioasid_t intel_vcmd_ioasid_alloc(ioasid_t min, ioasid_t max, void *data)
2816 {
2817 struct intel_iommu *iommu = data;
2818 ioasid_t ioasid;
2819
2820 if (!iommu)
2821 return INVALID_IOASID;
2822
2823
2824
2825
2826
2827 if (min < PASID_MIN || max > intel_pasid_max_id)
2828 return INVALID_IOASID;
2829
2830 if (vcmd_alloc_pasid(iommu, &ioasid))
2831 return INVALID_IOASID;
2832
2833 return ioasid;
2834 }
2835
2836 static void intel_vcmd_ioasid_free(ioasid_t ioasid, void *data)
2837 {
2838 struct intel_iommu *iommu = data;
2839
2840 if (!iommu)
2841 return;
2842
2843
2844
2845
2846 if (ioasid_find(NULL, ioasid, NULL)) {
2847 pr_alert("Cannot free active IOASID %d\n", ioasid);
2848 return;
2849 }
2850 vcmd_free_pasid(iommu, ioasid);
2851 }
2852
2853 static void register_pasid_allocator(struct intel_iommu *iommu)
2854 {
2855
2856
2857
2858
2859 if (!cap_caching_mode(iommu->cap))
2860 return;
2861
2862 if (!sm_supported(iommu)) {
2863 pr_warn("VT-d Scalable Mode not enabled, no PASID allocation\n");
2864 return;
2865 }
2866
2867
2868
2869
2870
2871
2872
2873
2874 if (!vccap_pasid(iommu->vccap))
2875 return;
2876
2877 pr_info("Register custom PASID allocator\n");
2878 iommu->pasid_allocator.alloc = intel_vcmd_ioasid_alloc;
2879 iommu->pasid_allocator.free = intel_vcmd_ioasid_free;
2880 iommu->pasid_allocator.pdata = (void *)iommu;
2881 if (ioasid_register_allocator(&iommu->pasid_allocator)) {
2882 pr_warn("Custom PASID allocator failed, scalable mode disabled\n");
2883
2884
2885
2886
2887
2888 intel_iommu_sm = 0;
2889 }
2890 }
2891 #endif
2892
2893 static int __init init_dmars(void)
2894 {
2895 struct dmar_drhd_unit *drhd;
2896 struct intel_iommu *iommu;
2897 int ret;
2898
2899 ret = intel_cap_audit(CAP_AUDIT_STATIC_DMAR, NULL);
2900 if (ret)
2901 goto free_iommu;
2902
2903 for_each_iommu(iommu, drhd) {
2904 if (drhd->ignored) {
2905 iommu_disable_translation(iommu);
2906 continue;
2907 }
2908
2909
2910
2911
2912
2913
2914 if (pasid_supported(iommu)) {
2915 u32 temp = 2 << ecap_pss(iommu->ecap);
2916
2917 intel_pasid_max_id = min_t(u32, temp,
2918 intel_pasid_max_id);
2919 }
2920
2921 intel_iommu_init_qi(iommu);
2922
2923 ret = iommu_init_domains(iommu);
2924 if (ret)
2925 goto free_iommu;
2926
2927 init_translation_status(iommu);
2928
2929 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
2930 iommu_disable_translation(iommu);
2931 clear_translation_pre_enabled(iommu);
2932 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
2933 iommu->name);
2934 }
2935
2936
2937
2938
2939
2940
2941 ret = iommu_alloc_root_entry(iommu);
2942 if (ret)
2943 goto free_iommu;
2944
2945 if (translation_pre_enabled(iommu)) {
2946 pr_info("Translation already enabled - trying to copy translation structures\n");
2947
2948 ret = copy_translation_tables(iommu);
2949 if (ret) {
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959 pr_err("Failed to copy translation tables from previous kernel for %s\n",
2960 iommu->name);
2961 iommu_disable_translation(iommu);
2962 clear_translation_pre_enabled(iommu);
2963 } else {
2964 pr_info("Copied translation tables from previous kernel for %s\n",
2965 iommu->name);
2966 }
2967 }
2968
2969 if (!ecap_pass_through(iommu->ecap))
2970 hw_pass_through = 0;
2971 intel_svm_check(iommu);
2972 }
2973
2974
2975
2976
2977
2978
2979 for_each_active_iommu(iommu, drhd) {
2980 iommu_flush_write_buffer(iommu);
2981 #ifdef CONFIG_INTEL_IOMMU_SVM
2982 register_pasid_allocator(iommu);
2983 #endif
2984 iommu_set_root_entry(iommu);
2985 }
2986
2987 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
2988 dmar_map_gfx = 0;
2989 #endif
2990
2991 if (!dmar_map_gfx)
2992 iommu_identity_mapping |= IDENTMAP_GFX;
2993
2994 check_tylersburg_isoch();
2995
2996 ret = si_domain_init(hw_pass_through);
2997 if (ret)
2998 goto free_iommu;
2999
3000
3001
3002
3003
3004
3005
3006
3007 for_each_iommu(iommu, drhd) {
3008 if (drhd->ignored) {
3009
3010
3011
3012
3013 if (force_on)
3014 iommu_disable_protect_mem_regions(iommu);
3015 continue;
3016 }
3017
3018 iommu_flush_write_buffer(iommu);
3019
3020 #ifdef CONFIG_INTEL_IOMMU_SVM
3021 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
3022
3023
3024
3025
3026 up_write(&dmar_global_lock);
3027 ret = intel_svm_enable_prq(iommu);
3028 down_write(&dmar_global_lock);
3029 if (ret)
3030 goto free_iommu;
3031 }
3032 #endif
3033 ret = dmar_set_interrupt(iommu);
3034 if (ret)
3035 goto free_iommu;
3036 }
3037
3038 return 0;
3039
3040 free_iommu:
3041 for_each_active_iommu(iommu, drhd) {
3042 disable_dmar_iommu(iommu);
3043 free_dmar_iommu(iommu);
3044 }
3045
3046 return ret;
3047 }
3048
3049 static void __init init_no_remapping_devices(void)
3050 {
3051 struct dmar_drhd_unit *drhd;
3052 struct device *dev;
3053 int i;
3054
3055 for_each_drhd_unit(drhd) {
3056 if (!drhd->include_all) {
3057 for_each_active_dev_scope(drhd->devices,
3058 drhd->devices_cnt, i, dev)
3059 break;
3060
3061 if (i == drhd->devices_cnt)
3062 drhd->ignored = 1;
3063 }
3064 }
3065
3066 for_each_active_drhd_unit(drhd) {
3067 if (drhd->include_all)
3068 continue;
3069
3070 for_each_active_dev_scope(drhd->devices,
3071 drhd->devices_cnt, i, dev)
3072 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
3073 break;
3074 if (i < drhd->devices_cnt)
3075 continue;
3076
3077
3078
3079 drhd->gfx_dedicated = 1;
3080 if (!dmar_map_gfx)
3081 drhd->ignored = 1;
3082 }
3083 }
3084
3085 #ifdef CONFIG_SUSPEND
3086 static int init_iommu_hw(void)
3087 {
3088 struct dmar_drhd_unit *drhd;
3089 struct intel_iommu *iommu = NULL;
3090
3091 for_each_active_iommu(iommu, drhd)
3092 if (iommu->qi)
3093 dmar_reenable_qi(iommu);
3094
3095 for_each_iommu(iommu, drhd) {
3096 if (drhd->ignored) {
3097
3098
3099
3100
3101 if (force_on)
3102 iommu_disable_protect_mem_regions(iommu);
3103 continue;
3104 }
3105
3106 iommu_flush_write_buffer(iommu);
3107 iommu_set_root_entry(iommu);
3108 iommu_enable_translation(iommu);
3109 iommu_disable_protect_mem_regions(iommu);
3110 }
3111
3112 return 0;
3113 }
3114
3115 static void iommu_flush_all(void)
3116 {
3117 struct dmar_drhd_unit *drhd;
3118 struct intel_iommu *iommu;
3119
3120 for_each_active_iommu(iommu, drhd) {
3121 iommu->flush.flush_context(iommu, 0, 0, 0,
3122 DMA_CCMD_GLOBAL_INVL);
3123 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3124 DMA_TLB_GLOBAL_FLUSH);
3125 }
3126 }
3127
3128 static int iommu_suspend(void)
3129 {
3130 struct dmar_drhd_unit *drhd;
3131 struct intel_iommu *iommu = NULL;
3132 unsigned long flag;
3133
3134 for_each_active_iommu(iommu, drhd) {
3135 iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
3136 GFP_KERNEL);
3137 if (!iommu->iommu_state)
3138 goto nomem;
3139 }
3140
3141 iommu_flush_all();
3142
3143 for_each_active_iommu(iommu, drhd) {
3144 iommu_disable_translation(iommu);
3145
3146 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3147
3148 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3149 readl(iommu->reg + DMAR_FECTL_REG);
3150 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3151 readl(iommu->reg + DMAR_FEDATA_REG);
3152 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3153 readl(iommu->reg + DMAR_FEADDR_REG);
3154 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3155 readl(iommu->reg + DMAR_FEUADDR_REG);
3156
3157 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3158 }
3159 return 0;
3160
3161 nomem:
3162 for_each_active_iommu(iommu, drhd)
3163 kfree(iommu->iommu_state);
3164
3165 return -ENOMEM;
3166 }
3167
3168 static void iommu_resume(void)
3169 {
3170 struct dmar_drhd_unit *drhd;
3171 struct intel_iommu *iommu = NULL;
3172 unsigned long flag;
3173
3174 if (init_iommu_hw()) {
3175 if (force_on)
3176 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3177 else
3178 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3179 return;
3180 }
3181
3182 for_each_active_iommu(iommu, drhd) {
3183
3184 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3185
3186 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3187 iommu->reg + DMAR_FECTL_REG);
3188 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3189 iommu->reg + DMAR_FEDATA_REG);
3190 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3191 iommu->reg + DMAR_FEADDR_REG);
3192 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3193 iommu->reg + DMAR_FEUADDR_REG);
3194
3195 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3196 }
3197
3198 for_each_active_iommu(iommu, drhd)
3199 kfree(iommu->iommu_state);
3200 }
3201
3202 static struct syscore_ops iommu_syscore_ops = {
3203 .resume = iommu_resume,
3204 .suspend = iommu_suspend,
3205 };
3206
3207 static void __init init_iommu_pm_ops(void)
3208 {
3209 register_syscore_ops(&iommu_syscore_ops);
3210 }
3211
3212 #else
3213 static inline void init_iommu_pm_ops(void) {}
3214 #endif
3215
3216 static int __init rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
3217 {
3218 if (!IS_ALIGNED(rmrr->base_address, PAGE_SIZE) ||
3219 !IS_ALIGNED(rmrr->end_address + 1, PAGE_SIZE) ||
3220 rmrr->end_address <= rmrr->base_address ||
3221 arch_rmrr_sanity_check(rmrr))
3222 return -EINVAL;
3223
3224 return 0;
3225 }
3226
3227 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
3228 {
3229 struct acpi_dmar_reserved_memory *rmrr;
3230 struct dmar_rmrr_unit *rmrru;
3231
3232 rmrr = (struct acpi_dmar_reserved_memory *)header;
3233 if (rmrr_sanity_check(rmrr)) {
3234 pr_warn(FW_BUG
3235 "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n"
3236 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
3237 rmrr->base_address, rmrr->end_address,
3238 dmi_get_system_info(DMI_BIOS_VENDOR),
3239 dmi_get_system_info(DMI_BIOS_VERSION),
3240 dmi_get_system_info(DMI_PRODUCT_VERSION));
3241 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
3242 }
3243
3244 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3245 if (!rmrru)
3246 goto out;
3247
3248 rmrru->hdr = header;
3249
3250 rmrru->base_address = rmrr->base_address;
3251 rmrru->end_address = rmrr->end_address;
3252
3253 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3254 ((void *)rmrr) + rmrr->header.length,
3255 &rmrru->devices_cnt);
3256 if (rmrru->devices_cnt && rmrru->devices == NULL)
3257 goto free_rmrru;
3258
3259 list_add(&rmrru->list, &dmar_rmrr_units);
3260
3261 return 0;
3262 free_rmrru:
3263 kfree(rmrru);
3264 out:
3265 return -ENOMEM;
3266 }
3267
3268 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
3269 {
3270 struct dmar_atsr_unit *atsru;
3271 struct acpi_dmar_atsr *tmp;
3272
3273 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list,
3274 dmar_rcu_check()) {
3275 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
3276 if (atsr->segment != tmp->segment)
3277 continue;
3278 if (atsr->header.length != tmp->header.length)
3279 continue;
3280 if (memcmp(atsr, tmp, atsr->header.length) == 0)
3281 return atsru;
3282 }
3283
3284 return NULL;
3285 }
3286
3287 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3288 {
3289 struct acpi_dmar_atsr *atsr;
3290 struct dmar_atsr_unit *atsru;
3291
3292 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
3293 return 0;
3294
3295 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3296 atsru = dmar_find_atsr(atsr);
3297 if (atsru)
3298 return 0;
3299
3300 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
3301 if (!atsru)
3302 return -ENOMEM;
3303
3304
3305
3306
3307
3308
3309 atsru->hdr = (void *)(atsru + 1);
3310 memcpy(atsru->hdr, hdr, hdr->length);
3311 atsru->include_all = atsr->flags & 0x1;
3312 if (!atsru->include_all) {
3313 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
3314 (void *)atsr + atsr->header.length,
3315 &atsru->devices_cnt);
3316 if (atsru->devices_cnt && atsru->devices == NULL) {
3317 kfree(atsru);
3318 return -ENOMEM;
3319 }
3320 }
3321
3322 list_add_rcu(&atsru->list, &dmar_atsr_units);
3323
3324 return 0;
3325 }
3326
3327 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3328 {
3329 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3330 kfree(atsru);
3331 }
3332
3333 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3334 {
3335 struct acpi_dmar_atsr *atsr;
3336 struct dmar_atsr_unit *atsru;
3337
3338 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3339 atsru = dmar_find_atsr(atsr);
3340 if (atsru) {
3341 list_del_rcu(&atsru->list);
3342 synchronize_rcu();
3343 intel_iommu_free_atsr(atsru);
3344 }
3345
3346 return 0;
3347 }
3348
3349 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3350 {
3351 int i;
3352 struct device *dev;
3353 struct acpi_dmar_atsr *atsr;
3354 struct dmar_atsr_unit *atsru;
3355
3356 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3357 atsru = dmar_find_atsr(atsr);
3358 if (!atsru)
3359 return 0;
3360
3361 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
3362 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
3363 i, dev)
3364 return -EBUSY;
3365 }
3366
3367 return 0;
3368 }
3369
3370 static struct dmar_satc_unit *dmar_find_satc(struct acpi_dmar_satc *satc)
3371 {
3372 struct dmar_satc_unit *satcu;
3373 struct acpi_dmar_satc *tmp;
3374
3375 list_for_each_entry_rcu(satcu, &dmar_satc_units, list,
3376 dmar_rcu_check()) {
3377 tmp = (struct acpi_dmar_satc *)satcu->hdr;
3378 if (satc->segment != tmp->segment)
3379 continue;
3380 if (satc->header.length != tmp->header.length)
3381 continue;
3382 if (memcmp(satc, tmp, satc->header.length) == 0)
3383 return satcu;
3384 }
3385
3386 return NULL;
3387 }
3388
3389 int dmar_parse_one_satc(struct acpi_dmar_header *hdr, void *arg)
3390 {
3391 struct acpi_dmar_satc *satc;
3392 struct dmar_satc_unit *satcu;
3393
3394 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
3395 return 0;
3396
3397 satc = container_of(hdr, struct acpi_dmar_satc, header);
3398 satcu = dmar_find_satc(satc);
3399 if (satcu)
3400 return 0;
3401
3402 satcu = kzalloc(sizeof(*satcu) + hdr->length, GFP_KERNEL);
3403 if (!satcu)
3404 return -ENOMEM;
3405
3406 satcu->hdr = (void *)(satcu + 1);
3407 memcpy(satcu->hdr, hdr, hdr->length);
3408 satcu->atc_required = satc->flags & 0x1;
3409 satcu->devices = dmar_alloc_dev_scope((void *)(satc + 1),
3410 (void *)satc + satc->header.length,
3411 &satcu->devices_cnt);
3412 if (satcu->devices_cnt && !satcu->devices) {
3413 kfree(satcu);
3414 return -ENOMEM;
3415 }
3416 list_add_rcu(&satcu->list, &dmar_satc_units);
3417
3418 return 0;
3419 }
3420
3421 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
3422 {
3423 int sp, ret;
3424 struct intel_iommu *iommu = dmaru->iommu;
3425
3426 ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_DMAR, iommu);
3427 if (ret)
3428 goto out;
3429
3430 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
3431 pr_warn("%s: Doesn't support hardware pass through.\n",
3432 iommu->name);
3433 return -ENXIO;
3434 }
3435
3436 sp = domain_update_iommu_superpage(NULL, iommu) - 1;
3437 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
3438 pr_warn("%s: Doesn't support large page.\n",
3439 iommu->name);
3440 return -ENXIO;
3441 }
3442
3443
3444
3445
3446 if (iommu->gcmd & DMA_GCMD_TE)
3447 iommu_disable_translation(iommu);
3448
3449 ret = iommu_init_domains(iommu);
3450 if (ret == 0)
3451 ret = iommu_alloc_root_entry(iommu);
3452 if (ret)
3453 goto out;
3454
3455 intel_svm_check(iommu);
3456
3457 if (dmaru->ignored) {
3458
3459
3460
3461 if (force_on)
3462 iommu_disable_protect_mem_regions(iommu);
3463 return 0;
3464 }
3465
3466 intel_iommu_init_qi(iommu);
3467 iommu_flush_write_buffer(iommu);
3468
3469 #ifdef CONFIG_INTEL_IOMMU_SVM
3470 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
3471 ret = intel_svm_enable_prq(iommu);
3472 if (ret)
3473 goto disable_iommu;
3474 }
3475 #endif
3476 ret = dmar_set_interrupt(iommu);
3477 if (ret)
3478 goto disable_iommu;
3479
3480 iommu_set_root_entry(iommu);
3481 iommu_enable_translation(iommu);
3482
3483 iommu_disable_protect_mem_regions(iommu);
3484 return 0;
3485
3486 disable_iommu:
3487 disable_dmar_iommu(iommu);
3488 out:
3489 free_dmar_iommu(iommu);
3490 return ret;
3491 }
3492
3493 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
3494 {
3495 int ret = 0;
3496 struct intel_iommu *iommu = dmaru->iommu;
3497
3498 if (!intel_iommu_enabled)
3499 return 0;
3500 if (iommu == NULL)
3501 return -EINVAL;
3502
3503 if (insert) {
3504 ret = intel_iommu_add(dmaru);
3505 } else {
3506 disable_dmar_iommu(iommu);
3507 free_dmar_iommu(iommu);
3508 }
3509
3510 return ret;
3511 }
3512
3513 static void intel_iommu_free_dmars(void)
3514 {
3515 struct dmar_rmrr_unit *rmrru, *rmrr_n;
3516 struct dmar_atsr_unit *atsru, *atsr_n;
3517 struct dmar_satc_unit *satcu, *satc_n;
3518
3519 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3520 list_del(&rmrru->list);
3521 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3522 kfree(rmrru);
3523 }
3524
3525 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3526 list_del(&atsru->list);
3527 intel_iommu_free_atsr(atsru);
3528 }
3529 list_for_each_entry_safe(satcu, satc_n, &dmar_satc_units, list) {
3530 list_del(&satcu->list);
3531 dmar_free_dev_scope(&satcu->devices, &satcu->devices_cnt);
3532 kfree(satcu);
3533 }
3534 }
3535
3536 static struct dmar_satc_unit *dmar_find_matched_satc_unit(struct pci_dev *dev)
3537 {
3538 struct dmar_satc_unit *satcu;
3539 struct acpi_dmar_satc *satc;
3540 struct device *tmp;
3541 int i;
3542
3543 dev = pci_physfn(dev);
3544 rcu_read_lock();
3545
3546 list_for_each_entry_rcu(satcu, &dmar_satc_units, list) {
3547 satc = container_of(satcu->hdr, struct acpi_dmar_satc, header);
3548 if (satc->segment != pci_domain_nr(dev->bus))
3549 continue;
3550 for_each_dev_scope(satcu->devices, satcu->devices_cnt, i, tmp)
3551 if (to_pci_dev(tmp) == dev)
3552 goto out;
3553 }
3554 satcu = NULL;
3555 out:
3556 rcu_read_unlock();
3557 return satcu;
3558 }
3559
3560 static int dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu)
3561 {
3562 int i, ret = 1;
3563 struct pci_bus *bus;
3564 struct pci_dev *bridge = NULL;
3565 struct device *tmp;
3566 struct acpi_dmar_atsr *atsr;
3567 struct dmar_atsr_unit *atsru;
3568 struct dmar_satc_unit *satcu;
3569
3570 dev = pci_physfn(dev);
3571 satcu = dmar_find_matched_satc_unit(dev);
3572 if (satcu)
3573
3574
3575
3576
3577
3578
3579
3580 return !(satcu->atc_required && !sm_supported(iommu));
3581
3582 for (bus = dev->bus; bus; bus = bus->parent) {
3583 bridge = bus->self;
3584
3585 if (!bridge)
3586 return 1;
3587
3588 if (!pci_is_pcie(bridge) ||
3589 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
3590 return 0;
3591
3592 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
3593 break;
3594 }
3595
3596 rcu_read_lock();
3597 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3598 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3599 if (atsr->segment != pci_domain_nr(dev->bus))
3600 continue;
3601
3602 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
3603 if (tmp == &bridge->dev)
3604 goto out;
3605
3606 if (atsru->include_all)
3607 goto out;
3608 }
3609 ret = 0;
3610 out:
3611 rcu_read_unlock();
3612
3613 return ret;
3614 }
3615
3616 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
3617 {
3618 int ret;
3619 struct dmar_rmrr_unit *rmrru;
3620 struct dmar_atsr_unit *atsru;
3621 struct dmar_satc_unit *satcu;
3622 struct acpi_dmar_atsr *atsr;
3623 struct acpi_dmar_reserved_memory *rmrr;
3624 struct acpi_dmar_satc *satc;
3625
3626 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
3627 return 0;
3628
3629 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
3630 rmrr = container_of(rmrru->hdr,
3631 struct acpi_dmar_reserved_memory, header);
3632 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3633 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
3634 ((void *)rmrr) + rmrr->header.length,
3635 rmrr->segment, rmrru->devices,
3636 rmrru->devices_cnt);
3637 if (ret < 0)
3638 return ret;
3639 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
3640 dmar_remove_dev_scope(info, rmrr->segment,
3641 rmrru->devices, rmrru->devices_cnt);
3642 }
3643 }
3644
3645 list_for_each_entry(atsru, &dmar_atsr_units, list) {
3646 if (atsru->include_all)
3647 continue;
3648
3649 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3650 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3651 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
3652 (void *)atsr + atsr->header.length,
3653 atsr->segment, atsru->devices,
3654 atsru->devices_cnt);
3655 if (ret > 0)
3656 break;
3657 else if (ret < 0)
3658 return ret;
3659 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
3660 if (dmar_remove_dev_scope(info, atsr->segment,
3661 atsru->devices, atsru->devices_cnt))
3662 break;
3663 }
3664 }
3665 list_for_each_entry(satcu, &dmar_satc_units, list) {
3666 satc = container_of(satcu->hdr, struct acpi_dmar_satc, header);
3667 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3668 ret = dmar_insert_dev_scope(info, (void *)(satc + 1),
3669 (void *)satc + satc->header.length,
3670 satc->segment, satcu->devices,
3671 satcu->devices_cnt);
3672 if (ret > 0)
3673 break;
3674 else if (ret < 0)
3675 return ret;
3676 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
3677 if (dmar_remove_dev_scope(info, satc->segment,
3678 satcu->devices, satcu->devices_cnt))
3679 break;
3680 }
3681 }
3682
3683 return 0;
3684 }
3685
3686 static int intel_iommu_memory_notifier(struct notifier_block *nb,
3687 unsigned long val, void *v)
3688 {
3689 struct memory_notify *mhp = v;
3690 unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
3691 unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn +
3692 mhp->nr_pages - 1);
3693
3694 switch (val) {
3695 case MEM_GOING_ONLINE:
3696 if (iommu_domain_identity_map(si_domain,
3697 start_vpfn, last_vpfn)) {
3698 pr_warn("Failed to build identity map for [%lx-%lx]\n",
3699 start_vpfn, last_vpfn);
3700 return NOTIFY_BAD;
3701 }
3702 break;
3703
3704 case MEM_OFFLINE:
3705 case MEM_CANCEL_ONLINE:
3706 {
3707 struct dmar_drhd_unit *drhd;
3708 struct intel_iommu *iommu;
3709 LIST_HEAD(freelist);
3710
3711 domain_unmap(si_domain, start_vpfn, last_vpfn, &freelist);
3712
3713 rcu_read_lock();
3714 for_each_active_iommu(iommu, drhd)
3715 iommu_flush_iotlb_psi(iommu, si_domain,
3716 start_vpfn, mhp->nr_pages,
3717 list_empty(&freelist), 0);
3718 rcu_read_unlock();
3719 put_pages_list(&freelist);
3720 }
3721 break;
3722 }
3723
3724 return NOTIFY_OK;
3725 }
3726
3727 static struct notifier_block intel_iommu_memory_nb = {
3728 .notifier_call = intel_iommu_memory_notifier,
3729 .priority = 0
3730 };
3731
3732 static void intel_disable_iommus(void)
3733 {
3734 struct intel_iommu *iommu = NULL;
3735 struct dmar_drhd_unit *drhd;
3736
3737 for_each_iommu(iommu, drhd)
3738 iommu_disable_translation(iommu);
3739 }
3740
3741 void intel_iommu_shutdown(void)
3742 {
3743 struct dmar_drhd_unit *drhd;
3744 struct intel_iommu *iommu = NULL;
3745
3746 if (no_iommu || dmar_disabled)
3747 return;
3748
3749 down_write(&dmar_global_lock);
3750
3751
3752 for_each_iommu(iommu, drhd)
3753 iommu_disable_protect_mem_regions(iommu);
3754
3755
3756 intel_disable_iommus();
3757
3758 up_write(&dmar_global_lock);
3759 }
3760
3761 static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
3762 {
3763 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
3764
3765 return container_of(iommu_dev, struct intel_iommu, iommu);
3766 }
3767
3768 static ssize_t version_show(struct device *dev,
3769 struct device_attribute *attr, char *buf)
3770 {
3771 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
3772 u32 ver = readl(iommu->reg + DMAR_VER_REG);
3773 return sprintf(buf, "%d:%d\n",
3774 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
3775 }
3776 static DEVICE_ATTR_RO(version);
3777
3778 static ssize_t address_show(struct device *dev,
3779 struct device_attribute *attr, char *buf)
3780 {
3781 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
3782 return sprintf(buf, "%llx\n", iommu->reg_phys);
3783 }
3784 static DEVICE_ATTR_RO(address);
3785
3786 static ssize_t cap_show(struct device *dev,
3787 struct device_attribute *attr, char *buf)
3788 {
3789 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
3790 return sprintf(buf, "%llx\n", iommu->cap);
3791 }
3792 static DEVICE_ATTR_RO(cap);
3793
3794 static ssize_t ecap_show(struct device *dev,
3795 struct device_attribute *attr, char *buf)
3796 {
3797 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
3798 return sprintf(buf, "%llx\n", iommu->ecap);
3799 }
3800 static DEVICE_ATTR_RO(ecap);
3801
3802 static ssize_t domains_supported_show(struct device *dev,
3803 struct device_attribute *attr, char *buf)
3804 {
3805 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
3806 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
3807 }
3808 static DEVICE_ATTR_RO(domains_supported);
3809
3810 static ssize_t domains_used_show(struct device *dev,
3811 struct device_attribute *attr, char *buf)
3812 {
3813 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
3814 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
3815 cap_ndoms(iommu->cap)));
3816 }
3817 static DEVICE_ATTR_RO(domains_used);
3818
3819 static struct attribute *intel_iommu_attrs[] = {
3820 &dev_attr_version.attr,
3821 &dev_attr_address.attr,
3822 &dev_attr_cap.attr,
3823 &dev_attr_ecap.attr,
3824 &dev_attr_domains_supported.attr,
3825 &dev_attr_domains_used.attr,
3826 NULL,
3827 };
3828
3829 static struct attribute_group intel_iommu_group = {
3830 .name = "intel-iommu",
3831 .attrs = intel_iommu_attrs,
3832 };
3833
3834 const struct attribute_group *intel_iommu_groups[] = {
3835 &intel_iommu_group,
3836 NULL,
3837 };
3838
3839 static inline bool has_external_pci(void)
3840 {
3841 struct pci_dev *pdev = NULL;
3842
3843 for_each_pci_dev(pdev)
3844 if (pdev->external_facing)
3845 return true;
3846
3847 return false;
3848 }
3849
3850 static int __init platform_optin_force_iommu(void)
3851 {
3852 if (!dmar_platform_optin() || no_platform_optin || !has_external_pci())
3853 return 0;
3854
3855 if (no_iommu || dmar_disabled)
3856 pr_info("Intel-IOMMU force enabled due to platform opt in\n");
3857
3858
3859
3860
3861
3862 if (dmar_disabled)
3863 iommu_set_default_passthrough(false);
3864
3865 dmar_disabled = 0;
3866 no_iommu = 0;
3867
3868 return 1;
3869 }
3870
3871 static int __init probe_acpi_namespace_devices(void)
3872 {
3873 struct dmar_drhd_unit *drhd;
3874
3875 struct intel_iommu *iommu __maybe_unused;
3876 struct device *dev;
3877 int i, ret = 0;
3878
3879 for_each_active_iommu(iommu, drhd) {
3880 for_each_active_dev_scope(drhd->devices,
3881 drhd->devices_cnt, i, dev) {
3882 struct acpi_device_physical_node *pn;
3883 struct iommu_group *group;
3884 struct acpi_device *adev;
3885
3886 if (dev->bus != &acpi_bus_type)
3887 continue;
3888
3889 adev = to_acpi_device(dev);
3890 mutex_lock(&adev->physical_node_lock);
3891 list_for_each_entry(pn,
3892 &adev->physical_node_list, node) {
3893 group = iommu_group_get(pn->dev);
3894 if (group) {
3895 iommu_group_put(group);
3896 continue;
3897 }
3898
3899 pn->dev->bus->iommu_ops = &intel_iommu_ops;
3900 ret = iommu_probe_device(pn->dev);
3901 if (ret)
3902 break;
3903 }
3904 mutex_unlock(&adev->physical_node_lock);
3905
3906 if (ret)
3907 return ret;
3908 }
3909 }
3910
3911 return 0;
3912 }
3913
3914 static __init int tboot_force_iommu(void)
3915 {
3916 if (!tboot_enabled())
3917 return 0;
3918
3919 if (no_iommu || dmar_disabled)
3920 pr_warn("Forcing Intel-IOMMU to enabled\n");
3921
3922 dmar_disabled = 0;
3923 no_iommu = 0;
3924
3925 return 1;
3926 }
3927
3928 int __init intel_iommu_init(void)
3929 {
3930 int ret = -ENODEV;
3931 struct dmar_drhd_unit *drhd;
3932 struct intel_iommu *iommu;
3933
3934
3935
3936
3937
3938 force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) ||
3939 platform_optin_force_iommu();
3940
3941 down_write(&dmar_global_lock);
3942 if (dmar_table_init()) {
3943 if (force_on)
3944 panic("tboot: Failed to initialize DMAR table\n");
3945 goto out_free_dmar;
3946 }
3947
3948 if (dmar_dev_scope_init() < 0) {
3949 if (force_on)
3950 panic("tboot: Failed to initialize DMAR device scope\n");
3951 goto out_free_dmar;
3952 }
3953
3954 up_write(&dmar_global_lock);
3955
3956
3957
3958
3959
3960 dmar_register_bus_notifier();
3961
3962 down_write(&dmar_global_lock);
3963
3964 if (!no_iommu)
3965 intel_iommu_debugfs_init();
3966
3967 if (no_iommu || dmar_disabled) {
3968
3969
3970
3971
3972
3973
3974
3975
3976 if (intel_iommu_tboot_noforce) {
3977 for_each_iommu(iommu, drhd)
3978 iommu_disable_protect_mem_regions(iommu);
3979 }
3980
3981
3982
3983
3984
3985
3986 intel_disable_iommus();
3987 goto out_free_dmar;
3988 }
3989
3990 if (list_empty(&dmar_rmrr_units))
3991 pr_info("No RMRR found\n");
3992
3993 if (list_empty(&dmar_atsr_units))
3994 pr_info("No ATSR found\n");
3995
3996 if (list_empty(&dmar_satc_units))
3997 pr_info("No SATC found\n");
3998
3999 init_no_remapping_devices();
4000
4001 ret = init_dmars();
4002 if (ret) {
4003 if (force_on)
4004 panic("tboot: Failed to initialize DMARs\n");
4005 pr_err("Initialization failed\n");
4006 goto out_free_dmar;
4007 }
4008 up_write(&dmar_global_lock);
4009
4010 init_iommu_pm_ops();
4011
4012 down_read(&dmar_global_lock);
4013 for_each_active_iommu(iommu, drhd) {
4014
4015
4016
4017
4018
4019
4020
4021 if (cap_caching_mode(iommu->cap)) {
4022 pr_info_once("IOMMU batching disallowed due to virtualization\n");
4023 iommu_set_dma_strict();
4024 }
4025 iommu_device_sysfs_add(&iommu->iommu, NULL,
4026 intel_iommu_groups,
4027 "%s", iommu->name);
4028 iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
4029 }
4030 up_read(&dmar_global_lock);
4031
4032 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4033 if (si_domain && !hw_pass_through)
4034 register_memory_notifier(&intel_iommu_memory_nb);
4035
4036 down_read(&dmar_global_lock);
4037 if (probe_acpi_namespace_devices())
4038 pr_warn("ACPI name space devices didn't probe correctly\n");
4039
4040
4041 for_each_iommu(iommu, drhd) {
4042 if (!drhd->ignored && !translation_pre_enabled(iommu))
4043 iommu_enable_translation(iommu);
4044
4045 iommu_disable_protect_mem_regions(iommu);
4046 }
4047 up_read(&dmar_global_lock);
4048
4049 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
4050
4051 intel_iommu_enabled = 1;
4052
4053 return 0;
4054
4055 out_free_dmar:
4056 intel_iommu_free_dmars();
4057 up_write(&dmar_global_lock);
4058 return ret;
4059 }
4060
4061 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4062 {
4063 struct device_domain_info *info = opaque;
4064
4065 domain_context_clear_one(info, PCI_BUS_NUM(alias), alias & 0xff);
4066 return 0;
4067 }
4068
4069
4070
4071
4072
4073
4074
4075 static void domain_context_clear(struct device_domain_info *info)
4076 {
4077 if (!info->iommu || !info->dev || !dev_is_pci(info->dev))
4078 return;
4079
4080 pci_for_each_dma_alias(to_pci_dev(info->dev),
4081 &domain_context_clear_one_cb, info);
4082 }
4083
4084 static void dmar_remove_one_dev_info(struct device *dev)
4085 {
4086 struct device_domain_info *info = dev_iommu_priv_get(dev);
4087 struct dmar_domain *domain = info->domain;
4088 struct intel_iommu *iommu = info->iommu;
4089 unsigned long flags;
4090
4091 if (!dev_is_real_dma_subdevice(info->dev)) {
4092 if (dev_is_pci(info->dev) && sm_supported(iommu))
4093 intel_pasid_tear_down_entry(iommu, info->dev,
4094 PASID_RID2PASID, false);
4095
4096 iommu_disable_dev_iotlb(info);
4097 domain_context_clear(info);
4098 intel_pasid_free_table(info->dev);
4099 }
4100
4101 spin_lock_irqsave(&domain->lock, flags);
4102 list_del(&info->link);
4103 spin_unlock_irqrestore(&domain->lock, flags);
4104
4105 domain_detach_iommu(domain, iommu);
4106 info->domain = NULL;
4107 }
4108
4109 static int md_domain_init(struct dmar_domain *domain, int guest_width)
4110 {
4111 int adjust_width;
4112
4113
4114 domain->gaw = guest_width;
4115 adjust_width = guestwidth_to_adjustwidth(guest_width);
4116 domain->agaw = width_to_agaw(adjust_width);
4117
4118 domain->iommu_coherency = false;
4119 domain->iommu_superpage = 0;
4120 domain->max_addr = 0;
4121
4122
4123 domain->pgd = alloc_pgtable_page(domain->nid);
4124 if (!domain->pgd)
4125 return -ENOMEM;
4126 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4127 return 0;
4128 }
4129
4130 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
4131 {
4132 struct dmar_domain *dmar_domain;
4133 struct iommu_domain *domain;
4134
4135 switch (type) {
4136 case IOMMU_DOMAIN_DMA:
4137 case IOMMU_DOMAIN_DMA_FQ:
4138 case IOMMU_DOMAIN_UNMANAGED:
4139 dmar_domain = alloc_domain(type);
4140 if (!dmar_domain) {
4141 pr_err("Can't allocate dmar_domain\n");
4142 return NULL;
4143 }
4144 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4145 pr_err("Domain initialization failed\n");
4146 domain_exit(dmar_domain);
4147 return NULL;
4148 }
4149
4150 domain = &dmar_domain->domain;
4151 domain->geometry.aperture_start = 0;
4152 domain->geometry.aperture_end =
4153 __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4154 domain->geometry.force_aperture = true;
4155
4156 return domain;
4157 case IOMMU_DOMAIN_IDENTITY:
4158 return &si_domain->domain;
4159 default:
4160 return NULL;
4161 }
4162
4163 return NULL;
4164 }
4165
4166 static void intel_iommu_domain_free(struct iommu_domain *domain)
4167 {
4168 if (domain != &si_domain->domain)
4169 domain_exit(to_dmar_domain(domain));
4170 }
4171
4172 static int prepare_domain_attach_device(struct iommu_domain *domain,
4173 struct device *dev)
4174 {
4175 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4176 struct intel_iommu *iommu;
4177 int addr_width;
4178
4179 iommu = device_to_iommu(dev, NULL, NULL);
4180 if (!iommu)
4181 return -ENODEV;
4182
4183 if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap))
4184 return -EOPNOTSUPP;
4185
4186
4187 addr_width = agaw_to_width(iommu->agaw);
4188 if (addr_width > cap_mgaw(iommu->cap))
4189 addr_width = cap_mgaw(iommu->cap);
4190
4191 if (dmar_domain->max_addr > (1LL << addr_width)) {
4192 dev_err(dev, "%s: iommu width (%d) is not "
4193 "sufficient for the mapped address (%llx)\n",
4194 __func__, addr_width, dmar_domain->max_addr);
4195 return -EFAULT;
4196 }
4197 dmar_domain->gaw = addr_width;
4198
4199
4200
4201
4202 while (iommu->agaw < dmar_domain->agaw) {
4203 struct dma_pte *pte;
4204
4205 pte = dmar_domain->pgd;
4206 if (dma_pte_present(pte)) {
4207 dmar_domain->pgd = phys_to_virt(dma_pte_addr(pte));
4208 free_pgtable_page(pte);
4209 }
4210 dmar_domain->agaw--;
4211 }
4212
4213 return 0;
4214 }
4215
4216 static int intel_iommu_attach_device(struct iommu_domain *domain,
4217 struct device *dev)
4218 {
4219 int ret;
4220
4221 if (domain->type == IOMMU_DOMAIN_UNMANAGED &&
4222 device_is_rmrr_locked(dev)) {
4223 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
4224 return -EPERM;
4225 }
4226
4227
4228 if (unlikely(domain_context_mapped(dev))) {
4229 struct device_domain_info *info = dev_iommu_priv_get(dev);
4230
4231 if (info->domain)
4232 dmar_remove_one_dev_info(dev);
4233 }
4234
4235 ret = prepare_domain_attach_device(domain, dev);
4236 if (ret)
4237 return ret;
4238
4239 return domain_add_dev_info(to_dmar_domain(domain), dev);
4240 }
4241
4242 static void intel_iommu_detach_device(struct iommu_domain *domain,
4243 struct device *dev)
4244 {
4245 dmar_remove_one_dev_info(dev);
4246 }
4247
4248 static int intel_iommu_map(struct iommu_domain *domain,
4249 unsigned long iova, phys_addr_t hpa,
4250 size_t size, int iommu_prot, gfp_t gfp)
4251 {
4252 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4253 u64 max_addr;
4254 int prot = 0;
4255
4256 if (iommu_prot & IOMMU_READ)
4257 prot |= DMA_PTE_READ;
4258 if (iommu_prot & IOMMU_WRITE)
4259 prot |= DMA_PTE_WRITE;
4260 if (dmar_domain->set_pte_snp)
4261 prot |= DMA_PTE_SNP;
4262
4263 max_addr = iova + size;
4264 if (dmar_domain->max_addr < max_addr) {
4265 u64 end;
4266
4267
4268 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4269 if (end < max_addr) {
4270 pr_err("%s: iommu width (%d) is not "
4271 "sufficient for the mapped address (%llx)\n",
4272 __func__, dmar_domain->gaw, max_addr);
4273 return -EFAULT;
4274 }
4275 dmar_domain->max_addr = max_addr;
4276 }
4277
4278
4279 size = aligned_nrpages(hpa, size);
4280 return __domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4281 hpa >> VTD_PAGE_SHIFT, size, prot);
4282 }
4283
4284 static int intel_iommu_map_pages(struct iommu_domain *domain,
4285 unsigned long iova, phys_addr_t paddr,
4286 size_t pgsize, size_t pgcount,
4287 int prot, gfp_t gfp, size_t *mapped)
4288 {
4289 unsigned long pgshift = __ffs(pgsize);
4290 size_t size = pgcount << pgshift;
4291 int ret;
4292
4293 if (pgsize != SZ_4K && pgsize != SZ_2M && pgsize != SZ_1G)
4294 return -EINVAL;
4295
4296 if (!IS_ALIGNED(iova | paddr, pgsize))
4297 return -EINVAL;
4298
4299 ret = intel_iommu_map(domain, iova, paddr, size, prot, gfp);
4300 if (!ret && mapped)
4301 *mapped = size;
4302
4303 return ret;
4304 }
4305
4306 static size_t intel_iommu_unmap(struct iommu_domain *domain,
4307 unsigned long iova, size_t size,
4308 struct iommu_iotlb_gather *gather)
4309 {
4310 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4311 unsigned long start_pfn, last_pfn;
4312 int level = 0;
4313
4314
4315
4316 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
4317
4318 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4319 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4320
4321 start_pfn = iova >> VTD_PAGE_SHIFT;
4322 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4323
4324 domain_unmap(dmar_domain, start_pfn, last_pfn, &gather->freelist);
4325
4326 if (dmar_domain->max_addr == iova + size)
4327 dmar_domain->max_addr = iova;
4328
4329 iommu_iotlb_gather_add_page(domain, gather, iova, size);
4330
4331 return size;
4332 }
4333
4334 static size_t intel_iommu_unmap_pages(struct iommu_domain *domain,
4335 unsigned long iova,
4336 size_t pgsize, size_t pgcount,
4337 struct iommu_iotlb_gather *gather)
4338 {
4339 unsigned long pgshift = __ffs(pgsize);
4340 size_t size = pgcount << pgshift;
4341
4342 return intel_iommu_unmap(domain, iova, size, gather);
4343 }
4344
4345 static void intel_iommu_tlb_sync(struct iommu_domain *domain,
4346 struct iommu_iotlb_gather *gather)
4347 {
4348 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4349 unsigned long iova_pfn = IOVA_PFN(gather->start);
4350 size_t size = gather->end - gather->start;
4351 struct iommu_domain_info *info;
4352 unsigned long start_pfn;
4353 unsigned long nrpages;
4354 unsigned long i;
4355
4356 nrpages = aligned_nrpages(gather->start, size);
4357 start_pfn = mm_to_dma_pfn(iova_pfn);
4358
4359 xa_for_each(&dmar_domain->iommu_array, i, info)
4360 iommu_flush_iotlb_psi(info->iommu, dmar_domain,
4361 start_pfn, nrpages,
4362 list_empty(&gather->freelist), 0);
4363
4364 put_pages_list(&gather->freelist);
4365 }
4366
4367 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4368 dma_addr_t iova)
4369 {
4370 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4371 struct dma_pte *pte;
4372 int level = 0;
4373 u64 phys = 0;
4374
4375 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
4376 if (pte && dma_pte_present(pte))
4377 phys = dma_pte_addr(pte) +
4378 (iova & (BIT_MASK(level_to_offset_bits(level) +
4379 VTD_PAGE_SHIFT) - 1));
4380
4381 return phys;
4382 }
4383
4384 static bool domain_support_force_snooping(struct dmar_domain *domain)
4385 {
4386 struct device_domain_info *info;
4387 bool support = true;
4388
4389 assert_spin_locked(&domain->lock);
4390 list_for_each_entry(info, &domain->devices, link) {
4391 if (!ecap_sc_support(info->iommu->ecap)) {
4392 support = false;
4393 break;
4394 }
4395 }
4396
4397 return support;
4398 }
4399
4400 static void domain_set_force_snooping(struct dmar_domain *domain)
4401 {
4402 struct device_domain_info *info;
4403
4404 assert_spin_locked(&domain->lock);
4405
4406
4407
4408
4409 if (!domain_use_first_level(domain)) {
4410 domain->set_pte_snp = true;
4411 return;
4412 }
4413
4414 list_for_each_entry(info, &domain->devices, link)
4415 intel_pasid_setup_page_snoop_control(info->iommu, info->dev,
4416 PASID_RID2PASID);
4417 }
4418
4419 static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain)
4420 {
4421 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4422 unsigned long flags;
4423
4424 if (dmar_domain->force_snooping)
4425 return true;
4426
4427 spin_lock_irqsave(&dmar_domain->lock, flags);
4428 if (!domain_support_force_snooping(dmar_domain)) {
4429 spin_unlock_irqrestore(&dmar_domain->lock, flags);
4430 return false;
4431 }
4432
4433 domain_set_force_snooping(dmar_domain);
4434 dmar_domain->force_snooping = true;
4435 spin_unlock_irqrestore(&dmar_domain->lock, flags);
4436
4437 return true;
4438 }
4439
4440 static bool intel_iommu_capable(enum iommu_cap cap)
4441 {
4442 if (cap == IOMMU_CAP_CACHE_COHERENCY)
4443 return true;
4444 if (cap == IOMMU_CAP_INTR_REMAP)
4445 return irq_remapping_enabled == 1;
4446 if (cap == IOMMU_CAP_PRE_BOOT_PROTECTION)
4447 return dmar_platform_optin();
4448
4449 return false;
4450 }
4451
4452 static struct iommu_device *intel_iommu_probe_device(struct device *dev)
4453 {
4454 struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL;
4455 struct device_domain_info *info;
4456 struct intel_iommu *iommu;
4457 u8 bus, devfn;
4458
4459 iommu = device_to_iommu(dev, &bus, &devfn);
4460 if (!iommu)
4461 return ERR_PTR(-ENODEV);
4462
4463 info = kzalloc(sizeof(*info), GFP_KERNEL);
4464 if (!info)
4465 return ERR_PTR(-ENOMEM);
4466
4467 if (dev_is_real_dma_subdevice(dev)) {
4468 info->bus = pdev->bus->number;
4469 info->devfn = pdev->devfn;
4470 info->segment = pci_domain_nr(pdev->bus);
4471 } else {
4472 info->bus = bus;
4473 info->devfn = devfn;
4474 info->segment = iommu->segment;
4475 }
4476
4477 info->dev = dev;
4478 info->iommu = iommu;
4479 if (dev_is_pci(dev)) {
4480 if (ecap_dev_iotlb_support(iommu->ecap) &&
4481 pci_ats_supported(pdev) &&
4482 dmar_ats_supported(pdev, iommu))
4483 info->ats_supported = 1;
4484
4485 if (sm_supported(iommu)) {
4486 if (pasid_supported(iommu)) {
4487 int features = pci_pasid_features(pdev);
4488
4489 if (features >= 0)
4490 info->pasid_supported = features | 1;
4491 }
4492
4493 if (info->ats_supported && ecap_prs(iommu->ecap) &&
4494 pci_pri_supported(pdev))
4495 info->pri_supported = 1;
4496 }
4497 }
4498
4499 dev_iommu_priv_set(dev, info);
4500
4501 return &iommu->iommu;
4502 }
4503
4504 static void intel_iommu_release_device(struct device *dev)
4505 {
4506 struct device_domain_info *info = dev_iommu_priv_get(dev);
4507
4508 dmar_remove_one_dev_info(dev);
4509 dev_iommu_priv_set(dev, NULL);
4510 kfree(info);
4511 set_dma_ops(dev, NULL);
4512 }
4513
4514 static void intel_iommu_probe_finalize(struct device *dev)
4515 {
4516 set_dma_ops(dev, NULL);
4517 iommu_setup_dma_ops(dev, 0, U64_MAX);
4518 }
4519
4520 static void intel_iommu_get_resv_regions(struct device *device,
4521 struct list_head *head)
4522 {
4523 int prot = DMA_PTE_READ | DMA_PTE_WRITE;
4524 struct iommu_resv_region *reg;
4525 struct dmar_rmrr_unit *rmrr;
4526 struct device *i_dev;
4527 int i;
4528
4529 down_read(&dmar_global_lock);
4530 for_each_rmrr_units(rmrr) {
4531 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
4532 i, i_dev) {
4533 struct iommu_resv_region *resv;
4534 enum iommu_resv_type type;
4535 size_t length;
4536
4537 if (i_dev != device &&
4538 !is_downstream_to_pci_bridge(device, i_dev))
4539 continue;
4540
4541 length = rmrr->end_address - rmrr->base_address + 1;
4542
4543 type = device_rmrr_is_relaxable(device) ?
4544 IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT;
4545
4546 resv = iommu_alloc_resv_region(rmrr->base_address,
4547 length, prot, type);
4548 if (!resv)
4549 break;
4550
4551 list_add_tail(&resv->list, head);
4552 }
4553 }
4554 up_read(&dmar_global_lock);
4555
4556 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
4557 if (dev_is_pci(device)) {
4558 struct pci_dev *pdev = to_pci_dev(device);
4559
4560 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
4561 reg = iommu_alloc_resv_region(0, 1UL << 24, prot,
4562 IOMMU_RESV_DIRECT_RELAXABLE);
4563 if (reg)
4564 list_add_tail(®->list, head);
4565 }
4566 }
4567 #endif
4568
4569 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
4570 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
4571 0, IOMMU_RESV_MSI);
4572 if (!reg)
4573 return;
4574 list_add_tail(®->list, head);
4575 }
4576
4577 int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
4578 {
4579 struct device_domain_info *info = dev_iommu_priv_get(dev);
4580 struct context_entry *context;
4581 struct dmar_domain *domain;
4582 u64 ctx_lo;
4583 int ret;
4584
4585 domain = info->domain;
4586 if (!domain)
4587 return -EINVAL;
4588
4589 spin_lock(&iommu->lock);
4590 ret = -EINVAL;
4591 if (!info->pasid_supported)
4592 goto out;
4593
4594 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
4595 if (WARN_ON(!context))
4596 goto out;
4597
4598 ctx_lo = context[0].lo;
4599
4600 if (!(ctx_lo & CONTEXT_PASIDE)) {
4601 ctx_lo |= CONTEXT_PASIDE;
4602 context[0].lo = ctx_lo;
4603 wmb();
4604 iommu->flush.flush_context(iommu,
4605 domain_id_iommu(domain, iommu),
4606 PCI_DEVID(info->bus, info->devfn),
4607 DMA_CCMD_MASK_NOBIT,
4608 DMA_CCMD_DEVICE_INVL);
4609 }
4610
4611
4612 if (!info->pasid_enabled)
4613 iommu_enable_dev_iotlb(info);
4614
4615 ret = 0;
4616
4617 out:
4618 spin_unlock(&iommu->lock);
4619
4620 return ret;
4621 }
4622
4623 static struct iommu_group *intel_iommu_device_group(struct device *dev)
4624 {
4625 if (dev_is_pci(dev))
4626 return pci_device_group(dev);
4627 return generic_device_group(dev);
4628 }
4629
4630 static int intel_iommu_enable_sva(struct device *dev)
4631 {
4632 struct device_domain_info *info = dev_iommu_priv_get(dev);
4633 struct intel_iommu *iommu;
4634 int ret;
4635
4636 if (!info || dmar_disabled)
4637 return -EINVAL;
4638
4639 iommu = info->iommu;
4640 if (!iommu)
4641 return -EINVAL;
4642
4643 if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE))
4644 return -ENODEV;
4645
4646 if (intel_iommu_enable_pasid(iommu, dev))
4647 return -ENODEV;
4648
4649 if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled)
4650 return -EINVAL;
4651
4652 ret = iopf_queue_add_device(iommu->iopf_queue, dev);
4653 if (!ret)
4654 ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
4655
4656 return ret;
4657 }
4658
4659 static int intel_iommu_disable_sva(struct device *dev)
4660 {
4661 struct device_domain_info *info = dev_iommu_priv_get(dev);
4662 struct intel_iommu *iommu = info->iommu;
4663 int ret;
4664
4665 ret = iommu_unregister_device_fault_handler(dev);
4666 if (!ret)
4667 ret = iopf_queue_remove_device(iommu->iopf_queue, dev);
4668
4669 return ret;
4670 }
4671
4672 static int intel_iommu_enable_iopf(struct device *dev)
4673 {
4674 struct device_domain_info *info = dev_iommu_priv_get(dev);
4675
4676 if (info && info->pri_supported)
4677 return 0;
4678
4679 return -ENODEV;
4680 }
4681
4682 static int
4683 intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
4684 {
4685 switch (feat) {
4686 case IOMMU_DEV_FEAT_IOPF:
4687 return intel_iommu_enable_iopf(dev);
4688
4689 case IOMMU_DEV_FEAT_SVA:
4690 return intel_iommu_enable_sva(dev);
4691
4692 default:
4693 return -ENODEV;
4694 }
4695 }
4696
4697 static int
4698 intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
4699 {
4700 switch (feat) {
4701 case IOMMU_DEV_FEAT_IOPF:
4702 return 0;
4703
4704 case IOMMU_DEV_FEAT_SVA:
4705 return intel_iommu_disable_sva(dev);
4706
4707 default:
4708 return -ENODEV;
4709 }
4710 }
4711
4712 static bool intel_iommu_is_attach_deferred(struct device *dev)
4713 {
4714 struct device_domain_info *info = dev_iommu_priv_get(dev);
4715
4716 return translation_pre_enabled(info->iommu) && !info->domain;
4717 }
4718
4719
4720
4721
4722
4723
4724 static bool risky_device(struct pci_dev *pdev)
4725 {
4726 if (pdev->untrusted) {
4727 pci_info(pdev,
4728 "Skipping IOMMU quirk for dev [%04X:%04X] on untrusted PCI link\n",
4729 pdev->vendor, pdev->device);
4730 pci_info(pdev, "Please check with your BIOS/Platform vendor about this\n");
4731 return true;
4732 }
4733 return false;
4734 }
4735
4736 static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
4737 unsigned long iova, size_t size)
4738 {
4739 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4740 unsigned long pages = aligned_nrpages(iova, size);
4741 unsigned long pfn = iova >> VTD_PAGE_SHIFT;
4742 struct iommu_domain_info *info;
4743 unsigned long i;
4744
4745 xa_for_each(&dmar_domain->iommu_array, i, info)
4746 __mapping_notify_one(info->iommu, dmar_domain, pfn, pages);
4747 }
4748
4749 const struct iommu_ops intel_iommu_ops = {
4750 .capable = intel_iommu_capable,
4751 .domain_alloc = intel_iommu_domain_alloc,
4752 .probe_device = intel_iommu_probe_device,
4753 .probe_finalize = intel_iommu_probe_finalize,
4754 .release_device = intel_iommu_release_device,
4755 .get_resv_regions = intel_iommu_get_resv_regions,
4756 .device_group = intel_iommu_device_group,
4757 .dev_enable_feat = intel_iommu_dev_enable_feat,
4758 .dev_disable_feat = intel_iommu_dev_disable_feat,
4759 .is_attach_deferred = intel_iommu_is_attach_deferred,
4760 .def_domain_type = device_def_domain_type,
4761 .pgsize_bitmap = SZ_4K,
4762 #ifdef CONFIG_INTEL_IOMMU_SVM
4763 .sva_bind = intel_svm_bind,
4764 .sva_unbind = intel_svm_unbind,
4765 .sva_get_pasid = intel_svm_get_pasid,
4766 .page_response = intel_svm_page_response,
4767 #endif
4768 .default_domain_ops = &(const struct iommu_domain_ops) {
4769 .attach_dev = intel_iommu_attach_device,
4770 .detach_dev = intel_iommu_detach_device,
4771 .map_pages = intel_iommu_map_pages,
4772 .unmap_pages = intel_iommu_unmap_pages,
4773 .iotlb_sync_map = intel_iommu_iotlb_sync_map,
4774 .flush_iotlb_all = intel_flush_iotlb_all,
4775 .iotlb_sync = intel_iommu_tlb_sync,
4776 .iova_to_phys = intel_iommu_iova_to_phys,
4777 .free = intel_iommu_domain_free,
4778 .enforce_cache_coherency = intel_iommu_enforce_cache_coherency,
4779 }
4780 };
4781
4782 static void quirk_iommu_igfx(struct pci_dev *dev)
4783 {
4784 if (risky_device(dev))
4785 return;
4786
4787 pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
4788 dmar_map_gfx = 0;
4789 }
4790
4791
4792 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
4793 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
4794 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx);
4795 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx);
4796 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
4797 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
4798 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
4799
4800
4801 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
4802 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
4803 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx);
4804 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx);
4805 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx);
4806 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx);
4807 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx);
4808 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx);
4809 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx);
4810 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx);
4811 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx);
4812 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx);
4813 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx);
4814 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx);
4815 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx);
4816 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx);
4817 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx);
4818 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx);
4819 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx);
4820 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx);
4821 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx);
4822 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
4823 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
4824 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
4825
4826 static void quirk_iommu_rwbf(struct pci_dev *dev)
4827 {
4828 if (risky_device(dev))
4829 return;
4830
4831
4832
4833
4834
4835 pci_info(dev, "Forcing write-buffer flush capability\n");
4836 rwbf_quirk = 1;
4837 }
4838
4839 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4840 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4841 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4842 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4843 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4844 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4845 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
4846
4847 #define GGC 0x52
4848 #define GGC_MEMORY_SIZE_MASK (0xf << 8)
4849 #define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4850 #define GGC_MEMORY_SIZE_1M (0x1 << 8)
4851 #define GGC_MEMORY_SIZE_2M (0x3 << 8)
4852 #define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4853 #define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4854 #define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4855 #define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4856
4857 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4858 {
4859 unsigned short ggc;
4860
4861 if (risky_device(dev))
4862 return;
4863
4864 if (pci_read_config_word(dev, GGC, &ggc))
4865 return;
4866
4867 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4868 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4869 dmar_map_gfx = 0;
4870 } else if (dmar_map_gfx) {
4871
4872 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
4873 iommu_set_dma_strict();
4874 }
4875 }
4876 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4877 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4878 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4879 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4880
4881 static void quirk_igfx_skip_te_disable(struct pci_dev *dev)
4882 {
4883 unsigned short ver;
4884
4885 if (!IS_GFX_DEVICE(dev))
4886 return;
4887
4888 ver = (dev->device >> 8) & 0xff;
4889 if (ver != 0x45 && ver != 0x46 && ver != 0x4c &&
4890 ver != 0x4e && ver != 0x8a && ver != 0x98 &&
4891 ver != 0x9a && ver != 0xa7)
4892 return;
4893
4894 if (risky_device(dev))
4895 return;
4896
4897 pci_info(dev, "Skip IOMMU disabling for graphics\n");
4898 iommu_skip_te_disable = 1;
4899 }
4900 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_igfx_skip_te_disable);
4901
4902
4903
4904
4905
4906
4907
4908
4909 static void __init check_tylersburg_isoch(void)
4910 {
4911 struct pci_dev *pdev;
4912 uint32_t vtisochctrl;
4913
4914
4915 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4916 if (!pdev)
4917 return;
4918
4919 if (risky_device(pdev)) {
4920 pci_dev_put(pdev);
4921 return;
4922 }
4923
4924 pci_dev_put(pdev);
4925
4926
4927
4928
4929 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4930 if (!pdev)
4931 return;
4932
4933 if (risky_device(pdev)) {
4934 pci_dev_put(pdev);
4935 return;
4936 }
4937
4938 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4939 pci_dev_put(pdev);
4940 return;
4941 }
4942
4943 pci_dev_put(pdev);
4944
4945
4946 if (vtisochctrl & 1)
4947 return;
4948
4949
4950 vtisochctrl &= 0x1c;
4951
4952
4953 if (vtisochctrl == 0x10)
4954 return;
4955
4956
4957 if (!vtisochctrl) {
4958 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4959 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4960 dmi_get_system_info(DMI_BIOS_VENDOR),
4961 dmi_get_system_info(DMI_BIOS_VERSION),
4962 dmi_get_system_info(DMI_PRODUCT_VERSION));
4963 iommu_identity_mapping |= IDENTMAP_AZALIA;
4964 return;
4965 }
4966
4967 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4968 vtisochctrl);
4969 }