Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
0004  * Author: Joerg Roedel <jroedel@suse.de>
0005  *         Leo Duran <leo.duran@amd.com>
0006  */
0007 
0008 #define pr_fmt(fmt)     "AMD-Vi: " fmt
0009 #define dev_fmt(fmt)    pr_fmt(fmt)
0010 
0011 #include <linux/pci.h>
0012 #include <linux/acpi.h>
0013 #include <linux/list.h>
0014 #include <linux/bitmap.h>
0015 #include <linux/slab.h>
0016 #include <linux/syscore_ops.h>
0017 #include <linux/interrupt.h>
0018 #include <linux/msi.h>
0019 #include <linux/irq.h>
0020 #include <linux/amd-iommu.h>
0021 #include <linux/export.h>
0022 #include <linux/kmemleak.h>
0023 #include <linux/cc_platform.h>
0024 #include <linux/iopoll.h>
0025 #include <asm/pci-direct.h>
0026 #include <asm/iommu.h>
0027 #include <asm/apic.h>
0028 #include <asm/gart.h>
0029 #include <asm/x86_init.h>
0030 #include <asm/io_apic.h>
0031 #include <asm/irq_remapping.h>
0032 #include <asm/set_memory.h>
0033 
0034 #include <linux/crash_dump.h>
0035 
0036 #include "amd_iommu.h"
0037 #include "../irq_remapping.h"
0038 
0039 /*
0040  * definitions for the ACPI scanning code
0041  */
0042 #define IVRS_HEADER_LENGTH 48
0043 
0044 #define ACPI_IVHD_TYPE_MAX_SUPPORTED    0x40
0045 #define ACPI_IVMD_TYPE_ALL              0x20
0046 #define ACPI_IVMD_TYPE                  0x21
0047 #define ACPI_IVMD_TYPE_RANGE            0x22
0048 
0049 #define IVHD_DEV_ALL                    0x01
0050 #define IVHD_DEV_SELECT                 0x02
0051 #define IVHD_DEV_SELECT_RANGE_START     0x03
0052 #define IVHD_DEV_RANGE_END              0x04
0053 #define IVHD_DEV_ALIAS                  0x42
0054 #define IVHD_DEV_ALIAS_RANGE            0x43
0055 #define IVHD_DEV_EXT_SELECT             0x46
0056 #define IVHD_DEV_EXT_SELECT_RANGE       0x47
0057 #define IVHD_DEV_SPECIAL        0x48
0058 #define IVHD_DEV_ACPI_HID       0xf0
0059 
0060 #define UID_NOT_PRESENT                 0
0061 #define UID_IS_INTEGER                  1
0062 #define UID_IS_CHARACTER                2
0063 
0064 #define IVHD_SPECIAL_IOAPIC     1
0065 #define IVHD_SPECIAL_HPET       2
0066 
0067 #define IVHD_FLAG_HT_TUN_EN_MASK        0x01
0068 #define IVHD_FLAG_PASSPW_EN_MASK        0x02
0069 #define IVHD_FLAG_RESPASSPW_EN_MASK     0x04
0070 #define IVHD_FLAG_ISOC_EN_MASK          0x08
0071 
0072 #define IVMD_FLAG_EXCL_RANGE            0x08
0073 #define IVMD_FLAG_IW                    0x04
0074 #define IVMD_FLAG_IR                    0x02
0075 #define IVMD_FLAG_UNITY_MAP             0x01
0076 
0077 #define ACPI_DEVFLAG_INITPASS           0x01
0078 #define ACPI_DEVFLAG_EXTINT             0x02
0079 #define ACPI_DEVFLAG_NMI                0x04
0080 #define ACPI_DEVFLAG_SYSMGT1            0x10
0081 #define ACPI_DEVFLAG_SYSMGT2            0x20
0082 #define ACPI_DEVFLAG_LINT0              0x40
0083 #define ACPI_DEVFLAG_LINT1              0x80
0084 #define ACPI_DEVFLAG_ATSDIS             0x10000000
0085 
0086 #define LOOP_TIMEOUT    2000000
0087 
0088 #define IVRS_GET_SBDF_ID(seg, bus, dev, fd) (((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \
0089                          | ((dev & 0x1f) << 3) | (fn & 0x7))
0090 
0091 /*
0092  * ACPI table definitions
0093  *
0094  * These data structures are laid over the table to parse the important values
0095  * out of it.
0096  */
0097 
0098 extern const struct iommu_ops amd_iommu_ops;
0099 
0100 /*
0101  * structure describing one IOMMU in the ACPI table. Typically followed by one
0102  * or more ivhd_entrys.
0103  */
0104 struct ivhd_header {
0105     u8 type;
0106     u8 flags;
0107     u16 length;
0108     u16 devid;
0109     u16 cap_ptr;
0110     u64 mmio_phys;
0111     u16 pci_seg;
0112     u16 info;
0113     u32 efr_attr;
0114 
0115     /* Following only valid on IVHD type 11h and 40h */
0116     u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */
0117     u64 efr_reg2;
0118 } __attribute__((packed));
0119 
0120 /*
0121  * A device entry describing which devices a specific IOMMU translates and
0122  * which requestor ids they use.
0123  */
0124 struct ivhd_entry {
0125     u8 type;
0126     u16 devid;
0127     u8 flags;
0128     struct_group(ext_hid,
0129         u32 ext;
0130         u32 hidh;
0131     );
0132     u64 cid;
0133     u8 uidf;
0134     u8 uidl;
0135     u8 uid;
0136 } __attribute__((packed));
0137 
0138 /*
0139  * An AMD IOMMU memory definition structure. It defines things like exclusion
0140  * ranges for devices and regions that should be unity mapped.
0141  */
0142 struct ivmd_header {
0143     u8 type;
0144     u8 flags;
0145     u16 length;
0146     u16 devid;
0147     u16 aux;
0148     u16 pci_seg;
0149     u8  resv[6];
0150     u64 range_start;
0151     u64 range_length;
0152 } __attribute__((packed));
0153 
0154 bool amd_iommu_dump;
0155 bool amd_iommu_irq_remap __read_mostly;
0156 
0157 enum io_pgtable_fmt amd_iommu_pgtable = AMD_IOMMU_V1;
0158 
0159 int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
0160 static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
0161 
0162 static bool amd_iommu_detected;
0163 static bool amd_iommu_disabled __initdata;
0164 static bool amd_iommu_force_enable __initdata;
0165 static int amd_iommu_target_ivhd_type;
0166 
0167 /* Global EFR and EFR2 registers */
0168 u64 amd_iommu_efr;
0169 u64 amd_iommu_efr2;
0170 
0171 /* SNP is enabled on the system? */
0172 bool amd_iommu_snp_en;
0173 EXPORT_SYMBOL(amd_iommu_snp_en);
0174 
0175 LIST_HEAD(amd_iommu_pci_seg_list);  /* list of all PCI segments */
0176 LIST_HEAD(amd_iommu_list);      /* list of all AMD IOMMUs in the
0177                        system */
0178 
0179 /* Array to assign indices to IOMMUs*/
0180 struct amd_iommu *amd_iommus[MAX_IOMMUS];
0181 
0182 /* Number of IOMMUs present in the system */
0183 static int amd_iommus_present;
0184 
0185 /* IOMMUs have a non-present cache? */
0186 bool amd_iommu_np_cache __read_mostly;
0187 bool amd_iommu_iotlb_sup __read_mostly = true;
0188 
0189 u32 amd_iommu_max_pasid __read_mostly = ~0;
0190 
0191 bool amd_iommu_v2_present __read_mostly;
0192 static bool amd_iommu_pc_present __read_mostly;
0193 bool amdr_ivrs_remap_support __read_mostly;
0194 
0195 bool amd_iommu_force_isolation __read_mostly;
0196 
0197 /*
0198  * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap
0199  * to know which ones are already in use.
0200  */
0201 unsigned long *amd_iommu_pd_alloc_bitmap;
0202 
0203 enum iommu_init_state {
0204     IOMMU_START_STATE,
0205     IOMMU_IVRS_DETECTED,
0206     IOMMU_ACPI_FINISHED,
0207     IOMMU_ENABLED,
0208     IOMMU_PCI_INIT,
0209     IOMMU_INTERRUPTS_EN,
0210     IOMMU_INITIALIZED,
0211     IOMMU_NOT_FOUND,
0212     IOMMU_INIT_ERROR,
0213     IOMMU_CMDLINE_DISABLED,
0214 };
0215 
0216 /* Early ioapic and hpet maps from kernel command line */
0217 #define EARLY_MAP_SIZE      4
0218 static struct devid_map __initdata early_ioapic_map[EARLY_MAP_SIZE];
0219 static struct devid_map __initdata early_hpet_map[EARLY_MAP_SIZE];
0220 static struct acpihid_map_entry __initdata early_acpihid_map[EARLY_MAP_SIZE];
0221 
0222 static int __initdata early_ioapic_map_size;
0223 static int __initdata early_hpet_map_size;
0224 static int __initdata early_acpihid_map_size;
0225 
0226 static bool __initdata cmdline_maps;
0227 
0228 static enum iommu_init_state init_state = IOMMU_START_STATE;
0229 
0230 static int amd_iommu_enable_interrupts(void);
0231 static int __init iommu_go_to_state(enum iommu_init_state state);
0232 static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg);
0233 
0234 static bool amd_iommu_pre_enabled = true;
0235 
0236 static u32 amd_iommu_ivinfo __initdata;
0237 
0238 bool translation_pre_enabled(struct amd_iommu *iommu)
0239 {
0240     return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
0241 }
0242 
0243 static void clear_translation_pre_enabled(struct amd_iommu *iommu)
0244 {
0245     iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
0246 }
0247 
0248 static void init_translation_status(struct amd_iommu *iommu)
0249 {
0250     u64 ctrl;
0251 
0252     ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
0253     if (ctrl & (1<<CONTROL_IOMMU_EN))
0254         iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
0255 }
0256 
0257 static inline unsigned long tbl_size(int entry_size, int last_bdf)
0258 {
0259     unsigned shift = PAGE_SHIFT +
0260              get_order((last_bdf + 1) * entry_size);
0261 
0262     return 1UL << shift;
0263 }
0264 
0265 int amd_iommu_get_num_iommus(void)
0266 {
0267     return amd_iommus_present;
0268 }
0269 
0270 /*
0271  * Iterate through all the IOMMUs to get common EFR
0272  * masks among all IOMMUs and warn if found inconsistency.
0273  */
0274 static void get_global_efr(void)
0275 {
0276     struct amd_iommu *iommu;
0277 
0278     for_each_iommu(iommu) {
0279         u64 tmp = iommu->features;
0280         u64 tmp2 = iommu->features2;
0281 
0282         if (list_is_first(&iommu->list, &amd_iommu_list)) {
0283             amd_iommu_efr = tmp;
0284             amd_iommu_efr2 = tmp2;
0285             continue;
0286         }
0287 
0288         if (amd_iommu_efr == tmp &&
0289             amd_iommu_efr2 == tmp2)
0290             continue;
0291 
0292         pr_err(FW_BUG
0293                "Found inconsistent EFR/EFR2 %#llx,%#llx (global %#llx,%#llx) on iommu%d (%04x:%02x:%02x.%01x).\n",
0294                tmp, tmp2, amd_iommu_efr, amd_iommu_efr2,
0295                iommu->index, iommu->pci_seg->id,
0296                PCI_BUS_NUM(iommu->devid), PCI_SLOT(iommu->devid),
0297                PCI_FUNC(iommu->devid));
0298 
0299         amd_iommu_efr &= tmp;
0300         amd_iommu_efr2 &= tmp2;
0301     }
0302 
0303     pr_info("Using global IVHD EFR:%#llx, EFR2:%#llx\n", amd_iommu_efr, amd_iommu_efr2);
0304 }
0305 
0306 static bool check_feature_on_all_iommus(u64 mask)
0307 {
0308     return !!(amd_iommu_efr & mask);
0309 }
0310 
0311 /*
0312  * For IVHD type 0x11/0x40, EFR is also available via IVHD.
0313  * Default to IVHD EFR since it is available sooner
0314  * (i.e. before PCI init).
0315  */
0316 static void __init early_iommu_features_init(struct amd_iommu *iommu,
0317                          struct ivhd_header *h)
0318 {
0319     if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP) {
0320         iommu->features = h->efr_reg;
0321         iommu->features2 = h->efr_reg2;
0322     }
0323     if (amd_iommu_ivinfo & IOMMU_IVINFO_DMA_REMAP)
0324         amdr_ivrs_remap_support = true;
0325 }
0326 
0327 /* Access to l1 and l2 indexed register spaces */
0328 
0329 static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
0330 {
0331     u32 val;
0332 
0333     pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
0334     pci_read_config_dword(iommu->dev, 0xfc, &val);
0335     return val;
0336 }
0337 
0338 static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val)
0339 {
0340     pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31));
0341     pci_write_config_dword(iommu->dev, 0xfc, val);
0342     pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
0343 }
0344 
0345 static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address)
0346 {
0347     u32 val;
0348 
0349     pci_write_config_dword(iommu->dev, 0xf0, address);
0350     pci_read_config_dword(iommu->dev, 0xf4, &val);
0351     return val;
0352 }
0353 
0354 static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val)
0355 {
0356     pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8));
0357     pci_write_config_dword(iommu->dev, 0xf4, val);
0358 }
0359 
0360 /****************************************************************************
0361  *
0362  * AMD IOMMU MMIO register space handling functions
0363  *
0364  * These functions are used to program the IOMMU device registers in
0365  * MMIO space required for that driver.
0366  *
0367  ****************************************************************************/
0368 
0369 /*
0370  * This function set the exclusion range in the IOMMU. DMA accesses to the
0371  * exclusion range are passed through untranslated
0372  */
0373 static void iommu_set_exclusion_range(struct amd_iommu *iommu)
0374 {
0375     u64 start = iommu->exclusion_start & PAGE_MASK;
0376     u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK;
0377     u64 entry;
0378 
0379     if (!iommu->exclusion_start)
0380         return;
0381 
0382     entry = start | MMIO_EXCL_ENABLE_MASK;
0383     memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
0384             &entry, sizeof(entry));
0385 
0386     entry = limit;
0387     memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
0388             &entry, sizeof(entry));
0389 }
0390 
0391 static void iommu_set_cwwb_range(struct amd_iommu *iommu)
0392 {
0393     u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem);
0394     u64 entry = start & PM_ADDR_MASK;
0395 
0396     if (!check_feature_on_all_iommus(FEATURE_SNP))
0397         return;
0398 
0399     /* Note:
0400      * Re-purpose Exclusion base/limit registers for Completion wait
0401      * write-back base/limit.
0402      */
0403     memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
0404             &entry, sizeof(entry));
0405 
0406     /* Note:
0407      * Default to 4 Kbytes, which can be specified by setting base
0408      * address equal to the limit address.
0409      */
0410     memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
0411             &entry, sizeof(entry));
0412 }
0413 
0414 /* Programs the physical address of the device table into the IOMMU hardware */
0415 static void iommu_set_device_table(struct amd_iommu *iommu)
0416 {
0417     u64 entry;
0418     u32 dev_table_size = iommu->pci_seg->dev_table_size;
0419     void *dev_table = (void *)get_dev_table(iommu);
0420 
0421     BUG_ON(iommu->mmio_base == NULL);
0422 
0423     entry = iommu_virt_to_phys(dev_table);
0424     entry |= (dev_table_size >> 12) - 1;
0425     memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
0426             &entry, sizeof(entry));
0427 }
0428 
0429 /* Generic functions to enable/disable certain features of the IOMMU. */
0430 static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
0431 {
0432     u64 ctrl;
0433 
0434     ctrl = readq(iommu->mmio_base +  MMIO_CONTROL_OFFSET);
0435     ctrl |= (1ULL << bit);
0436     writeq(ctrl, iommu->mmio_base +  MMIO_CONTROL_OFFSET);
0437 }
0438 
0439 static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
0440 {
0441     u64 ctrl;
0442 
0443     ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
0444     ctrl &= ~(1ULL << bit);
0445     writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
0446 }
0447 
0448 static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout)
0449 {
0450     u64 ctrl;
0451 
0452     ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
0453     ctrl &= ~CTRL_INV_TO_MASK;
0454     ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK;
0455     writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
0456 }
0457 
0458 /* Function to enable the hardware */
0459 static void iommu_enable(struct amd_iommu *iommu)
0460 {
0461     iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
0462 }
0463 
0464 static void iommu_disable(struct amd_iommu *iommu)
0465 {
0466     if (!iommu->mmio_base)
0467         return;
0468 
0469     /* Disable command buffer */
0470     iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
0471 
0472     /* Disable event logging and event interrupts */
0473     iommu_feature_disable(iommu, CONTROL_EVT_INT_EN);
0474     iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
0475 
0476     /* Disable IOMMU GA_LOG */
0477     iommu_feature_disable(iommu, CONTROL_GALOG_EN);
0478     iommu_feature_disable(iommu, CONTROL_GAINT_EN);
0479 
0480     /* Disable IOMMU hardware itself */
0481     iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
0482 }
0483 
0484 /*
0485  * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
0486  * the system has one.
0487  */
0488 static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end)
0489 {
0490     if (!request_mem_region(address, end, "amd_iommu")) {
0491         pr_err("Can not reserve memory region %llx-%llx for mmio\n",
0492             address, end);
0493         pr_err("This is a BIOS bug. Please contact your hardware vendor\n");
0494         return NULL;
0495     }
0496 
0497     return (u8 __iomem *)ioremap(address, end);
0498 }
0499 
0500 static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
0501 {
0502     if (iommu->mmio_base)
0503         iounmap(iommu->mmio_base);
0504     release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end);
0505 }
0506 
0507 static inline u32 get_ivhd_header_size(struct ivhd_header *h)
0508 {
0509     u32 size = 0;
0510 
0511     switch (h->type) {
0512     case 0x10:
0513         size = 24;
0514         break;
0515     case 0x11:
0516     case 0x40:
0517         size = 40;
0518         break;
0519     }
0520     return size;
0521 }
0522 
0523 /****************************************************************************
0524  *
0525  * The functions below belong to the first pass of AMD IOMMU ACPI table
0526  * parsing. In this pass we try to find out the highest device id this
0527  * code has to handle. Upon this information the size of the shared data
0528  * structures is determined later.
0529  *
0530  ****************************************************************************/
0531 
0532 /*
0533  * This function calculates the length of a given IVHD entry
0534  */
0535 static inline int ivhd_entry_length(u8 *ivhd)
0536 {
0537     u32 type = ((struct ivhd_entry *)ivhd)->type;
0538 
0539     if (type < 0x80) {
0540         return 0x04 << (*ivhd >> 6);
0541     } else if (type == IVHD_DEV_ACPI_HID) {
0542         /* For ACPI_HID, offset 21 is uid len */
0543         return *((u8 *)ivhd + 21) + 22;
0544     }
0545     return 0;
0546 }
0547 
0548 /*
0549  * After reading the highest device id from the IOMMU PCI capability header
0550  * this function looks if there is a higher device id defined in the ACPI table
0551  */
0552 static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
0553 {
0554     u8 *p = (void *)h, *end = (void *)h;
0555     struct ivhd_entry *dev;
0556     int last_devid = -EINVAL;
0557 
0558     u32 ivhd_size = get_ivhd_header_size(h);
0559 
0560     if (!ivhd_size) {
0561         pr_err("Unsupported IVHD type %#x\n", h->type);
0562         return -EINVAL;
0563     }
0564 
0565     p += ivhd_size;
0566     end += h->length;
0567 
0568     while (p < end) {
0569         dev = (struct ivhd_entry *)p;
0570         switch (dev->type) {
0571         case IVHD_DEV_ALL:
0572             /* Use maximum BDF value for DEV_ALL */
0573             return 0xffff;
0574         case IVHD_DEV_SELECT:
0575         case IVHD_DEV_RANGE_END:
0576         case IVHD_DEV_ALIAS:
0577         case IVHD_DEV_EXT_SELECT:
0578             /* all the above subfield types refer to device ids */
0579             if (dev->devid > last_devid)
0580                 last_devid = dev->devid;
0581             break;
0582         default:
0583             break;
0584         }
0585         p += ivhd_entry_length(p);
0586     }
0587 
0588     WARN_ON(p != end);
0589 
0590     return last_devid;
0591 }
0592 
0593 static int __init check_ivrs_checksum(struct acpi_table_header *table)
0594 {
0595     int i;
0596     u8 checksum = 0, *p = (u8 *)table;
0597 
0598     for (i = 0; i < table->length; ++i)
0599         checksum += p[i];
0600     if (checksum != 0) {
0601         /* ACPI table corrupt */
0602         pr_err(FW_BUG "IVRS invalid checksum\n");
0603         return -ENODEV;
0604     }
0605 
0606     return 0;
0607 }
0608 
0609 /*
0610  * Iterate over all IVHD entries in the ACPI table and find the highest device
0611  * id which we need to handle. This is the first of three functions which parse
0612  * the ACPI table. So we check the checksum here.
0613  */
0614 static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_seg)
0615 {
0616     u8 *p = (u8 *)table, *end = (u8 *)table;
0617     struct ivhd_header *h;
0618     int last_devid, last_bdf = 0;
0619 
0620     p += IVRS_HEADER_LENGTH;
0621 
0622     end += table->length;
0623     while (p < end) {
0624         h = (struct ivhd_header *)p;
0625         if (h->pci_seg == pci_seg &&
0626             h->type == amd_iommu_target_ivhd_type) {
0627             last_devid = find_last_devid_from_ivhd(h);
0628 
0629             if (last_devid < 0)
0630                 return -EINVAL;
0631             if (last_devid > last_bdf)
0632                 last_bdf = last_devid;
0633         }
0634         p += h->length;
0635     }
0636     WARN_ON(p != end);
0637 
0638     return last_bdf;
0639 }
0640 
0641 /****************************************************************************
0642  *
0643  * The following functions belong to the code path which parses the ACPI table
0644  * the second time. In this ACPI parsing iteration we allocate IOMMU specific
0645  * data structures, initialize the per PCI segment device/alias/rlookup table
0646  * and also basically initialize the hardware.
0647  *
0648  ****************************************************************************/
0649 
0650 /* Allocate per PCI segment device table */
0651 static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg)
0652 {
0653     pci_seg->dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO | GFP_DMA32,
0654                               get_order(pci_seg->dev_table_size));
0655     if (!pci_seg->dev_table)
0656         return -ENOMEM;
0657 
0658     return 0;
0659 }
0660 
0661 static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg)
0662 {
0663     free_pages((unsigned long)pci_seg->dev_table,
0664             get_order(pci_seg->dev_table_size));
0665     pci_seg->dev_table = NULL;
0666 }
0667 
0668 /* Allocate per PCI segment IOMMU rlookup table. */
0669 static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
0670 {
0671     pci_seg->rlookup_table = (void *)__get_free_pages(
0672                         GFP_KERNEL | __GFP_ZERO,
0673                         get_order(pci_seg->rlookup_table_size));
0674     if (pci_seg->rlookup_table == NULL)
0675         return -ENOMEM;
0676 
0677     return 0;
0678 }
0679 
0680 static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
0681 {
0682     free_pages((unsigned long)pci_seg->rlookup_table,
0683            get_order(pci_seg->rlookup_table_size));
0684     pci_seg->rlookup_table = NULL;
0685 }
0686 
0687 static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
0688 {
0689     pci_seg->irq_lookup_table = (void *)__get_free_pages(
0690                          GFP_KERNEL | __GFP_ZERO,
0691                          get_order(pci_seg->rlookup_table_size));
0692     kmemleak_alloc(pci_seg->irq_lookup_table,
0693                pci_seg->rlookup_table_size, 1, GFP_KERNEL);
0694     if (pci_seg->irq_lookup_table == NULL)
0695         return -ENOMEM;
0696 
0697     return 0;
0698 }
0699 
0700 static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
0701 {
0702     kmemleak_free(pci_seg->irq_lookup_table);
0703     free_pages((unsigned long)pci_seg->irq_lookup_table,
0704            get_order(pci_seg->rlookup_table_size));
0705     pci_seg->irq_lookup_table = NULL;
0706 }
0707 
0708 static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg)
0709 {
0710     int i;
0711 
0712     pci_seg->alias_table = (void *)__get_free_pages(GFP_KERNEL,
0713                     get_order(pci_seg->alias_table_size));
0714     if (!pci_seg->alias_table)
0715         return -ENOMEM;
0716 
0717     /*
0718      * let all alias entries point to itself
0719      */
0720     for (i = 0; i <= pci_seg->last_bdf; ++i)
0721         pci_seg->alias_table[i] = i;
0722 
0723     return 0;
0724 }
0725 
0726 static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg)
0727 {
0728     free_pages((unsigned long)pci_seg->alias_table,
0729            get_order(pci_seg->alias_table_size));
0730     pci_seg->alias_table = NULL;
0731 }
0732 
0733 /*
0734  * Allocates the command buffer. This buffer is per AMD IOMMU. We can
0735  * write commands to that buffer later and the IOMMU will execute them
0736  * asynchronously
0737  */
0738 static int __init alloc_command_buffer(struct amd_iommu *iommu)
0739 {
0740     iommu->cmd_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
0741                           get_order(CMD_BUFFER_SIZE));
0742 
0743     return iommu->cmd_buf ? 0 : -ENOMEM;
0744 }
0745 
0746 /*
0747  * This function restarts event logging in case the IOMMU experienced
0748  * an event log buffer overflow.
0749  */
0750 void amd_iommu_restart_event_logging(struct amd_iommu *iommu)
0751 {
0752     iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
0753     iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
0754 }
0755 
0756 /*
0757  * This function resets the command buffer if the IOMMU stopped fetching
0758  * commands from it.
0759  */
0760 static void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
0761 {
0762     iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
0763 
0764     writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
0765     writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
0766     iommu->cmd_buf_head = 0;
0767     iommu->cmd_buf_tail = 0;
0768 
0769     iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
0770 }
0771 
0772 /*
0773  * This function writes the command buffer address to the hardware and
0774  * enables it.
0775  */
0776 static void iommu_enable_command_buffer(struct amd_iommu *iommu)
0777 {
0778     u64 entry;
0779 
0780     BUG_ON(iommu->cmd_buf == NULL);
0781 
0782     entry = iommu_virt_to_phys(iommu->cmd_buf);
0783     entry |= MMIO_CMD_SIZE_512;
0784 
0785     memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
0786             &entry, sizeof(entry));
0787 
0788     amd_iommu_reset_cmd_buffer(iommu);
0789 }
0790 
0791 /*
0792  * This function disables the command buffer
0793  */
0794 static void iommu_disable_command_buffer(struct amd_iommu *iommu)
0795 {
0796     iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
0797 }
0798 
0799 static void __init free_command_buffer(struct amd_iommu *iommu)
0800 {
0801     free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
0802 }
0803 
0804 static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu,
0805                      gfp_t gfp, size_t size)
0806 {
0807     int order = get_order(size);
0808     void *buf = (void *)__get_free_pages(gfp, order);
0809 
0810     if (buf &&
0811         check_feature_on_all_iommus(FEATURE_SNP) &&
0812         set_memory_4k((unsigned long)buf, (1 << order))) {
0813         free_pages((unsigned long)buf, order);
0814         buf = NULL;
0815     }
0816 
0817     return buf;
0818 }
0819 
0820 /* allocates the memory where the IOMMU will log its events to */
0821 static int __init alloc_event_buffer(struct amd_iommu *iommu)
0822 {
0823     iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO,
0824                           EVT_BUFFER_SIZE);
0825 
0826     return iommu->evt_buf ? 0 : -ENOMEM;
0827 }
0828 
0829 static void iommu_enable_event_buffer(struct amd_iommu *iommu)
0830 {
0831     u64 entry;
0832 
0833     BUG_ON(iommu->evt_buf == NULL);
0834 
0835     entry = iommu_virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
0836 
0837     memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
0838             &entry, sizeof(entry));
0839 
0840     /* set head and tail to zero manually */
0841     writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
0842     writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
0843 
0844     iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
0845 }
0846 
0847 /*
0848  * This function disables the event log buffer
0849  */
0850 static void iommu_disable_event_buffer(struct amd_iommu *iommu)
0851 {
0852     iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
0853 }
0854 
0855 static void __init free_event_buffer(struct amd_iommu *iommu)
0856 {
0857     free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
0858 }
0859 
0860 /* allocates the memory where the IOMMU will log its events to */
0861 static int __init alloc_ppr_log(struct amd_iommu *iommu)
0862 {
0863     iommu->ppr_log = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO,
0864                           PPR_LOG_SIZE);
0865 
0866     return iommu->ppr_log ? 0 : -ENOMEM;
0867 }
0868 
0869 static void iommu_enable_ppr_log(struct amd_iommu *iommu)
0870 {
0871     u64 entry;
0872 
0873     if (iommu->ppr_log == NULL)
0874         return;
0875 
0876     entry = iommu_virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512;
0877 
0878     memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET,
0879             &entry, sizeof(entry));
0880 
0881     /* set head and tail to zero manually */
0882     writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
0883     writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
0884 
0885     iommu_feature_enable(iommu, CONTROL_PPRLOG_EN);
0886     iommu_feature_enable(iommu, CONTROL_PPR_EN);
0887 }
0888 
0889 static void __init free_ppr_log(struct amd_iommu *iommu)
0890 {
0891     free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE));
0892 }
0893 
0894 static void free_ga_log(struct amd_iommu *iommu)
0895 {
0896 #ifdef CONFIG_IRQ_REMAP
0897     free_pages((unsigned long)iommu->ga_log, get_order(GA_LOG_SIZE));
0898     free_pages((unsigned long)iommu->ga_log_tail, get_order(8));
0899 #endif
0900 }
0901 
0902 #ifdef CONFIG_IRQ_REMAP
0903 static int iommu_ga_log_enable(struct amd_iommu *iommu)
0904 {
0905     u32 status, i;
0906     u64 entry;
0907 
0908     if (!iommu->ga_log)
0909         return -EINVAL;
0910 
0911     entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512;
0912     memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET,
0913             &entry, sizeof(entry));
0914     entry = (iommu_virt_to_phys(iommu->ga_log_tail) &
0915          (BIT_ULL(52)-1)) & ~7ULL;
0916     memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET,
0917             &entry, sizeof(entry));
0918     writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
0919     writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET);
0920 
0921 
0922     iommu_feature_enable(iommu, CONTROL_GAINT_EN);
0923     iommu_feature_enable(iommu, CONTROL_GALOG_EN);
0924 
0925     for (i = 0; i < LOOP_TIMEOUT; ++i) {
0926         status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
0927         if (status & (MMIO_STATUS_GALOG_RUN_MASK))
0928             break;
0929         udelay(10);
0930     }
0931 
0932     if (WARN_ON(i >= LOOP_TIMEOUT))
0933         return -EINVAL;
0934 
0935     return 0;
0936 }
0937 
0938 static int iommu_init_ga_log(struct amd_iommu *iommu)
0939 {
0940     if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
0941         return 0;
0942 
0943     iommu->ga_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
0944                     get_order(GA_LOG_SIZE));
0945     if (!iommu->ga_log)
0946         goto err_out;
0947 
0948     iommu->ga_log_tail = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
0949                     get_order(8));
0950     if (!iommu->ga_log_tail)
0951         goto err_out;
0952 
0953     return 0;
0954 err_out:
0955     free_ga_log(iommu);
0956     return -EINVAL;
0957 }
0958 #endif /* CONFIG_IRQ_REMAP */
0959 
0960 static int __init alloc_cwwb_sem(struct amd_iommu *iommu)
0961 {
0962     iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, 1);
0963 
0964     return iommu->cmd_sem ? 0 : -ENOMEM;
0965 }
0966 
0967 static void __init free_cwwb_sem(struct amd_iommu *iommu)
0968 {
0969     if (iommu->cmd_sem)
0970         free_page((unsigned long)iommu->cmd_sem);
0971 }
0972 
0973 static void iommu_enable_xt(struct amd_iommu *iommu)
0974 {
0975 #ifdef CONFIG_IRQ_REMAP
0976     /*
0977      * XT mode (32-bit APIC destination ID) requires
0978      * GA mode (128-bit IRTE support) as a prerequisite.
0979      */
0980     if (AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir) &&
0981         amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
0982         iommu_feature_enable(iommu, CONTROL_XT_EN);
0983 #endif /* CONFIG_IRQ_REMAP */
0984 }
0985 
0986 static void iommu_enable_gt(struct amd_iommu *iommu)
0987 {
0988     if (!iommu_feature(iommu, FEATURE_GT))
0989         return;
0990 
0991     iommu_feature_enable(iommu, CONTROL_GT_EN);
0992 }
0993 
0994 /* sets a specific bit in the device table entry. */
0995 static void __set_dev_entry_bit(struct dev_table_entry *dev_table,
0996                 u16 devid, u8 bit)
0997 {
0998     int i = (bit >> 6) & 0x03;
0999     int _bit = bit & 0x3f;
1000 
1001     dev_table[devid].data[i] |= (1UL << _bit);
1002 }
1003 
1004 static void set_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit)
1005 {
1006     struct dev_table_entry *dev_table = get_dev_table(iommu);
1007 
1008     return __set_dev_entry_bit(dev_table, devid, bit);
1009 }
1010 
1011 static int __get_dev_entry_bit(struct dev_table_entry *dev_table,
1012                    u16 devid, u8 bit)
1013 {
1014     int i = (bit >> 6) & 0x03;
1015     int _bit = bit & 0x3f;
1016 
1017     return (dev_table[devid].data[i] & (1UL << _bit)) >> _bit;
1018 }
1019 
1020 static int get_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit)
1021 {
1022     struct dev_table_entry *dev_table = get_dev_table(iommu);
1023 
1024     return __get_dev_entry_bit(dev_table, devid, bit);
1025 }
1026 
1027 static bool __copy_device_table(struct amd_iommu *iommu)
1028 {
1029     u64 int_ctl, int_tab_len, entry = 0;
1030     struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
1031     struct dev_table_entry *old_devtb = NULL;
1032     u32 lo, hi, devid, old_devtb_size;
1033     phys_addr_t old_devtb_phys;
1034     u16 dom_id, dte_v, irq_v;
1035     gfp_t gfp_flag;
1036     u64 tmp;
1037 
1038     /* Each IOMMU use separate device table with the same size */
1039     lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
1040     hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
1041     entry = (((u64) hi) << 32) + lo;
1042 
1043     old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12;
1044     if (old_devtb_size != pci_seg->dev_table_size) {
1045         pr_err("The device table size of IOMMU:%d is not expected!\n",
1046             iommu->index);
1047         return false;
1048     }
1049 
1050     /*
1051      * When SME is enabled in the first kernel, the entry includes the
1052      * memory encryption mask(sme_me_mask), we must remove the memory
1053      * encryption mask to obtain the true physical address in kdump kernel.
1054      */
1055     old_devtb_phys = __sme_clr(entry) & PAGE_MASK;
1056 
1057     if (old_devtb_phys >= 0x100000000ULL) {
1058         pr_err("The address of old device table is above 4G, not trustworthy!\n");
1059         return false;
1060     }
1061     old_devtb = (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) && is_kdump_kernel())
1062             ? (__force void *)ioremap_encrypted(old_devtb_phys,
1063                             pci_seg->dev_table_size)
1064             : memremap(old_devtb_phys, pci_seg->dev_table_size, MEMREMAP_WB);
1065 
1066     if (!old_devtb)
1067         return false;
1068 
1069     gfp_flag = GFP_KERNEL | __GFP_ZERO | GFP_DMA32;
1070     pci_seg->old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag,
1071                             get_order(pci_seg->dev_table_size));
1072     if (pci_seg->old_dev_tbl_cpy == NULL) {
1073         pr_err("Failed to allocate memory for copying old device table!\n");
1074         memunmap(old_devtb);
1075         return false;
1076     }
1077 
1078     for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
1079         pci_seg->old_dev_tbl_cpy[devid] = old_devtb[devid];
1080         dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK;
1081         dte_v = old_devtb[devid].data[0] & DTE_FLAG_V;
1082 
1083         if (dte_v && dom_id) {
1084             pci_seg->old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0];
1085             pci_seg->old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1];
1086             __set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
1087             /* If gcr3 table existed, mask it out */
1088             if (old_devtb[devid].data[0] & DTE_FLAG_GV) {
1089                 tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
1090                 tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
1091                 pci_seg->old_dev_tbl_cpy[devid].data[1] &= ~tmp;
1092                 tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A;
1093                 tmp |= DTE_FLAG_GV;
1094                 pci_seg->old_dev_tbl_cpy[devid].data[0] &= ~tmp;
1095             }
1096         }
1097 
1098         irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE;
1099         int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK;
1100         int_tab_len = old_devtb[devid].data[2] & DTE_INTTABLEN_MASK;
1101         if (irq_v && (int_ctl || int_tab_len)) {
1102             if ((int_ctl != DTE_IRQ_REMAP_INTCTL) ||
1103                 (int_tab_len != DTE_INTTABLEN)) {
1104                 pr_err("Wrong old irq remapping flag: %#x\n", devid);
1105                 memunmap(old_devtb);
1106                 return false;
1107             }
1108 
1109             pci_seg->old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2];
1110         }
1111     }
1112     memunmap(old_devtb);
1113 
1114     return true;
1115 }
1116 
1117 static bool copy_device_table(void)
1118 {
1119     struct amd_iommu *iommu;
1120     struct amd_iommu_pci_seg *pci_seg;
1121 
1122     if (!amd_iommu_pre_enabled)
1123         return false;
1124 
1125     pr_warn("Translation is already enabled - trying to copy translation structures\n");
1126 
1127     /*
1128      * All IOMMUs within PCI segment shares common device table.
1129      * Hence copy device table only once per PCI segment.
1130      */
1131     for_each_pci_segment(pci_seg) {
1132         for_each_iommu(iommu) {
1133             if (pci_seg->id != iommu->pci_seg->id)
1134                 continue;
1135             if (!__copy_device_table(iommu))
1136                 return false;
1137             break;
1138         }
1139     }
1140 
1141     return true;
1142 }
1143 
1144 void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid)
1145 {
1146     int sysmgt;
1147 
1148     sysmgt = get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1) |
1149          (get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2) << 1);
1150 
1151     if (sysmgt == 0x01)
1152         set_dev_entry_bit(iommu, devid, DEV_ENTRY_IW);
1153 }
1154 
1155 /*
1156  * This function takes the device specific flags read from the ACPI
1157  * table and sets up the device table entry with that information
1158  */
1159 static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
1160                        u16 devid, u32 flags, u32 ext_flags)
1161 {
1162     if (flags & ACPI_DEVFLAG_INITPASS)
1163         set_dev_entry_bit(iommu, devid, DEV_ENTRY_INIT_PASS);
1164     if (flags & ACPI_DEVFLAG_EXTINT)
1165         set_dev_entry_bit(iommu, devid, DEV_ENTRY_EINT_PASS);
1166     if (flags & ACPI_DEVFLAG_NMI)
1167         set_dev_entry_bit(iommu, devid, DEV_ENTRY_NMI_PASS);
1168     if (flags & ACPI_DEVFLAG_SYSMGT1)
1169         set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1);
1170     if (flags & ACPI_DEVFLAG_SYSMGT2)
1171         set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2);
1172     if (flags & ACPI_DEVFLAG_LINT0)
1173         set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT0_PASS);
1174     if (flags & ACPI_DEVFLAG_LINT1)
1175         set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT1_PASS);
1176 
1177     amd_iommu_apply_erratum_63(iommu, devid);
1178 
1179     amd_iommu_set_rlookup_table(iommu, devid);
1180 }
1181 
1182 int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line)
1183 {
1184     struct devid_map *entry;
1185     struct list_head *list;
1186 
1187     if (type == IVHD_SPECIAL_IOAPIC)
1188         list = &ioapic_map;
1189     else if (type == IVHD_SPECIAL_HPET)
1190         list = &hpet_map;
1191     else
1192         return -EINVAL;
1193 
1194     list_for_each_entry(entry, list, list) {
1195         if (!(entry->id == id && entry->cmd_line))
1196             continue;
1197 
1198         pr_info("Command-line override present for %s id %d - ignoring\n",
1199             type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id);
1200 
1201         *devid = entry->devid;
1202 
1203         return 0;
1204     }
1205 
1206     entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1207     if (!entry)
1208         return -ENOMEM;
1209 
1210     entry->id   = id;
1211     entry->devid    = *devid;
1212     entry->cmd_line = cmd_line;
1213 
1214     list_add_tail(&entry->list, list);
1215 
1216     return 0;
1217 }
1218 
1219 static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u32 *devid,
1220                       bool cmd_line)
1221 {
1222     struct acpihid_map_entry *entry;
1223     struct list_head *list = &acpihid_map;
1224 
1225     list_for_each_entry(entry, list, list) {
1226         if (strcmp(entry->hid, hid) ||
1227             (*uid && *entry->uid && strcmp(entry->uid, uid)) ||
1228             !entry->cmd_line)
1229             continue;
1230 
1231         pr_info("Command-line override for hid:%s uid:%s\n",
1232             hid, uid);
1233         *devid = entry->devid;
1234         return 0;
1235     }
1236 
1237     entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1238     if (!entry)
1239         return -ENOMEM;
1240 
1241     memcpy(entry->uid, uid, strlen(uid));
1242     memcpy(entry->hid, hid, strlen(hid));
1243     entry->devid = *devid;
1244     entry->cmd_line = cmd_line;
1245     entry->root_devid = (entry->devid & (~0x7));
1246 
1247     pr_info("%s, add hid:%s, uid:%s, rdevid:%d\n",
1248         entry->cmd_line ? "cmd" : "ivrs",
1249         entry->hid, entry->uid, entry->root_devid);
1250 
1251     list_add_tail(&entry->list, list);
1252     return 0;
1253 }
1254 
1255 static int __init add_early_maps(void)
1256 {
1257     int i, ret;
1258 
1259     for (i = 0; i < early_ioapic_map_size; ++i) {
1260         ret = add_special_device(IVHD_SPECIAL_IOAPIC,
1261                      early_ioapic_map[i].id,
1262                      &early_ioapic_map[i].devid,
1263                      early_ioapic_map[i].cmd_line);
1264         if (ret)
1265             return ret;
1266     }
1267 
1268     for (i = 0; i < early_hpet_map_size; ++i) {
1269         ret = add_special_device(IVHD_SPECIAL_HPET,
1270                      early_hpet_map[i].id,
1271                      &early_hpet_map[i].devid,
1272                      early_hpet_map[i].cmd_line);
1273         if (ret)
1274             return ret;
1275     }
1276 
1277     for (i = 0; i < early_acpihid_map_size; ++i) {
1278         ret = add_acpi_hid_device(early_acpihid_map[i].hid,
1279                       early_acpihid_map[i].uid,
1280                       &early_acpihid_map[i].devid,
1281                       early_acpihid_map[i].cmd_line);
1282         if (ret)
1283             return ret;
1284     }
1285 
1286     return 0;
1287 }
1288 
1289 /*
1290  * Takes a pointer to an AMD IOMMU entry in the ACPI table and
1291  * initializes the hardware and our data structures with it.
1292  */
1293 static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
1294                     struct ivhd_header *h)
1295 {
1296     u8 *p = (u8 *)h;
1297     u8 *end = p, flags = 0;
1298     u16 devid = 0, devid_start = 0, devid_to = 0, seg_id;
1299     u32 dev_i, ext_flags = 0;
1300     bool alias = false;
1301     struct ivhd_entry *e;
1302     struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
1303     u32 ivhd_size;
1304     int ret;
1305 
1306 
1307     ret = add_early_maps();
1308     if (ret)
1309         return ret;
1310 
1311     amd_iommu_apply_ivrs_quirks();
1312 
1313     /*
1314      * First save the recommended feature enable bits from ACPI
1315      */
1316     iommu->acpi_flags = h->flags;
1317 
1318     /*
1319      * Done. Now parse the device entries
1320      */
1321     ivhd_size = get_ivhd_header_size(h);
1322     if (!ivhd_size) {
1323         pr_err("Unsupported IVHD type %#x\n", h->type);
1324         return -EINVAL;
1325     }
1326 
1327     p += ivhd_size;
1328 
1329     end += h->length;
1330 
1331 
1332     while (p < end) {
1333         e = (struct ivhd_entry *)p;
1334         seg_id = pci_seg->id;
1335 
1336         switch (e->type) {
1337         case IVHD_DEV_ALL:
1338 
1339             DUMP_printk("  DEV_ALL\t\t\tflags: %02x\n", e->flags);
1340 
1341             for (dev_i = 0; dev_i <= pci_seg->last_bdf; ++dev_i)
1342                 set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0);
1343             break;
1344         case IVHD_DEV_SELECT:
1345 
1346             DUMP_printk("  DEV_SELECT\t\t\t devid: %04x:%02x:%02x.%x "
1347                     "flags: %02x\n",
1348                     seg_id, PCI_BUS_NUM(e->devid),
1349                     PCI_SLOT(e->devid),
1350                     PCI_FUNC(e->devid),
1351                     e->flags);
1352 
1353             devid = e->devid;
1354             set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1355             break;
1356         case IVHD_DEV_SELECT_RANGE_START:
1357 
1358             DUMP_printk("  DEV_SELECT_RANGE_START\t "
1359                     "devid: %04x:%02x:%02x.%x flags: %02x\n",
1360                     seg_id, PCI_BUS_NUM(e->devid),
1361                     PCI_SLOT(e->devid),
1362                     PCI_FUNC(e->devid),
1363                     e->flags);
1364 
1365             devid_start = e->devid;
1366             flags = e->flags;
1367             ext_flags = 0;
1368             alias = false;
1369             break;
1370         case IVHD_DEV_ALIAS:
1371 
1372             DUMP_printk("  DEV_ALIAS\t\t\t devid: %04x:%02x:%02x.%x "
1373                     "flags: %02x devid_to: %02x:%02x.%x\n",
1374                     seg_id, PCI_BUS_NUM(e->devid),
1375                     PCI_SLOT(e->devid),
1376                     PCI_FUNC(e->devid),
1377                     e->flags,
1378                     PCI_BUS_NUM(e->ext >> 8),
1379                     PCI_SLOT(e->ext >> 8),
1380                     PCI_FUNC(e->ext >> 8));
1381 
1382             devid = e->devid;
1383             devid_to = e->ext >> 8;
1384             set_dev_entry_from_acpi(iommu, devid   , e->flags, 0);
1385             set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
1386             pci_seg->alias_table[devid] = devid_to;
1387             break;
1388         case IVHD_DEV_ALIAS_RANGE:
1389 
1390             DUMP_printk("  DEV_ALIAS_RANGE\t\t "
1391                     "devid: %04x:%02x:%02x.%x flags: %02x "
1392                     "devid_to: %04x:%02x:%02x.%x\n",
1393                     seg_id, PCI_BUS_NUM(e->devid),
1394                     PCI_SLOT(e->devid),
1395                     PCI_FUNC(e->devid),
1396                     e->flags,
1397                     seg_id, PCI_BUS_NUM(e->ext >> 8),
1398                     PCI_SLOT(e->ext >> 8),
1399                     PCI_FUNC(e->ext >> 8));
1400 
1401             devid_start = e->devid;
1402             flags = e->flags;
1403             devid_to = e->ext >> 8;
1404             ext_flags = 0;
1405             alias = true;
1406             break;
1407         case IVHD_DEV_EXT_SELECT:
1408 
1409             DUMP_printk("  DEV_EXT_SELECT\t\t devid: %04x:%02x:%02x.%x "
1410                     "flags: %02x ext: %08x\n",
1411                     seg_id, PCI_BUS_NUM(e->devid),
1412                     PCI_SLOT(e->devid),
1413                     PCI_FUNC(e->devid),
1414                     e->flags, e->ext);
1415 
1416             devid = e->devid;
1417             set_dev_entry_from_acpi(iommu, devid, e->flags,
1418                         e->ext);
1419             break;
1420         case IVHD_DEV_EXT_SELECT_RANGE:
1421 
1422             DUMP_printk("  DEV_EXT_SELECT_RANGE\t devid: "
1423                     "%04x:%02x:%02x.%x flags: %02x ext: %08x\n",
1424                     seg_id, PCI_BUS_NUM(e->devid),
1425                     PCI_SLOT(e->devid),
1426                     PCI_FUNC(e->devid),
1427                     e->flags, e->ext);
1428 
1429             devid_start = e->devid;
1430             flags = e->flags;
1431             ext_flags = e->ext;
1432             alias = false;
1433             break;
1434         case IVHD_DEV_RANGE_END:
1435 
1436             DUMP_printk("  DEV_RANGE_END\t\t devid: %04x:%02x:%02x.%x\n",
1437                     seg_id, PCI_BUS_NUM(e->devid),
1438                     PCI_SLOT(e->devid),
1439                     PCI_FUNC(e->devid));
1440 
1441             devid = e->devid;
1442             for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
1443                 if (alias) {
1444                     pci_seg->alias_table[dev_i] = devid_to;
1445                     set_dev_entry_from_acpi(iommu,
1446                         devid_to, flags, ext_flags);
1447                 }
1448                 set_dev_entry_from_acpi(iommu, dev_i,
1449                             flags, ext_flags);
1450             }
1451             break;
1452         case IVHD_DEV_SPECIAL: {
1453             u8 handle, type;
1454             const char *var;
1455             u32 devid;
1456             int ret;
1457 
1458             handle = e->ext & 0xff;
1459             devid = PCI_SEG_DEVID_TO_SBDF(seg_id, (e->ext >> 8));
1460             type   = (e->ext >> 24) & 0xff;
1461 
1462             if (type == IVHD_SPECIAL_IOAPIC)
1463                 var = "IOAPIC";
1464             else if (type == IVHD_SPECIAL_HPET)
1465                 var = "HPET";
1466             else
1467                 var = "UNKNOWN";
1468 
1469             DUMP_printk("  DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x\n",
1470                     var, (int)handle,
1471                     seg_id, PCI_BUS_NUM(devid),
1472                     PCI_SLOT(devid),
1473                     PCI_FUNC(devid));
1474 
1475             ret = add_special_device(type, handle, &devid, false);
1476             if (ret)
1477                 return ret;
1478 
1479             /*
1480              * add_special_device might update the devid in case a
1481              * command-line override is present. So call
1482              * set_dev_entry_from_acpi after add_special_device.
1483              */
1484             set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1485 
1486             break;
1487         }
1488         case IVHD_DEV_ACPI_HID: {
1489             u32 devid;
1490             u8 hid[ACPIHID_HID_LEN];
1491             u8 uid[ACPIHID_UID_LEN];
1492             int ret;
1493 
1494             if (h->type != 0x40) {
1495                 pr_err(FW_BUG "Invalid IVHD device type %#x\n",
1496                        e->type);
1497                 break;
1498             }
1499 
1500             BUILD_BUG_ON(sizeof(e->ext_hid) != ACPIHID_HID_LEN - 1);
1501             memcpy(hid, &e->ext_hid, ACPIHID_HID_LEN - 1);
1502             hid[ACPIHID_HID_LEN - 1] = '\0';
1503 
1504             if (!(*hid)) {
1505                 pr_err(FW_BUG "Invalid HID.\n");
1506                 break;
1507             }
1508 
1509             uid[0] = '\0';
1510             switch (e->uidf) {
1511             case UID_NOT_PRESENT:
1512 
1513                 if (e->uidl != 0)
1514                     pr_warn(FW_BUG "Invalid UID length.\n");
1515 
1516                 break;
1517             case UID_IS_INTEGER:
1518 
1519                 sprintf(uid, "%d", e->uid);
1520 
1521                 break;
1522             case UID_IS_CHARACTER:
1523 
1524                 memcpy(uid, &e->uid, e->uidl);
1525                 uid[e->uidl] = '\0';
1526 
1527                 break;
1528             default:
1529                 break;
1530             }
1531 
1532             devid = PCI_SEG_DEVID_TO_SBDF(seg_id, e->devid);
1533             DUMP_printk("  DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x\n",
1534                     hid, uid, seg_id,
1535                     PCI_BUS_NUM(devid),
1536                     PCI_SLOT(devid),
1537                     PCI_FUNC(devid));
1538 
1539             flags = e->flags;
1540 
1541             ret = add_acpi_hid_device(hid, uid, &devid, false);
1542             if (ret)
1543                 return ret;
1544 
1545             /*
1546              * add_special_device might update the devid in case a
1547              * command-line override is present. So call
1548              * set_dev_entry_from_acpi after add_special_device.
1549              */
1550             set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1551 
1552             break;
1553         }
1554         default:
1555             break;
1556         }
1557 
1558         p += ivhd_entry_length(p);
1559     }
1560 
1561     return 0;
1562 }
1563 
1564 /* Allocate PCI segment data structure */
1565 static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id,
1566                       struct acpi_table_header *ivrs_base)
1567 {
1568     struct amd_iommu_pci_seg *pci_seg;
1569     int last_bdf;
1570 
1571     /*
1572      * First parse ACPI tables to find the largest Bus/Dev/Func we need to
1573      * handle in this PCI segment. Upon this information the shared data
1574      * structures for the PCI segments in the system will be allocated.
1575      */
1576     last_bdf = find_last_devid_acpi(ivrs_base, id);
1577     if (last_bdf < 0)
1578         return NULL;
1579 
1580     pci_seg = kzalloc(sizeof(struct amd_iommu_pci_seg), GFP_KERNEL);
1581     if (pci_seg == NULL)
1582         return NULL;
1583 
1584     pci_seg->last_bdf = last_bdf;
1585     DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf);
1586     pci_seg->dev_table_size     = tbl_size(DEV_TABLE_ENTRY_SIZE, last_bdf);
1587     pci_seg->alias_table_size   = tbl_size(ALIAS_TABLE_ENTRY_SIZE, last_bdf);
1588     pci_seg->rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE, last_bdf);
1589 
1590     pci_seg->id = id;
1591     init_llist_head(&pci_seg->dev_data_list);
1592     INIT_LIST_HEAD(&pci_seg->unity_map);
1593     list_add_tail(&pci_seg->list, &amd_iommu_pci_seg_list);
1594 
1595     if (alloc_dev_table(pci_seg))
1596         return NULL;
1597     if (alloc_alias_table(pci_seg))
1598         return NULL;
1599     if (alloc_rlookup_table(pci_seg))
1600         return NULL;
1601 
1602     return pci_seg;
1603 }
1604 
1605 static struct amd_iommu_pci_seg *__init get_pci_segment(u16 id,
1606                     struct acpi_table_header *ivrs_base)
1607 {
1608     struct amd_iommu_pci_seg *pci_seg;
1609 
1610     for_each_pci_segment(pci_seg) {
1611         if (pci_seg->id == id)
1612             return pci_seg;
1613     }
1614 
1615     return alloc_pci_segment(id, ivrs_base);
1616 }
1617 
1618 static void __init free_pci_segments(void)
1619 {
1620     struct amd_iommu_pci_seg *pci_seg, *next;
1621 
1622     for_each_pci_segment_safe(pci_seg, next) {
1623         list_del(&pci_seg->list);
1624         free_irq_lookup_table(pci_seg);
1625         free_rlookup_table(pci_seg);
1626         free_alias_table(pci_seg);
1627         free_dev_table(pci_seg);
1628         kfree(pci_seg);
1629     }
1630 }
1631 
1632 static void __init free_iommu_one(struct amd_iommu *iommu)
1633 {
1634     free_cwwb_sem(iommu);
1635     free_command_buffer(iommu);
1636     free_event_buffer(iommu);
1637     free_ppr_log(iommu);
1638     free_ga_log(iommu);
1639     iommu_unmap_mmio_space(iommu);
1640 }
1641 
1642 static void __init free_iommu_all(void)
1643 {
1644     struct amd_iommu *iommu, *next;
1645 
1646     for_each_iommu_safe(iommu, next) {
1647         list_del(&iommu->list);
1648         free_iommu_one(iommu);
1649         kfree(iommu);
1650     }
1651 }
1652 
1653 /*
1654  * Family15h Model 10h-1fh erratum 746 (IOMMU Logging May Stall Translations)
1655  * Workaround:
1656  *     BIOS should disable L2B micellaneous clock gating by setting
1657  *     L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b
1658  */
1659 static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu)
1660 {
1661     u32 value;
1662 
1663     if ((boot_cpu_data.x86 != 0x15) ||
1664         (boot_cpu_data.x86_model < 0x10) ||
1665         (boot_cpu_data.x86_model > 0x1f))
1666         return;
1667 
1668     pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1669     pci_read_config_dword(iommu->dev, 0xf4, &value);
1670 
1671     if (value & BIT(2))
1672         return;
1673 
1674     /* Select NB indirect register 0x90 and enable writing */
1675     pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8));
1676 
1677     pci_write_config_dword(iommu->dev, 0xf4, value | 0x4);
1678     pci_info(iommu->dev, "Applying erratum 746 workaround\n");
1679 
1680     /* Clear the enable writing bit */
1681     pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1682 }
1683 
1684 /*
1685  * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission)
1686  * Workaround:
1687  *     BIOS should enable ATS write permission check by setting
1688  *     L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b
1689  */
1690 static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu)
1691 {
1692     u32 value;
1693 
1694     if ((boot_cpu_data.x86 != 0x15) ||
1695         (boot_cpu_data.x86_model < 0x30) ||
1696         (boot_cpu_data.x86_model > 0x3f))
1697         return;
1698 
1699     /* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */
1700     value = iommu_read_l2(iommu, 0x47);
1701 
1702     if (value & BIT(0))
1703         return;
1704 
1705     /* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */
1706     iommu_write_l2(iommu, 0x47, value | BIT(0));
1707 
1708     pci_info(iommu->dev, "Applying ATS write check workaround\n");
1709 }
1710 
1711 /*
1712  * This function glues the initialization function for one IOMMU
1713  * together and also allocates the command buffer and programs the
1714  * hardware. It does NOT enable the IOMMU. This is done afterwards.
1715  */
1716 static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
1717                  struct acpi_table_header *ivrs_base)
1718 {
1719     struct amd_iommu_pci_seg *pci_seg;
1720 
1721     pci_seg = get_pci_segment(h->pci_seg, ivrs_base);
1722     if (pci_seg == NULL)
1723         return -ENOMEM;
1724     iommu->pci_seg = pci_seg;
1725 
1726     raw_spin_lock_init(&iommu->lock);
1727     iommu->cmd_sem_val = 0;
1728 
1729     /* Add IOMMU to internal data structures */
1730     list_add_tail(&iommu->list, &amd_iommu_list);
1731     iommu->index = amd_iommus_present++;
1732 
1733     if (unlikely(iommu->index >= MAX_IOMMUS)) {
1734         WARN(1, "System has more IOMMUs than supported by this driver\n");
1735         return -ENOSYS;
1736     }
1737 
1738     /* Index is fine - add IOMMU to the array */
1739     amd_iommus[iommu->index] = iommu;
1740 
1741     /*
1742      * Copy data from ACPI table entry to the iommu struct
1743      */
1744     iommu->devid   = h->devid;
1745     iommu->cap_ptr = h->cap_ptr;
1746     iommu->mmio_phys = h->mmio_phys;
1747 
1748     switch (h->type) {
1749     case 0x10:
1750         /* Check if IVHD EFR contains proper max banks/counters */
1751         if ((h->efr_attr != 0) &&
1752             ((h->efr_attr & (0xF << 13)) != 0) &&
1753             ((h->efr_attr & (0x3F << 17)) != 0))
1754             iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1755         else
1756             iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1757 
1758         /*
1759          * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports.
1760          * GAM also requires GA mode. Therefore, we need to
1761          * check cmpxchg16b support before enabling it.
1762          */
1763         if (!boot_cpu_has(X86_FEATURE_CX16) ||
1764             ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0))
1765             amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
1766         break;
1767     case 0x11:
1768     case 0x40:
1769         if (h->efr_reg & (1 << 9))
1770             iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1771         else
1772             iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1773 
1774         /*
1775          * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports.
1776          * XT, GAM also requires GA mode. Therefore, we need to
1777          * check cmpxchg16b support before enabling them.
1778          */
1779         if (!boot_cpu_has(X86_FEATURE_CX16) ||
1780             ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0)) {
1781             amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
1782             break;
1783         }
1784 
1785         if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT))
1786             amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE;
1787 
1788         early_iommu_features_init(iommu, h);
1789 
1790         break;
1791     default:
1792         return -EINVAL;
1793     }
1794 
1795     iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys,
1796                         iommu->mmio_phys_end);
1797     if (!iommu->mmio_base)
1798         return -ENOMEM;
1799 
1800     return init_iommu_from_acpi(iommu, h);
1801 }
1802 
1803 static int __init init_iommu_one_late(struct amd_iommu *iommu)
1804 {
1805     int ret;
1806 
1807     if (alloc_cwwb_sem(iommu))
1808         return -ENOMEM;
1809 
1810     if (alloc_command_buffer(iommu))
1811         return -ENOMEM;
1812 
1813     if (alloc_event_buffer(iommu))
1814         return -ENOMEM;
1815 
1816     iommu->int_enabled = false;
1817 
1818     init_translation_status(iommu);
1819     if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
1820         iommu_disable(iommu);
1821         clear_translation_pre_enabled(iommu);
1822         pr_warn("Translation was enabled for IOMMU:%d but we are not in kdump mode\n",
1823             iommu->index);
1824     }
1825     if (amd_iommu_pre_enabled)
1826         amd_iommu_pre_enabled = translation_pre_enabled(iommu);
1827 
1828     if (amd_iommu_irq_remap) {
1829         ret = amd_iommu_create_irq_domain(iommu);
1830         if (ret)
1831             return ret;
1832     }
1833 
1834     /*
1835      * Make sure IOMMU is not considered to translate itself. The IVRS
1836      * table tells us so, but this is a lie!
1837      */
1838     iommu->pci_seg->rlookup_table[iommu->devid] = NULL;
1839 
1840     return 0;
1841 }
1842 
1843 /**
1844  * get_highest_supported_ivhd_type - Look up the appropriate IVHD type
1845  * @ivrs: Pointer to the IVRS header
1846  *
1847  * This function search through all IVDB of the maximum supported IVHD
1848  */
1849 static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs)
1850 {
1851     u8 *base = (u8 *)ivrs;
1852     struct ivhd_header *ivhd = (struct ivhd_header *)
1853                     (base + IVRS_HEADER_LENGTH);
1854     u8 last_type = ivhd->type;
1855     u16 devid = ivhd->devid;
1856 
1857     while (((u8 *)ivhd - base < ivrs->length) &&
1858            (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) {
1859         u8 *p = (u8 *) ivhd;
1860 
1861         if (ivhd->devid == devid)
1862             last_type = ivhd->type;
1863         ivhd = (struct ivhd_header *)(p + ivhd->length);
1864     }
1865 
1866     return last_type;
1867 }
1868 
1869 /*
1870  * Iterates over all IOMMU entries in the ACPI table, allocates the
1871  * IOMMU structure and initializes it with init_iommu_one()
1872  */
1873 static int __init init_iommu_all(struct acpi_table_header *table)
1874 {
1875     u8 *p = (u8 *)table, *end = (u8 *)table;
1876     struct ivhd_header *h;
1877     struct amd_iommu *iommu;
1878     int ret;
1879 
1880     end += table->length;
1881     p += IVRS_HEADER_LENGTH;
1882 
1883     /* Phase 1: Process all IVHD blocks */
1884     while (p < end) {
1885         h = (struct ivhd_header *)p;
1886         if (*p == amd_iommu_target_ivhd_type) {
1887 
1888             DUMP_printk("device: %04x:%02x:%02x.%01x cap: %04x "
1889                     "flags: %01x info %04x\n",
1890                     h->pci_seg, PCI_BUS_NUM(h->devid),
1891                     PCI_SLOT(h->devid), PCI_FUNC(h->devid),
1892                     h->cap_ptr, h->flags, h->info);
1893             DUMP_printk("       mmio-addr: %016llx\n",
1894                     h->mmio_phys);
1895 
1896             iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
1897             if (iommu == NULL)
1898                 return -ENOMEM;
1899 
1900             ret = init_iommu_one(iommu, h, table);
1901             if (ret)
1902                 return ret;
1903         }
1904         p += h->length;
1905 
1906     }
1907     WARN_ON(p != end);
1908 
1909     /* Phase 2 : Early feature support check */
1910     get_global_efr();
1911 
1912     /* Phase 3 : Enabling IOMMU features */
1913     for_each_iommu(iommu) {
1914         ret = init_iommu_one_late(iommu);
1915         if (ret)
1916             return ret;
1917     }
1918 
1919     return 0;
1920 }
1921 
1922 static void init_iommu_perf_ctr(struct amd_iommu *iommu)
1923 {
1924     u64 val;
1925     struct pci_dev *pdev = iommu->dev;
1926 
1927     if (!iommu_feature(iommu, FEATURE_PC))
1928         return;
1929 
1930     amd_iommu_pc_present = true;
1931 
1932     pci_info(pdev, "IOMMU performance counters supported\n");
1933 
1934     val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET);
1935     iommu->max_banks = (u8) ((val >> 12) & 0x3f);
1936     iommu->max_counters = (u8) ((val >> 7) & 0xf);
1937 
1938     return;
1939 }
1940 
1941 static ssize_t amd_iommu_show_cap(struct device *dev,
1942                   struct device_attribute *attr,
1943                   char *buf)
1944 {
1945     struct amd_iommu *iommu = dev_to_amd_iommu(dev);
1946     return sprintf(buf, "%x\n", iommu->cap);
1947 }
1948 static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL);
1949 
1950 static ssize_t amd_iommu_show_features(struct device *dev,
1951                        struct device_attribute *attr,
1952                        char *buf)
1953 {
1954     struct amd_iommu *iommu = dev_to_amd_iommu(dev);
1955     return sprintf(buf, "%llx:%llx\n", iommu->features2, iommu->features);
1956 }
1957 static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL);
1958 
1959 static struct attribute *amd_iommu_attrs[] = {
1960     &dev_attr_cap.attr,
1961     &dev_attr_features.attr,
1962     NULL,
1963 };
1964 
1965 static struct attribute_group amd_iommu_group = {
1966     .name = "amd-iommu",
1967     .attrs = amd_iommu_attrs,
1968 };
1969 
1970 static const struct attribute_group *amd_iommu_groups[] = {
1971     &amd_iommu_group,
1972     NULL,
1973 };
1974 
1975 /*
1976  * Note: IVHD 0x11 and 0x40 also contains exact copy
1977  * of the IOMMU Extended Feature Register [MMIO Offset 0030h].
1978  * Default to EFR in IVHD since it is available sooner (i.e. before PCI init).
1979  */
1980 static void __init late_iommu_features_init(struct amd_iommu *iommu)
1981 {
1982     u64 features, features2;
1983 
1984     if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
1985         return;
1986 
1987     /* read extended feature bits */
1988     features = readq(iommu->mmio_base + MMIO_EXT_FEATURES);
1989     features2 = readq(iommu->mmio_base + MMIO_EXT_FEATURES2);
1990 
1991     if (!iommu->features) {
1992         iommu->features = features;
1993         iommu->features2 = features2;
1994         return;
1995     }
1996 
1997     /*
1998      * Sanity check and warn if EFR values from
1999      * IVHD and MMIO conflict.
2000      */
2001     if (features != iommu->features ||
2002         features2 != iommu->features2) {
2003         pr_warn(FW_WARN
2004             "EFR mismatch. Use IVHD EFR (%#llx : %#llx), EFR2 (%#llx : %#llx).\n",
2005             features, iommu->features,
2006             features2, iommu->features2);
2007     }
2008 }
2009 
2010 static int __init iommu_init_pci(struct amd_iommu *iommu)
2011 {
2012     int cap_ptr = iommu->cap_ptr;
2013     int ret;
2014 
2015     iommu->dev = pci_get_domain_bus_and_slot(iommu->pci_seg->id,
2016                          PCI_BUS_NUM(iommu->devid),
2017                          iommu->devid & 0xff);
2018     if (!iommu->dev)
2019         return -ENODEV;
2020 
2021     /* Prevent binding other PCI device drivers to IOMMU devices */
2022     iommu->dev->match_driver = false;
2023 
2024     pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
2025                   &iommu->cap);
2026 
2027     if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
2028         amd_iommu_iotlb_sup = false;
2029 
2030     late_iommu_features_init(iommu);
2031 
2032     if (iommu_feature(iommu, FEATURE_GT)) {
2033         int glxval;
2034         u32 max_pasid;
2035         u64 pasmax;
2036 
2037         pasmax = iommu->features & FEATURE_PASID_MASK;
2038         pasmax >>= FEATURE_PASID_SHIFT;
2039         max_pasid  = (1 << (pasmax + 1)) - 1;
2040 
2041         amd_iommu_max_pasid = min(amd_iommu_max_pasid, max_pasid);
2042 
2043         BUG_ON(amd_iommu_max_pasid & ~PASID_MASK);
2044 
2045         glxval   = iommu->features & FEATURE_GLXVAL_MASK;
2046         glxval >>= FEATURE_GLXVAL_SHIFT;
2047 
2048         if (amd_iommu_max_glx_val == -1)
2049             amd_iommu_max_glx_val = glxval;
2050         else
2051             amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval);
2052     }
2053 
2054     if (iommu_feature(iommu, FEATURE_GT) &&
2055         iommu_feature(iommu, FEATURE_PPR)) {
2056         iommu->is_iommu_v2   = true;
2057         amd_iommu_v2_present = true;
2058     }
2059 
2060     if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu))
2061         return -ENOMEM;
2062 
2063     if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) {
2064         pr_info("Using strict mode due to virtualization\n");
2065         iommu_set_dma_strict();
2066         amd_iommu_np_cache = true;
2067     }
2068 
2069     init_iommu_perf_ctr(iommu);
2070 
2071     if (is_rd890_iommu(iommu->dev)) {
2072         int i, j;
2073 
2074         iommu->root_pdev =
2075             pci_get_domain_bus_and_slot(iommu->pci_seg->id,
2076                             iommu->dev->bus->number,
2077                             PCI_DEVFN(0, 0));
2078 
2079         /*
2080          * Some rd890 systems may not be fully reconfigured by the
2081          * BIOS, so it's necessary for us to store this information so
2082          * it can be reprogrammed on resume
2083          */
2084         pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4,
2085                 &iommu->stored_addr_lo);
2086         pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8,
2087                 &iommu->stored_addr_hi);
2088 
2089         /* Low bit locks writes to configuration space */
2090         iommu->stored_addr_lo &= ~1;
2091 
2092         for (i = 0; i < 6; i++)
2093             for (j = 0; j < 0x12; j++)
2094                 iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j);
2095 
2096         for (i = 0; i < 0x83; i++)
2097             iommu->stored_l2[i] = iommu_read_l2(iommu, i);
2098     }
2099 
2100     amd_iommu_erratum_746_workaround(iommu);
2101     amd_iommu_ats_write_check_workaround(iommu);
2102 
2103     ret = iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev,
2104                    amd_iommu_groups, "ivhd%d", iommu->index);
2105     if (ret)
2106         return ret;
2107 
2108     iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL);
2109 
2110     return pci_enable_device(iommu->dev);
2111 }
2112 
2113 static void print_iommu_info(void)
2114 {
2115     static const char * const feat_str[] = {
2116         "PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
2117         "IA", "GA", "HE", "PC"
2118     };
2119     struct amd_iommu *iommu;
2120 
2121     for_each_iommu(iommu) {
2122         struct pci_dev *pdev = iommu->dev;
2123         int i;
2124 
2125         pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr);
2126 
2127         if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
2128             pr_info("Extended features (%#llx, %#llx):", iommu->features, iommu->features2);
2129 
2130             for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
2131                 if (iommu_feature(iommu, (1ULL << i)))
2132                     pr_cont(" %s", feat_str[i]);
2133             }
2134 
2135             if (iommu->features & FEATURE_GAM_VAPIC)
2136                 pr_cont(" GA_vAPIC");
2137 
2138             if (iommu->features & FEATURE_SNP)
2139                 pr_cont(" SNP");
2140 
2141             pr_cont("\n");
2142         }
2143     }
2144     if (irq_remapping_enabled) {
2145         pr_info("Interrupt remapping enabled\n");
2146         if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2147             pr_info("X2APIC enabled\n");
2148     }
2149 }
2150 
2151 static int __init amd_iommu_init_pci(void)
2152 {
2153     struct amd_iommu *iommu;
2154     struct amd_iommu_pci_seg *pci_seg;
2155     int ret;
2156 
2157     for_each_iommu(iommu) {
2158         ret = iommu_init_pci(iommu);
2159         if (ret) {
2160             pr_err("IOMMU%d: Failed to initialize IOMMU Hardware (error=%d)!\n",
2161                    iommu->index, ret);
2162             goto out;
2163         }
2164         /* Need to setup range after PCI init */
2165         iommu_set_cwwb_range(iommu);
2166     }
2167 
2168     /*
2169      * Order is important here to make sure any unity map requirements are
2170      * fulfilled. The unity mappings are created and written to the device
2171      * table during the amd_iommu_init_api() call.
2172      *
2173      * After that we call init_device_table_dma() to make sure any
2174      * uninitialized DTE will block DMA, and in the end we flush the caches
2175      * of all IOMMUs to make sure the changes to the device table are
2176      * active.
2177      */
2178     ret = amd_iommu_init_api();
2179     if (ret) {
2180         pr_err("IOMMU: Failed to initialize IOMMU-API interface (error=%d)!\n",
2181                ret);
2182         goto out;
2183     }
2184 
2185     for_each_pci_segment(pci_seg)
2186         init_device_table_dma(pci_seg);
2187 
2188     for_each_iommu(iommu)
2189         iommu_flush_all_caches(iommu);
2190 
2191     print_iommu_info();
2192 
2193 out:
2194     return ret;
2195 }
2196 
2197 /****************************************************************************
2198  *
2199  * The following functions initialize the MSI interrupts for all IOMMUs
2200  * in the system. It's a bit challenging because there could be multiple
2201  * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per
2202  * pci_dev.
2203  *
2204  ****************************************************************************/
2205 
2206 static int iommu_setup_msi(struct amd_iommu *iommu)
2207 {
2208     int r;
2209 
2210     r = pci_enable_msi(iommu->dev);
2211     if (r)
2212         return r;
2213 
2214     r = request_threaded_irq(iommu->dev->irq,
2215                  amd_iommu_int_handler,
2216                  amd_iommu_int_thread,
2217                  0, "AMD-Vi",
2218                  iommu);
2219 
2220     if (r) {
2221         pci_disable_msi(iommu->dev);
2222         return r;
2223     }
2224 
2225     return 0;
2226 }
2227 
2228 union intcapxt {
2229     u64 capxt;
2230     struct {
2231         u64 reserved_0      :  2,
2232             dest_mode_logical   :  1,
2233             reserved_1      :  5,
2234             destid_0_23     : 24,
2235             vector          :  8,
2236             reserved_2      : 16,
2237             destid_24_31        :  8;
2238     };
2239 } __attribute__ ((packed));
2240 
2241 
2242 static struct irq_chip intcapxt_controller;
2243 
2244 static int intcapxt_irqdomain_activate(struct irq_domain *domain,
2245                        struct irq_data *irqd, bool reserve)
2246 {
2247     return 0;
2248 }
2249 
2250 static void intcapxt_irqdomain_deactivate(struct irq_domain *domain,
2251                       struct irq_data *irqd)
2252 {
2253 }
2254 
2255 
2256 static int intcapxt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
2257                     unsigned int nr_irqs, void *arg)
2258 {
2259     struct irq_alloc_info *info = arg;
2260     int i, ret;
2261 
2262     if (!info || info->type != X86_IRQ_ALLOC_TYPE_AMDVI)
2263         return -EINVAL;
2264 
2265     ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
2266     if (ret < 0)
2267         return ret;
2268 
2269     for (i = virq; i < virq + nr_irqs; i++) {
2270         struct irq_data *irqd = irq_domain_get_irq_data(domain, i);
2271 
2272         irqd->chip = &intcapxt_controller;
2273         irqd->chip_data = info->data;
2274         __irq_set_handler(i, handle_edge_irq, 0, "edge");
2275     }
2276 
2277     return ret;
2278 }
2279 
2280 static void intcapxt_irqdomain_free(struct irq_domain *domain, unsigned int virq,
2281                     unsigned int nr_irqs)
2282 {
2283     irq_domain_free_irqs_top(domain, virq, nr_irqs);
2284 }
2285 
2286 
2287 static void intcapxt_unmask_irq(struct irq_data *irqd)
2288 {
2289     struct amd_iommu *iommu = irqd->chip_data;
2290     struct irq_cfg *cfg = irqd_cfg(irqd);
2291     union intcapxt xt;
2292 
2293     xt.capxt = 0ULL;
2294     xt.dest_mode_logical = apic->dest_mode_logical;
2295     xt.vector = cfg->vector;
2296     xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0);
2297     xt.destid_24_31 = cfg->dest_apicid >> 24;
2298 
2299     /**
2300      * Current IOMMU implementation uses the same IRQ for all
2301      * 3 IOMMU interrupts.
2302      */
2303     writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
2304     writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
2305     writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
2306 }
2307 
2308 static void intcapxt_mask_irq(struct irq_data *irqd)
2309 {
2310     struct amd_iommu *iommu = irqd->chip_data;
2311 
2312     writeq(0, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
2313     writeq(0, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
2314     writeq(0, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
2315 }
2316 
2317 
2318 static int intcapxt_set_affinity(struct irq_data *irqd,
2319                  const struct cpumask *mask, bool force)
2320 {
2321     struct irq_data *parent = irqd->parent_data;
2322     int ret;
2323 
2324     ret = parent->chip->irq_set_affinity(parent, mask, force);
2325     if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
2326         return ret;
2327     return 0;
2328 }
2329 
2330 static int intcapxt_set_wake(struct irq_data *irqd, unsigned int on)
2331 {
2332     return on ? -EOPNOTSUPP : 0;
2333 }
2334 
2335 static struct irq_chip intcapxt_controller = {
2336     .name           = "IOMMU-MSI",
2337     .irq_unmask     = intcapxt_unmask_irq,
2338     .irq_mask       = intcapxt_mask_irq,
2339     .irq_ack        = irq_chip_ack_parent,
2340     .irq_retrigger      = irq_chip_retrigger_hierarchy,
2341     .irq_set_affinity       = intcapxt_set_affinity,
2342     .irq_set_wake       = intcapxt_set_wake,
2343     .flags          = IRQCHIP_MASK_ON_SUSPEND,
2344 };
2345 
2346 static const struct irq_domain_ops intcapxt_domain_ops = {
2347     .alloc          = intcapxt_irqdomain_alloc,
2348     .free           = intcapxt_irqdomain_free,
2349     .activate       = intcapxt_irqdomain_activate,
2350     .deactivate     = intcapxt_irqdomain_deactivate,
2351 };
2352 
2353 
2354 static struct irq_domain *iommu_irqdomain;
2355 
2356 static struct irq_domain *iommu_get_irqdomain(void)
2357 {
2358     struct fwnode_handle *fn;
2359 
2360     /* No need for locking here (yet) as the init is single-threaded */
2361     if (iommu_irqdomain)
2362         return iommu_irqdomain;
2363 
2364     fn = irq_domain_alloc_named_fwnode("AMD-Vi-MSI");
2365     if (!fn)
2366         return NULL;
2367 
2368     iommu_irqdomain = irq_domain_create_hierarchy(x86_vector_domain, 0, 0,
2369                               fn, &intcapxt_domain_ops,
2370                               NULL);
2371     if (!iommu_irqdomain)
2372         irq_domain_free_fwnode(fn);
2373 
2374     return iommu_irqdomain;
2375 }
2376 
2377 static int iommu_setup_intcapxt(struct amd_iommu *iommu)
2378 {
2379     struct irq_domain *domain;
2380     struct irq_alloc_info info;
2381     int irq, ret;
2382 
2383     domain = iommu_get_irqdomain();
2384     if (!domain)
2385         return -ENXIO;
2386 
2387     init_irq_alloc_info(&info, NULL);
2388     info.type = X86_IRQ_ALLOC_TYPE_AMDVI;
2389     info.data = iommu;
2390 
2391     irq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, &info);
2392     if (irq < 0) {
2393         irq_domain_remove(domain);
2394         return irq;
2395     }
2396 
2397     ret = request_threaded_irq(irq, amd_iommu_int_handler,
2398                    amd_iommu_int_thread, 0, "AMD-Vi", iommu);
2399     if (ret) {
2400         irq_domain_free_irqs(irq, 1);
2401         irq_domain_remove(domain);
2402         return ret;
2403     }
2404 
2405     return 0;
2406 }
2407 
2408 static int iommu_init_irq(struct amd_iommu *iommu)
2409 {
2410     int ret;
2411 
2412     if (iommu->int_enabled)
2413         goto enable_faults;
2414 
2415     if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2416         ret = iommu_setup_intcapxt(iommu);
2417     else if (iommu->dev->msi_cap)
2418         ret = iommu_setup_msi(iommu);
2419     else
2420         ret = -ENODEV;
2421 
2422     if (ret)
2423         return ret;
2424 
2425     iommu->int_enabled = true;
2426 enable_faults:
2427 
2428     if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2429         iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN);
2430 
2431     iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
2432 
2433     if (iommu->ppr_log != NULL)
2434         iommu_feature_enable(iommu, CONTROL_PPRINT_EN);
2435     return 0;
2436 }
2437 
2438 /****************************************************************************
2439  *
2440  * The next functions belong to the third pass of parsing the ACPI
2441  * table. In this last pass the memory mapping requirements are
2442  * gathered (like exclusion and unity mapping ranges).
2443  *
2444  ****************************************************************************/
2445 
2446 static void __init free_unity_maps(void)
2447 {
2448     struct unity_map_entry *entry, *next;
2449     struct amd_iommu_pci_seg *p, *pci_seg;
2450 
2451     for_each_pci_segment_safe(pci_seg, p) {
2452         list_for_each_entry_safe(entry, next, &pci_seg->unity_map, list) {
2453             list_del(&entry->list);
2454             kfree(entry);
2455         }
2456     }
2457 }
2458 
2459 /* called for unity map ACPI definition */
2460 static int __init init_unity_map_range(struct ivmd_header *m,
2461                        struct acpi_table_header *ivrs_base)
2462 {
2463     struct unity_map_entry *e = NULL;
2464     struct amd_iommu_pci_seg *pci_seg;
2465     char *s;
2466 
2467     pci_seg = get_pci_segment(m->pci_seg, ivrs_base);
2468     if (pci_seg == NULL)
2469         return -ENOMEM;
2470 
2471     e = kzalloc(sizeof(*e), GFP_KERNEL);
2472     if (e == NULL)
2473         return -ENOMEM;
2474 
2475     switch (m->type) {
2476     default:
2477         kfree(e);
2478         return 0;
2479     case ACPI_IVMD_TYPE:
2480         s = "IVMD_TYPEi\t\t\t";
2481         e->devid_start = e->devid_end = m->devid;
2482         break;
2483     case ACPI_IVMD_TYPE_ALL:
2484         s = "IVMD_TYPE_ALL\t\t";
2485         e->devid_start = 0;
2486         e->devid_end = pci_seg->last_bdf;
2487         break;
2488     case ACPI_IVMD_TYPE_RANGE:
2489         s = "IVMD_TYPE_RANGE\t\t";
2490         e->devid_start = m->devid;
2491         e->devid_end = m->aux;
2492         break;
2493     }
2494     e->address_start = PAGE_ALIGN(m->range_start);
2495     e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
2496     e->prot = m->flags >> 1;
2497 
2498     /*
2499      * Treat per-device exclusion ranges as r/w unity-mapped regions
2500      * since some buggy BIOSes might lead to the overwritten exclusion
2501      * range (exclusion_start and exclusion_length members). This
2502      * happens when there are multiple exclusion ranges (IVMD entries)
2503      * defined in ACPI table.
2504      */
2505     if (m->flags & IVMD_FLAG_EXCL_RANGE)
2506         e->prot = (IVMD_FLAG_IW | IVMD_FLAG_IR) >> 1;
2507 
2508     DUMP_printk("%s devid_start: %04x:%02x:%02x.%x devid_end: "
2509             "%04x:%02x:%02x.%x range_start: %016llx range_end: %016llx"
2510             " flags: %x\n", s, m->pci_seg,
2511             PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start),
2512             PCI_FUNC(e->devid_start), m->pci_seg,
2513             PCI_BUS_NUM(e->devid_end),
2514             PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
2515             e->address_start, e->address_end, m->flags);
2516 
2517     list_add_tail(&e->list, &pci_seg->unity_map);
2518 
2519     return 0;
2520 }
2521 
2522 /* iterates over all memory definitions we find in the ACPI table */
2523 static int __init init_memory_definitions(struct acpi_table_header *table)
2524 {
2525     u8 *p = (u8 *)table, *end = (u8 *)table;
2526     struct ivmd_header *m;
2527 
2528     end += table->length;
2529     p += IVRS_HEADER_LENGTH;
2530 
2531     while (p < end) {
2532         m = (struct ivmd_header *)p;
2533         if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE))
2534             init_unity_map_range(m, table);
2535 
2536         p += m->length;
2537     }
2538 
2539     return 0;
2540 }
2541 
2542 /*
2543  * Init the device table to not allow DMA access for devices
2544  */
2545 static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
2546 {
2547     u32 devid;
2548     struct dev_table_entry *dev_table = pci_seg->dev_table;
2549 
2550     if (dev_table == NULL)
2551         return;
2552 
2553     for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
2554         __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_VALID);
2555         if (!amd_iommu_snp_en)
2556             __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_TRANSLATION);
2557     }
2558 }
2559 
2560 static void __init uninit_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
2561 {
2562     u32 devid;
2563     struct dev_table_entry *dev_table = pci_seg->dev_table;
2564 
2565     if (dev_table == NULL)
2566         return;
2567 
2568     for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
2569         dev_table[devid].data[0] = 0ULL;
2570         dev_table[devid].data[1] = 0ULL;
2571     }
2572 }
2573 
2574 static void init_device_table(void)
2575 {
2576     struct amd_iommu_pci_seg *pci_seg;
2577     u32 devid;
2578 
2579     if (!amd_iommu_irq_remap)
2580         return;
2581 
2582     for_each_pci_segment(pci_seg) {
2583         for (devid = 0; devid <= pci_seg->last_bdf; ++devid)
2584             __set_dev_entry_bit(pci_seg->dev_table,
2585                         devid, DEV_ENTRY_IRQ_TBL_EN);
2586     }
2587 }
2588 
2589 static void iommu_init_flags(struct amd_iommu *iommu)
2590 {
2591     iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ?
2592         iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
2593         iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
2594 
2595     iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ?
2596         iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
2597         iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
2598 
2599     iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
2600         iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
2601         iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
2602 
2603     iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ?
2604         iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
2605         iommu_feature_disable(iommu, CONTROL_ISOC_EN);
2606 
2607     /*
2608      * make IOMMU memory accesses cache coherent
2609      */
2610     iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
2611 
2612     /* Set IOTLB invalidation timeout to 1s */
2613     iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S);
2614 }
2615 
2616 static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
2617 {
2618     int i, j;
2619     u32 ioc_feature_control;
2620     struct pci_dev *pdev = iommu->root_pdev;
2621 
2622     /* RD890 BIOSes may not have completely reconfigured the iommu */
2623     if (!is_rd890_iommu(iommu->dev) || !pdev)
2624         return;
2625 
2626     /*
2627      * First, we need to ensure that the iommu is enabled. This is
2628      * controlled by a register in the northbridge
2629      */
2630 
2631     /* Select Northbridge indirect register 0x75 and enable writing */
2632     pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7));
2633     pci_read_config_dword(pdev, 0x64, &ioc_feature_control);
2634 
2635     /* Enable the iommu */
2636     if (!(ioc_feature_control & 0x1))
2637         pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1);
2638 
2639     /* Restore the iommu BAR */
2640     pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
2641                    iommu->stored_addr_lo);
2642     pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8,
2643                    iommu->stored_addr_hi);
2644 
2645     /* Restore the l1 indirect regs for each of the 6 l1s */
2646     for (i = 0; i < 6; i++)
2647         for (j = 0; j < 0x12; j++)
2648             iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]);
2649 
2650     /* Restore the l2 indirect regs */
2651     for (i = 0; i < 0x83; i++)
2652         iommu_write_l2(iommu, i, iommu->stored_l2[i]);
2653 
2654     /* Lock PCI setup registers */
2655     pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
2656                    iommu->stored_addr_lo | 1);
2657 }
2658 
2659 static void iommu_enable_ga(struct amd_iommu *iommu)
2660 {
2661 #ifdef CONFIG_IRQ_REMAP
2662     switch (amd_iommu_guest_ir) {
2663     case AMD_IOMMU_GUEST_IR_VAPIC:
2664     case AMD_IOMMU_GUEST_IR_LEGACY_GA:
2665         iommu_feature_enable(iommu, CONTROL_GA_EN);
2666         iommu->irte_ops = &irte_128_ops;
2667         break;
2668     default:
2669         iommu->irte_ops = &irte_32_ops;
2670         break;
2671     }
2672 #endif
2673 }
2674 
2675 static void early_enable_iommu(struct amd_iommu *iommu)
2676 {
2677     iommu_disable(iommu);
2678     iommu_init_flags(iommu);
2679     iommu_set_device_table(iommu);
2680     iommu_enable_command_buffer(iommu);
2681     iommu_enable_event_buffer(iommu);
2682     iommu_set_exclusion_range(iommu);
2683     iommu_enable_ga(iommu);
2684     iommu_enable_xt(iommu);
2685     iommu_enable(iommu);
2686     iommu_flush_all_caches(iommu);
2687 }
2688 
2689 /*
2690  * This function finally enables all IOMMUs found in the system after
2691  * they have been initialized.
2692  *
2693  * Or if in kdump kernel and IOMMUs are all pre-enabled, try to copy
2694  * the old content of device table entries. Not this case or copy failed,
2695  * just continue as normal kernel does.
2696  */
2697 static void early_enable_iommus(void)
2698 {
2699     struct amd_iommu *iommu;
2700     struct amd_iommu_pci_seg *pci_seg;
2701 
2702     if (!copy_device_table()) {
2703         /*
2704          * If come here because of failure in copying device table from old
2705          * kernel with all IOMMUs enabled, print error message and try to
2706          * free allocated old_dev_tbl_cpy.
2707          */
2708         if (amd_iommu_pre_enabled)
2709             pr_err("Failed to copy DEV table from previous kernel.\n");
2710 
2711         for_each_pci_segment(pci_seg) {
2712             if (pci_seg->old_dev_tbl_cpy != NULL) {
2713                 free_pages((unsigned long)pci_seg->old_dev_tbl_cpy,
2714                         get_order(pci_seg->dev_table_size));
2715                 pci_seg->old_dev_tbl_cpy = NULL;
2716             }
2717         }
2718 
2719         for_each_iommu(iommu) {
2720             clear_translation_pre_enabled(iommu);
2721             early_enable_iommu(iommu);
2722         }
2723     } else {
2724         pr_info("Copied DEV table from previous kernel.\n");
2725 
2726         for_each_pci_segment(pci_seg) {
2727             free_pages((unsigned long)pci_seg->dev_table,
2728                    get_order(pci_seg->dev_table_size));
2729             pci_seg->dev_table = pci_seg->old_dev_tbl_cpy;
2730         }
2731 
2732         for_each_iommu(iommu) {
2733             iommu_disable_command_buffer(iommu);
2734             iommu_disable_event_buffer(iommu);
2735             iommu_enable_command_buffer(iommu);
2736             iommu_enable_event_buffer(iommu);
2737             iommu_enable_ga(iommu);
2738             iommu_enable_xt(iommu);
2739             iommu_set_device_table(iommu);
2740             iommu_flush_all_caches(iommu);
2741         }
2742     }
2743 }
2744 
2745 static void enable_iommus_v2(void)
2746 {
2747     struct amd_iommu *iommu;
2748 
2749     for_each_iommu(iommu) {
2750         iommu_enable_ppr_log(iommu);
2751         iommu_enable_gt(iommu);
2752     }
2753 }
2754 
2755 static void enable_iommus_vapic(void)
2756 {
2757 #ifdef CONFIG_IRQ_REMAP
2758     u32 status, i;
2759     struct amd_iommu *iommu;
2760 
2761     for_each_iommu(iommu) {
2762         /*
2763          * Disable GALog if already running. It could have been enabled
2764          * in the previous boot before kdump.
2765          */
2766         status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
2767         if (!(status & MMIO_STATUS_GALOG_RUN_MASK))
2768             continue;
2769 
2770         iommu_feature_disable(iommu, CONTROL_GALOG_EN);
2771         iommu_feature_disable(iommu, CONTROL_GAINT_EN);
2772 
2773         /*
2774          * Need to set and poll check the GALOGRun bit to zero before
2775          * we can set/ modify GA Log registers safely.
2776          */
2777         for (i = 0; i < LOOP_TIMEOUT; ++i) {
2778             status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
2779             if (!(status & MMIO_STATUS_GALOG_RUN_MASK))
2780                 break;
2781             udelay(10);
2782         }
2783 
2784         if (WARN_ON(i >= LOOP_TIMEOUT))
2785             return;
2786     }
2787 
2788     if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
2789         !check_feature_on_all_iommus(FEATURE_GAM_VAPIC)) {
2790         amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
2791         return;
2792     }
2793 
2794     if (amd_iommu_snp_en &&
2795         !FEATURE_SNPAVICSUP_GAM(amd_iommu_efr2)) {
2796         pr_warn("Force to disable Virtual APIC due to SNP\n");
2797         amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
2798         return;
2799     }
2800 
2801     /* Enabling GAM and SNPAVIC support */
2802     for_each_iommu(iommu) {
2803         if (iommu_init_ga_log(iommu) ||
2804             iommu_ga_log_enable(iommu))
2805             return;
2806 
2807         iommu_feature_enable(iommu, CONTROL_GAM_EN);
2808         if (amd_iommu_snp_en)
2809             iommu_feature_enable(iommu, CONTROL_SNPAVIC_EN);
2810     }
2811 
2812     amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP);
2813     pr_info("Virtual APIC enabled\n");
2814 #endif
2815 }
2816 
2817 static void enable_iommus(void)
2818 {
2819     early_enable_iommus();
2820     enable_iommus_vapic();
2821     enable_iommus_v2();
2822 }
2823 
2824 static void disable_iommus(void)
2825 {
2826     struct amd_iommu *iommu;
2827 
2828     for_each_iommu(iommu)
2829         iommu_disable(iommu);
2830 
2831 #ifdef CONFIG_IRQ_REMAP
2832     if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
2833         amd_iommu_irq_ops.capability &= ~(1 << IRQ_POSTING_CAP);
2834 #endif
2835 }
2836 
2837 /*
2838  * Suspend/Resume support
2839  * disable suspend until real resume implemented
2840  */
2841 
2842 static void amd_iommu_resume(void)
2843 {
2844     struct amd_iommu *iommu;
2845 
2846     for_each_iommu(iommu)
2847         iommu_apply_resume_quirks(iommu);
2848 
2849     /* re-load the hardware */
2850     enable_iommus();
2851 
2852     amd_iommu_enable_interrupts();
2853 }
2854 
2855 static int amd_iommu_suspend(void)
2856 {
2857     /* disable IOMMUs to go out of the way for BIOS */
2858     disable_iommus();
2859 
2860     return 0;
2861 }
2862 
2863 static struct syscore_ops amd_iommu_syscore_ops = {
2864     .suspend = amd_iommu_suspend,
2865     .resume = amd_iommu_resume,
2866 };
2867 
2868 static void __init free_iommu_resources(void)
2869 {
2870     kmem_cache_destroy(amd_iommu_irq_cache);
2871     amd_iommu_irq_cache = NULL;
2872 
2873     free_iommu_all();
2874     free_pci_segments();
2875 }
2876 
2877 /* SB IOAPIC is always on this device in AMD systems */
2878 #define IOAPIC_SB_DEVID     ((0x00 << 8) | PCI_DEVFN(0x14, 0))
2879 
2880 static bool __init check_ioapic_information(void)
2881 {
2882     const char *fw_bug = FW_BUG;
2883     bool ret, has_sb_ioapic;
2884     int idx;
2885 
2886     has_sb_ioapic = false;
2887     ret           = false;
2888 
2889     /*
2890      * If we have map overrides on the kernel command line the
2891      * messages in this function might not describe firmware bugs
2892      * anymore - so be careful
2893      */
2894     if (cmdline_maps)
2895         fw_bug = "";
2896 
2897     for (idx = 0; idx < nr_ioapics; idx++) {
2898         int devid, id = mpc_ioapic_id(idx);
2899 
2900         devid = get_ioapic_devid(id);
2901         if (devid < 0) {
2902             pr_err("%s: IOAPIC[%d] not in IVRS table\n",
2903                 fw_bug, id);
2904             ret = false;
2905         } else if (devid == IOAPIC_SB_DEVID) {
2906             has_sb_ioapic = true;
2907             ret           = true;
2908         }
2909     }
2910 
2911     if (!has_sb_ioapic) {
2912         /*
2913          * We expect the SB IOAPIC to be listed in the IVRS
2914          * table. The system timer is connected to the SB IOAPIC
2915          * and if we don't have it in the list the system will
2916          * panic at boot time.  This situation usually happens
2917          * when the BIOS is buggy and provides us the wrong
2918          * device id for the IOAPIC in the system.
2919          */
2920         pr_err("%s: No southbridge IOAPIC found\n", fw_bug);
2921     }
2922 
2923     if (!ret)
2924         pr_err("Disabling interrupt remapping\n");
2925 
2926     return ret;
2927 }
2928 
2929 static void __init free_dma_resources(void)
2930 {
2931     free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
2932            get_order(MAX_DOMAIN_ID/8));
2933     amd_iommu_pd_alloc_bitmap = NULL;
2934 
2935     free_unity_maps();
2936 }
2937 
2938 static void __init ivinfo_init(void *ivrs)
2939 {
2940     amd_iommu_ivinfo = *((u32 *)(ivrs + IOMMU_IVINFO_OFFSET));
2941 }
2942 
2943 /*
2944  * This is the hardware init function for AMD IOMMU in the system.
2945  * This function is called either from amd_iommu_init or from the interrupt
2946  * remapping setup code.
2947  *
2948  * This function basically parses the ACPI table for AMD IOMMU (IVRS)
2949  * four times:
2950  *
2951  *  1 pass) Discover the most comprehensive IVHD type to use.
2952  *
2953  *  2 pass) Find the highest PCI device id the driver has to handle.
2954  *      Upon this information the size of the data structures is
2955  *      determined that needs to be allocated.
2956  *
2957  *  3 pass) Initialize the data structures just allocated with the
2958  *      information in the ACPI table about available AMD IOMMUs
2959  *      in the system. It also maps the PCI devices in the
2960  *      system to specific IOMMUs
2961  *
2962  *  4 pass) After the basic data structures are allocated and
2963  *      initialized we update them with information about memory
2964  *      remapping requirements parsed out of the ACPI table in
2965  *      this last pass.
2966  *
2967  * After everything is set up the IOMMUs are enabled and the necessary
2968  * hotplug and suspend notifiers are registered.
2969  */
2970 static int __init early_amd_iommu_init(void)
2971 {
2972     struct acpi_table_header *ivrs_base;
2973     int remap_cache_sz, ret;
2974     acpi_status status;
2975 
2976     if (!amd_iommu_detected)
2977         return -ENODEV;
2978 
2979     status = acpi_get_table("IVRS", 0, &ivrs_base);
2980     if (status == AE_NOT_FOUND)
2981         return -ENODEV;
2982     else if (ACPI_FAILURE(status)) {
2983         const char *err = acpi_format_exception(status);
2984         pr_err("IVRS table error: %s\n", err);
2985         return -EINVAL;
2986     }
2987 
2988     /*
2989      * Validate checksum here so we don't need to do it when
2990      * we actually parse the table
2991      */
2992     ret = check_ivrs_checksum(ivrs_base);
2993     if (ret)
2994         goto out;
2995 
2996     ivinfo_init(ivrs_base);
2997 
2998     amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base);
2999     DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type);
3000 
3001     /* Device table - directly used by all IOMMUs */
3002     ret = -ENOMEM;
3003 
3004     amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(
3005                         GFP_KERNEL | __GFP_ZERO,
3006                         get_order(MAX_DOMAIN_ID/8));
3007     if (amd_iommu_pd_alloc_bitmap == NULL)
3008         goto out;
3009 
3010     /*
3011      * never allocate domain 0 because its used as the non-allocated and
3012      * error value placeholder
3013      */
3014     __set_bit(0, amd_iommu_pd_alloc_bitmap);
3015 
3016     /*
3017      * now the data structures are allocated and basically initialized
3018      * start the real acpi table scan
3019      */
3020     ret = init_iommu_all(ivrs_base);
3021     if (ret)
3022         goto out;
3023 
3024     /* Disable any previously enabled IOMMUs */
3025     if (!is_kdump_kernel() || amd_iommu_disabled)
3026         disable_iommus();
3027 
3028     if (amd_iommu_irq_remap)
3029         amd_iommu_irq_remap = check_ioapic_information();
3030 
3031     if (amd_iommu_irq_remap) {
3032         struct amd_iommu_pci_seg *pci_seg;
3033         /*
3034          * Interrupt remapping enabled, create kmem_cache for the
3035          * remapping tables.
3036          */
3037         ret = -ENOMEM;
3038         if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
3039             remap_cache_sz = MAX_IRQS_PER_TABLE * sizeof(u32);
3040         else
3041             remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2);
3042         amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache",
3043                             remap_cache_sz,
3044                             DTE_INTTAB_ALIGNMENT,
3045                             0, NULL);
3046         if (!amd_iommu_irq_cache)
3047             goto out;
3048 
3049         for_each_pci_segment(pci_seg) {
3050             if (alloc_irq_lookup_table(pci_seg))
3051                 goto out;
3052         }
3053     }
3054 
3055     ret = init_memory_definitions(ivrs_base);
3056     if (ret)
3057         goto out;
3058 
3059     /* init the device table */
3060     init_device_table();
3061 
3062 out:
3063     /* Don't leak any ACPI memory */
3064     acpi_put_table(ivrs_base);
3065 
3066     return ret;
3067 }
3068 
3069 static int amd_iommu_enable_interrupts(void)
3070 {
3071     struct amd_iommu *iommu;
3072     int ret = 0;
3073 
3074     for_each_iommu(iommu) {
3075         ret = iommu_init_irq(iommu);
3076         if (ret)
3077             goto out;
3078     }
3079 
3080 out:
3081     return ret;
3082 }
3083 
3084 static bool __init detect_ivrs(void)
3085 {
3086     struct acpi_table_header *ivrs_base;
3087     acpi_status status;
3088     int i;
3089 
3090     status = acpi_get_table("IVRS", 0, &ivrs_base);
3091     if (status == AE_NOT_FOUND)
3092         return false;
3093     else if (ACPI_FAILURE(status)) {
3094         const char *err = acpi_format_exception(status);
3095         pr_err("IVRS table error: %s\n", err);
3096         return false;
3097     }
3098 
3099     acpi_put_table(ivrs_base);
3100 
3101     if (amd_iommu_force_enable)
3102         goto out;
3103 
3104     /* Don't use IOMMU if there is Stoney Ridge graphics */
3105     for (i = 0; i < 32; i++) {
3106         u32 pci_id;
3107 
3108         pci_id = read_pci_config(0, i, 0, 0);
3109         if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) {
3110             pr_info("Disable IOMMU on Stoney Ridge\n");
3111             return false;
3112         }
3113     }
3114 
3115 out:
3116     /* Make sure ACS will be enabled during PCI probe */
3117     pci_request_acs();
3118 
3119     return true;
3120 }
3121 
3122 /****************************************************************************
3123  *
3124  * AMD IOMMU Initialization State Machine
3125  *
3126  ****************************************************************************/
3127 
3128 static int __init state_next(void)
3129 {
3130     int ret = 0;
3131 
3132     switch (init_state) {
3133     case IOMMU_START_STATE:
3134         if (!detect_ivrs()) {
3135             init_state  = IOMMU_NOT_FOUND;
3136             ret     = -ENODEV;
3137         } else {
3138             init_state  = IOMMU_IVRS_DETECTED;
3139         }
3140         break;
3141     case IOMMU_IVRS_DETECTED:
3142         if (amd_iommu_disabled) {
3143             init_state = IOMMU_CMDLINE_DISABLED;
3144             ret = -EINVAL;
3145         } else {
3146             ret = early_amd_iommu_init();
3147             init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
3148         }
3149         break;
3150     case IOMMU_ACPI_FINISHED:
3151         early_enable_iommus();
3152         x86_platform.iommu_shutdown = disable_iommus;
3153         init_state = IOMMU_ENABLED;
3154         break;
3155     case IOMMU_ENABLED:
3156         register_syscore_ops(&amd_iommu_syscore_ops);
3157         ret = amd_iommu_init_pci();
3158         init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT;
3159         enable_iommus_vapic();
3160         enable_iommus_v2();
3161         break;
3162     case IOMMU_PCI_INIT:
3163         ret = amd_iommu_enable_interrupts();
3164         init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN;
3165         break;
3166     case IOMMU_INTERRUPTS_EN:
3167         init_state = IOMMU_INITIALIZED;
3168         break;
3169     case IOMMU_INITIALIZED:
3170         /* Nothing to do */
3171         break;
3172     case IOMMU_NOT_FOUND:
3173     case IOMMU_INIT_ERROR:
3174     case IOMMU_CMDLINE_DISABLED:
3175         /* Error states => do nothing */
3176         ret = -EINVAL;
3177         break;
3178     default:
3179         /* Unknown state */
3180         BUG();
3181     }
3182 
3183     if (ret) {
3184         free_dma_resources();
3185         if (!irq_remapping_enabled) {
3186             disable_iommus();
3187             free_iommu_resources();
3188         } else {
3189             struct amd_iommu *iommu;
3190             struct amd_iommu_pci_seg *pci_seg;
3191 
3192             for_each_pci_segment(pci_seg)
3193                 uninit_device_table_dma(pci_seg);
3194 
3195             for_each_iommu(iommu)
3196                 iommu_flush_all_caches(iommu);
3197         }
3198     }
3199     return ret;
3200 }
3201 
3202 static int __init iommu_go_to_state(enum iommu_init_state state)
3203 {
3204     int ret = -EINVAL;
3205 
3206     while (init_state != state) {
3207         if (init_state == IOMMU_NOT_FOUND         ||
3208             init_state == IOMMU_INIT_ERROR        ||
3209             init_state == IOMMU_CMDLINE_DISABLED)
3210             break;
3211         ret = state_next();
3212     }
3213 
3214     return ret;
3215 }
3216 
3217 #ifdef CONFIG_IRQ_REMAP
3218 int __init amd_iommu_prepare(void)
3219 {
3220     int ret;
3221 
3222     amd_iommu_irq_remap = true;
3223 
3224     ret = iommu_go_to_state(IOMMU_ACPI_FINISHED);
3225     if (ret) {
3226         amd_iommu_irq_remap = false;
3227         return ret;
3228     }
3229 
3230     return amd_iommu_irq_remap ? 0 : -ENODEV;
3231 }
3232 
3233 int __init amd_iommu_enable(void)
3234 {
3235     int ret;
3236 
3237     ret = iommu_go_to_state(IOMMU_ENABLED);
3238     if (ret)
3239         return ret;
3240 
3241     irq_remapping_enabled = 1;
3242     return amd_iommu_xt_mode;
3243 }
3244 
3245 void amd_iommu_disable(void)
3246 {
3247     amd_iommu_suspend();
3248 }
3249 
3250 int amd_iommu_reenable(int mode)
3251 {
3252     amd_iommu_resume();
3253 
3254     return 0;
3255 }
3256 
3257 int __init amd_iommu_enable_faulting(void)
3258 {
3259     /* We enable MSI later when PCI is initialized */
3260     return 0;
3261 }
3262 #endif
3263 
3264 /*
3265  * This is the core init function for AMD IOMMU hardware in the system.
3266  * This function is called from the generic x86 DMA layer initialization
3267  * code.
3268  */
3269 static int __init amd_iommu_init(void)
3270 {
3271     struct amd_iommu *iommu;
3272     int ret;
3273 
3274     ret = iommu_go_to_state(IOMMU_INITIALIZED);
3275 #ifdef CONFIG_GART_IOMMU
3276     if (ret && list_empty(&amd_iommu_list)) {
3277         /*
3278          * We failed to initialize the AMD IOMMU - try fallback
3279          * to GART if possible.
3280          */
3281         gart_iommu_init();
3282     }
3283 #endif
3284 
3285     for_each_iommu(iommu)
3286         amd_iommu_debugfs_setup(iommu);
3287 
3288     return ret;
3289 }
3290 
3291 static bool amd_iommu_sme_check(void)
3292 {
3293     if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) ||
3294         (boot_cpu_data.x86 != 0x17))
3295         return true;
3296 
3297     /* For Fam17h, a specific level of support is required */
3298     if (boot_cpu_data.microcode >= 0x08001205)
3299         return true;
3300 
3301     if ((boot_cpu_data.microcode >= 0x08001126) &&
3302         (boot_cpu_data.microcode <= 0x080011ff))
3303         return true;
3304 
3305     pr_notice("IOMMU not currently supported when SME is active\n");
3306 
3307     return false;
3308 }
3309 
3310 /****************************************************************************
3311  *
3312  * Early detect code. This code runs at IOMMU detection time in the DMA
3313  * layer. It just looks if there is an IVRS ACPI table to detect AMD
3314  * IOMMUs
3315  *
3316  ****************************************************************************/
3317 int __init amd_iommu_detect(void)
3318 {
3319     int ret;
3320 
3321     if (no_iommu || (iommu_detected && !gart_iommu_aperture))
3322         return -ENODEV;
3323 
3324     if (!amd_iommu_sme_check())
3325         return -ENODEV;
3326 
3327     ret = iommu_go_to_state(IOMMU_IVRS_DETECTED);
3328     if (ret)
3329         return ret;
3330 
3331     amd_iommu_detected = true;
3332     iommu_detected = 1;
3333     x86_init.iommu.iommu_init = amd_iommu_init;
3334 
3335     return 1;
3336 }
3337 
3338 /****************************************************************************
3339  *
3340  * Parsing functions for the AMD IOMMU specific kernel command line
3341  * options.
3342  *
3343  ****************************************************************************/
3344 
3345 static int __init parse_amd_iommu_dump(char *str)
3346 {
3347     amd_iommu_dump = true;
3348 
3349     return 1;
3350 }
3351 
3352 static int __init parse_amd_iommu_intr(char *str)
3353 {
3354     for (; *str; ++str) {
3355         if (strncmp(str, "legacy", 6) == 0) {
3356             amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
3357             break;
3358         }
3359         if (strncmp(str, "vapic", 5) == 0) {
3360             amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
3361             break;
3362         }
3363     }
3364     return 1;
3365 }
3366 
3367 static int __init parse_amd_iommu_options(char *str)
3368 {
3369     for (; *str; ++str) {
3370         if (strncmp(str, "fullflush", 9) == 0) {
3371             pr_warn("amd_iommu=fullflush deprecated; use iommu.strict=1 instead\n");
3372             iommu_set_dma_strict();
3373         }
3374         if (strncmp(str, "force_enable", 12) == 0)
3375             amd_iommu_force_enable = true;
3376         if (strncmp(str, "off", 3) == 0)
3377             amd_iommu_disabled = true;
3378         if (strncmp(str, "force_isolation", 15) == 0)
3379             amd_iommu_force_isolation = true;
3380     }
3381 
3382     return 1;
3383 }
3384 
3385 static int __init parse_ivrs_ioapic(char *str)
3386 {
3387     u32 seg = 0, bus, dev, fn;
3388     int ret, id, i;
3389     u32 devid;
3390 
3391     ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
3392     if (ret != 4) {
3393         ret = sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn);
3394         if (ret != 5) {
3395             pr_err("Invalid command line: ivrs_ioapic%s\n", str);
3396             return 1;
3397         }
3398     }
3399 
3400     if (early_ioapic_map_size == EARLY_MAP_SIZE) {
3401         pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n",
3402             str);
3403         return 1;
3404     }
3405 
3406     devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
3407 
3408     cmdline_maps            = true;
3409     i               = early_ioapic_map_size++;
3410     early_ioapic_map[i].id      = id;
3411     early_ioapic_map[i].devid   = devid;
3412     early_ioapic_map[i].cmd_line    = true;
3413 
3414     return 1;
3415 }
3416 
3417 static int __init parse_ivrs_hpet(char *str)
3418 {
3419     u32 seg = 0, bus, dev, fn;
3420     int ret, id, i;
3421     u32 devid;
3422 
3423     ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
3424     if (ret != 4) {
3425         ret = sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn);
3426         if (ret != 5) {
3427             pr_err("Invalid command line: ivrs_hpet%s\n", str);
3428             return 1;
3429         }
3430     }
3431 
3432     if (early_hpet_map_size == EARLY_MAP_SIZE) {
3433         pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n",
3434             str);
3435         return 1;
3436     }
3437 
3438     devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
3439 
3440     cmdline_maps            = true;
3441     i               = early_hpet_map_size++;
3442     early_hpet_map[i].id        = id;
3443     early_hpet_map[i].devid     = devid;
3444     early_hpet_map[i].cmd_line  = true;
3445 
3446     return 1;
3447 }
3448 
3449 static int __init parse_ivrs_acpihid(char *str)
3450 {
3451     u32 seg = 0, bus, dev, fn;
3452     char *hid, *uid, *p;
3453     char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0};
3454     int ret, i;
3455 
3456     ret = sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid);
3457     if (ret != 4) {
3458         ret = sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid);
3459         if (ret != 5) {
3460             pr_err("Invalid command line: ivrs_acpihid(%s)\n", str);
3461             return 1;
3462         }
3463     }
3464 
3465     p = acpiid;
3466     hid = strsep(&p, ":");
3467     uid = p;
3468 
3469     if (!hid || !(*hid) || !uid) {
3470         pr_err("Invalid command line: hid or uid\n");
3471         return 1;
3472     }
3473 
3474     i = early_acpihid_map_size++;
3475     memcpy(early_acpihid_map[i].hid, hid, strlen(hid));
3476     memcpy(early_acpihid_map[i].uid, uid, strlen(uid));
3477     early_acpihid_map[i].devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
3478     early_acpihid_map[i].cmd_line   = true;
3479 
3480     return 1;
3481 }
3482 
3483 __setup("amd_iommu_dump",   parse_amd_iommu_dump);
3484 __setup("amd_iommu=",       parse_amd_iommu_options);
3485 __setup("amd_iommu_intr=",  parse_amd_iommu_intr);
3486 __setup("ivrs_ioapic",      parse_ivrs_ioapic);
3487 __setup("ivrs_hpet",        parse_ivrs_hpet);
3488 __setup("ivrs_acpihid",     parse_ivrs_acpihid);
3489 
3490 bool amd_iommu_v2_supported(void)
3491 {
3492     /*
3493      * Since DTE[Mode]=0 is prohibited on SNP-enabled system
3494      * (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without
3495      * setting up IOMMUv1 page table.
3496      */
3497     return amd_iommu_v2_present && !amd_iommu_snp_en;
3498 }
3499 EXPORT_SYMBOL(amd_iommu_v2_supported);
3500 
3501 struct amd_iommu *get_amd_iommu(unsigned int idx)
3502 {
3503     unsigned int i = 0;
3504     struct amd_iommu *iommu;
3505 
3506     for_each_iommu(iommu)
3507         if (i++ == idx)
3508             return iommu;
3509     return NULL;
3510 }
3511 
3512 /****************************************************************************
3513  *
3514  * IOMMU EFR Performance Counter support functionality. This code allows
3515  * access to the IOMMU PC functionality.
3516  *
3517  ****************************************************************************/
3518 
3519 u8 amd_iommu_pc_get_max_banks(unsigned int idx)
3520 {
3521     struct amd_iommu *iommu = get_amd_iommu(idx);
3522 
3523     if (iommu)
3524         return iommu->max_banks;
3525 
3526     return 0;
3527 }
3528 EXPORT_SYMBOL(amd_iommu_pc_get_max_banks);
3529 
3530 bool amd_iommu_pc_supported(void)
3531 {
3532     return amd_iommu_pc_present;
3533 }
3534 EXPORT_SYMBOL(amd_iommu_pc_supported);
3535 
3536 u8 amd_iommu_pc_get_max_counters(unsigned int idx)
3537 {
3538     struct amd_iommu *iommu = get_amd_iommu(idx);
3539 
3540     if (iommu)
3541         return iommu->max_counters;
3542 
3543     return 0;
3544 }
3545 EXPORT_SYMBOL(amd_iommu_pc_get_max_counters);
3546 
3547 static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
3548                 u8 fxn, u64 *value, bool is_write)
3549 {
3550     u32 offset;
3551     u32 max_offset_lim;
3552 
3553     /* Make sure the IOMMU PC resource is available */
3554     if (!amd_iommu_pc_present)
3555         return -ENODEV;
3556 
3557     /* Check for valid iommu and pc register indexing */
3558     if (WARN_ON(!iommu || (fxn > 0x28) || (fxn & 7)))
3559         return -ENODEV;
3560 
3561     offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn);
3562 
3563     /* Limit the offset to the hw defined mmio region aperture */
3564     max_offset_lim = (u32)(((0x40 | iommu->max_banks) << 12) |
3565                 (iommu->max_counters << 8) | 0x28);
3566     if ((offset < MMIO_CNTR_REG_OFFSET) ||
3567         (offset > max_offset_lim))
3568         return -EINVAL;
3569 
3570     if (is_write) {
3571         u64 val = *value & GENMASK_ULL(47, 0);
3572 
3573         writel((u32)val, iommu->mmio_base + offset);
3574         writel((val >> 32), iommu->mmio_base + offset + 4);
3575     } else {
3576         *value = readl(iommu->mmio_base + offset + 4);
3577         *value <<= 32;
3578         *value |= readl(iommu->mmio_base + offset);
3579         *value &= GENMASK_ULL(47, 0);
3580     }
3581 
3582     return 0;
3583 }
3584 
3585 int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
3586 {
3587     if (!iommu)
3588         return -EINVAL;
3589 
3590     return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false);
3591 }
3592 
3593 int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
3594 {
3595     if (!iommu)
3596         return -EINVAL;
3597 
3598     return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true);
3599 }
3600 
3601 #ifdef CONFIG_AMD_MEM_ENCRYPT
3602 int amd_iommu_snp_enable(void)
3603 {
3604     /*
3605      * The SNP support requires that IOMMU must be enabled, and is
3606      * not configured in the passthrough mode.
3607      */
3608     if (no_iommu || iommu_default_passthrough()) {
3609         pr_err("SNP: IOMMU is disabled or configured in passthrough mode, SNP cannot be supported");
3610         return -EINVAL;
3611     }
3612 
3613     /*
3614      * Prevent enabling SNP after IOMMU_ENABLED state because this process
3615      * affect how IOMMU driver sets up data structures and configures
3616      * IOMMU hardware.
3617      */
3618     if (init_state > IOMMU_ENABLED) {
3619         pr_err("SNP: Too late to enable SNP for IOMMU.\n");
3620         return -EINVAL;
3621     }
3622 
3623     amd_iommu_snp_en = check_feature_on_all_iommus(FEATURE_SNP);
3624     if (!amd_iommu_snp_en)
3625         return -EINVAL;
3626 
3627     pr_info("SNP enabled\n");
3628 
3629     /* Enforce IOMMU v1 pagetable when SNP is enabled. */
3630     if (amd_iommu_pgtable != AMD_IOMMU_V1) {
3631         pr_warn("Force to using AMD IOMMU v1 page table due to SNP\n");
3632         amd_iommu_pgtable = AMD_IOMMU_V1;
3633     }
3634 
3635     return 0;
3636 }
3637 #endif