Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 #include <linux/kernel.h>
0003 #include <linux/pgtable.h>
0004 
0005 #include <linux/string.h>
0006 #include <linux/bitops.h>
0007 #include <linux/smp.h>
0008 #include <linux/sched.h>
0009 #include <linux/sched/clock.h>
0010 #include <linux/semaphore.h>
0011 #include <linux/thread_info.h>
0012 #include <linux/init.h>
0013 #include <linux/uaccess.h>
0014 #include <linux/workqueue.h>
0015 #include <linux/delay.h>
0016 #include <linux/cpuhotplug.h>
0017 
0018 #include <asm/cpufeature.h>
0019 #include <asm/msr.h>
0020 #include <asm/bugs.h>
0021 #include <asm/cpu.h>
0022 #include <asm/intel-family.h>
0023 #include <asm/microcode_intel.h>
0024 #include <asm/hwcap2.h>
0025 #include <asm/elf.h>
0026 #include <asm/cpu_device_id.h>
0027 #include <asm/cmdline.h>
0028 #include <asm/traps.h>
0029 #include <asm/resctrl.h>
0030 #include <asm/numa.h>
0031 #include <asm/thermal.h>
0032 
0033 #ifdef CONFIG_X86_64
0034 #include <linux/topology.h>
0035 #endif
0036 
0037 #include "cpu.h"
0038 
0039 #ifdef CONFIG_X86_LOCAL_APIC
0040 #include <asm/mpspec.h>
0041 #include <asm/apic.h>
0042 #endif
0043 
0044 enum split_lock_detect_state {
0045     sld_off = 0,
0046     sld_warn,
0047     sld_fatal,
0048     sld_ratelimit,
0049 };
0050 
0051 /*
0052  * Default to sld_off because most systems do not support split lock detection.
0053  * sld_state_setup() will switch this to sld_warn on systems that support
0054  * split lock/bus lock detect, unless there is a command line override.
0055  */
0056 static enum split_lock_detect_state sld_state __ro_after_init = sld_off;
0057 static u64 msr_test_ctrl_cache __ro_after_init;
0058 
0059 /*
0060  * With a name like MSR_TEST_CTL it should go without saying, but don't touch
0061  * MSR_TEST_CTL unless the CPU is one of the whitelisted models.  Writing it
0062  * on CPUs that do not support SLD can cause fireworks, even when writing '0'.
0063  */
0064 static bool cpu_model_supports_sld __ro_after_init;
0065 
0066 /*
0067  * Processors which have self-snooping capability can handle conflicting
0068  * memory type across CPUs by snooping its own cache. However, there exists
0069  * CPU models in which having conflicting memory types still leads to
0070  * unpredictable behavior, machine check errors, or hangs. Clear this
0071  * feature to prevent its use on machines with known erratas.
0072  */
0073 static void check_memory_type_self_snoop_errata(struct cpuinfo_x86 *c)
0074 {
0075     switch (c->x86_model) {
0076     case INTEL_FAM6_CORE_YONAH:
0077     case INTEL_FAM6_CORE2_MEROM:
0078     case INTEL_FAM6_CORE2_MEROM_L:
0079     case INTEL_FAM6_CORE2_PENRYN:
0080     case INTEL_FAM6_CORE2_DUNNINGTON:
0081     case INTEL_FAM6_NEHALEM:
0082     case INTEL_FAM6_NEHALEM_G:
0083     case INTEL_FAM6_NEHALEM_EP:
0084     case INTEL_FAM6_NEHALEM_EX:
0085     case INTEL_FAM6_WESTMERE:
0086     case INTEL_FAM6_WESTMERE_EP:
0087     case INTEL_FAM6_SANDYBRIDGE:
0088         setup_clear_cpu_cap(X86_FEATURE_SELFSNOOP);
0089     }
0090 }
0091 
0092 static bool ring3mwait_disabled __read_mostly;
0093 
0094 static int __init ring3mwait_disable(char *__unused)
0095 {
0096     ring3mwait_disabled = true;
0097     return 1;
0098 }
0099 __setup("ring3mwait=disable", ring3mwait_disable);
0100 
0101 static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c)
0102 {
0103     /*
0104      * Ring 3 MONITOR/MWAIT feature cannot be detected without
0105      * cpu model and family comparison.
0106      */
0107     if (c->x86 != 6)
0108         return;
0109     switch (c->x86_model) {
0110     case INTEL_FAM6_XEON_PHI_KNL:
0111     case INTEL_FAM6_XEON_PHI_KNM:
0112         break;
0113     default:
0114         return;
0115     }
0116 
0117     if (ring3mwait_disabled)
0118         return;
0119 
0120     set_cpu_cap(c, X86_FEATURE_RING3MWAIT);
0121     this_cpu_or(msr_misc_features_shadow,
0122             1UL << MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT);
0123 
0124     if (c == &boot_cpu_data)
0125         ELF_HWCAP2 |= HWCAP2_RING3MWAIT;
0126 }
0127 
0128 /*
0129  * Early microcode releases for the Spectre v2 mitigation were broken.
0130  * Information taken from;
0131  * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/03/microcode-update-guidance.pdf
0132  * - https://kb.vmware.com/s/article/52345
0133  * - Microcode revisions observed in the wild
0134  * - Release note from 20180108 microcode release
0135  */
0136 struct sku_microcode {
0137     u8 model;
0138     u8 stepping;
0139     u32 microcode;
0140 };
0141 static const struct sku_microcode spectre_bad_microcodes[] = {
0142     { INTEL_FAM6_KABYLAKE,      0x0B,   0x80 },
0143     { INTEL_FAM6_KABYLAKE,      0x0A,   0x80 },
0144     { INTEL_FAM6_KABYLAKE,      0x09,   0x80 },
0145     { INTEL_FAM6_KABYLAKE_L,    0x0A,   0x80 },
0146     { INTEL_FAM6_KABYLAKE_L,    0x09,   0x80 },
0147     { INTEL_FAM6_SKYLAKE_X,     0x03,   0x0100013e },
0148     { INTEL_FAM6_SKYLAKE_X,     0x04,   0x0200003c },
0149     { INTEL_FAM6_BROADWELL,     0x04,   0x28 },
0150     { INTEL_FAM6_BROADWELL_G,   0x01,   0x1b },
0151     { INTEL_FAM6_BROADWELL_D,   0x02,   0x14 },
0152     { INTEL_FAM6_BROADWELL_D,   0x03,   0x07000011 },
0153     { INTEL_FAM6_BROADWELL_X,   0x01,   0x0b000025 },
0154     { INTEL_FAM6_HASWELL_L,     0x01,   0x21 },
0155     { INTEL_FAM6_HASWELL_G,     0x01,   0x18 },
0156     { INTEL_FAM6_HASWELL,       0x03,   0x23 },
0157     { INTEL_FAM6_HASWELL_X,     0x02,   0x3b },
0158     { INTEL_FAM6_HASWELL_X,     0x04,   0x10 },
0159     { INTEL_FAM6_IVYBRIDGE_X,   0x04,   0x42a },
0160     /* Observed in the wild */
0161     { INTEL_FAM6_SANDYBRIDGE_X, 0x06,   0x61b },
0162     { INTEL_FAM6_SANDYBRIDGE_X, 0x07,   0x712 },
0163 };
0164 
0165 static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
0166 {
0167     int i;
0168 
0169     /*
0170      * We know that the hypervisor lie to us on the microcode version so
0171      * we may as well hope that it is running the correct version.
0172      */
0173     if (cpu_has(c, X86_FEATURE_HYPERVISOR))
0174         return false;
0175 
0176     if (c->x86 != 6)
0177         return false;
0178 
0179     for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
0180         if (c->x86_model == spectre_bad_microcodes[i].model &&
0181             c->x86_stepping == spectre_bad_microcodes[i].stepping)
0182             return (c->microcode <= spectre_bad_microcodes[i].microcode);
0183     }
0184     return false;
0185 }
0186 
0187 int intel_cpu_collect_info(struct ucode_cpu_info *uci)
0188 {
0189     unsigned int val[2];
0190     unsigned int family, model;
0191     struct cpu_signature csig = { 0 };
0192     unsigned int eax, ebx, ecx, edx;
0193 
0194     memset(uci, 0, sizeof(*uci));
0195 
0196     eax = 0x00000001;
0197     ecx = 0;
0198     native_cpuid(&eax, &ebx, &ecx, &edx);
0199     csig.sig = eax;
0200 
0201     family = x86_family(eax);
0202     model  = x86_model(eax);
0203 
0204     if (model >= 5 || family > 6) {
0205         /* get processor flags from MSR 0x17 */
0206         native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
0207         csig.pf = 1 << ((val[1] >> 18) & 7);
0208     }
0209 
0210     csig.rev = intel_get_microcode_revision();
0211 
0212     uci->cpu_sig = csig;
0213     uci->valid = 1;
0214 
0215     return 0;
0216 }
0217 EXPORT_SYMBOL_GPL(intel_cpu_collect_info);
0218 
0219 static void early_init_intel(struct cpuinfo_x86 *c)
0220 {
0221     u64 misc_enable;
0222 
0223     /* Unmask CPUID levels if masked: */
0224     if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
0225         if (msr_clear_bit(MSR_IA32_MISC_ENABLE,
0226                   MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0) {
0227             c->cpuid_level = cpuid_eax(0);
0228             get_cpu_cap(c);
0229         }
0230     }
0231 
0232     if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
0233         (c->x86 == 0x6 && c->x86_model >= 0x0e))
0234         set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
0235 
0236     if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64))
0237         c->microcode = intel_get_microcode_revision();
0238 
0239     /* Now if any of them are set, check the blacklist and clear the lot */
0240     if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) ||
0241          cpu_has(c, X86_FEATURE_INTEL_STIBP) ||
0242          cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) ||
0243          cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) {
0244         pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n");
0245         setup_clear_cpu_cap(X86_FEATURE_IBRS);
0246         setup_clear_cpu_cap(X86_FEATURE_IBPB);
0247         setup_clear_cpu_cap(X86_FEATURE_STIBP);
0248         setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL);
0249         setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL);
0250         setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP);
0251         setup_clear_cpu_cap(X86_FEATURE_SSBD);
0252         setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL_SSBD);
0253     }
0254 
0255     /*
0256      * Atom erratum AAE44/AAF40/AAG38/AAH41:
0257      *
0258      * A race condition between speculative fetches and invalidating
0259      * a large page.  This is worked around in microcode, but we
0260      * need the microcode to have already been loaded... so if it is
0261      * not, recommend a BIOS update and disable large pages.
0262      */
0263     if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_stepping <= 2 &&
0264         c->microcode < 0x20e) {
0265         pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n");
0266         clear_cpu_cap(c, X86_FEATURE_PSE);
0267     }
0268 
0269 #ifdef CONFIG_X86_64
0270     set_cpu_cap(c, X86_FEATURE_SYSENTER32);
0271 #else
0272     /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
0273     if (c->x86 == 15 && c->x86_cache_alignment == 64)
0274         c->x86_cache_alignment = 128;
0275 #endif
0276 
0277     /* CPUID workaround for 0F33/0F34 CPU */
0278     if (c->x86 == 0xF && c->x86_model == 0x3
0279         && (c->x86_stepping == 0x3 || c->x86_stepping == 0x4))
0280         c->x86_phys_bits = 36;
0281 
0282     /*
0283      * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
0284      * with P/T states and does not stop in deep C-states.
0285      *
0286      * It is also reliable across cores and sockets. (but not across
0287      * cabinets - we turn it off in that case explicitly.)
0288      */
0289     if (c->x86_power & (1 << 8)) {
0290         set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
0291         set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
0292     }
0293 
0294     /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */
0295     if (c->x86 == 6) {
0296         switch (c->x86_model) {
0297         case INTEL_FAM6_ATOM_SALTWELL_MID:
0298         case INTEL_FAM6_ATOM_SALTWELL_TABLET:
0299         case INTEL_FAM6_ATOM_SILVERMONT_MID:
0300         case INTEL_FAM6_ATOM_AIRMONT_NP:
0301             set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3);
0302             break;
0303         default:
0304             break;
0305         }
0306     }
0307 
0308     /*
0309      * There is a known erratum on Pentium III and Core Solo
0310      * and Core Duo CPUs.
0311      * " Page with PAT set to WC while associated MTRR is UC
0312      *   may consolidate to UC "
0313      * Because of this erratum, it is better to stick with
0314      * setting WC in MTRR rather than using PAT on these CPUs.
0315      *
0316      * Enable PAT WC only on P4, Core 2 or later CPUs.
0317      */
0318     if (c->x86 == 6 && c->x86_model < 15)
0319         clear_cpu_cap(c, X86_FEATURE_PAT);
0320 
0321     /*
0322      * If fast string is not enabled in IA32_MISC_ENABLE for any reason,
0323      * clear the fast string and enhanced fast string CPU capabilities.
0324      */
0325     if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
0326         rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
0327         if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) {
0328             pr_info("Disabled fast string operations\n");
0329             setup_clear_cpu_cap(X86_FEATURE_REP_GOOD);
0330             setup_clear_cpu_cap(X86_FEATURE_ERMS);
0331         }
0332     }
0333 
0334     /*
0335      * Intel Quark Core DevMan_001.pdf section 6.4.11
0336      * "The operating system also is required to invalidate (i.e., flush)
0337      *  the TLB when any changes are made to any of the page table entries.
0338      *  The operating system must reload CR3 to cause the TLB to be flushed"
0339      *
0340      * As a result, boot_cpu_has(X86_FEATURE_PGE) in arch/x86/include/asm/tlbflush.h
0341      * should be false so that __flush_tlb_all() causes CR3 instead of CR4.PGE
0342      * to be modified.
0343      */
0344     if (c->x86 == 5 && c->x86_model == 9) {
0345         pr_info("Disabling PGE capability bit\n");
0346         setup_clear_cpu_cap(X86_FEATURE_PGE);
0347     }
0348 
0349     if (c->cpuid_level >= 0x00000001) {
0350         u32 eax, ebx, ecx, edx;
0351 
0352         cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
0353         /*
0354          * If HTT (EDX[28]) is set EBX[16:23] contain the number of
0355          * apicids which are reserved per package. Store the resulting
0356          * shift value for the package management code.
0357          */
0358         if (edx & (1U << 28))
0359             c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff);
0360     }
0361 
0362     check_memory_type_self_snoop_errata(c);
0363 
0364     /*
0365      * Get the number of SMT siblings early from the extended topology
0366      * leaf, if available. Otherwise try the legacy SMT detection.
0367      */
0368     if (detect_extended_topology_early(c) < 0)
0369         detect_ht_early(c);
0370 }
0371 
0372 static void bsp_init_intel(struct cpuinfo_x86 *c)
0373 {
0374     resctrl_cpu_detect(c);
0375 }
0376 
0377 #ifdef CONFIG_X86_32
0378 /*
0379  *  Early probe support logic for ppro memory erratum #50
0380  *
0381  *  This is called before we do cpu ident work
0382  */
0383 
0384 int ppro_with_ram_bug(void)
0385 {
0386     /* Uses data from early_cpu_detect now */
0387     if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
0388         boot_cpu_data.x86 == 6 &&
0389         boot_cpu_data.x86_model == 1 &&
0390         boot_cpu_data.x86_stepping < 8) {
0391         pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n");
0392         return 1;
0393     }
0394     return 0;
0395 }
0396 
0397 static void intel_smp_check(struct cpuinfo_x86 *c)
0398 {
0399     /* calling is from identify_secondary_cpu() ? */
0400     if (!c->cpu_index)
0401         return;
0402 
0403     /*
0404      * Mask B, Pentium, but not Pentium MMX
0405      */
0406     if (c->x86 == 5 &&
0407         c->x86_stepping >= 1 && c->x86_stepping <= 4 &&
0408         c->x86_model <= 3) {
0409         /*
0410          * Remember we have B step Pentia with bugs
0411          */
0412         WARN_ONCE(1, "WARNING: SMP operation may be unreliable"
0413                     "with B stepping processors.\n");
0414     }
0415 }
0416 
0417 static int forcepae;
0418 static int __init forcepae_setup(char *__unused)
0419 {
0420     forcepae = 1;
0421     return 1;
0422 }
0423 __setup("forcepae", forcepae_setup);
0424 
0425 static void intel_workarounds(struct cpuinfo_x86 *c)
0426 {
0427 #ifdef CONFIG_X86_F00F_BUG
0428     /*
0429      * All models of Pentium and Pentium with MMX technology CPUs
0430      * have the F0 0F bug, which lets nonprivileged users lock up the
0431      * system. Announce that the fault handler will be checking for it.
0432      * The Quark is also family 5, but does not have the same bug.
0433      */
0434     clear_cpu_bug(c, X86_BUG_F00F);
0435     if (c->x86 == 5 && c->x86_model < 9) {
0436         static int f00f_workaround_enabled;
0437 
0438         set_cpu_bug(c, X86_BUG_F00F);
0439         if (!f00f_workaround_enabled) {
0440             pr_notice("Intel Pentium with F0 0F bug - workaround enabled.\n");
0441             f00f_workaround_enabled = 1;
0442         }
0443     }
0444 #endif
0445 
0446     /*
0447      * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until
0448      * model 3 mask 3
0449      */
0450     if ((c->x86<<8 | c->x86_model<<4 | c->x86_stepping) < 0x633)
0451         clear_cpu_cap(c, X86_FEATURE_SEP);
0452 
0453     /*
0454      * PAE CPUID issue: many Pentium M report no PAE but may have a
0455      * functionally usable PAE implementation.
0456      * Forcefully enable PAE if kernel parameter "forcepae" is present.
0457      */
0458     if (forcepae) {
0459         pr_warn("PAE forced!\n");
0460         set_cpu_cap(c, X86_FEATURE_PAE);
0461         add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE);
0462     }
0463 
0464     /*
0465      * P4 Xeon erratum 037 workaround.
0466      * Hardware prefetcher may cause stale data to be loaded into the cache.
0467      */
0468     if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_stepping == 1)) {
0469         if (msr_set_bit(MSR_IA32_MISC_ENABLE,
0470                 MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) {
0471             pr_info("CPU: C0 stepping P4 Xeon detected.\n");
0472             pr_info("CPU: Disabling hardware prefetching (Erratum 037)\n");
0473         }
0474     }
0475 
0476     /*
0477      * See if we have a good local APIC by checking for buggy Pentia,
0478      * i.e. all B steppings and the C2 stepping of P54C when using their
0479      * integrated APIC (see 11AP erratum in "Pentium Processor
0480      * Specification Update").
0481      */
0482     if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
0483         (c->x86_stepping < 0x6 || c->x86_stepping == 0xb))
0484         set_cpu_bug(c, X86_BUG_11AP);
0485 
0486 
0487 #ifdef CONFIG_X86_INTEL_USERCOPY
0488     /*
0489      * Set up the preferred alignment for movsl bulk memory moves
0490      */
0491     switch (c->x86) {
0492     case 4:     /* 486: untested */
0493         break;
0494     case 5:     /* Old Pentia: untested */
0495         break;
0496     case 6:     /* PII/PIII only like movsl with 8-byte alignment */
0497         movsl_mask.mask = 7;
0498         break;
0499     case 15:    /* P4 is OK down to 8-byte alignment */
0500         movsl_mask.mask = 7;
0501         break;
0502     }
0503 #endif
0504 
0505     intel_smp_check(c);
0506 }
0507 #else
0508 static void intel_workarounds(struct cpuinfo_x86 *c)
0509 {
0510 }
0511 #endif
0512 
0513 static void srat_detect_node(struct cpuinfo_x86 *c)
0514 {
0515 #ifdef CONFIG_NUMA
0516     unsigned node;
0517     int cpu = smp_processor_id();
0518 
0519     /* Don't do the funky fallback heuristics the AMD version employs
0520        for now. */
0521     node = numa_cpu_node(cpu);
0522     if (node == NUMA_NO_NODE || !node_online(node)) {
0523         /* reuse the value from init_cpu_to_node() */
0524         node = cpu_to_node(cpu);
0525     }
0526     numa_set_node(cpu, node);
0527 #endif
0528 }
0529 
0530 #define MSR_IA32_TME_ACTIVATE       0x982
0531 
0532 /* Helpers to access TME_ACTIVATE MSR */
0533 #define TME_ACTIVATE_LOCKED(x)      (x & 0x1)
0534 #define TME_ACTIVATE_ENABLED(x)     (x & 0x2)
0535 
0536 #define TME_ACTIVATE_POLICY(x)      ((x >> 4) & 0xf)    /* Bits 7:4 */
0537 #define TME_ACTIVATE_POLICY_AES_XTS_128 0
0538 
0539 #define TME_ACTIVATE_KEYID_BITS(x)  ((x >> 32) & 0xf)   /* Bits 35:32 */
0540 
0541 #define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff)    /* Bits 63:48 */
0542 #define TME_ACTIVATE_CRYPTO_AES_XTS_128 1
0543 
0544 /* Values for mktme_status (SW only construct) */
0545 #define MKTME_ENABLED           0
0546 #define MKTME_DISABLED          1
0547 #define MKTME_UNINITIALIZED     2
0548 static int mktme_status = MKTME_UNINITIALIZED;
0549 
0550 static void detect_tme(struct cpuinfo_x86 *c)
0551 {
0552     u64 tme_activate, tme_policy, tme_crypto_algs;
0553     int keyid_bits = 0, nr_keyids = 0;
0554     static u64 tme_activate_cpu0 = 0;
0555 
0556     rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate);
0557 
0558     if (mktme_status != MKTME_UNINITIALIZED) {
0559         if (tme_activate != tme_activate_cpu0) {
0560             /* Broken BIOS? */
0561             pr_err_once("x86/tme: configuration is inconsistent between CPUs\n");
0562             pr_err_once("x86/tme: MKTME is not usable\n");
0563             mktme_status = MKTME_DISABLED;
0564 
0565             /* Proceed. We may need to exclude bits from x86_phys_bits. */
0566         }
0567     } else {
0568         tme_activate_cpu0 = tme_activate;
0569     }
0570 
0571     if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) {
0572         pr_info_once("x86/tme: not enabled by BIOS\n");
0573         mktme_status = MKTME_DISABLED;
0574         return;
0575     }
0576 
0577     if (mktme_status != MKTME_UNINITIALIZED)
0578         goto detect_keyid_bits;
0579 
0580     pr_info("x86/tme: enabled by BIOS\n");
0581 
0582     tme_policy = TME_ACTIVATE_POLICY(tme_activate);
0583     if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128)
0584         pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy);
0585 
0586     tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate);
0587     if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) {
0588         pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n",
0589                 tme_crypto_algs);
0590         mktme_status = MKTME_DISABLED;
0591     }
0592 detect_keyid_bits:
0593     keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate);
0594     nr_keyids = (1UL << keyid_bits) - 1;
0595     if (nr_keyids) {
0596         pr_info_once("x86/mktme: enabled by BIOS\n");
0597         pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids);
0598     } else {
0599         pr_info_once("x86/mktme: disabled by BIOS\n");
0600     }
0601 
0602     if (mktme_status == MKTME_UNINITIALIZED) {
0603         /* MKTME is usable */
0604         mktme_status = MKTME_ENABLED;
0605     }
0606 
0607     /*
0608      * KeyID bits effectively lower the number of physical address
0609      * bits.  Update cpuinfo_x86::x86_phys_bits accordingly.
0610      */
0611     c->x86_phys_bits -= keyid_bits;
0612 }
0613 
0614 static void init_cpuid_fault(struct cpuinfo_x86 *c)
0615 {
0616     u64 msr;
0617 
0618     if (!rdmsrl_safe(MSR_PLATFORM_INFO, &msr)) {
0619         if (msr & MSR_PLATFORM_INFO_CPUID_FAULT)
0620             set_cpu_cap(c, X86_FEATURE_CPUID_FAULT);
0621     }
0622 }
0623 
0624 static void init_intel_misc_features(struct cpuinfo_x86 *c)
0625 {
0626     u64 msr;
0627 
0628     if (rdmsrl_safe(MSR_MISC_FEATURES_ENABLES, &msr))
0629         return;
0630 
0631     /* Clear all MISC features */
0632     this_cpu_write(msr_misc_features_shadow, 0);
0633 
0634     /* Check features and update capabilities and shadow control bits */
0635     init_cpuid_fault(c);
0636     probe_xeon_phi_r3mwait(c);
0637 
0638     msr = this_cpu_read(msr_misc_features_shadow);
0639     wrmsrl(MSR_MISC_FEATURES_ENABLES, msr);
0640 }
0641 
0642 static void split_lock_init(void);
0643 static void bus_lock_init(void);
0644 
0645 static void init_intel(struct cpuinfo_x86 *c)
0646 {
0647     early_init_intel(c);
0648 
0649     intel_workarounds(c);
0650 
0651     /*
0652      * Detect the extended topology information if available. This
0653      * will reinitialise the initial_apicid which will be used
0654      * in init_intel_cacheinfo()
0655      */
0656     detect_extended_topology(c);
0657 
0658     if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
0659         /*
0660          * let's use the legacy cpuid vector 0x1 and 0x4 for topology
0661          * detection.
0662          */
0663         detect_num_cpu_cores(c);
0664 #ifdef CONFIG_X86_32
0665         detect_ht(c);
0666 #endif
0667     }
0668 
0669     init_intel_cacheinfo(c);
0670 
0671     if (c->cpuid_level > 9) {
0672         unsigned eax = cpuid_eax(10);
0673         /* Check for version and the number of counters */
0674         if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
0675             set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
0676     }
0677 
0678     if (cpu_has(c, X86_FEATURE_XMM2))
0679         set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
0680 
0681     if (boot_cpu_has(X86_FEATURE_DS)) {
0682         unsigned int l1, l2;
0683 
0684         rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
0685         if (!(l1 & MSR_IA32_MISC_ENABLE_BTS_UNAVAIL))
0686             set_cpu_cap(c, X86_FEATURE_BTS);
0687         if (!(l1 & MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL))
0688             set_cpu_cap(c, X86_FEATURE_PEBS);
0689     }
0690 
0691     if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_CLFLUSH) &&
0692         (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
0693         set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
0694 
0695     if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_MWAIT) &&
0696         ((c->x86_model == INTEL_FAM6_ATOM_GOLDMONT)))
0697         set_cpu_bug(c, X86_BUG_MONITOR);
0698 
0699 #ifdef CONFIG_X86_64
0700     if (c->x86 == 15)
0701         c->x86_cache_alignment = c->x86_clflush_size * 2;
0702     if (c->x86 == 6)
0703         set_cpu_cap(c, X86_FEATURE_REP_GOOD);
0704 #else
0705     /*
0706      * Names for the Pentium II/Celeron processors
0707      * detectable only by also checking the cache size.
0708      * Dixon is NOT a Celeron.
0709      */
0710     if (c->x86 == 6) {
0711         unsigned int l2 = c->x86_cache_size;
0712         char *p = NULL;
0713 
0714         switch (c->x86_model) {
0715         case 5:
0716             if (l2 == 0)
0717                 p = "Celeron (Covington)";
0718             else if (l2 == 256)
0719                 p = "Mobile Pentium II (Dixon)";
0720             break;
0721 
0722         case 6:
0723             if (l2 == 128)
0724                 p = "Celeron (Mendocino)";
0725             else if (c->x86_stepping == 0 || c->x86_stepping == 5)
0726                 p = "Celeron-A";
0727             break;
0728 
0729         case 8:
0730             if (l2 == 128)
0731                 p = "Celeron (Coppermine)";
0732             break;
0733         }
0734 
0735         if (p)
0736             strcpy(c->x86_model_id, p);
0737     }
0738 
0739     if (c->x86 == 15)
0740         set_cpu_cap(c, X86_FEATURE_P4);
0741     if (c->x86 == 6)
0742         set_cpu_cap(c, X86_FEATURE_P3);
0743 #endif
0744 
0745     /* Work around errata */
0746     srat_detect_node(c);
0747 
0748     init_ia32_feat_ctl(c);
0749 
0750     if (cpu_has(c, X86_FEATURE_TME))
0751         detect_tme(c);
0752 
0753     init_intel_misc_features(c);
0754 
0755     split_lock_init();
0756     bus_lock_init();
0757 
0758     intel_init_thermal(c);
0759 }
0760 
0761 #ifdef CONFIG_X86_32
0762 static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
0763 {
0764     /*
0765      * Intel PIII Tualatin. This comes in two flavours.
0766      * One has 256kb of cache, the other 512. We have no way
0767      * to determine which, so we use a boottime override
0768      * for the 512kb model, and assume 256 otherwise.
0769      */
0770     if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0))
0771         size = 256;
0772 
0773     /*
0774      * Intel Quark SoC X1000 contains a 4-way set associative
0775      * 16K cache with a 16 byte cache line and 256 lines per tag
0776      */
0777     if ((c->x86 == 5) && (c->x86_model == 9))
0778         size = 16;
0779     return size;
0780 }
0781 #endif
0782 
0783 #define TLB_INST_4K 0x01
0784 #define TLB_INST_4M 0x02
0785 #define TLB_INST_2M_4M  0x03
0786 
0787 #define TLB_INST_ALL    0x05
0788 #define TLB_INST_1G 0x06
0789 
0790 #define TLB_DATA_4K 0x11
0791 #define TLB_DATA_4M 0x12
0792 #define TLB_DATA_2M_4M  0x13
0793 #define TLB_DATA_4K_4M  0x14
0794 
0795 #define TLB_DATA_1G 0x16
0796 
0797 #define TLB_DATA0_4K    0x21
0798 #define TLB_DATA0_4M    0x22
0799 #define TLB_DATA0_2M_4M 0x23
0800 
0801 #define STLB_4K     0x41
0802 #define STLB_4K_2M  0x42
0803 
0804 static const struct _tlb_table intel_tlb_table[] = {
0805     { 0x01, TLB_INST_4K,        32, " TLB_INST 4 KByte pages, 4-way set associative" },
0806     { 0x02, TLB_INST_4M,        2,  " TLB_INST 4 MByte pages, full associative" },
0807     { 0x03, TLB_DATA_4K,        64, " TLB_DATA 4 KByte pages, 4-way set associative" },
0808     { 0x04, TLB_DATA_4M,        8,  " TLB_DATA 4 MByte pages, 4-way set associative" },
0809     { 0x05, TLB_DATA_4M,        32, " TLB_DATA 4 MByte pages, 4-way set associative" },
0810     { 0x0b, TLB_INST_4M,        4,  " TLB_INST 4 MByte pages, 4-way set associative" },
0811     { 0x4f, TLB_INST_4K,        32, " TLB_INST 4 KByte pages" },
0812     { 0x50, TLB_INST_ALL,       64, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
0813     { 0x51, TLB_INST_ALL,       128,    " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
0814     { 0x52, TLB_INST_ALL,       256,    " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
0815     { 0x55, TLB_INST_2M_4M,     7,  " TLB_INST 2-MByte or 4-MByte pages, fully associative" },
0816     { 0x56, TLB_DATA0_4M,       16, " TLB_DATA0 4 MByte pages, 4-way set associative" },
0817     { 0x57, TLB_DATA0_4K,       16, " TLB_DATA0 4 KByte pages, 4-way associative" },
0818     { 0x59, TLB_DATA0_4K,       16, " TLB_DATA0 4 KByte pages, fully associative" },
0819     { 0x5a, TLB_DATA0_2M_4M,    32, " TLB_DATA0 2-MByte or 4 MByte pages, 4-way set associative" },
0820     { 0x5b, TLB_DATA_4K_4M,     64, " TLB_DATA 4 KByte and 4 MByte pages" },
0821     { 0x5c, TLB_DATA_4K_4M,     128,    " TLB_DATA 4 KByte and 4 MByte pages" },
0822     { 0x5d, TLB_DATA_4K_4M,     256,    " TLB_DATA 4 KByte and 4 MByte pages" },
0823     { 0x61, TLB_INST_4K,        48, " TLB_INST 4 KByte pages, full associative" },
0824     { 0x63, TLB_DATA_1G,        4,  " TLB_DATA 1 GByte pages, 4-way set associative" },
0825     { 0x6b, TLB_DATA_4K,        256,    " TLB_DATA 4 KByte pages, 8-way associative" },
0826     { 0x6c, TLB_DATA_2M_4M,     128,    " TLB_DATA 2 MByte or 4 MByte pages, 8-way associative" },
0827     { 0x6d, TLB_DATA_1G,        16, " TLB_DATA 1 GByte pages, fully associative" },
0828     { 0x76, TLB_INST_2M_4M,     8,  " TLB_INST 2-MByte or 4-MByte pages, fully associative" },
0829     { 0xb0, TLB_INST_4K,        128,    " TLB_INST 4 KByte pages, 4-way set associative" },
0830     { 0xb1, TLB_INST_2M_4M,     4,  " TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries" },
0831     { 0xb2, TLB_INST_4K,        64, " TLB_INST 4KByte pages, 4-way set associative" },
0832     { 0xb3, TLB_DATA_4K,        128,    " TLB_DATA 4 KByte pages, 4-way set associative" },
0833     { 0xb4, TLB_DATA_4K,        256,    " TLB_DATA 4 KByte pages, 4-way associative" },
0834     { 0xb5, TLB_INST_4K,        64, " TLB_INST 4 KByte pages, 8-way set associative" },
0835     { 0xb6, TLB_INST_4K,        128,    " TLB_INST 4 KByte pages, 8-way set associative" },
0836     { 0xba, TLB_DATA_4K,        64, " TLB_DATA 4 KByte pages, 4-way associative" },
0837     { 0xc0, TLB_DATA_4K_4M,     8,  " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" },
0838     { 0xc1, STLB_4K_2M,     1024,   " STLB 4 KByte and 2 MByte pages, 8-way associative" },
0839     { 0xc2, TLB_DATA_2M_4M,     16, " TLB_DATA 2 MByte/4MByte pages, 4-way associative" },
0840     { 0xca, STLB_4K,        512,    " STLB 4 KByte pages, 4-way associative" },
0841     { 0x00, 0, 0 }
0842 };
0843 
0844 static void intel_tlb_lookup(const unsigned char desc)
0845 {
0846     unsigned char k;
0847     if (desc == 0)
0848         return;
0849 
0850     /* look up this descriptor in the table */
0851     for (k = 0; intel_tlb_table[k].descriptor != desc &&
0852          intel_tlb_table[k].descriptor != 0; k++)
0853         ;
0854 
0855     if (intel_tlb_table[k].tlb_type == 0)
0856         return;
0857 
0858     switch (intel_tlb_table[k].tlb_type) {
0859     case STLB_4K:
0860         if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
0861             tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
0862         if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
0863             tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
0864         break;
0865     case STLB_4K_2M:
0866         if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
0867             tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
0868         if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
0869             tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
0870         if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries)
0871             tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries;
0872         if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries)
0873             tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries;
0874         if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
0875             tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
0876         if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
0877             tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
0878         break;
0879     case TLB_INST_ALL:
0880         if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
0881             tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
0882         if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries)
0883             tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries;
0884         if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
0885             tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
0886         break;
0887     case TLB_INST_4K:
0888         if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
0889             tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
0890         break;
0891     case TLB_INST_4M:
0892         if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
0893             tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
0894         break;
0895     case TLB_INST_2M_4M:
0896         if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries)
0897             tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries;
0898         if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
0899             tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
0900         break;
0901     case TLB_DATA_4K:
0902     case TLB_DATA0_4K:
0903         if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
0904             tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
0905         break;
0906     case TLB_DATA_4M:
0907     case TLB_DATA0_4M:
0908         if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
0909             tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
0910         break;
0911     case TLB_DATA_2M_4M:
0912     case TLB_DATA0_2M_4M:
0913         if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries)
0914             tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries;
0915         if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
0916             tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
0917         break;
0918     case TLB_DATA_4K_4M:
0919         if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
0920             tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
0921         if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
0922             tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
0923         break;
0924     case TLB_DATA_1G:
0925         if (tlb_lld_1g[ENTRIES] < intel_tlb_table[k].entries)
0926             tlb_lld_1g[ENTRIES] = intel_tlb_table[k].entries;
0927         break;
0928     }
0929 }
0930 
0931 static void intel_detect_tlb(struct cpuinfo_x86 *c)
0932 {
0933     int i, j, n;
0934     unsigned int regs[4];
0935     unsigned char *desc = (unsigned char *)regs;
0936 
0937     if (c->cpuid_level < 2)
0938         return;
0939 
0940     /* Number of times to iterate */
0941     n = cpuid_eax(2) & 0xFF;
0942 
0943     for (i = 0 ; i < n ; i++) {
0944         cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
0945 
0946         /* If bit 31 is set, this is an unknown format */
0947         for (j = 0 ; j < 3 ; j++)
0948             if (regs[j] & (1 << 31))
0949                 regs[j] = 0;
0950 
0951         /* Byte 0 is level count, not a descriptor */
0952         for (j = 1 ; j < 16 ; j++)
0953             intel_tlb_lookup(desc[j]);
0954     }
0955 }
0956 
0957 static const struct cpu_dev intel_cpu_dev = {
0958     .c_vendor   = "Intel",
0959     .c_ident    = { "GenuineIntel" },
0960 #ifdef CONFIG_X86_32
0961     .legacy_models = {
0962         { .family = 4, .model_names =
0963           {
0964               [0] = "486 DX-25/33",
0965               [1] = "486 DX-50",
0966               [2] = "486 SX",
0967               [3] = "486 DX/2",
0968               [4] = "486 SL",
0969               [5] = "486 SX/2",
0970               [7] = "486 DX/2-WB",
0971               [8] = "486 DX/4",
0972               [9] = "486 DX/4-WB"
0973           }
0974         },
0975         { .family = 5, .model_names =
0976           {
0977               [0] = "Pentium 60/66 A-step",
0978               [1] = "Pentium 60/66",
0979               [2] = "Pentium 75 - 200",
0980               [3] = "OverDrive PODP5V83",
0981               [4] = "Pentium MMX",
0982               [7] = "Mobile Pentium 75 - 200",
0983               [8] = "Mobile Pentium MMX",
0984               [9] = "Quark SoC X1000",
0985           }
0986         },
0987         { .family = 6, .model_names =
0988           {
0989               [0] = "Pentium Pro A-step",
0990               [1] = "Pentium Pro",
0991               [3] = "Pentium II (Klamath)",
0992               [4] = "Pentium II (Deschutes)",
0993               [5] = "Pentium II (Deschutes)",
0994               [6] = "Mobile Pentium II",
0995               [7] = "Pentium III (Katmai)",
0996               [8] = "Pentium III (Coppermine)",
0997               [10] = "Pentium III (Cascades)",
0998               [11] = "Pentium III (Tualatin)",
0999           }
1000         },
1001         { .family = 15, .model_names =
1002           {
1003               [0] = "Pentium 4 (Unknown)",
1004               [1] = "Pentium 4 (Willamette)",
1005               [2] = "Pentium 4 (Northwood)",
1006               [4] = "Pentium 4 (Foster)",
1007               [5] = "Pentium 4 (Foster)",
1008           }
1009         },
1010     },
1011     .legacy_cache_size = intel_size_cache,
1012 #endif
1013     .c_detect_tlb   = intel_detect_tlb,
1014     .c_early_init   = early_init_intel,
1015     .c_bsp_init = bsp_init_intel,
1016     .c_init     = init_intel,
1017     .c_x86_vendor   = X86_VENDOR_INTEL,
1018 };
1019 
1020 cpu_dev_register(intel_cpu_dev);
1021 
1022 #undef pr_fmt
1023 #define pr_fmt(fmt) "x86/split lock detection: " fmt
1024 
1025 static const struct {
1026     const char          *option;
1027     enum split_lock_detect_state    state;
1028 } sld_options[] __initconst = {
1029     { "off",    sld_off   },
1030     { "warn",   sld_warn  },
1031     { "fatal",  sld_fatal },
1032     { "ratelimit:", sld_ratelimit },
1033 };
1034 
1035 static struct ratelimit_state bld_ratelimit;
1036 
1037 static DEFINE_SEMAPHORE(buslock_sem);
1038 
1039 static inline bool match_option(const char *arg, int arglen, const char *opt)
1040 {
1041     int len = strlen(opt), ratelimit;
1042 
1043     if (strncmp(arg, opt, len))
1044         return false;
1045 
1046     /*
1047      * Min ratelimit is 1 bus lock/sec.
1048      * Max ratelimit is 1000 bus locks/sec.
1049      */
1050     if (sscanf(arg, "ratelimit:%d", &ratelimit) == 1 &&
1051         ratelimit > 0 && ratelimit <= 1000) {
1052         ratelimit_state_init(&bld_ratelimit, HZ, ratelimit);
1053         ratelimit_set_flags(&bld_ratelimit, RATELIMIT_MSG_ON_RELEASE);
1054         return true;
1055     }
1056 
1057     return len == arglen;
1058 }
1059 
1060 static bool split_lock_verify_msr(bool on)
1061 {
1062     u64 ctrl, tmp;
1063 
1064     if (rdmsrl_safe(MSR_TEST_CTRL, &ctrl))
1065         return false;
1066     if (on)
1067         ctrl |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
1068     else
1069         ctrl &= ~MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
1070     if (wrmsrl_safe(MSR_TEST_CTRL, ctrl))
1071         return false;
1072     rdmsrl(MSR_TEST_CTRL, tmp);
1073     return ctrl == tmp;
1074 }
1075 
1076 static void __init sld_state_setup(void)
1077 {
1078     enum split_lock_detect_state state = sld_warn;
1079     char arg[20];
1080     int i, ret;
1081 
1082     if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
1083         !boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
1084         return;
1085 
1086     ret = cmdline_find_option(boot_command_line, "split_lock_detect",
1087                   arg, sizeof(arg));
1088     if (ret >= 0) {
1089         for (i = 0; i < ARRAY_SIZE(sld_options); i++) {
1090             if (match_option(arg, ret, sld_options[i].option)) {
1091                 state = sld_options[i].state;
1092                 break;
1093             }
1094         }
1095     }
1096     sld_state = state;
1097 }
1098 
1099 static void __init __split_lock_setup(void)
1100 {
1101     if (!split_lock_verify_msr(false)) {
1102         pr_info("MSR access failed: Disabled\n");
1103         return;
1104     }
1105 
1106     rdmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache);
1107 
1108     if (!split_lock_verify_msr(true)) {
1109         pr_info("MSR access failed: Disabled\n");
1110         return;
1111     }
1112 
1113     /* Restore the MSR to its cached value. */
1114     wrmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache);
1115 
1116     setup_force_cpu_cap(X86_FEATURE_SPLIT_LOCK_DETECT);
1117 }
1118 
1119 /*
1120  * MSR_TEST_CTRL is per core, but we treat it like a per CPU MSR. Locking
1121  * is not implemented as one thread could undo the setting of the other
1122  * thread immediately after dropping the lock anyway.
1123  */
1124 static void sld_update_msr(bool on)
1125 {
1126     u64 test_ctrl_val = msr_test_ctrl_cache;
1127 
1128     if (on)
1129         test_ctrl_val |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
1130 
1131     wrmsrl(MSR_TEST_CTRL, test_ctrl_val);
1132 }
1133 
1134 static void split_lock_init(void)
1135 {
1136     /*
1137      * #DB for bus lock handles ratelimit and #AC for split lock is
1138      * disabled.
1139      */
1140     if (sld_state == sld_ratelimit) {
1141         split_lock_verify_msr(false);
1142         return;
1143     }
1144 
1145     if (cpu_model_supports_sld)
1146         split_lock_verify_msr(sld_state != sld_off);
1147 }
1148 
1149 static void __split_lock_reenable(struct work_struct *work)
1150 {
1151     sld_update_msr(true);
1152     up(&buslock_sem);
1153 }
1154 
1155 /*
1156  * If a CPU goes offline with pending delayed work to re-enable split lock
1157  * detection then the delayed work will be executed on some other CPU. That
1158  * handles releasing the buslock_sem, but because it executes on a
1159  * different CPU probably won't re-enable split lock detection. This is a
1160  * problem on HT systems since the sibling CPU on the same core may then be
1161  * left running with split lock detection disabled.
1162  *
1163  * Unconditionally re-enable detection here.
1164  */
1165 static int splitlock_cpu_offline(unsigned int cpu)
1166 {
1167     sld_update_msr(true);
1168 
1169     return 0;
1170 }
1171 
1172 static DECLARE_DELAYED_WORK(split_lock_reenable, __split_lock_reenable);
1173 
1174 static void split_lock_warn(unsigned long ip)
1175 {
1176     int cpu;
1177 
1178     if (!current->reported_split_lock)
1179         pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
1180                     current->comm, current->pid, ip);
1181     current->reported_split_lock = 1;
1182 
1183     /* misery factor #1, sleep 10ms before trying to execute split lock */
1184     if (msleep_interruptible(10) > 0)
1185         return;
1186     /* Misery factor #2, only allow one buslocked disabled core at a time */
1187     if (down_interruptible(&buslock_sem) == -EINTR)
1188         return;
1189     cpu = get_cpu();
1190     schedule_delayed_work_on(cpu, &split_lock_reenable, 2);
1191 
1192     /* Disable split lock detection on this CPU to make progress */
1193     sld_update_msr(false);
1194     put_cpu();
1195 }
1196 
1197 bool handle_guest_split_lock(unsigned long ip)
1198 {
1199     if (sld_state == sld_warn) {
1200         split_lock_warn(ip);
1201         return true;
1202     }
1203 
1204     pr_warn_once("#AC: %s/%d %s split_lock trap at address: 0x%lx\n",
1205              current->comm, current->pid,
1206              sld_state == sld_fatal ? "fatal" : "bogus", ip);
1207 
1208     current->thread.error_code = 0;
1209     current->thread.trap_nr = X86_TRAP_AC;
1210     force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
1211     return false;
1212 }
1213 EXPORT_SYMBOL_GPL(handle_guest_split_lock);
1214 
1215 static void bus_lock_init(void)
1216 {
1217     u64 val;
1218 
1219     if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
1220         return;
1221 
1222     rdmsrl(MSR_IA32_DEBUGCTLMSR, val);
1223 
1224     if ((boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
1225         (sld_state == sld_warn || sld_state == sld_fatal)) ||
1226         sld_state == sld_off) {
1227         /*
1228          * Warn and fatal are handled by #AC for split lock if #AC for
1229          * split lock is supported.
1230          */
1231         val &= ~DEBUGCTLMSR_BUS_LOCK_DETECT;
1232     } else {
1233         val |= DEBUGCTLMSR_BUS_LOCK_DETECT;
1234     }
1235 
1236     wrmsrl(MSR_IA32_DEBUGCTLMSR, val);
1237 }
1238 
1239 bool handle_user_split_lock(struct pt_regs *regs, long error_code)
1240 {
1241     if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
1242         return false;
1243     split_lock_warn(regs->ip);
1244     return true;
1245 }
1246 
1247 void handle_bus_lock(struct pt_regs *regs)
1248 {
1249     switch (sld_state) {
1250     case sld_off:
1251         break;
1252     case sld_ratelimit:
1253         /* Enforce no more than bld_ratelimit bus locks/sec. */
1254         while (!__ratelimit(&bld_ratelimit))
1255             msleep(20);
1256         /* Warn on the bus lock. */
1257         fallthrough;
1258     case sld_warn:
1259         pr_warn_ratelimited("#DB: %s/%d took a bus_lock trap at address: 0x%lx\n",
1260                     current->comm, current->pid, regs->ip);
1261         break;
1262     case sld_fatal:
1263         force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
1264         break;
1265     }
1266 }
1267 
1268 /*
1269  * Bits in the IA32_CORE_CAPABILITIES are not architectural, so they should
1270  * only be trusted if it is confirmed that a CPU model implements a
1271  * specific feature at a particular bit position.
1272  *
1273  * The possible driver data field values:
1274  *
1275  * - 0: CPU models that are known to have the per-core split-lock detection
1276  *  feature even though they do not enumerate IA32_CORE_CAPABILITIES.
1277  *
1278  * - 1: CPU models which may enumerate IA32_CORE_CAPABILITIES and if so use
1279  *      bit 5 to enumerate the per-core split-lock detection feature.
1280  */
1281 static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
1282     X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,       0),
1283     X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L,       0),
1284     X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,       0),
1285     X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT,    1),
1286     X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,  1),
1287     X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L,  1),
1288     X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L,     1),
1289     X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE,       1),
1290     X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,    1),
1291     X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,       1),
1292     X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,     1),
1293     X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE,      1),
1294     {}
1295 };
1296 
1297 static void __init split_lock_setup(struct cpuinfo_x86 *c)
1298 {
1299     const struct x86_cpu_id *m;
1300     u64 ia32_core_caps;
1301 
1302     if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
1303         return;
1304 
1305     m = x86_match_cpu(split_lock_cpu_ids);
1306     if (!m)
1307         return;
1308 
1309     switch (m->driver_data) {
1310     case 0:
1311         break;
1312     case 1:
1313         if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES))
1314             return;
1315         rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps);
1316         if (!(ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT))
1317             return;
1318         break;
1319     default:
1320         return;
1321     }
1322 
1323     cpu_model_supports_sld = true;
1324     __split_lock_setup();
1325 }
1326 
1327 static void sld_state_show(void)
1328 {
1329     if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) &&
1330         !boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
1331         return;
1332 
1333     switch (sld_state) {
1334     case sld_off:
1335         pr_info("disabled\n");
1336         break;
1337     case sld_warn:
1338         if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
1339             pr_info("#AC: crashing the kernel on kernel split_locks and warning on user-space split_locks\n");
1340             if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
1341                           "x86/splitlock", NULL, splitlock_cpu_offline) < 0)
1342                 pr_warn("No splitlock CPU offline handler\n");
1343         } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
1344             pr_info("#DB: warning on user-space bus_locks\n");
1345         }
1346         break;
1347     case sld_fatal:
1348         if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
1349             pr_info("#AC: crashing the kernel on kernel split_locks and sending SIGBUS on user-space split_locks\n");
1350         } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
1351             pr_info("#DB: sending SIGBUS on user-space bus_locks%s\n",
1352                 boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ?
1353                 " from non-WB" : "");
1354         }
1355         break;
1356     case sld_ratelimit:
1357         if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
1358             pr_info("#DB: setting system wide bus lock rate limit to %u/sec\n", bld_ratelimit.burst);
1359         break;
1360     }
1361 }
1362 
1363 void __init sld_setup(struct cpuinfo_x86 *c)
1364 {
1365     split_lock_setup(c);
1366     sld_state_setup();
1367     sld_state_show();
1368 }
1369 
1370 #define X86_HYBRID_CPU_TYPE_ID_SHIFT    24
1371 
1372 /**
1373  * get_this_hybrid_cpu_type() - Get the type of this hybrid CPU
1374  *
1375  * Returns the CPU type [31:24] (i.e., Atom or Core) of a CPU in
1376  * a hybrid processor. If the processor is not hybrid, returns 0.
1377  */
1378 u8 get_this_hybrid_cpu_type(void)
1379 {
1380     if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
1381         return 0;
1382 
1383     return cpuid_eax(0x0000001a) >> X86_HYBRID_CPU_TYPE_ID_SHIFT;
1384 }