Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * HyperV  Detection code.
0004  *
0005  * Copyright (C) 2010, Novell, Inc.
0006  * Author : K. Y. Srinivasan <ksrinivasan@novell.com>
0007  */
0008 
0009 #include <linux/types.h>
0010 #include <linux/time.h>
0011 #include <linux/clocksource.h>
0012 #include <linux/init.h>
0013 #include <linux/export.h>
0014 #include <linux/hardirq.h>
0015 #include <linux/efi.h>
0016 #include <linux/interrupt.h>
0017 #include <linux/irq.h>
0018 #include <linux/kexec.h>
0019 #include <linux/i8253.h>
0020 #include <linux/random.h>
0021 #include <linux/swiotlb.h>
0022 #include <asm/processor.h>
0023 #include <asm/hypervisor.h>
0024 #include <asm/hyperv-tlfs.h>
0025 #include <asm/mshyperv.h>
0026 #include <asm/desc.h>
0027 #include <asm/idtentry.h>
0028 #include <asm/irq_regs.h>
0029 #include <asm/i8259.h>
0030 #include <asm/apic.h>
0031 #include <asm/timer.h>
0032 #include <asm/reboot.h>
0033 #include <asm/nmi.h>
0034 #include <clocksource/hyperv_timer.h>
0035 #include <asm/numa.h>
0036 #include <asm/coco.h>
0037 
0038 /* Is Linux running as the root partition? */
0039 bool hv_root_partition;
0040 struct ms_hyperv_info ms_hyperv;
0041 
0042 #if IS_ENABLED(CONFIG_HYPERV)
0043 static void (*vmbus_handler)(void);
0044 static void (*hv_stimer0_handler)(void);
0045 static void (*hv_kexec_handler)(void);
0046 static void (*hv_crash_handler)(struct pt_regs *regs);
0047 
0048 DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback)
0049 {
0050     struct pt_regs *old_regs = set_irq_regs(regs);
0051 
0052     inc_irq_stat(irq_hv_callback_count);
0053     if (vmbus_handler)
0054         vmbus_handler();
0055 
0056     if (ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED)
0057         ack_APIC_irq();
0058 
0059     set_irq_regs(old_regs);
0060 }
0061 
0062 void hv_setup_vmbus_handler(void (*handler)(void))
0063 {
0064     vmbus_handler = handler;
0065 }
0066 
0067 void hv_remove_vmbus_handler(void)
0068 {
0069     /* We have no way to deallocate the interrupt gate */
0070     vmbus_handler = NULL;
0071 }
0072 
0073 /*
0074  * Routines to do per-architecture handling of stimer0
0075  * interrupts when in Direct Mode
0076  */
0077 DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_stimer0)
0078 {
0079     struct pt_regs *old_regs = set_irq_regs(regs);
0080 
0081     inc_irq_stat(hyperv_stimer0_count);
0082     if (hv_stimer0_handler)
0083         hv_stimer0_handler();
0084     add_interrupt_randomness(HYPERV_STIMER0_VECTOR);
0085     ack_APIC_irq();
0086 
0087     set_irq_regs(old_regs);
0088 }
0089 
0090 /* For x86/x64, override weak placeholders in hyperv_timer.c */
0091 void hv_setup_stimer0_handler(void (*handler)(void))
0092 {
0093     hv_stimer0_handler = handler;
0094 }
0095 
0096 void hv_remove_stimer0_handler(void)
0097 {
0098     /* We have no way to deallocate the interrupt gate */
0099     hv_stimer0_handler = NULL;
0100 }
0101 
0102 void hv_setup_kexec_handler(void (*handler)(void))
0103 {
0104     hv_kexec_handler = handler;
0105 }
0106 
0107 void hv_remove_kexec_handler(void)
0108 {
0109     hv_kexec_handler = NULL;
0110 }
0111 
0112 void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs))
0113 {
0114     hv_crash_handler = handler;
0115 }
0116 
0117 void hv_remove_crash_handler(void)
0118 {
0119     hv_crash_handler = NULL;
0120 }
0121 
0122 #ifdef CONFIG_KEXEC_CORE
0123 static void hv_machine_shutdown(void)
0124 {
0125     if (kexec_in_progress && hv_kexec_handler)
0126         hv_kexec_handler();
0127 
0128     /*
0129      * Call hv_cpu_die() on all the CPUs, otherwise later the hypervisor
0130      * corrupts the old VP Assist Pages and can crash the kexec kernel.
0131      */
0132     if (kexec_in_progress && hyperv_init_cpuhp > 0)
0133         cpuhp_remove_state(hyperv_init_cpuhp);
0134 
0135     /* The function calls stop_other_cpus(). */
0136     native_machine_shutdown();
0137 
0138     /* Disable the hypercall page when there is only 1 active CPU. */
0139     if (kexec_in_progress)
0140         hyperv_cleanup();
0141 }
0142 
0143 static void hv_machine_crash_shutdown(struct pt_regs *regs)
0144 {
0145     if (hv_crash_handler)
0146         hv_crash_handler(regs);
0147 
0148     /* The function calls crash_smp_send_stop(). */
0149     native_machine_crash_shutdown(regs);
0150 
0151     /* Disable the hypercall page when there is only 1 active CPU. */
0152     hyperv_cleanup();
0153 }
0154 #endif /* CONFIG_KEXEC_CORE */
0155 #endif /* CONFIG_HYPERV */
0156 
0157 static uint32_t  __init ms_hyperv_platform(void)
0158 {
0159     u32 eax;
0160     u32 hyp_signature[3];
0161 
0162     if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
0163         return 0;
0164 
0165     cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS,
0166           &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]);
0167 
0168     if (eax < HYPERV_CPUID_MIN || eax > HYPERV_CPUID_MAX ||
0169         memcmp("Microsoft Hv", hyp_signature, 12))
0170         return 0;
0171 
0172     /* HYPERCALL and VP_INDEX MSRs are mandatory for all features. */
0173     eax = cpuid_eax(HYPERV_CPUID_FEATURES);
0174     if (!(eax & HV_MSR_HYPERCALL_AVAILABLE)) {
0175         pr_warn("x86/hyperv: HYPERCALL MSR not available.\n");
0176         return 0;
0177     }
0178     if (!(eax & HV_MSR_VP_INDEX_AVAILABLE)) {
0179         pr_warn("x86/hyperv: VP_INDEX MSR not available.\n");
0180         return 0;
0181     }
0182 
0183     return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
0184 }
0185 
0186 static unsigned char hv_get_nmi_reason(void)
0187 {
0188     return 0;
0189 }
0190 
0191 #ifdef CONFIG_X86_LOCAL_APIC
0192 /*
0193  * Prior to WS2016 Debug-VM sends NMIs to all CPUs which makes
0194  * it difficult to process CHANNELMSG_UNLOAD in case of crash. Handle
0195  * unknown NMI on the first CPU which gets it.
0196  */
0197 static int hv_nmi_unknown(unsigned int val, struct pt_regs *regs)
0198 {
0199     static atomic_t nmi_cpu = ATOMIC_INIT(-1);
0200 
0201     if (!unknown_nmi_panic)
0202         return NMI_DONE;
0203 
0204     if (atomic_cmpxchg(&nmi_cpu, -1, raw_smp_processor_id()) != -1)
0205         return NMI_HANDLED;
0206 
0207     return NMI_DONE;
0208 }
0209 #endif
0210 
0211 static unsigned long hv_get_tsc_khz(void)
0212 {
0213     unsigned long freq;
0214 
0215     rdmsrl(HV_X64_MSR_TSC_FREQUENCY, freq);
0216 
0217     return freq / 1000;
0218 }
0219 
0220 #if defined(CONFIG_SMP) && IS_ENABLED(CONFIG_HYPERV)
0221 static void __init hv_smp_prepare_boot_cpu(void)
0222 {
0223     native_smp_prepare_boot_cpu();
0224 #if defined(CONFIG_X86_64) && defined(CONFIG_PARAVIRT_SPINLOCKS)
0225     hv_init_spinlocks();
0226 #endif
0227 }
0228 
0229 static void __init hv_smp_prepare_cpus(unsigned int max_cpus)
0230 {
0231 #ifdef CONFIG_X86_64
0232     int i;
0233     int ret;
0234 #endif
0235 
0236     native_smp_prepare_cpus(max_cpus);
0237 
0238 #ifdef CONFIG_X86_64
0239     for_each_present_cpu(i) {
0240         if (i == 0)
0241             continue;
0242         ret = hv_call_add_logical_proc(numa_cpu_node(i), i, cpu_physical_id(i));
0243         BUG_ON(ret);
0244     }
0245 
0246     for_each_present_cpu(i) {
0247         if (i == 0)
0248             continue;
0249         ret = hv_call_create_vp(numa_cpu_node(i), hv_current_partition_id, i, i);
0250         BUG_ON(ret);
0251     }
0252 #endif
0253 }
0254 #endif
0255 
0256 static void __init ms_hyperv_init_platform(void)
0257 {
0258     int hv_max_functions_eax;
0259     int hv_host_info_eax;
0260     int hv_host_info_ebx;
0261     int hv_host_info_ecx;
0262     int hv_host_info_edx;
0263 
0264 #ifdef CONFIG_PARAVIRT
0265     pv_info.name = "Hyper-V";
0266 #endif
0267 
0268     /*
0269      * Extract the features and hints
0270      */
0271     ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES);
0272     ms_hyperv.priv_high = cpuid_ebx(HYPERV_CPUID_FEATURES);
0273     ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
0274     ms_hyperv.hints    = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);
0275 
0276     hv_max_functions_eax = cpuid_eax(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS);
0277 
0278     pr_info("Hyper-V: privilege flags low 0x%x, high 0x%x, hints 0x%x, misc 0x%x\n",
0279         ms_hyperv.features, ms_hyperv.priv_high, ms_hyperv.hints,
0280         ms_hyperv.misc_features);
0281 
0282     ms_hyperv.max_vp_index = cpuid_eax(HYPERV_CPUID_IMPLEMENT_LIMITS);
0283     ms_hyperv.max_lp_index = cpuid_ebx(HYPERV_CPUID_IMPLEMENT_LIMITS);
0284 
0285     pr_debug("Hyper-V: max %u virtual processors, %u logical processors\n",
0286          ms_hyperv.max_vp_index, ms_hyperv.max_lp_index);
0287 
0288     /*
0289      * Check CPU management privilege.
0290      *
0291      * To mirror what Windows does we should extract CPU management
0292      * features and use the ReservedIdentityBit to detect if Linux is the
0293      * root partition. But that requires negotiating CPU management
0294      * interface (a process to be finalized).
0295      *
0296      * For now, use the privilege flag as the indicator for running as
0297      * root.
0298      */
0299     if (cpuid_ebx(HYPERV_CPUID_FEATURES) & HV_CPU_MANAGEMENT) {
0300         hv_root_partition = true;
0301         pr_info("Hyper-V: running as root partition\n");
0302     }
0303 
0304     /*
0305      * Extract host information.
0306      */
0307     if (hv_max_functions_eax >= HYPERV_CPUID_VERSION) {
0308         hv_host_info_eax = cpuid_eax(HYPERV_CPUID_VERSION);
0309         hv_host_info_ebx = cpuid_ebx(HYPERV_CPUID_VERSION);
0310         hv_host_info_ecx = cpuid_ecx(HYPERV_CPUID_VERSION);
0311         hv_host_info_edx = cpuid_edx(HYPERV_CPUID_VERSION);
0312 
0313         pr_info("Hyper-V: Host Build %d.%d.%d.%d-%d-%d\n",
0314             hv_host_info_ebx >> 16, hv_host_info_ebx & 0xFFFF,
0315             hv_host_info_eax, hv_host_info_edx & 0xFFFFFF,
0316             hv_host_info_ecx, hv_host_info_edx >> 24);
0317     }
0318 
0319     if (ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS &&
0320         ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) {
0321         x86_platform.calibrate_tsc = hv_get_tsc_khz;
0322         x86_platform.calibrate_cpu = hv_get_tsc_khz;
0323     }
0324 
0325     if (ms_hyperv.priv_high & HV_ISOLATION) {
0326         ms_hyperv.isolation_config_a = cpuid_eax(HYPERV_CPUID_ISOLATION_CONFIG);
0327         ms_hyperv.isolation_config_b = cpuid_ebx(HYPERV_CPUID_ISOLATION_CONFIG);
0328         ms_hyperv.shared_gpa_boundary =
0329             BIT_ULL(ms_hyperv.shared_gpa_boundary_bits);
0330 
0331         pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n",
0332             ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b);
0333 
0334         if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) {
0335             static_branch_enable(&isolation_type_snp);
0336 #ifdef CONFIG_SWIOTLB
0337             swiotlb_unencrypted_base = ms_hyperv.shared_gpa_boundary;
0338 #endif
0339         }
0340         /* Isolation VMs are unenlightened SEV-based VMs, thus this check: */
0341         if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
0342             if (hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE)
0343                 cc_set_vendor(CC_VENDOR_HYPERV);
0344         }
0345     }
0346 
0347     if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) {
0348         ms_hyperv.nested_features =
0349             cpuid_eax(HYPERV_CPUID_NESTED_FEATURES);
0350         pr_info("Hyper-V: Nested features: 0x%x\n",
0351             ms_hyperv.nested_features);
0352     }
0353 
0354 #ifdef CONFIG_X86_LOCAL_APIC
0355     if (ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS &&
0356         ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) {
0357         /*
0358          * Get the APIC frequency.
0359          */
0360         u64 hv_lapic_frequency;
0361 
0362         rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency);
0363         hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ);
0364         lapic_timer_period = hv_lapic_frequency;
0365         pr_info("Hyper-V: LAPIC Timer Frequency: %#x\n",
0366             lapic_timer_period);
0367     }
0368 
0369     register_nmi_handler(NMI_UNKNOWN, hv_nmi_unknown, NMI_FLAG_FIRST,
0370                  "hv_nmi_unknown");
0371 #endif
0372 
0373 #ifdef CONFIG_X86_IO_APIC
0374     no_timer_check = 1;
0375 #endif
0376 
0377 #if IS_ENABLED(CONFIG_HYPERV) && defined(CONFIG_KEXEC_CORE)
0378     machine_ops.shutdown = hv_machine_shutdown;
0379     machine_ops.crash_shutdown = hv_machine_crash_shutdown;
0380 #endif
0381     if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) {
0382         /*
0383          * Writing to synthetic MSR 0x40000118 updates/changes the
0384          * guest visible CPUIDs. Setting bit 0 of this MSR  enables
0385          * guests to report invariant TSC feature through CPUID
0386          * instruction, CPUID 0x800000007/EDX, bit 8. See code in
0387          * early_init_intel() where this bit is examined. The
0388          * setting of this MSR bit should happen before init_intel()
0389          * is called.
0390          */
0391         wrmsrl(HV_X64_MSR_TSC_INVARIANT_CONTROL, 0x1);
0392         setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
0393     }
0394 
0395     /*
0396      * Generation 2 instances don't support reading the NMI status from
0397      * 0x61 port.
0398      */
0399     if (efi_enabled(EFI_BOOT))
0400         x86_platform.get_nmi_reason = hv_get_nmi_reason;
0401 
0402     /*
0403      * Hyper-V VMs have a PIT emulation quirk such that zeroing the
0404      * counter register during PIT shutdown restarts the PIT. So it
0405      * continues to interrupt @18.2 HZ. Setting i8253_clear_counter
0406      * to false tells pit_shutdown() not to zero the counter so that
0407      * the PIT really is shutdown. Generation 2 VMs don't have a PIT,
0408      * and setting this value has no effect.
0409      */
0410     i8253_clear_counter_on_shutdown = false;
0411 
0412 #if IS_ENABLED(CONFIG_HYPERV)
0413     /*
0414      * Setup the hook to get control post apic initialization.
0415      */
0416     x86_platform.apic_post_init = hyperv_init;
0417     hyperv_setup_mmu_ops();
0418     /* Setup the IDT for hypervisor callback */
0419     alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_hyperv_callback);
0420 
0421     /* Setup the IDT for reenlightenment notifications */
0422     if (ms_hyperv.features & HV_ACCESS_REENLIGHTENMENT) {
0423         alloc_intr_gate(HYPERV_REENLIGHTENMENT_VECTOR,
0424                 asm_sysvec_hyperv_reenlightenment);
0425     }
0426 
0427     /* Setup the IDT for stimer0 */
0428     if (ms_hyperv.misc_features & HV_STIMER_DIRECT_MODE_AVAILABLE) {
0429         alloc_intr_gate(HYPERV_STIMER0_VECTOR,
0430                 asm_sysvec_hyperv_stimer0);
0431     }
0432 
0433 # ifdef CONFIG_SMP
0434     smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu;
0435     if (hv_root_partition)
0436         smp_ops.smp_prepare_cpus = hv_smp_prepare_cpus;
0437 # endif
0438 
0439     /*
0440      * Hyper-V doesn't provide irq remapping for IO-APIC. To enable x2apic,
0441      * set x2apic destination mode to physical mode when x2apic is available
0442      * and Hyper-V IOMMU driver makes sure cpus assigned with IO-APIC irqs
0443      * have 8-bit APIC id.
0444      */
0445 # ifdef CONFIG_X86_X2APIC
0446     if (x2apic_supported())
0447         x2apic_phys = 1;
0448 # endif
0449 
0450     /* Register Hyper-V specific clocksource */
0451     hv_init_clocksource();
0452 #endif
0453     /*
0454      * TSC should be marked as unstable only after Hyper-V
0455      * clocksource has been initialized. This ensures that the
0456      * stability of the sched_clock is not altered.
0457      */
0458     if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT))
0459         mark_tsc_unstable("running on Hyper-V");
0460 
0461     hardlockup_detector_disable();
0462 }
0463 
0464 static bool __init ms_hyperv_x2apic_available(void)
0465 {
0466     return x2apic_supported();
0467 }
0468 
0469 /*
0470  * If ms_hyperv_msi_ext_dest_id() returns true, hyperv_prepare_irq_remapping()
0471  * returns -ENODEV and the Hyper-V IOMMU driver is not used; instead, the
0472  * generic support of the 15-bit APIC ID is used: see __irq_msi_compose_msg().
0473  *
0474  * Note: for a VM on Hyper-V, the I/O-APIC is the only device which
0475  * (logically) generates MSIs directly to the system APIC irq domain.
0476  * There is no HPET, and PCI MSI/MSI-X interrupts are remapped by the
0477  * pci-hyperv host bridge.
0478  */
0479 static bool __init ms_hyperv_msi_ext_dest_id(void)
0480 {
0481     u32 eax;
0482 
0483     eax = cpuid_eax(HYPERV_CPUID_VIRT_STACK_INTERFACE);
0484     if (eax != HYPERV_VS_INTERFACE_EAX_SIGNATURE)
0485         return false;
0486 
0487     eax = cpuid_eax(HYPERV_CPUID_VIRT_STACK_PROPERTIES);
0488     return eax & HYPERV_VS_PROPERTIES_EAX_EXTENDED_IOAPIC_RTE;
0489 }
0490 
0491 const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
0492     .name           = "Microsoft Hyper-V",
0493     .detect         = ms_hyperv_platform,
0494     .type           = X86_HYPER_MS_HYPERV,
0495     .init.x2apic_available  = ms_hyperv_x2apic_available,
0496     .init.msi_ext_dest_id   = ms_hyperv_msi_ext_dest_id,
0497     .init.init_platform = ms_hyperv_init_platform,
0498 };