Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * VMware Detection code.
0003  *
0004  * Copyright (C) 2008, VMware, Inc.
0005  * Author : Alok N Kataria <akataria@vmware.com>
0006  *
0007  * This program is free software; you can redistribute it and/or modify
0008  * it under the terms of the GNU General Public License as published by
0009  * the Free Software Foundation; either version 2 of the License, or
0010  * (at your option) any later version.
0011  *
0012  * This program is distributed in the hope that it will be useful, but
0013  * WITHOUT ANY WARRANTY; without even the implied warranty of
0014  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
0015  * NON INFRINGEMENT.  See the GNU General Public License for more
0016  * details.
0017  *
0018  * You should have received a copy of the GNU General Public License
0019  * along with this program; if not, write to the Free Software
0020  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0021  *
0022  */
0023 
0024 #include <linux/dmi.h>
0025 #include <linux/init.h>
0026 #include <linux/export.h>
0027 #include <linux/clocksource.h>
0028 #include <linux/cpu.h>
0029 #include <linux/reboot.h>
0030 #include <linux/static_call.h>
0031 #include <asm/div64.h>
0032 #include <asm/x86_init.h>
0033 #include <asm/hypervisor.h>
0034 #include <asm/timer.h>
0035 #include <asm/apic.h>
0036 #include <asm/vmware.h>
0037 #include <asm/svm.h>
0038 
0039 #undef pr_fmt
0040 #define pr_fmt(fmt) "vmware: " fmt
0041 
0042 #define CPUID_VMWARE_INFO_LEAF               0x40000000
0043 #define CPUID_VMWARE_FEATURES_LEAF           0x40000010
0044 #define CPUID_VMWARE_FEATURES_ECX_VMMCALL    BIT(0)
0045 #define CPUID_VMWARE_FEATURES_ECX_VMCALL     BIT(1)
0046 
0047 #define VMWARE_HYPERVISOR_MAGIC 0x564D5868
0048 
0049 #define VMWARE_CMD_GETVERSION    10
0050 #define VMWARE_CMD_GETHZ         45
0051 #define VMWARE_CMD_GETVCPU_INFO  68
0052 #define VMWARE_CMD_LEGACY_X2APIC  3
0053 #define VMWARE_CMD_VCPU_RESERVED 31
0054 #define VMWARE_CMD_STEALCLOCK    91
0055 
0056 #define STEALCLOCK_NOT_AVAILABLE (-1)
0057 #define STEALCLOCK_DISABLED        0
0058 #define STEALCLOCK_ENABLED         1
0059 
0060 #define VMWARE_PORT(cmd, eax, ebx, ecx, edx)                \
0061     __asm__("inl (%%dx), %%eax" :                   \
0062         "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) :        \
0063         "a"(VMWARE_HYPERVISOR_MAGIC),               \
0064         "c"(VMWARE_CMD_##cmd),                  \
0065         "d"(VMWARE_HYPERVISOR_PORT), "b"(UINT_MAX) :        \
0066         "memory")
0067 
0068 #define VMWARE_VMCALL(cmd, eax, ebx, ecx, edx)              \
0069     __asm__("vmcall" :                      \
0070         "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) :        \
0071         "a"(VMWARE_HYPERVISOR_MAGIC),               \
0072         "c"(VMWARE_CMD_##cmd),                  \
0073         "d"(0), "b"(UINT_MAX) :                 \
0074         "memory")
0075 
0076 #define VMWARE_VMMCALL(cmd, eax, ebx, ecx, edx)                         \
0077     __asm__("vmmcall" :                     \
0078         "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) :        \
0079         "a"(VMWARE_HYPERVISOR_MAGIC),               \
0080         "c"(VMWARE_CMD_##cmd),                  \
0081         "d"(0), "b"(UINT_MAX) :                 \
0082         "memory")
0083 
0084 #define VMWARE_CMD(cmd, eax, ebx, ecx, edx) do {        \
0085     switch (vmware_hypercall_mode) {            \
0086     case CPUID_VMWARE_FEATURES_ECX_VMCALL:          \
0087         VMWARE_VMCALL(cmd, eax, ebx, ecx, edx);     \
0088         break;                      \
0089     case CPUID_VMWARE_FEATURES_ECX_VMMCALL:         \
0090         VMWARE_VMMCALL(cmd, eax, ebx, ecx, edx);    \
0091         break;                      \
0092     default:                        \
0093         VMWARE_PORT(cmd, eax, ebx, ecx, edx);       \
0094         break;                      \
0095     }                           \
0096     } while (0)
0097 
0098 struct vmware_steal_time {
0099     union {
0100         uint64_t clock; /* stolen time counter in units of vtsc */
0101         struct {
0102             /* only for little-endian */
0103             uint32_t clock_low;
0104             uint32_t clock_high;
0105         };
0106     };
0107     uint64_t reserved[7];
0108 };
0109 
0110 static unsigned long vmware_tsc_khz __ro_after_init;
0111 static u8 vmware_hypercall_mode     __ro_after_init;
0112 
0113 static inline int __vmware_platform(void)
0114 {
0115     uint32_t eax, ebx, ecx, edx;
0116     VMWARE_CMD(GETVERSION, eax, ebx, ecx, edx);
0117     return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC;
0118 }
0119 
0120 static unsigned long vmware_get_tsc_khz(void)
0121 {
0122     return vmware_tsc_khz;
0123 }
0124 
0125 #ifdef CONFIG_PARAVIRT
0126 static struct cyc2ns_data vmware_cyc2ns __ro_after_init;
0127 static bool vmw_sched_clock __initdata = true;
0128 static DEFINE_PER_CPU_DECRYPTED(struct vmware_steal_time, vmw_steal_time) __aligned(64);
0129 static bool has_steal_clock;
0130 static bool steal_acc __initdata = true; /* steal time accounting */
0131 
0132 static __init int setup_vmw_sched_clock(char *s)
0133 {
0134     vmw_sched_clock = false;
0135     return 0;
0136 }
0137 early_param("no-vmw-sched-clock", setup_vmw_sched_clock);
0138 
0139 static __init int parse_no_stealacc(char *arg)
0140 {
0141     steal_acc = false;
0142     return 0;
0143 }
0144 early_param("no-steal-acc", parse_no_stealacc);
0145 
0146 static unsigned long long notrace vmware_sched_clock(void)
0147 {
0148     unsigned long long ns;
0149 
0150     ns = mul_u64_u32_shr(rdtsc(), vmware_cyc2ns.cyc2ns_mul,
0151                  vmware_cyc2ns.cyc2ns_shift);
0152     ns -= vmware_cyc2ns.cyc2ns_offset;
0153     return ns;
0154 }
0155 
0156 static void __init vmware_cyc2ns_setup(void)
0157 {
0158     struct cyc2ns_data *d = &vmware_cyc2ns;
0159     unsigned long long tsc_now = rdtsc();
0160 
0161     clocks_calc_mult_shift(&d->cyc2ns_mul, &d->cyc2ns_shift,
0162                    vmware_tsc_khz, NSEC_PER_MSEC, 0);
0163     d->cyc2ns_offset = mul_u64_u32_shr(tsc_now, d->cyc2ns_mul,
0164                        d->cyc2ns_shift);
0165 
0166     pr_info("using clock offset of %llu ns\n", d->cyc2ns_offset);
0167 }
0168 
0169 static int vmware_cmd_stealclock(uint32_t arg1, uint32_t arg2)
0170 {
0171     uint32_t result, info;
0172 
0173     asm volatile (VMWARE_HYPERCALL :
0174         "=a"(result),
0175         "=c"(info) :
0176         "a"(VMWARE_HYPERVISOR_MAGIC),
0177         "b"(0),
0178         "c"(VMWARE_CMD_STEALCLOCK),
0179         "d"(0),
0180         "S"(arg1),
0181         "D"(arg2) :
0182         "memory");
0183     return result;
0184 }
0185 
0186 static bool stealclock_enable(phys_addr_t pa)
0187 {
0188     return vmware_cmd_stealclock(upper_32_bits(pa),
0189                      lower_32_bits(pa)) == STEALCLOCK_ENABLED;
0190 }
0191 
0192 static int __stealclock_disable(void)
0193 {
0194     return vmware_cmd_stealclock(0, 1);
0195 }
0196 
0197 static void stealclock_disable(void)
0198 {
0199     __stealclock_disable();
0200 }
0201 
0202 static bool vmware_is_stealclock_available(void)
0203 {
0204     return __stealclock_disable() != STEALCLOCK_NOT_AVAILABLE;
0205 }
0206 
0207 /**
0208  * vmware_steal_clock() - read the per-cpu steal clock
0209  * @cpu:            the cpu number whose steal clock we want to read
0210  *
0211  * The function reads the steal clock if we are on a 64-bit system, otherwise
0212  * reads it in parts, checking that the high part didn't change in the
0213  * meantime.
0214  *
0215  * Return:
0216  *      The steal clock reading in ns.
0217  */
0218 static uint64_t vmware_steal_clock(int cpu)
0219 {
0220     struct vmware_steal_time *steal = &per_cpu(vmw_steal_time, cpu);
0221     uint64_t clock;
0222 
0223     if (IS_ENABLED(CONFIG_64BIT))
0224         clock = READ_ONCE(steal->clock);
0225     else {
0226         uint32_t initial_high, low, high;
0227 
0228         do {
0229             initial_high = READ_ONCE(steal->clock_high);
0230             /* Do not reorder initial_high and high readings */
0231             virt_rmb();
0232             low = READ_ONCE(steal->clock_low);
0233             /* Keep low reading in between */
0234             virt_rmb();
0235             high = READ_ONCE(steal->clock_high);
0236         } while (initial_high != high);
0237 
0238         clock = ((uint64_t)high << 32) | low;
0239     }
0240 
0241     return mul_u64_u32_shr(clock, vmware_cyc2ns.cyc2ns_mul,
0242                  vmware_cyc2ns.cyc2ns_shift);
0243 }
0244 
0245 static void vmware_register_steal_time(void)
0246 {
0247     int cpu = smp_processor_id();
0248     struct vmware_steal_time *st = &per_cpu(vmw_steal_time, cpu);
0249 
0250     if (!has_steal_clock)
0251         return;
0252 
0253     if (!stealclock_enable(slow_virt_to_phys(st))) {
0254         has_steal_clock = false;
0255         return;
0256     }
0257 
0258     pr_info("vmware-stealtime: cpu %d, pa %llx\n",
0259         cpu, (unsigned long long) slow_virt_to_phys(st));
0260 }
0261 
0262 static void vmware_disable_steal_time(void)
0263 {
0264     if (!has_steal_clock)
0265         return;
0266 
0267     stealclock_disable();
0268 }
0269 
0270 static void vmware_guest_cpu_init(void)
0271 {
0272     if (has_steal_clock)
0273         vmware_register_steal_time();
0274 }
0275 
0276 static void vmware_pv_guest_cpu_reboot(void *unused)
0277 {
0278     vmware_disable_steal_time();
0279 }
0280 
0281 static int vmware_pv_reboot_notify(struct notifier_block *nb,
0282                 unsigned long code, void *unused)
0283 {
0284     if (code == SYS_RESTART)
0285         on_each_cpu(vmware_pv_guest_cpu_reboot, NULL, 1);
0286     return NOTIFY_DONE;
0287 }
0288 
0289 static struct notifier_block vmware_pv_reboot_nb = {
0290     .notifier_call = vmware_pv_reboot_notify,
0291 };
0292 
0293 #ifdef CONFIG_SMP
0294 static void __init vmware_smp_prepare_boot_cpu(void)
0295 {
0296     vmware_guest_cpu_init();
0297     native_smp_prepare_boot_cpu();
0298 }
0299 
0300 static int vmware_cpu_online(unsigned int cpu)
0301 {
0302     local_irq_disable();
0303     vmware_guest_cpu_init();
0304     local_irq_enable();
0305     return 0;
0306 }
0307 
0308 static int vmware_cpu_down_prepare(unsigned int cpu)
0309 {
0310     local_irq_disable();
0311     vmware_disable_steal_time();
0312     local_irq_enable();
0313     return 0;
0314 }
0315 #endif
0316 
0317 static __init int activate_jump_labels(void)
0318 {
0319     if (has_steal_clock) {
0320         static_key_slow_inc(&paravirt_steal_enabled);
0321         if (steal_acc)
0322             static_key_slow_inc(&paravirt_steal_rq_enabled);
0323     }
0324 
0325     return 0;
0326 }
0327 arch_initcall(activate_jump_labels);
0328 
0329 static void __init vmware_paravirt_ops_setup(void)
0330 {
0331     pv_info.name = "VMware hypervisor";
0332     pv_ops.cpu.io_delay = paravirt_nop;
0333 
0334     if (vmware_tsc_khz == 0)
0335         return;
0336 
0337     vmware_cyc2ns_setup();
0338 
0339     if (vmw_sched_clock)
0340         paravirt_set_sched_clock(vmware_sched_clock);
0341 
0342     if (vmware_is_stealclock_available()) {
0343         has_steal_clock = true;
0344         static_call_update(pv_steal_clock, vmware_steal_clock);
0345 
0346         /* We use reboot notifier only to disable steal clock */
0347         register_reboot_notifier(&vmware_pv_reboot_nb);
0348 
0349 #ifdef CONFIG_SMP
0350         smp_ops.smp_prepare_boot_cpu =
0351             vmware_smp_prepare_boot_cpu;
0352         if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
0353                           "x86/vmware:online",
0354                           vmware_cpu_online,
0355                           vmware_cpu_down_prepare) < 0)
0356             pr_err("vmware_guest: Failed to install cpu hotplug callbacks\n");
0357 #else
0358         vmware_guest_cpu_init();
0359 #endif
0360     }
0361 }
0362 #else
0363 #define vmware_paravirt_ops_setup() do {} while (0)
0364 #endif
0365 
0366 /*
0367  * VMware hypervisor takes care of exporting a reliable TSC to the guest.
0368  * Still, due to timing difference when running on virtual cpus, the TSC can
0369  * be marked as unstable in some cases. For example, the TSC sync check at
0370  * bootup can fail due to a marginal offset between vcpus' TSCs (though the
0371  * TSCs do not drift from each other).  Also, the ACPI PM timer clocksource
0372  * is not suitable as a watchdog when running on a hypervisor because the
0373  * kernel may miss a wrap of the counter if the vcpu is descheduled for a
0374  * long time. To skip these checks at runtime we set these capability bits,
0375  * so that the kernel could just trust the hypervisor with providing a
0376  * reliable virtual TSC that is suitable for timekeeping.
0377  */
0378 static void __init vmware_set_capabilities(void)
0379 {
0380     setup_force_cpu_cap(X86_FEATURE_CONSTANT_TSC);
0381     setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
0382     if (vmware_tsc_khz)
0383         setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
0384     if (vmware_hypercall_mode == CPUID_VMWARE_FEATURES_ECX_VMCALL)
0385         setup_force_cpu_cap(X86_FEATURE_VMCALL);
0386     else if (vmware_hypercall_mode == CPUID_VMWARE_FEATURES_ECX_VMMCALL)
0387         setup_force_cpu_cap(X86_FEATURE_VMW_VMMCALL);
0388 }
0389 
0390 static void __init vmware_platform_setup(void)
0391 {
0392     uint32_t eax, ebx, ecx, edx;
0393     uint64_t lpj, tsc_khz;
0394 
0395     VMWARE_CMD(GETHZ, eax, ebx, ecx, edx);
0396 
0397     if (ebx != UINT_MAX) {
0398         lpj = tsc_khz = eax | (((uint64_t)ebx) << 32);
0399         do_div(tsc_khz, 1000);
0400         WARN_ON(tsc_khz >> 32);
0401         pr_info("TSC freq read from hypervisor : %lu.%03lu MHz\n",
0402             (unsigned long) tsc_khz / 1000,
0403             (unsigned long) tsc_khz % 1000);
0404 
0405         if (!preset_lpj) {
0406             do_div(lpj, HZ);
0407             preset_lpj = lpj;
0408         }
0409 
0410         vmware_tsc_khz = tsc_khz;
0411         x86_platform.calibrate_tsc = vmware_get_tsc_khz;
0412         x86_platform.calibrate_cpu = vmware_get_tsc_khz;
0413 
0414 #ifdef CONFIG_X86_LOCAL_APIC
0415         /* Skip lapic calibration since we know the bus frequency. */
0416         lapic_timer_period = ecx / HZ;
0417         pr_info("Host bus clock speed read from hypervisor : %u Hz\n",
0418             ecx);
0419 #endif
0420     } else {
0421         pr_warn("Failed to get TSC freq from the hypervisor\n");
0422     }
0423 
0424     vmware_paravirt_ops_setup();
0425 
0426 #ifdef CONFIG_X86_IO_APIC
0427     no_timer_check = 1;
0428 #endif
0429 
0430     vmware_set_capabilities();
0431 }
0432 
0433 static u8 __init vmware_select_hypercall(void)
0434 {
0435     int eax, ebx, ecx, edx;
0436 
0437     cpuid(CPUID_VMWARE_FEATURES_LEAF, &eax, &ebx, &ecx, &edx);
0438     return (ecx & (CPUID_VMWARE_FEATURES_ECX_VMMCALL |
0439                CPUID_VMWARE_FEATURES_ECX_VMCALL));
0440 }
0441 
0442 /*
0443  * While checking the dmi string information, just checking the product
0444  * serial key should be enough, as this will always have a VMware
0445  * specific string when running under VMware hypervisor.
0446  * If !boot_cpu_has(X86_FEATURE_HYPERVISOR), vmware_hypercall_mode
0447  * intentionally defaults to 0.
0448  */
0449 static uint32_t __init vmware_platform(void)
0450 {
0451     if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
0452         unsigned int eax;
0453         unsigned int hyper_vendor_id[3];
0454 
0455         cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &hyper_vendor_id[0],
0456               &hyper_vendor_id[1], &hyper_vendor_id[2]);
0457         if (!memcmp(hyper_vendor_id, "VMwareVMware", 12)) {
0458             if (eax >= CPUID_VMWARE_FEATURES_LEAF)
0459                 vmware_hypercall_mode =
0460                     vmware_select_hypercall();
0461 
0462             pr_info("hypercall mode: 0x%02x\n",
0463                 (unsigned int) vmware_hypercall_mode);
0464 
0465             return CPUID_VMWARE_INFO_LEAF;
0466         }
0467     } else if (dmi_available && dmi_name_in_serial("VMware") &&
0468            __vmware_platform())
0469         return 1;
0470 
0471     return 0;
0472 }
0473 
0474 /* Checks if hypervisor supports x2apic without VT-D interrupt remapping. */
0475 static bool __init vmware_legacy_x2apic_available(void)
0476 {
0477     uint32_t eax, ebx, ecx, edx;
0478     VMWARE_CMD(GETVCPU_INFO, eax, ebx, ecx, edx);
0479     return !(eax & BIT(VMWARE_CMD_VCPU_RESERVED)) &&
0480         (eax & BIT(VMWARE_CMD_LEGACY_X2APIC));
0481 }
0482 
0483 #ifdef CONFIG_AMD_MEM_ENCRYPT
0484 static void vmware_sev_es_hcall_prepare(struct ghcb *ghcb,
0485                     struct pt_regs *regs)
0486 {
0487     /* Copy VMWARE specific Hypercall parameters to the GHCB */
0488     ghcb_set_rip(ghcb, regs->ip);
0489     ghcb_set_rbx(ghcb, regs->bx);
0490     ghcb_set_rcx(ghcb, regs->cx);
0491     ghcb_set_rdx(ghcb, regs->dx);
0492     ghcb_set_rsi(ghcb, regs->si);
0493     ghcb_set_rdi(ghcb, regs->di);
0494     ghcb_set_rbp(ghcb, regs->bp);
0495 }
0496 
0497 static bool vmware_sev_es_hcall_finish(struct ghcb *ghcb, struct pt_regs *regs)
0498 {
0499     if (!(ghcb_rbx_is_valid(ghcb) &&
0500           ghcb_rcx_is_valid(ghcb) &&
0501           ghcb_rdx_is_valid(ghcb) &&
0502           ghcb_rsi_is_valid(ghcb) &&
0503           ghcb_rdi_is_valid(ghcb) &&
0504           ghcb_rbp_is_valid(ghcb)))
0505         return false;
0506 
0507     regs->bx = ghcb_get_rbx(ghcb);
0508     regs->cx = ghcb_get_rcx(ghcb);
0509     regs->dx = ghcb_get_rdx(ghcb);
0510     regs->si = ghcb_get_rsi(ghcb);
0511     regs->di = ghcb_get_rdi(ghcb);
0512     regs->bp = ghcb_get_rbp(ghcb);
0513 
0514     return true;
0515 }
0516 #endif
0517 
0518 const __initconst struct hypervisor_x86 x86_hyper_vmware = {
0519     .name               = "VMware",
0520     .detect             = vmware_platform,
0521     .type               = X86_HYPER_VMWARE,
0522     .init.init_platform     = vmware_platform_setup,
0523     .init.x2apic_available      = vmware_legacy_x2apic_available,
0524 #ifdef CONFIG_AMD_MEM_ENCRYPT
0525     .runtime.sev_es_hcall_prepare   = vmware_sev_es_hcall_prepare,
0526     .runtime.sev_es_hcall_finish    = vmware_sev_es_hcall_finish,
0527 #endif
0528 };