0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024 #include <linux/dmi.h>
0025 #include <linux/init.h>
0026 #include <linux/export.h>
0027 #include <linux/clocksource.h>
0028 #include <linux/cpu.h>
0029 #include <linux/reboot.h>
0030 #include <linux/static_call.h>
0031 #include <asm/div64.h>
0032 #include <asm/x86_init.h>
0033 #include <asm/hypervisor.h>
0034 #include <asm/timer.h>
0035 #include <asm/apic.h>
0036 #include <asm/vmware.h>
0037 #include <asm/svm.h>
0038
0039 #undef pr_fmt
0040 #define pr_fmt(fmt) "vmware: " fmt
0041
0042 #define CPUID_VMWARE_INFO_LEAF 0x40000000
0043 #define CPUID_VMWARE_FEATURES_LEAF 0x40000010
0044 #define CPUID_VMWARE_FEATURES_ECX_VMMCALL BIT(0)
0045 #define CPUID_VMWARE_FEATURES_ECX_VMCALL BIT(1)
0046
0047 #define VMWARE_HYPERVISOR_MAGIC 0x564D5868
0048
0049 #define VMWARE_CMD_GETVERSION 10
0050 #define VMWARE_CMD_GETHZ 45
0051 #define VMWARE_CMD_GETVCPU_INFO 68
0052 #define VMWARE_CMD_LEGACY_X2APIC 3
0053 #define VMWARE_CMD_VCPU_RESERVED 31
0054 #define VMWARE_CMD_STEALCLOCK 91
0055
0056 #define STEALCLOCK_NOT_AVAILABLE (-1)
0057 #define STEALCLOCK_DISABLED 0
0058 #define STEALCLOCK_ENABLED 1
0059
0060 #define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \
0061 __asm__("inl (%%dx), %%eax" : \
0062 "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \
0063 "a"(VMWARE_HYPERVISOR_MAGIC), \
0064 "c"(VMWARE_CMD_##cmd), \
0065 "d"(VMWARE_HYPERVISOR_PORT), "b"(UINT_MAX) : \
0066 "memory")
0067
0068 #define VMWARE_VMCALL(cmd, eax, ebx, ecx, edx) \
0069 __asm__("vmcall" : \
0070 "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \
0071 "a"(VMWARE_HYPERVISOR_MAGIC), \
0072 "c"(VMWARE_CMD_##cmd), \
0073 "d"(0), "b"(UINT_MAX) : \
0074 "memory")
0075
0076 #define VMWARE_VMMCALL(cmd, eax, ebx, ecx, edx) \
0077 __asm__("vmmcall" : \
0078 "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \
0079 "a"(VMWARE_HYPERVISOR_MAGIC), \
0080 "c"(VMWARE_CMD_##cmd), \
0081 "d"(0), "b"(UINT_MAX) : \
0082 "memory")
0083
0084 #define VMWARE_CMD(cmd, eax, ebx, ecx, edx) do { \
0085 switch (vmware_hypercall_mode) { \
0086 case CPUID_VMWARE_FEATURES_ECX_VMCALL: \
0087 VMWARE_VMCALL(cmd, eax, ebx, ecx, edx); \
0088 break; \
0089 case CPUID_VMWARE_FEATURES_ECX_VMMCALL: \
0090 VMWARE_VMMCALL(cmd, eax, ebx, ecx, edx); \
0091 break; \
0092 default: \
0093 VMWARE_PORT(cmd, eax, ebx, ecx, edx); \
0094 break; \
0095 } \
0096 } while (0)
0097
0098 struct vmware_steal_time {
0099 union {
0100 uint64_t clock;
0101 struct {
0102
0103 uint32_t clock_low;
0104 uint32_t clock_high;
0105 };
0106 };
0107 uint64_t reserved[7];
0108 };
0109
0110 static unsigned long vmware_tsc_khz __ro_after_init;
0111 static u8 vmware_hypercall_mode __ro_after_init;
0112
0113 static inline int __vmware_platform(void)
0114 {
0115 uint32_t eax, ebx, ecx, edx;
0116 VMWARE_CMD(GETVERSION, eax, ebx, ecx, edx);
0117 return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC;
0118 }
0119
0120 static unsigned long vmware_get_tsc_khz(void)
0121 {
0122 return vmware_tsc_khz;
0123 }
0124
0125 #ifdef CONFIG_PARAVIRT
0126 static struct cyc2ns_data vmware_cyc2ns __ro_after_init;
0127 static bool vmw_sched_clock __initdata = true;
0128 static DEFINE_PER_CPU_DECRYPTED(struct vmware_steal_time, vmw_steal_time) __aligned(64);
0129 static bool has_steal_clock;
0130 static bool steal_acc __initdata = true;
0131
0132 static __init int setup_vmw_sched_clock(char *s)
0133 {
0134 vmw_sched_clock = false;
0135 return 0;
0136 }
0137 early_param("no-vmw-sched-clock", setup_vmw_sched_clock);
0138
0139 static __init int parse_no_stealacc(char *arg)
0140 {
0141 steal_acc = false;
0142 return 0;
0143 }
0144 early_param("no-steal-acc", parse_no_stealacc);
0145
0146 static unsigned long long notrace vmware_sched_clock(void)
0147 {
0148 unsigned long long ns;
0149
0150 ns = mul_u64_u32_shr(rdtsc(), vmware_cyc2ns.cyc2ns_mul,
0151 vmware_cyc2ns.cyc2ns_shift);
0152 ns -= vmware_cyc2ns.cyc2ns_offset;
0153 return ns;
0154 }
0155
0156 static void __init vmware_cyc2ns_setup(void)
0157 {
0158 struct cyc2ns_data *d = &vmware_cyc2ns;
0159 unsigned long long tsc_now = rdtsc();
0160
0161 clocks_calc_mult_shift(&d->cyc2ns_mul, &d->cyc2ns_shift,
0162 vmware_tsc_khz, NSEC_PER_MSEC, 0);
0163 d->cyc2ns_offset = mul_u64_u32_shr(tsc_now, d->cyc2ns_mul,
0164 d->cyc2ns_shift);
0165
0166 pr_info("using clock offset of %llu ns\n", d->cyc2ns_offset);
0167 }
0168
0169 static int vmware_cmd_stealclock(uint32_t arg1, uint32_t arg2)
0170 {
0171 uint32_t result, info;
0172
0173 asm volatile (VMWARE_HYPERCALL :
0174 "=a"(result),
0175 "=c"(info) :
0176 "a"(VMWARE_HYPERVISOR_MAGIC),
0177 "b"(0),
0178 "c"(VMWARE_CMD_STEALCLOCK),
0179 "d"(0),
0180 "S"(arg1),
0181 "D"(arg2) :
0182 "memory");
0183 return result;
0184 }
0185
0186 static bool stealclock_enable(phys_addr_t pa)
0187 {
0188 return vmware_cmd_stealclock(upper_32_bits(pa),
0189 lower_32_bits(pa)) == STEALCLOCK_ENABLED;
0190 }
0191
0192 static int __stealclock_disable(void)
0193 {
0194 return vmware_cmd_stealclock(0, 1);
0195 }
0196
0197 static void stealclock_disable(void)
0198 {
0199 __stealclock_disable();
0200 }
0201
0202 static bool vmware_is_stealclock_available(void)
0203 {
0204 return __stealclock_disable() != STEALCLOCK_NOT_AVAILABLE;
0205 }
0206
0207
0208
0209
0210
0211
0212
0213
0214
0215
0216
0217
0218 static uint64_t vmware_steal_clock(int cpu)
0219 {
0220 struct vmware_steal_time *steal = &per_cpu(vmw_steal_time, cpu);
0221 uint64_t clock;
0222
0223 if (IS_ENABLED(CONFIG_64BIT))
0224 clock = READ_ONCE(steal->clock);
0225 else {
0226 uint32_t initial_high, low, high;
0227
0228 do {
0229 initial_high = READ_ONCE(steal->clock_high);
0230
0231 virt_rmb();
0232 low = READ_ONCE(steal->clock_low);
0233
0234 virt_rmb();
0235 high = READ_ONCE(steal->clock_high);
0236 } while (initial_high != high);
0237
0238 clock = ((uint64_t)high << 32) | low;
0239 }
0240
0241 return mul_u64_u32_shr(clock, vmware_cyc2ns.cyc2ns_mul,
0242 vmware_cyc2ns.cyc2ns_shift);
0243 }
0244
0245 static void vmware_register_steal_time(void)
0246 {
0247 int cpu = smp_processor_id();
0248 struct vmware_steal_time *st = &per_cpu(vmw_steal_time, cpu);
0249
0250 if (!has_steal_clock)
0251 return;
0252
0253 if (!stealclock_enable(slow_virt_to_phys(st))) {
0254 has_steal_clock = false;
0255 return;
0256 }
0257
0258 pr_info("vmware-stealtime: cpu %d, pa %llx\n",
0259 cpu, (unsigned long long) slow_virt_to_phys(st));
0260 }
0261
0262 static void vmware_disable_steal_time(void)
0263 {
0264 if (!has_steal_clock)
0265 return;
0266
0267 stealclock_disable();
0268 }
0269
0270 static void vmware_guest_cpu_init(void)
0271 {
0272 if (has_steal_clock)
0273 vmware_register_steal_time();
0274 }
0275
0276 static void vmware_pv_guest_cpu_reboot(void *unused)
0277 {
0278 vmware_disable_steal_time();
0279 }
0280
0281 static int vmware_pv_reboot_notify(struct notifier_block *nb,
0282 unsigned long code, void *unused)
0283 {
0284 if (code == SYS_RESTART)
0285 on_each_cpu(vmware_pv_guest_cpu_reboot, NULL, 1);
0286 return NOTIFY_DONE;
0287 }
0288
0289 static struct notifier_block vmware_pv_reboot_nb = {
0290 .notifier_call = vmware_pv_reboot_notify,
0291 };
0292
0293 #ifdef CONFIG_SMP
0294 static void __init vmware_smp_prepare_boot_cpu(void)
0295 {
0296 vmware_guest_cpu_init();
0297 native_smp_prepare_boot_cpu();
0298 }
0299
0300 static int vmware_cpu_online(unsigned int cpu)
0301 {
0302 local_irq_disable();
0303 vmware_guest_cpu_init();
0304 local_irq_enable();
0305 return 0;
0306 }
0307
0308 static int vmware_cpu_down_prepare(unsigned int cpu)
0309 {
0310 local_irq_disable();
0311 vmware_disable_steal_time();
0312 local_irq_enable();
0313 return 0;
0314 }
0315 #endif
0316
0317 static __init int activate_jump_labels(void)
0318 {
0319 if (has_steal_clock) {
0320 static_key_slow_inc(¶virt_steal_enabled);
0321 if (steal_acc)
0322 static_key_slow_inc(¶virt_steal_rq_enabled);
0323 }
0324
0325 return 0;
0326 }
0327 arch_initcall(activate_jump_labels);
0328
0329 static void __init vmware_paravirt_ops_setup(void)
0330 {
0331 pv_info.name = "VMware hypervisor";
0332 pv_ops.cpu.io_delay = paravirt_nop;
0333
0334 if (vmware_tsc_khz == 0)
0335 return;
0336
0337 vmware_cyc2ns_setup();
0338
0339 if (vmw_sched_clock)
0340 paravirt_set_sched_clock(vmware_sched_clock);
0341
0342 if (vmware_is_stealclock_available()) {
0343 has_steal_clock = true;
0344 static_call_update(pv_steal_clock, vmware_steal_clock);
0345
0346
0347 register_reboot_notifier(&vmware_pv_reboot_nb);
0348
0349 #ifdef CONFIG_SMP
0350 smp_ops.smp_prepare_boot_cpu =
0351 vmware_smp_prepare_boot_cpu;
0352 if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
0353 "x86/vmware:online",
0354 vmware_cpu_online,
0355 vmware_cpu_down_prepare) < 0)
0356 pr_err("vmware_guest: Failed to install cpu hotplug callbacks\n");
0357 #else
0358 vmware_guest_cpu_init();
0359 #endif
0360 }
0361 }
0362 #else
0363 #define vmware_paravirt_ops_setup() do {} while (0)
0364 #endif
0365
0366
0367
0368
0369
0370
0371
0372
0373
0374
0375
0376
0377
0378 static void __init vmware_set_capabilities(void)
0379 {
0380 setup_force_cpu_cap(X86_FEATURE_CONSTANT_TSC);
0381 setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
0382 if (vmware_tsc_khz)
0383 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
0384 if (vmware_hypercall_mode == CPUID_VMWARE_FEATURES_ECX_VMCALL)
0385 setup_force_cpu_cap(X86_FEATURE_VMCALL);
0386 else if (vmware_hypercall_mode == CPUID_VMWARE_FEATURES_ECX_VMMCALL)
0387 setup_force_cpu_cap(X86_FEATURE_VMW_VMMCALL);
0388 }
0389
0390 static void __init vmware_platform_setup(void)
0391 {
0392 uint32_t eax, ebx, ecx, edx;
0393 uint64_t lpj, tsc_khz;
0394
0395 VMWARE_CMD(GETHZ, eax, ebx, ecx, edx);
0396
0397 if (ebx != UINT_MAX) {
0398 lpj = tsc_khz = eax | (((uint64_t)ebx) << 32);
0399 do_div(tsc_khz, 1000);
0400 WARN_ON(tsc_khz >> 32);
0401 pr_info("TSC freq read from hypervisor : %lu.%03lu MHz\n",
0402 (unsigned long) tsc_khz / 1000,
0403 (unsigned long) tsc_khz % 1000);
0404
0405 if (!preset_lpj) {
0406 do_div(lpj, HZ);
0407 preset_lpj = lpj;
0408 }
0409
0410 vmware_tsc_khz = tsc_khz;
0411 x86_platform.calibrate_tsc = vmware_get_tsc_khz;
0412 x86_platform.calibrate_cpu = vmware_get_tsc_khz;
0413
0414 #ifdef CONFIG_X86_LOCAL_APIC
0415
0416 lapic_timer_period = ecx / HZ;
0417 pr_info("Host bus clock speed read from hypervisor : %u Hz\n",
0418 ecx);
0419 #endif
0420 } else {
0421 pr_warn("Failed to get TSC freq from the hypervisor\n");
0422 }
0423
0424 vmware_paravirt_ops_setup();
0425
0426 #ifdef CONFIG_X86_IO_APIC
0427 no_timer_check = 1;
0428 #endif
0429
0430 vmware_set_capabilities();
0431 }
0432
0433 static u8 __init vmware_select_hypercall(void)
0434 {
0435 int eax, ebx, ecx, edx;
0436
0437 cpuid(CPUID_VMWARE_FEATURES_LEAF, &eax, &ebx, &ecx, &edx);
0438 return (ecx & (CPUID_VMWARE_FEATURES_ECX_VMMCALL |
0439 CPUID_VMWARE_FEATURES_ECX_VMCALL));
0440 }
0441
0442
0443
0444
0445
0446
0447
0448
0449 static uint32_t __init vmware_platform(void)
0450 {
0451 if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
0452 unsigned int eax;
0453 unsigned int hyper_vendor_id[3];
0454
0455 cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &hyper_vendor_id[0],
0456 &hyper_vendor_id[1], &hyper_vendor_id[2]);
0457 if (!memcmp(hyper_vendor_id, "VMwareVMware", 12)) {
0458 if (eax >= CPUID_VMWARE_FEATURES_LEAF)
0459 vmware_hypercall_mode =
0460 vmware_select_hypercall();
0461
0462 pr_info("hypercall mode: 0x%02x\n",
0463 (unsigned int) vmware_hypercall_mode);
0464
0465 return CPUID_VMWARE_INFO_LEAF;
0466 }
0467 } else if (dmi_available && dmi_name_in_serial("VMware") &&
0468 __vmware_platform())
0469 return 1;
0470
0471 return 0;
0472 }
0473
0474
0475 static bool __init vmware_legacy_x2apic_available(void)
0476 {
0477 uint32_t eax, ebx, ecx, edx;
0478 VMWARE_CMD(GETVCPU_INFO, eax, ebx, ecx, edx);
0479 return !(eax & BIT(VMWARE_CMD_VCPU_RESERVED)) &&
0480 (eax & BIT(VMWARE_CMD_LEGACY_X2APIC));
0481 }
0482
0483 #ifdef CONFIG_AMD_MEM_ENCRYPT
0484 static void vmware_sev_es_hcall_prepare(struct ghcb *ghcb,
0485 struct pt_regs *regs)
0486 {
0487
0488 ghcb_set_rip(ghcb, regs->ip);
0489 ghcb_set_rbx(ghcb, regs->bx);
0490 ghcb_set_rcx(ghcb, regs->cx);
0491 ghcb_set_rdx(ghcb, regs->dx);
0492 ghcb_set_rsi(ghcb, regs->si);
0493 ghcb_set_rdi(ghcb, regs->di);
0494 ghcb_set_rbp(ghcb, regs->bp);
0495 }
0496
0497 static bool vmware_sev_es_hcall_finish(struct ghcb *ghcb, struct pt_regs *regs)
0498 {
0499 if (!(ghcb_rbx_is_valid(ghcb) &&
0500 ghcb_rcx_is_valid(ghcb) &&
0501 ghcb_rdx_is_valid(ghcb) &&
0502 ghcb_rsi_is_valid(ghcb) &&
0503 ghcb_rdi_is_valid(ghcb) &&
0504 ghcb_rbp_is_valid(ghcb)))
0505 return false;
0506
0507 regs->bx = ghcb_get_rbx(ghcb);
0508 regs->cx = ghcb_get_rcx(ghcb);
0509 regs->dx = ghcb_get_rdx(ghcb);
0510 regs->si = ghcb_get_rsi(ghcb);
0511 regs->di = ghcb_get_rdi(ghcb);
0512 regs->bp = ghcb_get_rbp(ghcb);
0513
0514 return true;
0515 }
0516 #endif
0517
0518 const __initconst struct hypervisor_x86 x86_hyper_vmware = {
0519 .name = "VMware",
0520 .detect = vmware_platform,
0521 .type = X86_HYPER_VMWARE,
0522 .init.init_platform = vmware_platform_setup,
0523 .init.x2apic_available = vmware_legacy_x2apic_available,
0524 #ifdef CONFIG_AMD_MEM_ENCRYPT
0525 .runtime.sev_es_hcall_prepare = vmware_sev_es_hcall_prepare,
0526 .runtime.sev_es_hcall_finish = vmware_sev_es_hcall_finish,
0527 #endif
0528 };