0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015 #include <linux/cpu.h>
0016 #include <linux/kernel.h>
0017 #include <linux/init.h>
0018 #include <linux/smp.h>
0019 #include <linux/preempt.h>
0020 #include <linux/hardirq.h>
0021 #include <linux/percpu.h>
0022 #include <linux/delay.h>
0023 #include <linux/start_kernel.h>
0024 #include <linux/sched.h>
0025 #include <linux/kprobes.h>
0026 #include <linux/memblock.h>
0027 #include <linux/export.h>
0028 #include <linux/mm.h>
0029 #include <linux/page-flags.h>
0030 #include <linux/pci.h>
0031 #include <linux/gfp.h>
0032 #include <linux/edd.h>
0033 #include <linux/reboot.h>
0034 #include <linux/virtio_anchor.h>
0035
0036 #include <xen/xen.h>
0037 #include <xen/events.h>
0038 #include <xen/interface/xen.h>
0039 #include <xen/interface/version.h>
0040 #include <xen/interface/physdev.h>
0041 #include <xen/interface/vcpu.h>
0042 #include <xen/interface/memory.h>
0043 #include <xen/interface/nmi.h>
0044 #include <xen/interface/xen-mca.h>
0045 #include <xen/features.h>
0046 #include <xen/page.h>
0047 #include <xen/hvc-console.h>
0048 #include <xen/acpi.h>
0049
0050 #include <asm/paravirt.h>
0051 #include <asm/apic.h>
0052 #include <asm/page.h>
0053 #include <asm/xen/pci.h>
0054 #include <asm/xen/hypercall.h>
0055 #include <asm/xen/hypervisor.h>
0056 #include <asm/xen/cpuid.h>
0057 #include <asm/fixmap.h>
0058 #include <asm/processor.h>
0059 #include <asm/proto.h>
0060 #include <asm/msr-index.h>
0061 #include <asm/traps.h>
0062 #include <asm/setup.h>
0063 #include <asm/desc.h>
0064 #include <asm/pgalloc.h>
0065 #include <asm/tlbflush.h>
0066 #include <asm/reboot.h>
0067 #include <asm/stackprotector.h>
0068 #include <asm/hypervisor.h>
0069 #include <asm/mach_traps.h>
0070 #include <asm/mwait.h>
0071 #include <asm/pci_x86.h>
0072 #include <asm/cpu.h>
0073 #ifdef CONFIG_X86_IOPL_IOPERM
0074 #include <asm/io_bitmap.h>
0075 #endif
0076
0077 #ifdef CONFIG_ACPI
0078 #include <linux/acpi.h>
0079 #include <asm/acpi.h>
0080 #include <acpi/pdc_intel.h>
0081 #include <acpi/processor.h>
0082 #include <xen/interface/platform.h>
0083 #endif
0084
0085 #include "xen-ops.h"
0086 #include "mmu.h"
0087 #include "smp.h"
0088 #include "multicalls.h"
0089 #include "pmu.h"
0090
0091 #include "../kernel/cpu/cpu.h" /* get_cpu_cap() */
0092
0093 void *xen_initial_gdt;
0094
0095 static int xen_cpu_up_prepare_pv(unsigned int cpu);
0096 static int xen_cpu_dead_pv(unsigned int cpu);
0097
0098 struct tls_descs {
0099 struct desc_struct desc[3];
0100 };
0101
0102
0103
0104
0105
0106
0107
0108
0109 static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc);
0110
0111 static void __init xen_pv_init_platform(void)
0112 {
0113
0114 if (IS_ENABLED(CONFIG_XEN_VIRTIO))
0115 virtio_set_mem_acc_cb(virtio_require_restricted_mem_acc);
0116
0117 populate_extra_pte(fix_to_virt(FIX_PARAVIRT_BOOTMAP));
0118
0119 set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_start_info->shared_info);
0120 HYPERVISOR_shared_info = (void *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
0121
0122
0123 xen_vcpu_info_reset(0);
0124
0125
0126 xen_init_time_ops();
0127 }
0128
0129 static void __init xen_pv_guest_late_init(void)
0130 {
0131 #ifndef CONFIG_SMP
0132
0133 xen_setup_vcpu_info_placement();
0134 #endif
0135 }
0136
0137 static __read_mostly unsigned int cpuid_leaf5_ecx_val;
0138 static __read_mostly unsigned int cpuid_leaf5_edx_val;
0139
0140 static void xen_cpuid(unsigned int *ax, unsigned int *bx,
0141 unsigned int *cx, unsigned int *dx)
0142 {
0143 unsigned maskebx = ~0;
0144
0145
0146
0147
0148
0149 switch (*ax) {
0150 case CPUID_MWAIT_LEAF:
0151
0152 *ax = 0;
0153 *bx = 0;
0154 *cx = cpuid_leaf5_ecx_val;
0155 *dx = cpuid_leaf5_edx_val;
0156 return;
0157
0158 case 0xb:
0159
0160 maskebx = 0;
0161 break;
0162 }
0163
0164 asm(XEN_EMULATE_PREFIX "cpuid"
0165 : "=a" (*ax),
0166 "=b" (*bx),
0167 "=c" (*cx),
0168 "=d" (*dx)
0169 : "0" (*ax), "2" (*cx));
0170
0171 *bx &= maskebx;
0172 }
0173
0174 static bool __init xen_check_mwait(void)
0175 {
0176 #ifdef CONFIG_ACPI
0177 struct xen_platform_op op = {
0178 .cmd = XENPF_set_processor_pminfo,
0179 .u.set_pminfo.id = -1,
0180 .u.set_pminfo.type = XEN_PM_PDC,
0181 };
0182 uint32_t buf[3];
0183 unsigned int ax, bx, cx, dx;
0184 unsigned int mwait_mask;
0185
0186
0187
0188
0189
0190
0191
0192
0193
0194 if (!xen_initial_domain())
0195 return false;
0196
0197
0198
0199
0200
0201 if (!xen_running_on_version_or_later(4, 2))
0202 return false;
0203
0204 ax = 1;
0205 cx = 0;
0206
0207 native_cpuid(&ax, &bx, &cx, &dx);
0208
0209 mwait_mask = (1 << (X86_FEATURE_EST % 32)) |
0210 (1 << (X86_FEATURE_MWAIT % 32));
0211
0212 if ((cx & mwait_mask) != mwait_mask)
0213 return false;
0214
0215
0216
0217
0218
0219 ax = CPUID_MWAIT_LEAF;
0220 bx = 0;
0221 cx = 0;
0222 dx = 0;
0223
0224 native_cpuid(&ax, &bx, &cx, &dx);
0225
0226
0227
0228
0229 buf[0] = ACPI_PDC_REVISION_ID;
0230 buf[1] = 1;
0231 buf[2] = (ACPI_PDC_C_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_SWSMP);
0232
0233 set_xen_guest_handle(op.u.set_pminfo.pdc, buf);
0234
0235 if ((HYPERVISOR_platform_op(&op) == 0) &&
0236 (buf[2] & (ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH))) {
0237 cpuid_leaf5_ecx_val = cx;
0238 cpuid_leaf5_edx_val = dx;
0239 }
0240 return true;
0241 #else
0242 return false;
0243 #endif
0244 }
0245
0246 static bool __init xen_check_xsave(void)
0247 {
0248 unsigned int cx, xsave_mask;
0249
0250 cx = cpuid_ecx(1);
0251
0252 xsave_mask = (1 << (X86_FEATURE_XSAVE % 32)) |
0253 (1 << (X86_FEATURE_OSXSAVE % 32));
0254
0255
0256 return (cx & xsave_mask) == xsave_mask;
0257 }
0258
0259 static void __init xen_init_capabilities(void)
0260 {
0261 setup_force_cpu_cap(X86_FEATURE_XENPV);
0262 setup_clear_cpu_cap(X86_FEATURE_DCA);
0263 setup_clear_cpu_cap(X86_FEATURE_APERFMPERF);
0264 setup_clear_cpu_cap(X86_FEATURE_MTRR);
0265 setup_clear_cpu_cap(X86_FEATURE_ACC);
0266 setup_clear_cpu_cap(X86_FEATURE_X2APIC);
0267 setup_clear_cpu_cap(X86_FEATURE_SME);
0268
0269
0270
0271
0272
0273 setup_clear_cpu_cap(X86_FEATURE_PCID);
0274
0275 if (!xen_initial_domain())
0276 setup_clear_cpu_cap(X86_FEATURE_ACPI);
0277
0278 if (xen_check_mwait())
0279 setup_force_cpu_cap(X86_FEATURE_MWAIT);
0280 else
0281 setup_clear_cpu_cap(X86_FEATURE_MWAIT);
0282
0283 if (!xen_check_xsave()) {
0284 setup_clear_cpu_cap(X86_FEATURE_XSAVE);
0285 setup_clear_cpu_cap(X86_FEATURE_OSXSAVE);
0286 }
0287 }
0288
0289 static noinstr void xen_set_debugreg(int reg, unsigned long val)
0290 {
0291 HYPERVISOR_set_debugreg(reg, val);
0292 }
0293
0294 static noinstr unsigned long xen_get_debugreg(int reg)
0295 {
0296 return HYPERVISOR_get_debugreg(reg);
0297 }
0298
0299 static void xen_end_context_switch(struct task_struct *next)
0300 {
0301 xen_mc_flush();
0302 paravirt_end_context_switch(next);
0303 }
0304
0305 static unsigned long xen_store_tr(void)
0306 {
0307 return 0;
0308 }
0309
0310
0311
0312
0313
0314
0315
0316 static void set_aliased_prot(void *v, pgprot_t prot)
0317 {
0318 int level;
0319 pte_t *ptep;
0320 pte_t pte;
0321 unsigned long pfn;
0322 unsigned char dummy;
0323 void *va;
0324
0325 ptep = lookup_address((unsigned long)v, &level);
0326 BUG_ON(ptep == NULL);
0327
0328 pfn = pte_pfn(*ptep);
0329 pte = pfn_pte(pfn, prot);
0330
0331
0332
0333
0334
0335
0336
0337
0338
0339
0340
0341
0342
0343
0344
0345
0346
0347
0348
0349
0350
0351 preempt_disable();
0352
0353 copy_from_kernel_nofault(&dummy, v, 1);
0354
0355 if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
0356 BUG();
0357
0358 va = __va(PFN_PHYS(pfn));
0359
0360 if (va != v && HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0))
0361 BUG();
0362
0363 preempt_enable();
0364 }
0365
0366 static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries)
0367 {
0368 const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
0369 int i;
0370
0371
0372
0373
0374
0375
0376
0377
0378
0379
0380
0381
0382 for (i = 0; i < entries; i += entries_per_page)
0383 set_aliased_prot(ldt + i, PAGE_KERNEL_RO);
0384 }
0385
0386 static void xen_free_ldt(struct desc_struct *ldt, unsigned entries)
0387 {
0388 const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
0389 int i;
0390
0391 for (i = 0; i < entries; i += entries_per_page)
0392 set_aliased_prot(ldt + i, PAGE_KERNEL);
0393 }
0394
0395 static void xen_set_ldt(const void *addr, unsigned entries)
0396 {
0397 struct mmuext_op *op;
0398 struct multicall_space mcs = xen_mc_entry(sizeof(*op));
0399
0400 trace_xen_cpu_set_ldt(addr, entries);
0401
0402 op = mcs.args;
0403 op->cmd = MMUEXT_SET_LDT;
0404 op->arg1.linear_addr = (unsigned long)addr;
0405 op->arg2.nr_ents = entries;
0406
0407 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
0408
0409 xen_mc_issue(PARAVIRT_LAZY_CPU);
0410 }
0411
0412 static void xen_load_gdt(const struct desc_ptr *dtr)
0413 {
0414 unsigned long va = dtr->address;
0415 unsigned int size = dtr->size + 1;
0416 unsigned long pfn, mfn;
0417 int level;
0418 pte_t *ptep;
0419 void *virt;
0420
0421
0422 BUG_ON(size > PAGE_SIZE);
0423 BUG_ON(va & ~PAGE_MASK);
0424
0425
0426
0427
0428
0429
0430
0431
0432 ptep = lookup_address(va, &level);
0433 BUG_ON(ptep == NULL);
0434
0435 pfn = pte_pfn(*ptep);
0436 mfn = pfn_to_mfn(pfn);
0437 virt = __va(PFN_PHYS(pfn));
0438
0439 make_lowmem_page_readonly((void *)va);
0440 make_lowmem_page_readonly(virt);
0441
0442 if (HYPERVISOR_set_gdt(&mfn, size / sizeof(struct desc_struct)))
0443 BUG();
0444 }
0445
0446
0447
0448
0449 static void __init xen_load_gdt_boot(const struct desc_ptr *dtr)
0450 {
0451 unsigned long va = dtr->address;
0452 unsigned int size = dtr->size + 1;
0453 unsigned long pfn, mfn;
0454 pte_t pte;
0455
0456
0457 BUG_ON(size > PAGE_SIZE);
0458 BUG_ON(va & ~PAGE_MASK);
0459
0460 pfn = virt_to_pfn(va);
0461 mfn = pfn_to_mfn(pfn);
0462
0463 pte = pfn_pte(pfn, PAGE_KERNEL_RO);
0464
0465 if (HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0))
0466 BUG();
0467
0468 if (HYPERVISOR_set_gdt(&mfn, size / sizeof(struct desc_struct)))
0469 BUG();
0470 }
0471
0472 static inline bool desc_equal(const struct desc_struct *d1,
0473 const struct desc_struct *d2)
0474 {
0475 return !memcmp(d1, d2, sizeof(*d1));
0476 }
0477
0478 static void load_TLS_descriptor(struct thread_struct *t,
0479 unsigned int cpu, unsigned int i)
0480 {
0481 struct desc_struct *shadow = &per_cpu(shadow_tls_desc, cpu).desc[i];
0482 struct desc_struct *gdt;
0483 xmaddr_t maddr;
0484 struct multicall_space mc;
0485
0486 if (desc_equal(shadow, &t->tls_array[i]))
0487 return;
0488
0489 *shadow = t->tls_array[i];
0490
0491 gdt = get_cpu_gdt_rw(cpu);
0492 maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
0493 mc = __xen_mc_entry(0);
0494
0495 MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]);
0496 }
0497
0498 static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
0499 {
0500
0501
0502
0503
0504
0505 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)
0506 loadsegment(fs, 0);
0507
0508 xen_mc_batch();
0509
0510 load_TLS_descriptor(t, cpu, 0);
0511 load_TLS_descriptor(t, cpu, 1);
0512 load_TLS_descriptor(t, cpu, 2);
0513
0514 xen_mc_issue(PARAVIRT_LAZY_CPU);
0515 }
0516
0517 static void xen_load_gs_index(unsigned int idx)
0518 {
0519 if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx))
0520 BUG();
0521 }
0522
0523 static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
0524 const void *ptr)
0525 {
0526 xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]);
0527 u64 entry = *(u64 *)ptr;
0528
0529 trace_xen_cpu_write_ldt_entry(dt, entrynum, entry);
0530
0531 preempt_disable();
0532
0533 xen_mc_flush();
0534 if (HYPERVISOR_update_descriptor(mach_lp.maddr, entry))
0535 BUG();
0536
0537 preempt_enable();
0538 }
0539
0540 void noist_exc_debug(struct pt_regs *regs);
0541
0542 DEFINE_IDTENTRY_RAW(xenpv_exc_nmi)
0543 {
0544
0545 exc_nmi(regs);
0546 }
0547
0548 DEFINE_IDTENTRY_RAW_ERRORCODE(xenpv_exc_double_fault)
0549 {
0550
0551 exc_double_fault(regs, error_code);
0552 }
0553
0554 DEFINE_IDTENTRY_RAW(xenpv_exc_debug)
0555 {
0556
0557
0558
0559
0560 if (user_mode(regs))
0561 noist_exc_debug(regs);
0562 else
0563 exc_debug(regs);
0564 }
0565
0566 DEFINE_IDTENTRY_RAW(exc_xen_unknown_trap)
0567 {
0568
0569 instrumentation_begin();
0570 pr_err("Unknown trap in Xen PV mode.");
0571 BUG();
0572 instrumentation_end();
0573 }
0574
0575 #ifdef CONFIG_X86_MCE
0576 DEFINE_IDTENTRY_RAW(xenpv_exc_machine_check)
0577 {
0578
0579
0580
0581
0582 if (user_mode(regs))
0583 noist_exc_machine_check(regs);
0584 else
0585 exc_machine_check(regs);
0586 }
0587 #endif
0588
0589 struct trap_array_entry {
0590 void (*orig)(void);
0591 void (*xen)(void);
0592 bool ist_okay;
0593 };
0594
0595 #define TRAP_ENTRY(func, ist_ok) { \
0596 .orig = asm_##func, \
0597 .xen = xen_asm_##func, \
0598 .ist_okay = ist_ok }
0599
0600 #define TRAP_ENTRY_REDIR(func, ist_ok) { \
0601 .orig = asm_##func, \
0602 .xen = xen_asm_xenpv_##func, \
0603 .ist_okay = ist_ok }
0604
0605 static struct trap_array_entry trap_array[] = {
0606 TRAP_ENTRY_REDIR(exc_debug, true ),
0607 TRAP_ENTRY_REDIR(exc_double_fault, true ),
0608 #ifdef CONFIG_X86_MCE
0609 TRAP_ENTRY_REDIR(exc_machine_check, true ),
0610 #endif
0611 TRAP_ENTRY_REDIR(exc_nmi, true ),
0612 TRAP_ENTRY(exc_int3, false ),
0613 TRAP_ENTRY(exc_overflow, false ),
0614 #ifdef CONFIG_IA32_EMULATION
0615 { entry_INT80_compat, xen_entry_INT80_compat, false },
0616 #endif
0617 TRAP_ENTRY(exc_page_fault, false ),
0618 TRAP_ENTRY(exc_divide_error, false ),
0619 TRAP_ENTRY(exc_bounds, false ),
0620 TRAP_ENTRY(exc_invalid_op, false ),
0621 TRAP_ENTRY(exc_device_not_available, false ),
0622 TRAP_ENTRY(exc_coproc_segment_overrun, false ),
0623 TRAP_ENTRY(exc_invalid_tss, false ),
0624 TRAP_ENTRY(exc_segment_not_present, false ),
0625 TRAP_ENTRY(exc_stack_segment, false ),
0626 TRAP_ENTRY(exc_general_protection, false ),
0627 TRAP_ENTRY(exc_spurious_interrupt_bug, false ),
0628 TRAP_ENTRY(exc_coprocessor_error, false ),
0629 TRAP_ENTRY(exc_alignment_check, false ),
0630 TRAP_ENTRY(exc_simd_coprocessor_error, false ),
0631 #ifdef CONFIG_X86_KERNEL_IBT
0632 TRAP_ENTRY(exc_control_protection, false ),
0633 #endif
0634 };
0635
0636 static bool __ref get_trap_addr(void **addr, unsigned int ist)
0637 {
0638 unsigned int nr;
0639 bool ist_okay = false;
0640 bool found = false;
0641
0642
0643
0644
0645
0646
0647
0648
0649 for (nr = 0; nr < ARRAY_SIZE(trap_array); nr++) {
0650 struct trap_array_entry *entry = trap_array + nr;
0651
0652 if (*addr == entry->orig) {
0653 *addr = entry->xen;
0654 ist_okay = entry->ist_okay;
0655 found = true;
0656 break;
0657 }
0658 }
0659
0660 if (nr == ARRAY_SIZE(trap_array) &&
0661 *addr >= (void *)early_idt_handler_array[0] &&
0662 *addr < (void *)early_idt_handler_array[NUM_EXCEPTION_VECTORS]) {
0663 nr = (*addr - (void *)early_idt_handler_array[0]) /
0664 EARLY_IDT_HANDLER_SIZE;
0665 *addr = (void *)xen_early_idt_handler_array[nr];
0666 found = true;
0667 }
0668
0669 if (!found)
0670 *addr = (void *)xen_asm_exc_xen_unknown_trap;
0671
0672 if (WARN_ON(found && ist != 0 && !ist_okay))
0673 return false;
0674
0675 return true;
0676 }
0677
0678 static int cvt_gate_to_trap(int vector, const gate_desc *val,
0679 struct trap_info *info)
0680 {
0681 unsigned long addr;
0682
0683 if (val->bits.type != GATE_TRAP && val->bits.type != GATE_INTERRUPT)
0684 return 0;
0685
0686 info->vector = vector;
0687
0688 addr = gate_offset(val);
0689 if (!get_trap_addr((void **)&addr, val->bits.ist))
0690 return 0;
0691 info->address = addr;
0692
0693 info->cs = gate_segment(val);
0694 info->flags = val->bits.dpl;
0695
0696 if (val->bits.type == GATE_INTERRUPT)
0697 info->flags |= 1 << 2;
0698
0699 return 1;
0700 }
0701
0702
0703 static DEFINE_PER_CPU(struct desc_ptr, idt_desc);
0704
0705
0706
0707 static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g)
0708 {
0709 unsigned long p = (unsigned long)&dt[entrynum];
0710 unsigned long start, end;
0711
0712 trace_xen_cpu_write_idt_entry(dt, entrynum, g);
0713
0714 preempt_disable();
0715
0716 start = __this_cpu_read(idt_desc.address);
0717 end = start + __this_cpu_read(idt_desc.size) + 1;
0718
0719 xen_mc_flush();
0720
0721 native_write_idt_entry(dt, entrynum, g);
0722
0723 if (p >= start && (p + 8) <= end) {
0724 struct trap_info info[2];
0725
0726 info[1].address = 0;
0727
0728 if (cvt_gate_to_trap(entrynum, g, &info[0]))
0729 if (HYPERVISOR_set_trap_table(info))
0730 BUG();
0731 }
0732
0733 preempt_enable();
0734 }
0735
0736 static unsigned xen_convert_trap_info(const struct desc_ptr *desc,
0737 struct trap_info *traps, bool full)
0738 {
0739 unsigned in, out, count;
0740
0741 count = (desc->size+1) / sizeof(gate_desc);
0742 BUG_ON(count > 256);
0743
0744 for (in = out = 0; in < count; in++) {
0745 gate_desc *entry = (gate_desc *)(desc->address) + in;
0746
0747 if (cvt_gate_to_trap(in, entry, &traps[out]) || full)
0748 out++;
0749 }
0750
0751 return out;
0752 }
0753
0754 void xen_copy_trap_info(struct trap_info *traps)
0755 {
0756 const struct desc_ptr *desc = this_cpu_ptr(&idt_desc);
0757
0758 xen_convert_trap_info(desc, traps, true);
0759 }
0760
0761
0762
0763
0764 static void xen_load_idt(const struct desc_ptr *desc)
0765 {
0766 static DEFINE_SPINLOCK(lock);
0767 static struct trap_info traps[257];
0768 unsigned out;
0769
0770 trace_xen_cpu_load_idt(desc);
0771
0772 spin_lock(&lock);
0773
0774 memcpy(this_cpu_ptr(&idt_desc), desc, sizeof(idt_desc));
0775
0776 out = xen_convert_trap_info(desc, traps, false);
0777 memset(&traps[out], 0, sizeof(traps[0]));
0778
0779 xen_mc_flush();
0780 if (HYPERVISOR_set_trap_table(traps))
0781 BUG();
0782
0783 spin_unlock(&lock);
0784 }
0785
0786
0787
0788 static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
0789 const void *desc, int type)
0790 {
0791 trace_xen_cpu_write_gdt_entry(dt, entry, desc, type);
0792
0793 preempt_disable();
0794
0795 switch (type) {
0796 case DESC_LDT:
0797 case DESC_TSS:
0798
0799 break;
0800
0801 default: {
0802 xmaddr_t maddr = arbitrary_virt_to_machine(&dt[entry]);
0803
0804 xen_mc_flush();
0805 if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc))
0806 BUG();
0807 }
0808
0809 }
0810
0811 preempt_enable();
0812 }
0813
0814
0815
0816
0817
0818 static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
0819 const void *desc, int type)
0820 {
0821 trace_xen_cpu_write_gdt_entry(dt, entry, desc, type);
0822
0823 switch (type) {
0824 case DESC_LDT:
0825 case DESC_TSS:
0826
0827 break;
0828
0829 default: {
0830 xmaddr_t maddr = virt_to_machine(&dt[entry]);
0831
0832 if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc))
0833 dt[entry] = *(struct desc_struct *)desc;
0834 }
0835
0836 }
0837 }
0838
0839 static void xen_load_sp0(unsigned long sp0)
0840 {
0841 struct multicall_space mcs;
0842
0843 mcs = xen_mc_entry(0);
0844 MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
0845 xen_mc_issue(PARAVIRT_LAZY_CPU);
0846 this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
0847 }
0848
0849 #ifdef CONFIG_X86_IOPL_IOPERM
0850 static void xen_invalidate_io_bitmap(void)
0851 {
0852 struct physdev_set_iobitmap iobitmap = {
0853 .bitmap = NULL,
0854 .nr_ports = 0,
0855 };
0856
0857 native_tss_invalidate_io_bitmap();
0858 HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &iobitmap);
0859 }
0860
0861 static void xen_update_io_bitmap(void)
0862 {
0863 struct physdev_set_iobitmap iobitmap;
0864 struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
0865
0866 native_tss_update_io_bitmap();
0867
0868 iobitmap.bitmap = (uint8_t *)(&tss->x86_tss) +
0869 tss->x86_tss.io_bitmap_base;
0870 if (tss->x86_tss.io_bitmap_base == IO_BITMAP_OFFSET_INVALID)
0871 iobitmap.nr_ports = 0;
0872 else
0873 iobitmap.nr_ports = IO_BITMAP_BITS;
0874
0875 HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &iobitmap);
0876 }
0877 #endif
0878
0879 static void xen_io_delay(void)
0880 {
0881 }
0882
0883 static DEFINE_PER_CPU(unsigned long, xen_cr0_value);
0884
0885 static unsigned long xen_read_cr0(void)
0886 {
0887 unsigned long cr0 = this_cpu_read(xen_cr0_value);
0888
0889 if (unlikely(cr0 == 0)) {
0890 cr0 = native_read_cr0();
0891 this_cpu_write(xen_cr0_value, cr0);
0892 }
0893
0894 return cr0;
0895 }
0896
0897 static void xen_write_cr0(unsigned long cr0)
0898 {
0899 struct multicall_space mcs;
0900
0901 this_cpu_write(xen_cr0_value, cr0);
0902
0903
0904
0905 mcs = xen_mc_entry(0);
0906
0907 MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0);
0908
0909 xen_mc_issue(PARAVIRT_LAZY_CPU);
0910 }
0911
0912 static void xen_write_cr4(unsigned long cr4)
0913 {
0914 cr4 &= ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PCE);
0915
0916 native_write_cr4(cr4);
0917 }
0918
0919 static u64 xen_read_msr_safe(unsigned int msr, int *err)
0920 {
0921 u64 val;
0922
0923 if (pmu_msr_read(msr, &val, err))
0924 return val;
0925
0926 val = native_read_msr_safe(msr, err);
0927 switch (msr) {
0928 case MSR_IA32_APICBASE:
0929 val &= ~X2APIC_ENABLE;
0930 break;
0931 }
0932 return val;
0933 }
0934
0935 static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
0936 {
0937 int ret;
0938 unsigned int which;
0939 u64 base;
0940
0941 ret = 0;
0942
0943 switch (msr) {
0944 case MSR_FS_BASE: which = SEGBASE_FS; goto set;
0945 case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set;
0946 case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set;
0947
0948 set:
0949 base = ((u64)high << 32) | low;
0950 if (HYPERVISOR_set_segment_base(which, base) != 0)
0951 ret = -EIO;
0952 break;
0953
0954 case MSR_STAR:
0955 case MSR_CSTAR:
0956 case MSR_LSTAR:
0957 case MSR_SYSCALL_MASK:
0958 case MSR_IA32_SYSENTER_CS:
0959 case MSR_IA32_SYSENTER_ESP:
0960 case MSR_IA32_SYSENTER_EIP:
0961
0962
0963
0964 break;
0965
0966 default:
0967 if (!pmu_msr_write(msr, low, high, &ret))
0968 ret = native_write_msr_safe(msr, low, high);
0969 }
0970
0971 return ret;
0972 }
0973
0974 static u64 xen_read_msr(unsigned int msr)
0975 {
0976
0977
0978
0979
0980 int err;
0981
0982 return xen_read_msr_safe(msr, &err);
0983 }
0984
0985 static void xen_write_msr(unsigned int msr, unsigned low, unsigned high)
0986 {
0987
0988
0989
0990
0991 xen_write_msr_safe(msr, low, high);
0992 }
0993
0994
0995 void __init xen_setup_vcpu_info_placement(void)
0996 {
0997 int cpu;
0998
0999 for_each_possible_cpu(cpu) {
1000
1001 per_cpu(xen_vcpu_id, cpu) = cpu;
1002 xen_vcpu_setup(cpu);
1003 }
1004
1005 pv_ops.irq.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
1006 pv_ops.irq.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
1007 pv_ops.irq.irq_enable = __PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
1008 pv_ops.mmu.read_cr2 = __PV_IS_CALLEE_SAVE(xen_read_cr2_direct);
1009 }
1010
1011 static const struct pv_info xen_info __initconst = {
1012 .extra_user_64bit_cs = FLAT_USER_CS64,
1013 .name = "Xen",
1014 };
1015
1016 static const typeof(pv_ops) xen_cpu_ops __initconst = {
1017 .cpu = {
1018 .cpuid = xen_cpuid,
1019
1020 .set_debugreg = xen_set_debugreg,
1021 .get_debugreg = xen_get_debugreg,
1022
1023 .read_cr0 = xen_read_cr0,
1024 .write_cr0 = xen_write_cr0,
1025
1026 .write_cr4 = xen_write_cr4,
1027
1028 .wbinvd = native_wbinvd,
1029
1030 .read_msr = xen_read_msr,
1031 .write_msr = xen_write_msr,
1032
1033 .read_msr_safe = xen_read_msr_safe,
1034 .write_msr_safe = xen_write_msr_safe,
1035
1036 .read_pmc = xen_read_pmc,
1037
1038 .load_tr_desc = paravirt_nop,
1039 .set_ldt = xen_set_ldt,
1040 .load_gdt = xen_load_gdt,
1041 .load_idt = xen_load_idt,
1042 .load_tls = xen_load_tls,
1043 .load_gs_index = xen_load_gs_index,
1044
1045 .alloc_ldt = xen_alloc_ldt,
1046 .free_ldt = xen_free_ldt,
1047
1048 .store_tr = xen_store_tr,
1049
1050 .write_ldt_entry = xen_write_ldt_entry,
1051 .write_gdt_entry = xen_write_gdt_entry,
1052 .write_idt_entry = xen_write_idt_entry,
1053 .load_sp0 = xen_load_sp0,
1054
1055 #ifdef CONFIG_X86_IOPL_IOPERM
1056 .invalidate_io_bitmap = xen_invalidate_io_bitmap,
1057 .update_io_bitmap = xen_update_io_bitmap,
1058 #endif
1059 .io_delay = xen_io_delay,
1060
1061 .start_context_switch = paravirt_start_context_switch,
1062 .end_context_switch = xen_end_context_switch,
1063 },
1064 };
1065
1066 static void xen_restart(char *msg)
1067 {
1068 xen_reboot(SHUTDOWN_reboot);
1069 }
1070
1071 static void xen_machine_halt(void)
1072 {
1073 xen_reboot(SHUTDOWN_poweroff);
1074 }
1075
1076 static void xen_machine_power_off(void)
1077 {
1078 do_kernel_power_off();
1079 xen_reboot(SHUTDOWN_poweroff);
1080 }
1081
1082 static void xen_crash_shutdown(struct pt_regs *regs)
1083 {
1084 xen_reboot(SHUTDOWN_crash);
1085 }
1086
1087 static const struct machine_ops xen_machine_ops __initconst = {
1088 .restart = xen_restart,
1089 .halt = xen_machine_halt,
1090 .power_off = xen_machine_power_off,
1091 .shutdown = xen_machine_halt,
1092 .crash_shutdown = xen_crash_shutdown,
1093 .emergency_restart = xen_emergency_restart,
1094 };
1095
1096 static unsigned char xen_get_nmi_reason(void)
1097 {
1098 unsigned char reason = 0;
1099
1100
1101 if (test_bit(_XEN_NMIREASON_io_error,
1102 &HYPERVISOR_shared_info->arch.nmi_reason))
1103 reason |= NMI_REASON_IOCHK;
1104 if (test_bit(_XEN_NMIREASON_pci_serr,
1105 &HYPERVISOR_shared_info->arch.nmi_reason))
1106 reason |= NMI_REASON_SERR;
1107
1108 return reason;
1109 }
1110
1111 static void __init xen_boot_params_init_edd(void)
1112 {
1113 #if IS_ENABLED(CONFIG_EDD)
1114 struct xen_platform_op op;
1115 struct edd_info *edd_info;
1116 u32 *mbr_signature;
1117 unsigned nr;
1118 int ret;
1119
1120 edd_info = boot_params.eddbuf;
1121 mbr_signature = boot_params.edd_mbr_sig_buffer;
1122
1123 op.cmd = XENPF_firmware_info;
1124
1125 op.u.firmware_info.type = XEN_FW_DISK_INFO;
1126 for (nr = 0; nr < EDDMAXNR; nr++) {
1127 struct edd_info *info = edd_info + nr;
1128
1129 op.u.firmware_info.index = nr;
1130 info->params.length = sizeof(info->params);
1131 set_xen_guest_handle(op.u.firmware_info.u.disk_info.edd_params,
1132 &info->params);
1133 ret = HYPERVISOR_platform_op(&op);
1134 if (ret)
1135 break;
1136
1137 #define C(x) info->x = op.u.firmware_info.u.disk_info.x
1138 C(device);
1139 C(version);
1140 C(interface_support);
1141 C(legacy_max_cylinder);
1142 C(legacy_max_head);
1143 C(legacy_sectors_per_track);
1144 #undef C
1145 }
1146 boot_params.eddbuf_entries = nr;
1147
1148 op.u.firmware_info.type = XEN_FW_DISK_MBR_SIGNATURE;
1149 for (nr = 0; nr < EDD_MBR_SIG_MAX; nr++) {
1150 op.u.firmware_info.index = nr;
1151 ret = HYPERVISOR_platform_op(&op);
1152 if (ret)
1153 break;
1154 mbr_signature[nr] = op.u.firmware_info.u.disk_mbr_signature.mbr_signature;
1155 }
1156 boot_params.edd_mbr_sig_buf_entries = nr;
1157 #endif
1158 }
1159
1160
1161
1162
1163
1164
1165 static void __init xen_setup_gdt(int cpu)
1166 {
1167 pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry_boot;
1168 pv_ops.cpu.load_gdt = xen_load_gdt_boot;
1169
1170 switch_to_new_gdt(cpu);
1171
1172 pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry;
1173 pv_ops.cpu.load_gdt = xen_load_gdt;
1174 }
1175
1176 static void __init xen_dom0_set_legacy_features(void)
1177 {
1178 x86_platform.legacy.rtc = 1;
1179 }
1180
1181 static void __init xen_domu_set_legacy_features(void)
1182 {
1183 x86_platform.legacy.rtc = 0;
1184 }
1185
1186 extern void early_xen_iret_patch(void);
1187
1188
1189 asmlinkage __visible void __init xen_start_kernel(struct start_info *si)
1190 {
1191 struct physdev_set_iopl set_iopl;
1192 unsigned long initrd_start = 0;
1193 int rc;
1194
1195 if (!si)
1196 return;
1197
1198 clear_bss();
1199
1200 xen_start_info = si;
1201
1202 __text_gen_insn(&early_xen_iret_patch,
1203 JMP32_INSN_OPCODE, &early_xen_iret_patch, &xen_iret,
1204 JMP32_INSN_SIZE);
1205
1206 xen_domain_type = XEN_PV_DOMAIN;
1207 xen_start_flags = xen_start_info->flags;
1208
1209 xen_setup_features();
1210
1211
1212 pv_info = xen_info;
1213 pv_ops.cpu = xen_cpu_ops.cpu;
1214 xen_init_irq_ops();
1215
1216
1217
1218
1219
1220
1221
1222
1223 xen_vcpu_info_reset(0);
1224
1225 x86_platform.get_nmi_reason = xen_get_nmi_reason;
1226
1227 x86_init.resources.memory_setup = xen_memory_setup;
1228 x86_init.irqs.intr_mode_select = x86_init_noop;
1229 x86_init.irqs.intr_mode_init = x86_init_noop;
1230 x86_init.oem.arch_setup = xen_arch_setup;
1231 x86_init.oem.banner = xen_banner;
1232 x86_init.hyper.init_platform = xen_pv_init_platform;
1233 x86_init.hyper.guest_late_init = xen_pv_guest_late_init;
1234
1235
1236
1237
1238
1239 xen_setup_machphys_mapping();
1240 xen_init_mmu_ops();
1241
1242
1243 __supported_pte_mask &= ~_PAGE_GLOBAL;
1244 __default_kernel_pte_mask &= ~_PAGE_GLOBAL;
1245
1246
1247 xen_build_dynamic_phys_to_machine();
1248
1249
1250 get_cpu_cap(&boot_cpu_data);
1251 x86_configure_nx();
1252
1253
1254
1255
1256
1257 xen_setup_gdt(0);
1258
1259
1260 get_cpu_address_sizes(&boot_cpu_data);
1261
1262
1263 per_cpu(xen_vcpu_id, 0) = 0;
1264
1265 idt_setup_early_handler();
1266
1267 xen_init_capabilities();
1268
1269 #ifdef CONFIG_X86_LOCAL_APIC
1270
1271
1272
1273 xen_init_apic();
1274 #endif
1275
1276 machine_ops = xen_machine_ops;
1277
1278
1279
1280
1281
1282
1283 xen_initial_gdt = &per_cpu(gdt_page, 0);
1284
1285 xen_smp_init();
1286
1287 #ifdef CONFIG_ACPI_NUMA
1288
1289
1290
1291
1292
1293 disable_srat();
1294 #endif
1295 WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv));
1296
1297 local_irq_disable();
1298 early_boot_irqs_disabled = true;
1299
1300 xen_raw_console_write("mapping kernel into physical memory\n");
1301 xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base,
1302 xen_start_info->nr_pages);
1303 xen_reserve_special_pages();
1304
1305
1306
1307
1308
1309
1310 set_iopl.iopl = 1;
1311 rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
1312 if (rc != 0)
1313 xen_raw_printk("physdev_op failed %d\n", rc);
1314
1315
1316 if (xen_start_info->mod_start) {
1317 if (xen_start_info->flags & SIF_MOD_START_PFN)
1318 initrd_start = PFN_PHYS(xen_start_info->mod_start);
1319 else
1320 initrd_start = __pa(xen_start_info->mod_start);
1321 }
1322
1323
1324 boot_params.hdr.type_of_loader = (9 << 4) | 0;
1325 boot_params.hdr.ramdisk_image = initrd_start;
1326 boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
1327 boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
1328 boot_params.hdr.hardware_subarch = X86_SUBARCH_XEN;
1329
1330 if (!xen_initial_domain()) {
1331 if (pci_xen)
1332 x86_init.pci.arch_init = pci_xen_init;
1333 x86_platform.set_legacy_features =
1334 xen_domu_set_legacy_features;
1335 } else {
1336 const struct dom0_vga_console_info *info =
1337 (void *)((char *)xen_start_info +
1338 xen_start_info->console.dom0.info_off);
1339 struct xen_platform_op op = {
1340 .cmd = XENPF_firmware_info,
1341 .interface_version = XENPF_INTERFACE_VERSION,
1342 .u.firmware_info.type = XEN_FW_KBD_SHIFT_FLAGS,
1343 };
1344
1345 x86_platform.set_legacy_features =
1346 xen_dom0_set_legacy_features;
1347 xen_init_vga(info, xen_start_info->console.dom0.info_size);
1348 xen_start_info->console.domU.mfn = 0;
1349 xen_start_info->console.domU.evtchn = 0;
1350
1351 if (HYPERVISOR_platform_op(&op) == 0)
1352 boot_params.kbd_status = op.u.firmware_info.u.kbd_shift_flags;
1353
1354
1355 pci_request_acs();
1356
1357 xen_acpi_sleep_register();
1358
1359 xen_boot_params_init_edd();
1360
1361 #ifdef CONFIG_ACPI
1362
1363
1364
1365
1366
1367 acpi_disable_cmcff = 1;
1368 #endif
1369 }
1370
1371 xen_add_preferred_consoles();
1372
1373 #ifdef CONFIG_PCI
1374
1375 pci_probe &= ~PCI_PROBE_BIOS;
1376 #endif
1377 xen_raw_console_write("about to get started...\n");
1378
1379
1380 xen_setup_runstate_info(0);
1381
1382 xen_efi_init(&boot_params);
1383
1384
1385 cr4_init_shadow();
1386 x86_64_start_reservations((char *)__pa_symbol(&boot_params));
1387 }
1388
1389 static int xen_cpu_up_prepare_pv(unsigned int cpu)
1390 {
1391 int rc;
1392
1393 if (per_cpu(xen_vcpu, cpu) == NULL)
1394 return -ENODEV;
1395
1396 xen_setup_timer(cpu);
1397
1398 rc = xen_smp_intr_init(cpu);
1399 if (rc) {
1400 WARN(1, "xen_smp_intr_init() for CPU %d failed: %d\n",
1401 cpu, rc);
1402 return rc;
1403 }
1404
1405 rc = xen_smp_intr_init_pv(cpu);
1406 if (rc) {
1407 WARN(1, "xen_smp_intr_init_pv() for CPU %d failed: %d\n",
1408 cpu, rc);
1409 return rc;
1410 }
1411
1412 return 0;
1413 }
1414
1415 static int xen_cpu_dead_pv(unsigned int cpu)
1416 {
1417 xen_smp_intr_free(cpu);
1418 xen_smp_intr_free_pv(cpu);
1419
1420 xen_teardown_timer(cpu);
1421
1422 return 0;
1423 }
1424
1425 static uint32_t __init xen_platform_pv(void)
1426 {
1427 if (xen_pv_domain())
1428 return xen_cpuid_base();
1429
1430 return 0;
1431 }
1432
1433 const __initconst struct hypervisor_x86 x86_hyper_xen_pv = {
1434 .name = "Xen PV",
1435 .detect = xen_platform_pv,
1436 .type = X86_HYPER_XEN_PV,
1437 .runtime.pin_vcpu = xen_pin_vcpu,
1438 .ignore_nopv = true,
1439 };