Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Xen SMP support
0004  *
0005  * This file implements the Xen versions of smp_ops.  SMP under Xen is
0006  * very straightforward.  Bringing a CPU up is simply a matter of
0007  * loading its initial context and setting it running.
0008  *
0009  * IPIs are handled through the Xen event mechanism.
0010  *
0011  * Because virtual CPUs can be scheduled onto any real CPU, there's no
0012  * useful topology information for the kernel to make use of.  As a
0013  * result, all CPUs are treated as if they're single-core and
0014  * single-threaded.
0015  */
0016 #include <linux/sched.h>
0017 #include <linux/sched/task_stack.h>
0018 #include <linux/err.h>
0019 #include <linux/slab.h>
0020 #include <linux/smp.h>
0021 #include <linux/irq_work.h>
0022 #include <linux/tick.h>
0023 #include <linux/nmi.h>
0024 #include <linux/cpuhotplug.h>
0025 #include <linux/stackprotector.h>
0026 #include <linux/pgtable.h>
0027 
0028 #include <asm/paravirt.h>
0029 #include <asm/idtentry.h>
0030 #include <asm/desc.h>
0031 #include <asm/cpu.h>
0032 #include <asm/io_apic.h>
0033 
0034 #include <xen/interface/xen.h>
0035 #include <xen/interface/vcpu.h>
0036 #include <xen/interface/xenpmu.h>
0037 
0038 #include <asm/spec-ctrl.h>
0039 #include <asm/xen/interface.h>
0040 #include <asm/xen/hypercall.h>
0041 
0042 #include <xen/xen.h>
0043 #include <xen/page.h>
0044 #include <xen/events.h>
0045 
0046 #include <xen/hvc-console.h>
0047 #include "xen-ops.h"
0048 #include "mmu.h"
0049 #include "smp.h"
0050 #include "pmu.h"
0051 
0052 cpumask_var_t xen_cpu_initialized_map;
0053 
0054 static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 };
0055 static DEFINE_PER_CPU(struct xen_common_irq, xen_pmu_irq) = { .irq = -1 };
0056 
0057 static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id);
0058 void asm_cpu_bringup_and_idle(void);
0059 
0060 static void cpu_bringup(void)
0061 {
0062     int cpu;
0063 
0064     cr4_init();
0065     cpu_init();
0066     touch_softlockup_watchdog();
0067 
0068     /* PVH runs in ring 0 and allows us to do native syscalls. Yay! */
0069     if (!xen_feature(XENFEAT_supervisor_mode_kernel)) {
0070         xen_enable_sysenter();
0071         xen_enable_syscall();
0072     }
0073     cpu = smp_processor_id();
0074     smp_store_cpu_info(cpu);
0075     cpu_data(cpu).x86_max_cores = 1;
0076     set_cpu_sibling_map(cpu);
0077 
0078     speculative_store_bypass_ht_init();
0079 
0080     xen_setup_cpu_clockevents();
0081 
0082     notify_cpu_starting(cpu);
0083 
0084     set_cpu_online(cpu, true);
0085 
0086     cpu_set_state_online(cpu);  /* Implies full memory barrier. */
0087 
0088     /* We can take interrupts now: we're officially "up". */
0089     local_irq_enable();
0090 }
0091 
0092 asmlinkage __visible void cpu_bringup_and_idle(void)
0093 {
0094     cpu_bringup();
0095     cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
0096 }
0097 
0098 void xen_smp_intr_free_pv(unsigned int cpu)
0099 {
0100     if (per_cpu(xen_irq_work, cpu).irq >= 0) {
0101         unbind_from_irqhandler(per_cpu(xen_irq_work, cpu).irq, NULL);
0102         per_cpu(xen_irq_work, cpu).irq = -1;
0103         kfree(per_cpu(xen_irq_work, cpu).name);
0104         per_cpu(xen_irq_work, cpu).name = NULL;
0105     }
0106 
0107     if (per_cpu(xen_pmu_irq, cpu).irq >= 0) {
0108         unbind_from_irqhandler(per_cpu(xen_pmu_irq, cpu).irq, NULL);
0109         per_cpu(xen_pmu_irq, cpu).irq = -1;
0110         kfree(per_cpu(xen_pmu_irq, cpu).name);
0111         per_cpu(xen_pmu_irq, cpu).name = NULL;
0112     }
0113 }
0114 
0115 int xen_smp_intr_init_pv(unsigned int cpu)
0116 {
0117     int rc;
0118     char *callfunc_name, *pmu_name;
0119 
0120     callfunc_name = kasprintf(GFP_KERNEL, "irqwork%d", cpu);
0121     rc = bind_ipi_to_irqhandler(XEN_IRQ_WORK_VECTOR,
0122                     cpu,
0123                     xen_irq_work_interrupt,
0124                     IRQF_PERCPU|IRQF_NOBALANCING,
0125                     callfunc_name,
0126                     NULL);
0127     if (rc < 0)
0128         goto fail;
0129     per_cpu(xen_irq_work, cpu).irq = rc;
0130     per_cpu(xen_irq_work, cpu).name = callfunc_name;
0131 
0132     if (is_xen_pmu) {
0133         pmu_name = kasprintf(GFP_KERNEL, "pmu%d", cpu);
0134         rc = bind_virq_to_irqhandler(VIRQ_XENPMU, cpu,
0135                          xen_pmu_irq_handler,
0136                          IRQF_PERCPU|IRQF_NOBALANCING,
0137                          pmu_name, NULL);
0138         if (rc < 0)
0139             goto fail;
0140         per_cpu(xen_pmu_irq, cpu).irq = rc;
0141         per_cpu(xen_pmu_irq, cpu).name = pmu_name;
0142     }
0143 
0144     return 0;
0145 
0146  fail:
0147     xen_smp_intr_free_pv(cpu);
0148     return rc;
0149 }
0150 
0151 static void __init _get_smp_config(unsigned int early)
0152 {
0153     int i, rc;
0154     unsigned int subtract = 0;
0155 
0156     if (early)
0157         return;
0158 
0159     num_processors = 0;
0160     disabled_cpus = 0;
0161     for (i = 0; i < nr_cpu_ids; i++) {
0162         rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
0163         if (rc >= 0) {
0164             num_processors++;
0165             set_cpu_possible(i, true);
0166         } else {
0167             set_cpu_possible(i, false);
0168             set_cpu_present(i, false);
0169             subtract++;
0170         }
0171     }
0172 #ifdef CONFIG_HOTPLUG_CPU
0173     /* This is akin to using 'nr_cpus' on the Linux command line.
0174      * Which is OK as when we use 'dom0_max_vcpus=X' we can only
0175      * have up to X, while nr_cpu_ids is greater than X. This
0176      * normally is not a problem, except when CPU hotplugging
0177      * is involved and then there might be more than X CPUs
0178      * in the guest - which will not work as there is no
0179      * hypercall to expand the max number of VCPUs an already
0180      * running guest has. So cap it up to X. */
0181     if (subtract)
0182         nr_cpu_ids = nr_cpu_ids - subtract;
0183 #endif
0184 
0185 }
0186 
0187 static void __init xen_pv_smp_prepare_boot_cpu(void)
0188 {
0189     BUG_ON(smp_processor_id() != 0);
0190     native_smp_prepare_boot_cpu();
0191 
0192     if (!xen_feature(XENFEAT_writable_page_tables))
0193         /* We've switched to the "real" per-cpu gdt, so make
0194          * sure the old memory can be recycled. */
0195         make_lowmem_page_readwrite(xen_initial_gdt);
0196 
0197     xen_setup_vcpu_info_placement();
0198 
0199     /*
0200      * The alternative logic (which patches the unlock/lock) runs before
0201      * the smp bootup up code is activated. Hence we need to set this up
0202      * the core kernel is being patched. Otherwise we will have only
0203      * modules patched but not core code.
0204      */
0205     xen_init_spinlocks();
0206 }
0207 
0208 static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus)
0209 {
0210     unsigned cpu;
0211 
0212     if (skip_ioapic_setup) {
0213         char *m = (max_cpus == 0) ?
0214             "The nosmp parameter is incompatible with Xen; " \
0215             "use Xen dom0_max_vcpus=1 parameter" :
0216             "The noapic parameter is incompatible with Xen";
0217 
0218         xen_raw_printk(m);
0219         panic(m);
0220     }
0221     xen_init_lock_cpu(0);
0222 
0223     smp_prepare_cpus_common();
0224 
0225     cpu_data(0).x86_max_cores = 1;
0226 
0227     speculative_store_bypass_ht_init();
0228 
0229     xen_pmu_init(0);
0230 
0231     if (xen_smp_intr_init(0) || xen_smp_intr_init_pv(0))
0232         BUG();
0233 
0234     if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL))
0235         panic("could not allocate xen_cpu_initialized_map\n");
0236 
0237     cpumask_copy(xen_cpu_initialized_map, cpumask_of(0));
0238 
0239     /* Restrict the possible_map according to max_cpus. */
0240     while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
0241         for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--)
0242             continue;
0243         set_cpu_possible(cpu, false);
0244     }
0245 
0246     for_each_possible_cpu(cpu)
0247         set_cpu_present(cpu, true);
0248 }
0249 
0250 static int
0251 cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
0252 {
0253     struct vcpu_guest_context *ctxt;
0254     struct desc_struct *gdt;
0255     unsigned long gdt_mfn;
0256 
0257     /* used to tell cpu_init() that it can proceed with initialization */
0258     cpumask_set_cpu(cpu, cpu_callout_mask);
0259     if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
0260         return 0;
0261 
0262     ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
0263     if (ctxt == NULL) {
0264         cpumask_clear_cpu(cpu, xen_cpu_initialized_map);
0265         cpumask_clear_cpu(cpu, cpu_callout_mask);
0266         return -ENOMEM;
0267     }
0268 
0269     gdt = get_cpu_gdt_rw(cpu);
0270 
0271     /*
0272      * Bring up the CPU in cpu_bringup_and_idle() with the stack
0273      * pointing just below where pt_regs would be if it were a normal
0274      * kernel entry.
0275      */
0276     ctxt->user_regs.eip = (unsigned long)asm_cpu_bringup_and_idle;
0277     ctxt->flags = VGCF_IN_KERNEL;
0278     ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
0279     ctxt->user_regs.ds = __USER_DS;
0280     ctxt->user_regs.es = __USER_DS;
0281     ctxt->user_regs.ss = __KERNEL_DS;
0282     ctxt->user_regs.cs = __KERNEL_CS;
0283     ctxt->user_regs.esp = (unsigned long)task_pt_regs(idle);
0284 
0285     xen_copy_trap_info(ctxt->trap_ctxt);
0286 
0287     BUG_ON((unsigned long)gdt & ~PAGE_MASK);
0288 
0289     gdt_mfn = arbitrary_virt_to_mfn(gdt);
0290     make_lowmem_page_readonly(gdt);
0291     make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
0292 
0293     ctxt->gdt_frames[0] = gdt_mfn;
0294     ctxt->gdt_ents      = GDT_ENTRIES;
0295 
0296     /*
0297      * Set SS:SP that Xen will use when entering guest kernel mode
0298      * from guest user mode.  Subsequent calls to load_sp0() can
0299      * change this value.
0300      */
0301     ctxt->kernel_ss = __KERNEL_DS;
0302     ctxt->kernel_sp = task_top_of_stack(idle);
0303 
0304     ctxt->gs_base_kernel = per_cpu_offset(cpu);
0305     ctxt->event_callback_eip    =
0306         (unsigned long)xen_asm_exc_xen_hypervisor_callback;
0307     ctxt->failsafe_callback_eip =
0308         (unsigned long)xen_failsafe_callback;
0309     per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
0310 
0311     ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir));
0312     if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt))
0313         BUG();
0314 
0315     kfree(ctxt);
0316     return 0;
0317 }
0318 
0319 static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle)
0320 {
0321     int rc;
0322 
0323     rc = common_cpu_up(cpu, idle);
0324     if (rc)
0325         return rc;
0326 
0327     xen_setup_runstate_info(cpu);
0328 
0329     /*
0330      * PV VCPUs are always successfully taken down (see 'while' loop
0331      * in xen_cpu_die()), so -EBUSY is an error.
0332      */
0333     rc = cpu_check_up_prepare(cpu);
0334     if (rc)
0335         return rc;
0336 
0337     /* make sure interrupts start blocked */
0338     per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1;
0339 
0340     rc = cpu_initialize_context(cpu, idle);
0341     if (rc)
0342         return rc;
0343 
0344     xen_pmu_init(cpu);
0345 
0346     rc = HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL);
0347     BUG_ON(rc);
0348 
0349     while (cpu_report_state(cpu) != CPU_ONLINE)
0350         HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
0351 
0352     return 0;
0353 }
0354 
0355 #ifdef CONFIG_HOTPLUG_CPU
0356 static int xen_pv_cpu_disable(void)
0357 {
0358     unsigned int cpu = smp_processor_id();
0359     if (cpu == 0)
0360         return -EBUSY;
0361 
0362     cpu_disable_common();
0363 
0364     load_cr3(swapper_pg_dir);
0365     return 0;
0366 }
0367 
0368 static void xen_pv_cpu_die(unsigned int cpu)
0369 {
0370     while (HYPERVISOR_vcpu_op(VCPUOP_is_up,
0371                   xen_vcpu_nr(cpu), NULL)) {
0372         __set_current_state(TASK_UNINTERRUPTIBLE);
0373         schedule_timeout(HZ/10);
0374     }
0375 
0376     if (common_cpu_die(cpu) == 0) {
0377         xen_smp_intr_free(cpu);
0378         xen_uninit_lock_cpu(cpu);
0379         xen_teardown_timer(cpu);
0380         xen_pmu_finish(cpu);
0381     }
0382 }
0383 
0384 static void xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */
0385 {
0386     play_dead_common();
0387     HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(smp_processor_id()), NULL);
0388     cpu_bringup();
0389     /*
0390      * commit 4b0c0f294 (tick: Cleanup NOHZ per cpu data on cpu down)
0391      * clears certain data that the cpu_idle loop (which called us
0392      * and that we return from) expects. The only way to get that
0393      * data back is to call:
0394      */
0395     tick_nohz_idle_enter();
0396     tick_nohz_idle_stop_tick_protected();
0397 
0398     cpuhp_online_idle(CPUHP_AP_ONLINE_IDLE);
0399 }
0400 
0401 #else /* !CONFIG_HOTPLUG_CPU */
0402 static int xen_pv_cpu_disable(void)
0403 {
0404     return -ENOSYS;
0405 }
0406 
0407 static void xen_pv_cpu_die(unsigned int cpu)
0408 {
0409     BUG();
0410 }
0411 
0412 static void xen_pv_play_dead(void)
0413 {
0414     BUG();
0415 }
0416 
0417 #endif
0418 static void stop_self(void *v)
0419 {
0420     int cpu = smp_processor_id();
0421 
0422     /* make sure we're not pinning something down */
0423     load_cr3(swapper_pg_dir);
0424     /* should set up a minimal gdt */
0425 
0426     set_cpu_online(cpu, false);
0427 
0428     HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL);
0429     BUG();
0430 }
0431 
0432 static void xen_pv_stop_other_cpus(int wait)
0433 {
0434     smp_call_function(stop_self, NULL, wait);
0435 }
0436 
0437 static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id)
0438 {
0439     irq_work_run();
0440     inc_irq_stat(apic_irq_work_irqs);
0441 
0442     return IRQ_HANDLED;
0443 }
0444 
0445 static const struct smp_ops xen_smp_ops __initconst = {
0446     .smp_prepare_boot_cpu = xen_pv_smp_prepare_boot_cpu,
0447     .smp_prepare_cpus = xen_pv_smp_prepare_cpus,
0448     .smp_cpus_done = xen_smp_cpus_done,
0449 
0450     .cpu_up = xen_pv_cpu_up,
0451     .cpu_die = xen_pv_cpu_die,
0452     .cpu_disable = xen_pv_cpu_disable,
0453     .play_dead = xen_pv_play_dead,
0454 
0455     .stop_other_cpus = xen_pv_stop_other_cpus,
0456     .smp_send_reschedule = xen_smp_send_reschedule,
0457 
0458     .send_call_func_ipi = xen_smp_send_call_function_ipi,
0459     .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
0460 };
0461 
0462 void __init xen_smp_init(void)
0463 {
0464     smp_ops = xen_smp_ops;
0465 
0466     /* Avoid searching for BIOS MP tables */
0467     x86_init.mpparse.find_smp_config = x86_init_noop;
0468     x86_init.mpparse.get_smp_config = _get_smp_config;
0469 }