Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * SMP support for PowerNV machines.
0004  *
0005  * Copyright 2011 IBM Corp.
0006  */
0007 
0008 #include <linux/kernel.h>
0009 #include <linux/module.h>
0010 #include <linux/sched.h>
0011 #include <linux/sched/hotplug.h>
0012 #include <linux/smp.h>
0013 #include <linux/interrupt.h>
0014 #include <linux/delay.h>
0015 #include <linux/init.h>
0016 #include <linux/spinlock.h>
0017 #include <linux/cpu.h>
0018 
0019 #include <asm/irq.h>
0020 #include <asm/smp.h>
0021 #include <asm/paca.h>
0022 #include <asm/machdep.h>
0023 #include <asm/cputable.h>
0024 #include <asm/firmware.h>
0025 #include <asm/vdso_datapage.h>
0026 #include <asm/cputhreads.h>
0027 #include <asm/xics.h>
0028 #include <asm/xive.h>
0029 #include <asm/opal.h>
0030 #include <asm/runlatch.h>
0031 #include <asm/code-patching.h>
0032 #include <asm/dbell.h>
0033 #include <asm/kvm_ppc.h>
0034 #include <asm/ppc-opcode.h>
0035 #include <asm/cpuidle.h>
0036 #include <asm/kexec.h>
0037 #include <asm/reg.h>
0038 #include <asm/powernv.h>
0039 
0040 #include "powernv.h"
0041 
0042 #ifdef DEBUG
0043 #include <asm/udbg.h>
0044 #define DBG(fmt...) udbg_printf(fmt)
0045 #else
0046 #define DBG(fmt...) do { } while (0)
0047 #endif
0048 
0049 static void pnv_smp_setup_cpu(int cpu)
0050 {
0051     /*
0052      * P9 workaround for CI vector load (see traps.c),
0053      * enable the corresponding HMI interrupt
0054      */
0055     if (pvr_version_is(PVR_POWER9))
0056         mtspr(SPRN_HMEER, mfspr(SPRN_HMEER) | PPC_BIT(17));
0057 
0058     if (xive_enabled())
0059         xive_smp_setup_cpu();
0060     else if (cpu != boot_cpuid)
0061         xics_setup_cpu();
0062 }
0063 
0064 static int pnv_smp_kick_cpu(int nr)
0065 {
0066     unsigned int pcpu;
0067     unsigned long start_here =
0068             __pa(ppc_function_entry(generic_secondary_smp_init));
0069     long rc;
0070     uint8_t status;
0071 
0072     if (nr < 0 || nr >= nr_cpu_ids)
0073         return -EINVAL;
0074 
0075     pcpu = get_hard_smp_processor_id(nr);
0076     /*
0077      * If we already started or OPAL is not supported, we just
0078      * kick the CPU via the PACA
0079      */
0080     if (paca_ptrs[nr]->cpu_start || !firmware_has_feature(FW_FEATURE_OPAL))
0081         goto kick;
0082 
0083     /*
0084      * At this point, the CPU can either be spinning on the way in
0085      * from kexec or be inside OPAL waiting to be started for the
0086      * first time. OPAL v3 allows us to query OPAL to know if it
0087      * has the CPUs, so we do that
0088      */
0089     rc = opal_query_cpu_status(pcpu, &status);
0090     if (rc != OPAL_SUCCESS) {
0091         pr_warn("OPAL Error %ld querying CPU %d state\n", rc, nr);
0092         return -ENODEV;
0093     }
0094 
0095     /*
0096      * Already started, just kick it, probably coming from
0097      * kexec and spinning
0098      */
0099     if (status == OPAL_THREAD_STARTED)
0100         goto kick;
0101 
0102     /*
0103      * Available/inactive, let's kick it
0104      */
0105     if (status == OPAL_THREAD_INACTIVE) {
0106         pr_devel("OPAL: Starting CPU %d (HW 0x%x)...\n", nr, pcpu);
0107         rc = opal_start_cpu(pcpu, start_here);
0108         if (rc != OPAL_SUCCESS) {
0109             pr_warn("OPAL Error %ld starting CPU %d\n", rc, nr);
0110             return -ENODEV;
0111         }
0112     } else {
0113         /*
0114          * An unavailable CPU (or any other unknown status)
0115          * shouldn't be started. It should also
0116          * not be in the possible map but currently it can
0117          * happen
0118          */
0119         pr_devel("OPAL: CPU %d (HW 0x%x) is unavailable"
0120              " (status %d)...\n", nr, pcpu, status);
0121         return -ENODEV;
0122     }
0123 
0124 kick:
0125     return smp_generic_kick_cpu(nr);
0126 }
0127 
0128 #ifdef CONFIG_HOTPLUG_CPU
0129 
0130 static int pnv_smp_cpu_disable(void)
0131 {
0132     int cpu = smp_processor_id();
0133 
0134     /* This is identical to pSeries... might consolidate by
0135      * moving migrate_irqs_away to a ppc_md with default to
0136      * the generic fixup_irqs. --BenH.
0137      */
0138     set_cpu_online(cpu, false);
0139     vdso_data->processorCount--;
0140     if (cpu == boot_cpuid)
0141         boot_cpuid = cpumask_any(cpu_online_mask);
0142     if (xive_enabled())
0143         xive_smp_disable_cpu();
0144     else
0145         xics_migrate_irqs_away();
0146 
0147     cleanup_cpu_mmu_context();
0148 
0149     return 0;
0150 }
0151 
0152 static void pnv_flush_interrupts(void)
0153 {
0154     if (cpu_has_feature(CPU_FTR_ARCH_300)) {
0155         if (xive_enabled())
0156             xive_flush_interrupt();
0157         else
0158             icp_opal_flush_interrupt();
0159     } else {
0160         icp_native_flush_interrupt();
0161     }
0162 }
0163 
0164 static void pnv_cpu_offline_self(void)
0165 {
0166     unsigned long srr1, unexpected_mask, wmask;
0167     unsigned int cpu;
0168     u64 lpcr_val;
0169 
0170     /* Standard hot unplug procedure */
0171 
0172     idle_task_exit();
0173     cpu = smp_processor_id();
0174     DBG("CPU%d offline\n", cpu);
0175     generic_set_cpu_dead(cpu);
0176     smp_wmb();
0177 
0178     wmask = SRR1_WAKEMASK;
0179     if (cpu_has_feature(CPU_FTR_ARCH_207S))
0180         wmask = SRR1_WAKEMASK_P8;
0181 
0182     /*
0183      * This turns the irq soft-disabled state we're called with, into a
0184      * hard-disabled state with pending irq_happened interrupts cleared.
0185      *
0186      * PACA_IRQ_DEC   - Decrementer should be ignored.
0187      * PACA_IRQ_HMI   - Can be ignored, processing is done in real mode.
0188      * PACA_IRQ_DBELL, EE, PMI - Unexpected.
0189      */
0190     hard_irq_disable();
0191     if (generic_check_cpu_restart(cpu))
0192         goto out;
0193 
0194     unexpected_mask = ~(PACA_IRQ_DEC | PACA_IRQ_HMI | PACA_IRQ_HARD_DIS);
0195     if (local_paca->irq_happened & unexpected_mask) {
0196         if (local_paca->irq_happened & PACA_IRQ_EE)
0197             pnv_flush_interrupts();
0198         DBG("CPU%d Unexpected exit while offline irq_happened=%lx!\n",
0199                 cpu, local_paca->irq_happened);
0200     }
0201     local_paca->irq_happened = PACA_IRQ_HARD_DIS;
0202 
0203     /*
0204      * We don't want to take decrementer interrupts while we are
0205      * offline, so clear LPCR:PECE1. We keep PECE2 (and
0206      * LPCR_PECE_HVEE on P9) enabled so as to let IPIs in.
0207      *
0208      * If the CPU gets woken up by a special wakeup, ensure that
0209      * the SLW engine sets LPCR with decrementer bit cleared, else
0210      * the CPU will come back to the kernel due to a spurious
0211      * wakeup.
0212      */
0213     lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
0214     pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
0215 
0216     while (!generic_check_cpu_restart(cpu)) {
0217         /*
0218          * Clear IPI flag, since we don't handle IPIs while
0219          * offline, except for those when changing micro-threading
0220          * mode, which are handled explicitly below, and those
0221          * for coming online, which are handled via
0222          * generic_check_cpu_restart() calls.
0223          */
0224         kvmppc_clear_host_ipi(cpu);
0225 
0226         srr1 = pnv_cpu_offline(cpu);
0227 
0228         WARN_ON_ONCE(!irqs_disabled());
0229         WARN_ON(lazy_irq_pending());
0230 
0231         /*
0232          * If the SRR1 value indicates that we woke up due to
0233          * an external interrupt, then clear the interrupt.
0234          * We clear the interrupt before checking for the
0235          * reason, so as to avoid a race where we wake up for
0236          * some other reason, find nothing and clear the interrupt
0237          * just as some other cpu is sending us an interrupt.
0238          * If we returned from power7_nap as a result of
0239          * having finished executing in a KVM guest, then srr1
0240          * contains 0.
0241          */
0242         if (((srr1 & wmask) == SRR1_WAKEEE) ||
0243             ((srr1 & wmask) == SRR1_WAKEHVI)) {
0244             pnv_flush_interrupts();
0245         } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) {
0246             unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
0247             asm volatile(PPC_MSGCLR(%0) : : "r" (msg));
0248         } else if ((srr1 & wmask) == SRR1_WAKERESET) {
0249             irq_set_pending_from_srr1(srr1);
0250             /* Does not return */
0251         }
0252 
0253         smp_mb();
0254 
0255         /*
0256          * For kdump kernels, we process the ipi and jump to
0257          * crash_ipi_callback
0258          */
0259         if (kdump_in_progress()) {
0260             /*
0261              * If we got to this point, we've not used
0262              * NMI's, otherwise we would have gone
0263              * via the SRR1_WAKERESET path. We are
0264              * using regular IPI's for waking up offline
0265              * threads.
0266              */
0267             struct pt_regs regs;
0268 
0269             ppc_save_regs(&regs);
0270             crash_ipi_callback(&regs);
0271             /* Does not return */
0272         }
0273 
0274         if (cpu_core_split_required())
0275             continue;
0276 
0277         if (srr1 && !generic_check_cpu_restart(cpu))
0278             DBG("CPU%d Unexpected exit while offline srr1=%lx!\n",
0279                     cpu, srr1);
0280 
0281     }
0282 
0283     /*
0284      * Re-enable decrementer interrupts in LPCR.
0285      *
0286      * Further, we want stop states to be woken up by decrementer
0287      * for non-hotplug cases. So program the LPCR via stop api as
0288      * well.
0289      */
0290     lpcr_val = mfspr(SPRN_LPCR) | (u64)LPCR_PECE1;
0291     pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
0292 out:
0293     DBG("CPU%d coming online...\n", cpu);
0294 }
0295 
0296 #endif /* CONFIG_HOTPLUG_CPU */
0297 
0298 static int pnv_cpu_bootable(unsigned int nr)
0299 {
0300     /*
0301      * Starting with POWER8, the subcore logic relies on all threads of a
0302      * core being booted so that they can participate in split mode
0303      * switches. So on those machines we ignore the smt_enabled_at_boot
0304      * setting (smt-enabled on the kernel command line).
0305      */
0306     if (cpu_has_feature(CPU_FTR_ARCH_207S))
0307         return 1;
0308 
0309     return smp_generic_cpu_bootable(nr);
0310 }
0311 
0312 static int pnv_smp_prepare_cpu(int cpu)
0313 {
0314     if (xive_enabled())
0315         return xive_smp_prepare_cpu(cpu);
0316     return 0;
0317 }
0318 
0319 /* Cause IPI as setup by the interrupt controller (xics or xive) */
0320 static void (*ic_cause_ipi)(int cpu);
0321 
0322 static void pnv_cause_ipi(int cpu)
0323 {
0324     if (doorbell_try_core_ipi(cpu))
0325         return;
0326 
0327     ic_cause_ipi(cpu);
0328 }
0329 
0330 static void __init pnv_smp_probe(void)
0331 {
0332     if (xive_enabled())
0333         xive_smp_probe();
0334     else
0335         xics_smp_probe();
0336 
0337     if (cpu_has_feature(CPU_FTR_DBELL)) {
0338         ic_cause_ipi = smp_ops->cause_ipi;
0339         WARN_ON(!ic_cause_ipi);
0340 
0341         if (cpu_has_feature(CPU_FTR_ARCH_300))
0342             smp_ops->cause_ipi = doorbell_global_ipi;
0343         else
0344             smp_ops->cause_ipi = pnv_cause_ipi;
0345     }
0346 }
0347 
0348 noinstr static int pnv_system_reset_exception(struct pt_regs *regs)
0349 {
0350     if (smp_handle_nmi_ipi(regs))
0351         return 1;
0352     return 0;
0353 }
0354 
0355 static int pnv_cause_nmi_ipi(int cpu)
0356 {
0357     int64_t rc;
0358 
0359     if (cpu >= 0) {
0360         int h = get_hard_smp_processor_id(cpu);
0361 
0362         if (opal_check_token(OPAL_QUIESCE))
0363             opal_quiesce(QUIESCE_HOLD, h);
0364 
0365         rc = opal_signal_system_reset(h);
0366 
0367         if (opal_check_token(OPAL_QUIESCE))
0368             opal_quiesce(QUIESCE_RESUME, h);
0369 
0370         if (rc != OPAL_SUCCESS)
0371             return 0;
0372         return 1;
0373 
0374     } else if (cpu == NMI_IPI_ALL_OTHERS) {
0375         bool success = true;
0376         int c;
0377 
0378         if (opal_check_token(OPAL_QUIESCE))
0379             opal_quiesce(QUIESCE_HOLD, -1);
0380 
0381         /*
0382          * We do not use broadcasts (yet), because it's not clear
0383          * exactly what semantics Linux wants or the firmware should
0384          * provide.
0385          */
0386         for_each_online_cpu(c) {
0387             if (c == smp_processor_id())
0388                 continue;
0389 
0390             rc = opal_signal_system_reset(
0391                         get_hard_smp_processor_id(c));
0392             if (rc != OPAL_SUCCESS)
0393                 success = false;
0394         }
0395 
0396         if (opal_check_token(OPAL_QUIESCE))
0397             opal_quiesce(QUIESCE_RESUME, -1);
0398 
0399         if (success)
0400             return 1;
0401 
0402         /*
0403          * Caller will fall back to doorbells, which may pick
0404          * up the remainders.
0405          */
0406     }
0407 
0408     return 0;
0409 }
0410 
0411 static struct smp_ops_t pnv_smp_ops = {
0412     .message_pass   = NULL, /* Use smp_muxed_ipi_message_pass */
0413     .cause_ipi  = NULL, /* Filled at runtime by pnv_smp_probe() */
0414     .cause_nmi_ipi  = NULL,
0415     .probe      = pnv_smp_probe,
0416     .prepare_cpu    = pnv_smp_prepare_cpu,
0417     .kick_cpu   = pnv_smp_kick_cpu,
0418     .setup_cpu  = pnv_smp_setup_cpu,
0419     .cpu_bootable   = pnv_cpu_bootable,
0420 #ifdef CONFIG_HOTPLUG_CPU
0421     .cpu_disable    = pnv_smp_cpu_disable,
0422     .cpu_die    = generic_cpu_die,
0423     .cpu_offline_self = pnv_cpu_offline_self,
0424 #endif /* CONFIG_HOTPLUG_CPU */
0425 };
0426 
0427 /* This is called very early during platform setup_arch */
0428 void __init pnv_smp_init(void)
0429 {
0430     if (opal_check_token(OPAL_SIGNAL_SYSTEM_RESET)) {
0431         ppc_md.system_reset_exception = pnv_system_reset_exception;
0432         pnv_smp_ops.cause_nmi_ipi = pnv_cause_nmi_ipi;
0433     }
0434     smp_ops = &pnv_smp_ops;
0435 
0436 #ifdef CONFIG_HOTPLUG_CPU
0437 #ifdef CONFIG_KEXEC_CORE
0438     crash_wake_offline = 1;
0439 #endif
0440 #endif
0441 }