powerpc/kernel/smp.c

0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * SMP support for ppc.
0004  *
0005  * Written by Cort Dougan (cort@cs.nmt.edu) borrowing a great
0006  * deal of code from the sparc and intel versions.
0007  *
0008  * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
0009  *
0010  * PowerPC-64 Support added by Dave Engebretsen, Peter Bergner, and
0011  * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
0012  */
0013
0014 #undef DEBUG
0015
0016 #include <linux/kernel.h>
0017 #include <linux/export.h>
0018 #include <linux/sched/mm.h>
0019 #include <linux/sched/task_stack.h>
0020 #include <linux/sched/topology.h>
0021 #include <linux/smp.h>
0022 #include <linux/interrupt.h>
0023 #include <linux/delay.h>
0024 #include <linux/init.h>
0025 #include <linux/spinlock.h>
0026 #include <linux/cache.h>
0027 #include <linux/err.h>
0028 #include <linux/device.h>
0029 #include <linux/cpu.h>
0030 #include <linux/notifier.h>
0031 #include <linux/topology.h>
0032 #include <linux/profile.h>
0033 #include <linux/processor.h>
0034 #include <linux/random.h>
0035 #include <linux/stackprotector.h>
0036 #include <linux/pgtable.h>
0037 #include <linux/clockchips.h>
0038 #include <linux/kexec.h>
0039
0040 #include <asm/ptrace.h>
0041 #include <linux/atomic.h>
0042 #include <asm/irq.h>
0043 #include <asm/hw_irq.h>
0044 #include <asm/kvm_ppc.h>
0045 #include <asm/dbell.h>
0046 #include <asm/page.h>
0047 #include <asm/smp.h>
0048 #include <asm/time.h>
0049 #include <asm/machdep.h>
0050 #include <asm/cputhreads.h>
0051 #include <asm/cputable.h>
0052 #include <asm/mpic.h>
0053 #include <asm/vdso_datapage.h>
0054 #ifdef CONFIG_PPC64
0055 #include <asm/paca.h>
0056 #endif
0057 #include <asm/vdso.h>
0058 #include <asm/debug.h>
0059 #include <asm/cpu_has_feature.h>
0060 #include <asm/ftrace.h>
0061 #include <asm/kup.h>
0062 #include <asm/fadump.h>
0063
0064 #ifdef DEBUG
0065 #include <asm/udbg.h>
0066 #define DBG(fmt...) udbg_printf(fmt)
0067 #else
0068 #define DBG(fmt...)
0069 #endif
0070
0071 #ifdef CONFIG_HOTPLUG_CPU
0072 /* State of each CPU during hotplug phases */
0073 static DEFINE_PER_CPU(int, cpu_state) = { 0 };
0074 #endif
0075
0076 struct task_struct *secondary_current;
0077 bool has_big_cores;
0078 bool coregroup_enabled;
0079 bool thread_group_shares_l2;
0080 bool thread_group_shares_l3;
0081
0082 DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
0083 DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
0084 DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
0085 DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
0086 static DEFINE_PER_CPU(cpumask_var_t, cpu_coregroup_map);
0087
0088 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
0089 EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
0090 EXPORT_PER_CPU_SYMBOL(cpu_core_map);
0091 EXPORT_SYMBOL_GPL(has_big_cores);
0092
0093 enum {
0094 #ifdef CONFIG_SCHED_SMT
0095     smt_idx,
0096 #endif
0097     cache_idx,
0098     mc_idx,
0099     die_idx,
0100 };
0101
0102 #define MAX_THREAD_LIST_SIZE    8
0103 #define THREAD_GROUP_SHARE_L1   1
0104 #define THREAD_GROUP_SHARE_L2_L3 2
0105 struct thread_groups {
0106     unsigned int property;
0107     unsigned int nr_groups;
0108     unsigned int threads_per_group;
0109     unsigned int thread_list[MAX_THREAD_LIST_SIZE];
0110 };
0111
0112 /* Maximum number of properties that groups of threads within a core can share */
0113 #define MAX_THREAD_GROUP_PROPERTIES 2
0114
0115 struct thread_groups_list {
0116     unsigned int nr_properties;
0117     struct thread_groups property_tgs[MAX_THREAD_GROUP_PROPERTIES];
0118 };
0119
0120 static struct thread_groups_list tgl[NR_CPUS] __initdata;
0121 /*
0122  * On big-cores system, thread_group_l1_cache_map for each CPU corresponds to
0123  * the set its siblings that share the L1-cache.
0124  */
0125 DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
0126
0127 /*
0128  * On some big-cores system, thread_group_l2_cache_map for each CPU
0129  * corresponds to the set its siblings within the core that share the
0130  * L2-cache.
0131  */
0132 DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
0133
0134 /*
0135  * On P10, thread_group_l3_cache_map for each CPU is equal to the
0136  * thread_group_l2_cache_map
0137  */
0138 DEFINE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map);
0139
0140 /* SMP operations for this machine */
0141 struct smp_ops_t *smp_ops;
0142
0143 /* Can't be static due to PowerMac hackery */
0144 volatile unsigned int cpu_callin_map[NR_CPUS];
0145
0146 int smt_enabled_at_boot = 1;
0147
0148 /*
0149  * Returns 1 if the specified cpu should be brought up during boot.
0150  * Used to inhibit booting threads if they've been disabled or
0151  * limited on the command line
0152  */
0153 int smp_generic_cpu_bootable(unsigned int nr)
0154 {
0155     /* Special case - we inhibit secondary thread startup
0156      * during boot if the user requests it.
0157      */
0158     if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) {
0159         if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
0160             return 0;
0161         if (smt_enabled_at_boot
0162             && cpu_thread_in_core(nr) >= smt_enabled_at_boot)
0163             return 0;
0164     }
0165
0166     return 1;
0167 }
0168
0169
0170 #ifdef CONFIG_PPC64
0171 int smp_generic_kick_cpu(int nr)
0172 {
0173     if (nr < 0 || nr >= nr_cpu_ids)
0174         return -EINVAL;
0175
0176     /*
0177      * The processor is currently spinning, waiting for the
0178      * cpu_start field to become non-zero After we set cpu_start,
0179      * the processor will continue on to secondary_start
0180      */
0181     if (!paca_ptrs[nr]->cpu_start) {
0182         paca_ptrs[nr]->cpu_start = 1;
0183         smp_mb();
0184         return 0;
0185     }
0186
0187 #ifdef CONFIG_HOTPLUG_CPU
0188     /*
0189      * Ok it's not there, so it might be soft-unplugged, let's
0190      * try to bring it back
0191      */
0192     generic_set_cpu_up(nr);
0193     smp_wmb();
0194     smp_send_reschedule(nr);
0195 #endif /* CONFIG_HOTPLUG_CPU */
0196
0197     return 0;
0198 }
0199 #endif /* CONFIG_PPC64 */
0200
0201 static irqreturn_t call_function_action(int irq, void *data)
0202 {
0203     generic_smp_call_function_interrupt();
0204     return IRQ_HANDLED;
0205 }
0206
0207 static irqreturn_t reschedule_action(int irq, void *data)
0208 {
0209     scheduler_ipi();
0210     return IRQ_HANDLED;
0211 }
0212
0213 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
0214 static irqreturn_t tick_broadcast_ipi_action(int irq, void *data)
0215 {
0216     timer_broadcast_interrupt();
0217     return IRQ_HANDLED;
0218 }
0219 #endif
0220
0221 #ifdef CONFIG_NMI_IPI
0222 static irqreturn_t nmi_ipi_action(int irq, void *data)
0223 {
0224     smp_handle_nmi_ipi(get_irq_regs());
0225     return IRQ_HANDLED;
0226 }
0227 #endif
0228
0229 static irq_handler_t smp_ipi_action[] = {
0230     [PPC_MSG_CALL_FUNCTION] =  call_function_action,
0231     [PPC_MSG_RESCHEDULE] = reschedule_action,
0232 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
0233     [PPC_MSG_TICK_BROADCAST] = tick_broadcast_ipi_action,
0234 #endif
0235 #ifdef CONFIG_NMI_IPI
0236     [PPC_MSG_NMI_IPI] = nmi_ipi_action,
0237 #endif
0238 };
0239
0240 /*
0241  * The NMI IPI is a fallback and not truly non-maskable. It is simpler
0242  * than going through the call function infrastructure, and strongly
0243  * serialized, so it is more appropriate for debugging.
0244  */
0245 const char *smp_ipi_name[] = {
0246     [PPC_MSG_CALL_FUNCTION] =  "ipi call function",
0247     [PPC_MSG_RESCHEDULE] = "ipi reschedule",
0248 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
0249     [PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast",
0250 #endif
0251 #ifdef CONFIG_NMI_IPI
0252     [PPC_MSG_NMI_IPI] = "nmi ipi",
0253 #endif
0254 };
0255
0256 /* optional function to request ipi, for controllers with >= 4 ipis */
0257 int smp_request_message_ipi(int virq, int msg)
0258 {
0259     int err;
0260
0261     if (msg < 0 || msg > PPC_MSG_NMI_IPI)
0262         return -EINVAL;
0263 #ifndef CONFIG_NMI_IPI
0264     if (msg == PPC_MSG_NMI_IPI)
0265         return 1;
0266 #endif
0267
0268     err = request_irq(virq, smp_ipi_action[msg],
0269               IRQF_PERCPU | IRQF_NO_THREAD | IRQF_NO_SUSPEND,
0270               smp_ipi_name[msg], NULL);
0271     WARN(err < 0, "unable to request_irq %d for %s (rc %d)\n",
0272         virq, smp_ipi_name[msg], err);
0273
0274     return err;
0275 }
0276
0277 #ifdef CONFIG_PPC_SMP_MUXED_IPI
0278 struct cpu_messages {
0279     long messages;          /* current messages */
0280 };
0281 static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message);
0282
0283 void smp_muxed_ipi_set_message(int cpu, int msg)
0284 {
0285     struct cpu_messages *info = &per_cpu(ipi_message, cpu);
0286     char *message = (char *)&info->messages;
0287
0288     /*
0289      * Order previous accesses before accesses in the IPI handler.
0290      */
0291     smp_mb();
0292     message[msg] = 1;
0293 }
0294
0295 void smp_muxed_ipi_message_pass(int cpu, int msg)
0296 {
0297     smp_muxed_ipi_set_message(cpu, msg);
0298
0299     /*
0300      * cause_ipi functions are required to include a full barrier
0301      * before doing whatever causes the IPI.
0302      */
0303     smp_ops->cause_ipi(cpu);
0304 }
0305
0306 #ifdef __BIG_ENDIAN__
0307 #define IPI_MESSAGE(A) (1uL << ((BITS_PER_LONG - 8) - 8 * (A)))
0308 #else
0309 #define IPI_MESSAGE(A) (1uL << (8 * (A)))
0310 #endif
0311
0312 irqreturn_t smp_ipi_demux(void)
0313 {
0314     mb();   /* order any irq clear */
0315
0316     return smp_ipi_demux_relaxed();
0317 }
0318
0319 /* sync-free variant. Callers should ensure synchronization */
0320 irqreturn_t smp_ipi_demux_relaxed(void)
0321 {
0322     struct cpu_messages *info;
0323     unsigned long all;
0324
0325     info = this_cpu_ptr(&ipi_message);
0326     do {
0327         all = xchg(&info->messages, 0);
0328 #if defined(CONFIG_KVM_XICS) && defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
0329         /*
0330          * Must check for PPC_MSG_RM_HOST_ACTION messages
0331          * before PPC_MSG_CALL_FUNCTION messages because when
0332          * a VM is destroyed, we call kick_all_cpus_sync()
0333          * to ensure that any pending PPC_MSG_RM_HOST_ACTION
0334          * messages have completed before we free any VCPUs.
0335          */
0336         if (all & IPI_MESSAGE(PPC_MSG_RM_HOST_ACTION))
0337             kvmppc_xics_ipi_action();
0338 #endif
0339         if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNCTION))
0340             generic_smp_call_function_interrupt();
0341         if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE))
0342             scheduler_ipi();
0343 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
0344         if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST))
0345             timer_broadcast_interrupt();
0346 #endif
0347 #ifdef CONFIG_NMI_IPI
0348         if (all & IPI_MESSAGE(PPC_MSG_NMI_IPI))
0349             nmi_ipi_action(0, NULL);
0350 #endif
0351     } while (info->messages);
0352
0353     return IRQ_HANDLED;
0354 }
0355 #endif /* CONFIG_PPC_SMP_MUXED_IPI */
0356
0357 static inline void do_message_pass(int cpu, int msg)
0358 {
0359     if (smp_ops->message_pass)
0360         smp_ops->message_pass(cpu, msg);
0361 #ifdef CONFIG_PPC_SMP_MUXED_IPI
0362     else
0363         smp_muxed_ipi_message_pass(cpu, msg);
0364 #endif
0365 }
0366
0367 void smp_send_reschedule(int cpu)
0368 {
0369     if (likely(smp_ops))
0370         do_message_pass(cpu, PPC_MSG_RESCHEDULE);
0371 }
0372 EXPORT_SYMBOL_GPL(smp_send_reschedule);
0373
0374 void arch_send_call_function_single_ipi(int cpu)
0375 {
0376     do_message_pass(cpu, PPC_MSG_CALL_FUNCTION);
0377 }
0378
0379 void arch_send_call_function_ipi_mask(const struct cpumask *mask)
0380 {
0381     unsigned int cpu;
0382
0383     for_each_cpu(cpu, mask)
0384         do_message_pass(cpu, PPC_MSG_CALL_FUNCTION);
0385 }
0386
0387 #ifdef CONFIG_NMI_IPI
0388
0389 /*
0390  * "NMI IPI" system.
0391  *
0392  * NMI IPIs may not be recoverable, so should not be used as ongoing part of
0393  * a running system. They can be used for crash, debug, halt/reboot, etc.
0394  *
0395  * The IPI call waits with interrupts disabled until all targets enter the
0396  * NMI handler, then returns. Subsequent IPIs can be issued before targets
0397  * have returned from their handlers, so there is no guarantee about
0398  * concurrency or re-entrancy.
0399  *
0400  * A new NMI can be issued before all targets exit the handler.
0401  *
0402  * The IPI call may time out without all targets entering the NMI handler.
0403  * In that case, there is some logic to recover (and ignore subsequent
0404  * NMI interrupts that may eventually be raised), but the platform interrupt
0405  * handler may not be able to distinguish this from other exception causes,
0406  * which may cause a crash.
0407  */
0408
0409 static atomic_t __nmi_ipi_lock = ATOMIC_INIT(0);
0410 static struct cpumask nmi_ipi_pending_mask;
0411 static bool nmi_ipi_busy = false;
0412 static void (*nmi_ipi_function)(struct pt_regs *) = NULL;
0413
0414 noinstr static void nmi_ipi_lock_start(unsigned long *flags)
0415 {
0416     raw_local_irq_save(*flags);
0417     hard_irq_disable();
0418     while (arch_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
0419         raw_local_irq_restore(*flags);
0420         spin_until_cond(arch_atomic_read(&__nmi_ipi_lock) == 0);
0421         raw_local_irq_save(*flags);
0422         hard_irq_disable();
0423     }
0424 }
0425
0426 noinstr static void nmi_ipi_lock(void)
0427 {
0428     while (arch_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
0429         spin_until_cond(arch_atomic_read(&__nmi_ipi_lock) == 0);
0430 }
0431
0432 noinstr static void nmi_ipi_unlock(void)
0433 {
0434     smp_mb();
0435     WARN_ON(arch_atomic_read(&__nmi_ipi_lock) != 1);
0436     arch_atomic_set(&__nmi_ipi_lock, 0);
0437 }
0438
0439 noinstr static void nmi_ipi_unlock_end(unsigned long *flags)
0440 {
0441     nmi_ipi_unlock();
0442     raw_local_irq_restore(*flags);
0443 }
0444
0445 /*
0446  * Platform NMI handler calls this to ack
0447  */
0448 noinstr int smp_handle_nmi_ipi(struct pt_regs *regs)
0449 {
0450     void (*fn)(struct pt_regs *) = NULL;
0451     unsigned long flags;
0452     int me = raw_smp_processor_id();
0453     int ret = 0;
0454
0455     /*
0456      * Unexpected NMIs are possible here because the interrupt may not
0457      * be able to distinguish NMI IPIs from other types of NMIs, or
0458      * because the caller may have timed out.
0459      */
0460     nmi_ipi_lock_start(&flags);
0461     if (cpumask_test_cpu(me, &nmi_ipi_pending_mask)) {
0462         cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
0463         fn = READ_ONCE(nmi_ipi_function);
0464         WARN_ON_ONCE(!fn);
0465         ret = 1;
0466     }
0467     nmi_ipi_unlock_end(&flags);
0468
0469     if (fn)
0470         fn(regs);
0471
0472     return ret;
0473 }
0474
0475 static void do_smp_send_nmi_ipi(int cpu, bool safe)
0476 {
0477     if (!safe && smp_ops->cause_nmi_ipi && smp_ops->cause_nmi_ipi(cpu))
0478         return;
0479
0480     if (cpu >= 0) {
0481         do_message_pass(cpu, PPC_MSG_NMI_IPI);
0482     } else {
0483         int c;
0484
0485         for_each_online_cpu(c) {
0486             if (c == raw_smp_processor_id())
0487                 continue;
0488             do_message_pass(c, PPC_MSG_NMI_IPI);
0489         }
0490     }
0491 }
0492
0493 /*
0494  * - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS.
0495  * - fn is the target callback function.
0496  * - delay_us > 0 is the delay before giving up waiting for targets to
0497  *   begin executing the handler, == 0 specifies indefinite delay.
0498  */
0499 static int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *),
0500                 u64 delay_us, bool safe)
0501 {
0502     unsigned long flags;
0503     int me = raw_smp_processor_id();
0504     int ret = 1;
0505
0506     BUG_ON(cpu == me);
0507     BUG_ON(cpu < 0 && cpu != NMI_IPI_ALL_OTHERS);
0508
0509     if (unlikely(!smp_ops))
0510         return 0;
0511
0512     nmi_ipi_lock_start(&flags);
0513     while (nmi_ipi_busy) {
0514         nmi_ipi_unlock_end(&flags);
0515         spin_until_cond(!nmi_ipi_busy);
0516         nmi_ipi_lock_start(&flags);
0517     }
0518     nmi_ipi_busy = true;
0519     nmi_ipi_function = fn;
0520
0521     WARN_ON_ONCE(!cpumask_empty(&nmi_ipi_pending_mask));
0522
0523     if (cpu < 0) {
0524         /* ALL_OTHERS */
0525         cpumask_copy(&nmi_ipi_pending_mask, cpu_online_mask);
0526         cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
0527     } else {
0528         cpumask_set_cpu(cpu, &nmi_ipi_pending_mask);
0529     }
0530
0531     nmi_ipi_unlock();
0532
0533     /* Interrupts remain hard disabled */
0534
0535     do_smp_send_nmi_ipi(cpu, safe);
0536
0537     nmi_ipi_lock();
0538     /* nmi_ipi_busy is set here, so unlock/lock is okay */
0539     while (!cpumask_empty(&nmi_ipi_pending_mask)) {
0540         nmi_ipi_unlock();
0541         udelay(1);
0542         nmi_ipi_lock();
0543         if (delay_us) {
0544             delay_us--;
0545             if (!delay_us)
0546                 break;
0547         }
0548     }
0549
0550     if (!cpumask_empty(&nmi_ipi_pending_mask)) {
0551         /* Timeout waiting for CPUs to call smp_handle_nmi_ipi */
0552         ret = 0;
0553         cpumask_clear(&nmi_ipi_pending_mask);
0554     }
0555
0556     nmi_ipi_function = NULL;
0557     nmi_ipi_busy = false;
0558
0559     nmi_ipi_unlock_end(&flags);
0560
0561     return ret;
0562 }
0563
0564 int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
0565 {
0566     return __smp_send_nmi_ipi(cpu, fn, delay_us, false);
0567 }
0568
0569 int smp_send_safe_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
0570 {
0571     return __smp_send_nmi_ipi(cpu, fn, delay_us, true);
0572 }
0573 #endif /* CONFIG_NMI_IPI */
0574
0575 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
0576 void tick_broadcast(const struct cpumask *mask)
0577 {
0578     unsigned int cpu;
0579
0580     for_each_cpu(cpu, mask)
0581         do_message_pass(cpu, PPC_MSG_TICK_BROADCAST);
0582 }
0583 #endif
0584
0585 #ifdef CONFIG_DEBUGGER
0586 static void debugger_ipi_callback(struct pt_regs *regs)
0587 {
0588     debugger_ipi(regs);
0589 }
0590
0591 void smp_send_debugger_break(void)
0592 {
0593     smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, debugger_ipi_callback, 1000000);
0594 }
0595 #endif
0596
0597 #ifdef CONFIG_KEXEC_CORE
0598 void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
0599 {
0600     int cpu;
0601
0602     smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_ipi_callback, 1000000);
0603     if (kdump_in_progress() && crash_wake_offline) {
0604         for_each_present_cpu(cpu) {
0605             if (cpu_online(cpu))
0606                 continue;
0607             /*
0608              * crash_ipi_callback will wait for
0609              * all cpus, including offline CPUs.
0610              * We don't care about nmi_ipi_function.
0611              * Offline cpus will jump straight into
0612              * crash_ipi_callback, we can skip the
0613              * entire NMI dance and waiting for
0614              * cpus to clear pending mask, etc.
0615              */
0616             do_smp_send_nmi_ipi(cpu, false);
0617         }
0618     }
0619 }
0620 #endif
0621
0622 void crash_smp_send_stop(void)
0623 {
0624     static bool stopped = false;
0625
0626     /*
0627      * In case of fadump, register data for all CPUs is captured by f/w
0628      * on ibm,os-term rtas call. Skip IPI callbacks to other CPUs before
0629      * this rtas call to avoid tricky post processing of those CPUs'
0630      * backtraces.
0631      */
0632     if (should_fadump_crash())
0633         return;
0634
0635     if (stopped)
0636         return;
0637
0638     stopped = true;
0639
0640 #ifdef CONFIG_KEXEC_CORE
0641     if (kexec_crash_image) {
0642         crash_kexec_prepare();
0643         return;
0644     }
0645 #endif
0646
0647     smp_send_stop();
0648 }
0649
0650 #ifdef CONFIG_NMI_IPI
0651 static void nmi_stop_this_cpu(struct pt_regs *regs)
0652 {
0653     /*
0654      * IRQs are already hard disabled by the smp_handle_nmi_ipi.
0655      */
0656     set_cpu_online(smp_processor_id(), false);
0657
0658     spin_begin();
0659     while (1)
0660         spin_cpu_relax();
0661 }
0662
0663 void smp_send_stop(void)
0664 {
0665     smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, nmi_stop_this_cpu, 1000000);
0666 }
0667
0668 #else /* CONFIG_NMI_IPI */
0669
0670 static void stop_this_cpu(void *dummy)
0671 {
0672     hard_irq_disable();
0673
0674     /*
0675      * Offlining CPUs in stop_this_cpu can result in scheduler warnings,
0676      * (see commit de6e5d38417e), but printk_safe_flush_on_panic() wants
0677      * to know other CPUs are offline before it breaks locks to flush
0678      * printk buffers, in case we panic()ed while holding the lock.
0679      */
0680     set_cpu_online(smp_processor_id(), false);
0681
0682     spin_begin();
0683     while (1)
0684         spin_cpu_relax();
0685 }
0686
0687 void smp_send_stop(void)
0688 {
0689     static bool stopped = false;
0690
0691     /*
0692      * Prevent waiting on csd lock from a previous smp_send_stop.
0693      * This is racy, but in general callers try to do the right
0694      * thing and only fire off one smp_send_stop (e.g., see
0695      * kernel/panic.c)
0696      */
0697     if (stopped)
0698         return;
0699
0700     stopped = true;
0701
0702     smp_call_function(stop_this_cpu, NULL, 0);
0703 }
0704 #endif /* CONFIG_NMI_IPI */
0705
0706 static struct task_struct *current_set[NR_CPUS];
0707
0708 static void smp_store_cpu_info(int id)
0709 {
0710     per_cpu(cpu_pvr, id) = mfspr(SPRN_PVR);
0711 #ifdef CONFIG_PPC_FSL_BOOK3E
0712     per_cpu(next_tlbcam_idx, id)
0713         = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1;
0714 #endif
0715 }
0716
0717 /*
0718  * Relationships between CPUs are maintained in a set of per-cpu cpumasks so
0719  * rather than just passing around the cpumask we pass around a function that
0720  * returns the that cpumask for the given CPU.
0721  */
0722 static void set_cpus_related(int i, int j, struct cpumask *(*get_cpumask)(int))
0723 {
0724     cpumask_set_cpu(i, get_cpumask(j));
0725     cpumask_set_cpu(j, get_cpumask(i));
0726 }
0727
0728 #ifdef CONFIG_HOTPLUG_CPU
0729 static void set_cpus_unrelated(int i, int j,
0730         struct cpumask *(*get_cpumask)(int))
0731 {
0732     cpumask_clear_cpu(i, get_cpumask(j));
0733     cpumask_clear_cpu(j, get_cpumask(i));
0734 }
0735 #endif
0736
0737 /*
0738  * Extends set_cpus_related. Instead of setting one CPU at a time in
0739  * dstmask, set srcmask at oneshot. dstmask should be super set of srcmask.
0740  */
0741 static void or_cpumasks_related(int i, int j, struct cpumask *(*srcmask)(int),
0742                 struct cpumask *(*dstmask)(int))
0743 {
0744     struct cpumask *mask;
0745     int k;
0746
0747     mask = srcmask(j);
0748     for_each_cpu(k, srcmask(i))
0749         cpumask_or(dstmask(k), dstmask(k), mask);
0750
0751     if (i == j)
0752         return;
0753
0754     mask = srcmask(i);
0755     for_each_cpu(k, srcmask(j))
0756         cpumask_or(dstmask(k), dstmask(k), mask);
0757 }
0758
0759 /*
0760  * parse_thread_groups: Parses the "ibm,thread-groups" device tree
0761  *                      property for the CPU device node @dn and stores
0762  *                      the parsed output in the thread_groups_list
0763  *                      structure @tglp.
0764  *
0765  * @dn: The device node of the CPU device.
0766  * @tglp: Pointer to a thread group list structure into which the parsed
0767  *      output of "ibm,thread-groups" is stored.
0768  *
0769  * ibm,thread-groups[0..N-1] array defines which group of threads in
0770  * the CPU-device node can be grouped together based on the property.
0771  *
0772  * This array can represent thread groupings for multiple properties.
0773  *
0774  * ibm,thread-groups[i + 0] tells us the property based on which the
0775  * threads are being grouped together. If this value is 1, it implies
0776  * that the threads in the same group share L1, translation cache. If
0777  * the value is 2, it implies that the threads in the same group share
0778  * the same L2 cache.
0779  *
0780  * ibm,thread-groups[i+1] tells us how many such thread groups exist for the
0781  * property ibm,thread-groups[i]
0782  *
0783  * ibm,thread-groups[i+2] tells us the number of threads in each such
0784  * group.
0785  * Suppose k = (ibm,thread-groups[i+1] * ibm,thread-groups[i+2]), then,
0786  *
0787  * ibm,thread-groups[i+3..i+k+2] (is the list of threads identified by
0788  * "ibm,ppc-interrupt-server#s" arranged as per their membership in
0789  * the grouping.
0790  *
0791  * Example:
0792  * If "ibm,thread-groups" = [1,2,4,8,10,12,14,9,11,13,15,2,2,4,8,10,12,14,9,11,13,15]
0793  * This can be decomposed up into two consecutive arrays:
0794  * a) [1,2,4,8,10,12,14,9,11,13,15]
0795  * b) [2,2,4,8,10,12,14,9,11,13,15]
0796  *
0797  * where in,
0798  *
0799  * a) provides information of Property "1" being shared by "2" groups,
0800  *  each with "4" threads each. The "ibm,ppc-interrupt-server#s" of
0801  *  the first group is {8,10,12,14} and the
0802  *  "ibm,ppc-interrupt-server#s" of the second group is
0803  *  {9,11,13,15}. Property "1" is indicative of the thread in the
0804  *  group sharing L1 cache, translation cache and Instruction Data
0805  *  flow.
0806  *
0807  * b) provides information of Property "2" being shared by "2" groups,
0808  *  each group with "4" threads. The "ibm,ppc-interrupt-server#s" of
0809  *  the first group is {8,10,12,14} and the
0810  *  "ibm,ppc-interrupt-server#s" of the second group is
0811  *  {9,11,13,15}. Property "2" indicates that the threads in each
0812  *  group share the L2-cache.
0813  *
0814  * Returns 0 on success, -EINVAL if the property does not exist,
0815  * -ENODATA if property does not have a value, and -EOVERFLOW if the
0816  * property data isn't large enough.
0817  */
0818 static int parse_thread_groups(struct device_node *dn,
0819                    struct thread_groups_list *tglp)
0820 {
0821     unsigned int property_idx = 0;
0822     u32 *thread_group_array;
0823     size_t total_threads;
0824     int ret = 0, count;
0825     u32 *thread_list;
0826     int i = 0;
0827
0828     count = of_property_count_u32_elems(dn, "ibm,thread-groups");
0829     thread_group_array = kcalloc(count, sizeof(u32), GFP_KERNEL);
0830     ret = of_property_read_u32_array(dn, "ibm,thread-groups",
0831                      thread_group_array, count);
0832     if (ret)
0833         goto out_free;
0834
0835     while (i < count && property_idx < MAX_THREAD_GROUP_PROPERTIES) {
0836         int j;
0837         struct thread_groups *tg = &tglp->property_tgs[property_idx++];
0838
0839         tg->property = thread_group_array[i];
0840         tg->nr_groups = thread_group_array[i + 1];
0841         tg->threads_per_group = thread_group_array[i + 2];
0842         total_threads = tg->nr_groups * tg->threads_per_group;
0843
0844         thread_list = &thread_group_array[i + 3];
0845
0846         for (j = 0; j < total_threads; j++)
0847             tg->thread_list[j] = thread_list[j];
0848         i = i + 3 + total_threads;
0849     }
0850
0851     tglp->nr_properties = property_idx;
0852
0853 out_free:
0854     kfree(thread_group_array);
0855     return ret;
0856 }
0857
0858 /*
0859  * get_cpu_thread_group_start : Searches the thread group in tg->thread_list
0860  *                              that @cpu belongs to.
0861  *
0862  * @cpu : The logical CPU whose thread group is being searched.
0863  * @tg : The thread-group structure of the CPU node which @cpu belongs
0864  *       to.
0865  *
0866  * Returns the index to tg->thread_list that points to the start
0867  * of the thread_group that @cpu belongs to.
0868  *
0869  * Returns -1 if cpu doesn't belong to any of the groups pointed to by
0870  * tg->thread_list.
0871  */
0872 static int get_cpu_thread_group_start(int cpu, struct thread_groups *tg)
0873 {
0874     int hw_cpu_id = get_hard_smp_processor_id(cpu);
0875     int i, j;
0876
0877     for (i = 0; i < tg->nr_groups; i++) {
0878         int group_start = i * tg->threads_per_group;
0879
0880         for (j = 0; j < tg->threads_per_group; j++) {
0881             int idx = group_start + j;
0882
0883             if (tg->thread_list[idx] == hw_cpu_id)
0884                 return group_start;
0885         }
0886     }
0887
0888     return -1;
0889 }
0890
0891 static struct thread_groups *__init get_thread_groups(int cpu,
0892                               int group_property,
0893                               int *err)
0894 {
0895     struct device_node *dn = of_get_cpu_node(cpu, NULL);
0896     struct thread_groups_list *cpu_tgl = &tgl[cpu];
0897     struct thread_groups *tg = NULL;
0898     int i;
0899     *err = 0;
0900
0901     if (!dn) {
0902         *err = -ENODATA;
0903         return NULL;
0904     }
0905
0906     if (!cpu_tgl->nr_properties) {
0907         *err = parse_thread_groups(dn, cpu_tgl);
0908         if (*err)
0909             goto out;
0910     }
0911
0912     for (i = 0; i < cpu_tgl->nr_properties; i++) {
0913         if (cpu_tgl->property_tgs[i].property == group_property) {
0914             tg = &cpu_tgl->property_tgs[i];
0915             break;
0916         }
0917     }
0918
0919     if (!tg)
0920         *err = -EINVAL;
0921 out:
0922     of_node_put(dn);
0923     return tg;
0924 }
0925
0926 static int __init update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg,
0927                            int cpu, int cpu_group_start)
0928 {
0929     int first_thread = cpu_first_thread_sibling(cpu);
0930     int i;
0931
0932     zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));
0933
0934     for (i = first_thread; i < first_thread + threads_per_core; i++) {
0935         int i_group_start = get_cpu_thread_group_start(i, tg);
0936
0937         if (unlikely(i_group_start == -1)) {
0938             WARN_ON_ONCE(1);
0939             return -ENODATA;
0940         }
0941
0942         if (i_group_start == cpu_group_start)
0943             cpumask_set_cpu(i, *mask);
0944     }
0945
0946     return 0;
0947 }
0948
0949 static int __init init_thread_group_cache_map(int cpu, int cache_property)
0950
0951 {
0952     int cpu_group_start = -1, err = 0;
0953     struct thread_groups *tg = NULL;
0954     cpumask_var_t *mask = NULL;
0955
0956     if (cache_property != THREAD_GROUP_SHARE_L1 &&
0957         cache_property != THREAD_GROUP_SHARE_L2_L3)
0958         return -EINVAL;
0959
0960     tg = get_thread_groups(cpu, cache_property, &err);
0961
0962     if (!tg)
0963         return err;
0964
0965     cpu_group_start = get_cpu_thread_group_start(cpu, tg);
0966
0967     if (unlikely(cpu_group_start == -1)) {
0968         WARN_ON_ONCE(1);
0969         return -ENODATA;
0970     }
0971
0972     if (cache_property == THREAD_GROUP_SHARE_L1) {
0973         mask = &per_cpu(thread_group_l1_cache_map, cpu);
0974         update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
0975     }
0976     else if (cache_property == THREAD_GROUP_SHARE_L2_L3) {
0977         mask = &per_cpu(thread_group_l2_cache_map, cpu);
0978         update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
0979         mask = &per_cpu(thread_group_l3_cache_map, cpu);
0980         update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
0981     }
0982
0983
0984     return 0;
0985 }
0986
0987 static bool shared_caches;
0988
0989 #ifdef CONFIG_SCHED_SMT
0990 /* cpumask of CPUs with asymmetric SMT dependency */
0991 static int powerpc_smt_flags(void)
0992 {
0993     int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
0994
0995     if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
0996         printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
0997         flags |= SD_ASYM_PACKING;
0998     }
0999     return flags;
1000 }
1001 #endif
1002
1003 /*
1004  * P9 has a slightly odd architecture where pairs of cores share an L2 cache.
1005  * This topology makes it *much* cheaper to migrate tasks between adjacent cores
1006  * since the migrated task remains cache hot. We want to take advantage of this
1007  * at the scheduler level so an extra topology level is required.
1008  */
1009 static int powerpc_shared_cache_flags(void)
1010 {
1011     return SD_SHARE_PKG_RESOURCES;
1012 }
1013
1014 /*
1015  * We can't just pass cpu_l2_cache_mask() directly because
1016  * returns a non-const pointer and the compiler barfs on that.
1017  */
1018 static const struct cpumask *shared_cache_mask(int cpu)
1019 {
1020     return per_cpu(cpu_l2_cache_map, cpu);
1021 }
1022
1023 #ifdef CONFIG_SCHED_SMT
1024 static const struct cpumask *smallcore_smt_mask(int cpu)
1025 {
1026     return cpu_smallcore_mask(cpu);
1027 }
1028 #endif
1029
1030 static struct cpumask *cpu_coregroup_mask(int cpu)
1031 {
1032     return per_cpu(cpu_coregroup_map, cpu);
1033 }
1034
1035 static bool has_coregroup_support(void)
1036 {
1037     return coregroup_enabled;
1038 }
1039
1040 static const struct cpumask *cpu_mc_mask(int cpu)
1041 {
1042     return cpu_coregroup_mask(cpu);
1043 }
1044
1045 static struct sched_domain_topology_level powerpc_topology[] = {
1046 #ifdef CONFIG_SCHED_SMT
1047     { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
1048 #endif
1049     { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
1050     { cpu_mc_mask, SD_INIT_NAME(MC) },
1051     { cpu_cpu_mask, SD_INIT_NAME(DIE) },
1052     { NULL, },
1053 };
1054
1055 static int __init init_big_cores(void)
1056 {
1057     int cpu;
1058
1059     for_each_possible_cpu(cpu) {
1060         int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L1);
1061
1062         if (err)
1063             return err;
1064
1065         zalloc_cpumask_var_node(&per_cpu(cpu_smallcore_map, cpu),
1066                     GFP_KERNEL,
1067                     cpu_to_node(cpu));
1068     }
1069
1070     has_big_cores = true;
1071
1072     for_each_possible_cpu(cpu) {
1073         int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2_L3);
1074
1075         if (err)
1076             return err;
1077     }
1078
1079     thread_group_shares_l2 = true;
1080     thread_group_shares_l3 = true;
1081     pr_debug("L2/L3 cache only shared by the threads in the small core\n");
1082
1083     return 0;
1084 }
1085
1086 void __init smp_prepare_cpus(unsigned int max_cpus)
1087 {
1088     unsigned int cpu;
1089
1090     DBG("smp_prepare_cpus\n");
1091
1092     /*
1093      * setup_cpu may need to be called on the boot cpu. We haven't
1094      * spun any cpus up but lets be paranoid.
1095      */
1096     BUG_ON(boot_cpuid != smp_processor_id());
1097
1098     /* Fixup boot cpu */
1099     smp_store_cpu_info(boot_cpuid);
1100     cpu_callin_map[boot_cpuid] = 1;
1101
1102     for_each_possible_cpu(cpu) {
1103         zalloc_cpumask_var_node(&per_cpu(cpu_sibling_map, cpu),
1104                     GFP_KERNEL, cpu_to_node(cpu));
1105         zalloc_cpumask_var_node(&per_cpu(cpu_l2_cache_map, cpu),
1106                     GFP_KERNEL, cpu_to_node(cpu));
1107         zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu),
1108                     GFP_KERNEL, cpu_to_node(cpu));
1109         if (has_coregroup_support())
1110             zalloc_cpumask_var_node(&per_cpu(cpu_coregroup_map, cpu),
1111                         GFP_KERNEL, cpu_to_node(cpu));
1112
1113 #ifdef CONFIG_NUMA
1114         /*
1115          * numa_node_id() works after this.
1116          */
1117         if (cpu_present(cpu)) {
1118             set_cpu_numa_node(cpu, numa_cpu_lookup_table[cpu]);
1119             set_cpu_numa_mem(cpu,
1120                 local_memory_node(numa_cpu_lookup_table[cpu]));
1121         }
1122 #endif
1123     }
1124
1125     /* Init the cpumasks so the boot CPU is related to itself */
1126     cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
1127     cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid));
1128     cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
1129
1130     if (has_coregroup_support())
1131         cpumask_set_cpu(boot_cpuid, cpu_coregroup_mask(boot_cpuid));
1132
1133     init_big_cores();
1134     if (has_big_cores) {
1135         cpumask_set_cpu(boot_cpuid,
1136                 cpu_smallcore_mask(boot_cpuid));
1137     }
1138
1139     if (cpu_to_chip_id(boot_cpuid) != -1) {
1140         int idx = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
1141
1142         /*
1143          * All threads of a core will all belong to the same core,
1144          * chip_id_lookup_table will have one entry per core.
1145          * Assumption: if boot_cpuid doesn't have a chip-id, then no
1146          * other CPUs, will also not have chip-id.
1147          */
1148         chip_id_lookup_table = kcalloc(idx, sizeof(int), GFP_KERNEL);
1149         if (chip_id_lookup_table)
1150             memset(chip_id_lookup_table, -1, sizeof(int) * idx);
1151     }
1152
1153     if (smp_ops && smp_ops->probe)
1154         smp_ops->probe();
1155 }
1156
1157 void smp_prepare_boot_cpu(void)
1158 {
1159     BUG_ON(smp_processor_id() != boot_cpuid);
1160 #ifdef CONFIG_PPC64
1161     paca_ptrs[boot_cpuid]->__current = current;
1162 #endif
1163     set_numa_node(numa_cpu_lookup_table[boot_cpuid]);
1164     current_set[boot_cpuid] = current;
1165 }
1166
1167 #ifdef CONFIG_HOTPLUG_CPU
1168
1169 int generic_cpu_disable(void)
1170 {
1171     unsigned int cpu = smp_processor_id();
1172
1173     if (cpu == boot_cpuid)
1174         return -EBUSY;
1175
1176     set_cpu_online(cpu, false);
1177 #ifdef CONFIG_PPC64
1178     vdso_data->processorCount--;
1179 #endif
1180     /* Update affinity of all IRQs previously aimed at this CPU */
1181     irq_migrate_all_off_this_cpu();
1182
1183     /*
1184      * Depending on the details of the interrupt controller, it's possible
1185      * that one of the interrupts we just migrated away from this CPU is
1186      * actually already pending on this CPU. If we leave it in that state
1187      * the interrupt will never be EOI'ed, and will never fire again. So
1188      * temporarily enable interrupts here, to allow any pending interrupt to
1189      * be received (and EOI'ed), before we take this CPU offline.
1190      */
1191     local_irq_enable();
1192     mdelay(1);
1193     local_irq_disable();
1194
1195     return 0;
1196 }
1197
1198 void generic_cpu_die(unsigned int cpu)
1199 {
1200     int i;
1201
1202     for (i = 0; i < 100; i++) {
1203         smp_rmb();
1204         if (is_cpu_dead(cpu))
1205             return;
1206         msleep(100);
1207     }
1208     printk(KERN_ERR "CPU%d didn't die...\n", cpu);
1209 }
1210
1211 void generic_set_cpu_dead(unsigned int cpu)
1212 {
1213     per_cpu(cpu_state, cpu) = CPU_DEAD;
1214 }
1215
1216 /*
1217  * The cpu_state should be set to CPU_UP_PREPARE in kick_cpu(), otherwise
1218  * the cpu_state is always CPU_DEAD after calling generic_set_cpu_dead(),
1219  * which makes the delay in generic_cpu_die() not happen.
1220  */
1221 void generic_set_cpu_up(unsigned int cpu)
1222 {
1223     per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
1224 }
1225
1226 int generic_check_cpu_restart(unsigned int cpu)
1227 {
1228     return per_cpu(cpu_state, cpu) == CPU_UP_PREPARE;
1229 }
1230
1231 int is_cpu_dead(unsigned int cpu)
1232 {
1233     return per_cpu(cpu_state, cpu) == CPU_DEAD;
1234 }
1235
1236 static bool secondaries_inhibited(void)
1237 {
1238     return kvm_hv_mode_active();
1239 }
1240
1241 #else /* HOTPLUG_CPU */
1242
1243 #define secondaries_inhibited()     0
1244
1245 #endif
1246
1247 static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
1248 {
1249 #ifdef CONFIG_PPC64
1250     paca_ptrs[cpu]->__current = idle;
1251     paca_ptrs[cpu]->kstack = (unsigned long)task_stack_page(idle) +
1252                  THREAD_SIZE - STACK_FRAME_OVERHEAD;
1253 #endif
1254     task_thread_info(idle)->cpu = cpu;
1255     secondary_current = current_set[cpu] = idle;
1256 }
1257
1258 int __cpu_up(unsigned int cpu, struct task_struct *tidle)
1259 {
1260     int rc, c;
1261
1262     /*
1263      * Don't allow secondary threads to come online if inhibited
1264      */
1265     if (threads_per_core > 1 && secondaries_inhibited() &&
1266         cpu_thread_in_subcore(cpu))
1267         return -EBUSY;
1268
1269     if (smp_ops == NULL ||
1270         (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu)))
1271         return -EINVAL;
1272
1273     cpu_idle_thread_init(cpu, tidle);
1274
1275     /*
1276      * The platform might need to allocate resources prior to bringing
1277      * up the CPU
1278      */
1279     if (smp_ops->prepare_cpu) {
1280         rc = smp_ops->prepare_cpu(cpu);
1281         if (rc)
1282             return rc;
1283     }
1284
1285     /* Make sure callin-map entry is 0 (can be leftover a CPU
1286      * hotplug
1287      */
1288     cpu_callin_map[cpu] = 0;
1289
1290     /* The information for processor bringup must
1291      * be written out to main store before we release
1292      * the processor.
1293      */
1294     smp_mb();
1295
1296     /* wake up cpus */
1297     DBG("smp: kicking cpu %d\n", cpu);
1298     rc = smp_ops->kick_cpu(cpu);
1299     if (rc) {
1300         pr_err("smp: failed starting cpu %d (rc %d)\n", cpu, rc);
1301         return rc;
1302     }
1303
1304     /*
1305      * wait to see if the cpu made a callin (is actually up).
1306      * use this value that I found through experimentation.
1307      * -- Cort
1308      */
1309     if (system_state < SYSTEM_RUNNING)
1310         for (c = 50000; c && !cpu_callin_map[cpu]; c--)
1311             udelay(100);
1312 #ifdef CONFIG_HOTPLUG_CPU
1313     else
1314         /*
1315          * CPUs can take much longer to come up in the
1316          * hotplug case.  Wait five seconds.
1317          */
1318         for (c = 5000; c && !cpu_callin_map[cpu]; c--)
1319             msleep(1);
1320 #endif
1321
1322     if (!cpu_callin_map[cpu]) {
1323         printk(KERN_ERR "Processor %u is stuck.\n", cpu);
1324         return -ENOENT;
1325     }
1326
1327     DBG("Processor %u found.\n", cpu);
1328
1329     if (smp_ops->give_timebase)
1330         smp_ops->give_timebase();
1331
1332     /* Wait until cpu puts itself in the online & active maps */
1333     spin_until_cond(cpu_online(cpu));
1334
1335     return 0;
1336 }
1337
1338 /* Return the value of the reg property corresponding to the given
1339  * logical cpu.
1340  */
1341 int cpu_to_core_id(int cpu)
1342 {
1343     struct device_node *np;
1344     int id = -1;
1345
1346     np = of_get_cpu_node(cpu, NULL);
1347     if (!np)
1348         goto out;
1349
1350     id = of_get_cpu_hwid(np, 0);
1351 out:
1352     of_node_put(np);
1353     return id;
1354 }
1355 EXPORT_SYMBOL_GPL(cpu_to_core_id);
1356
1357 /* Helper routines for cpu to core mapping */
1358 int cpu_core_index_of_thread(int cpu)
1359 {
1360     return cpu >> threads_shift;
1361 }
1362 EXPORT_SYMBOL_GPL(cpu_core_index_of_thread);
1363
1364 int cpu_first_thread_of_core(int core)
1365 {
1366     return core << threads_shift;
1367 }
1368 EXPORT_SYMBOL_GPL(cpu_first_thread_of_core);
1369
1370 /* Must be called when no change can occur to cpu_present_mask,
1371  * i.e. during cpu online or offline.
1372  */
1373 static struct device_node *cpu_to_l2cache(int cpu)
1374 {
1375     struct device_node *np;
1376     struct device_node *cache;
1377
1378     if (!cpu_present(cpu))
1379         return NULL;
1380
1381     np = of_get_cpu_node(cpu, NULL);
1382     if (np == NULL)
1383         return NULL;
1384
1385     cache = of_find_next_cache_node(np);
1386
1387     of_node_put(np);
1388
1389     return cache;
1390 }
1391
1392 static bool update_mask_by_l2(int cpu, cpumask_var_t *mask)
1393 {
1394     struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
1395     struct device_node *l2_cache, *np;
1396     int i;
1397
1398     if (has_big_cores)
1399         submask_fn = cpu_smallcore_mask;
1400
1401     /*
1402      * If the threads in a thread-group share L2 cache, then the
1403      * L2-mask can be obtained from thread_group_l2_cache_map.
1404      */
1405     if (thread_group_shares_l2) {
1406         cpumask_set_cpu(cpu, cpu_l2_cache_mask(cpu));
1407
1408         for_each_cpu(i, per_cpu(thread_group_l2_cache_map, cpu)) {
1409             if (cpu_online(i))
1410                 set_cpus_related(i, cpu, cpu_l2_cache_mask);
1411         }
1412
1413         /* Verify that L1-cache siblings are a subset of L2 cache-siblings */
1414         if (!cpumask_equal(submask_fn(cpu), cpu_l2_cache_mask(cpu)) &&
1415             !cpumask_subset(submask_fn(cpu), cpu_l2_cache_mask(cpu))) {
1416             pr_warn_once("CPU %d : Inconsistent L1 and L2 cache siblings\n",
1417                      cpu);
1418         }
1419
1420         return true;
1421     }
1422
1423     l2_cache = cpu_to_l2cache(cpu);
1424     if (!l2_cache || !*mask) {
1425         /* Assume only core siblings share cache with this CPU */
1426         for_each_cpu(i, cpu_sibling_mask(cpu))
1427             set_cpus_related(cpu, i, cpu_l2_cache_mask);
1428
1429         return false;
1430     }
1431
1432     cpumask_and(*mask, cpu_online_mask, cpu_cpu_mask(cpu));
1433
1434     /* Update l2-cache mask with all the CPUs that are part of submask */
1435     or_cpumasks_related(cpu, cpu, submask_fn, cpu_l2_cache_mask);
1436
1437     /* Skip all CPUs already part of current CPU l2-cache mask */
1438     cpumask_andnot(*mask, *mask, cpu_l2_cache_mask(cpu));
1439
1440     for_each_cpu(i, *mask) {
1441         /*
1442          * when updating the marks the current CPU has not been marked
1443          * online, but we need to update the cache masks
1444          */
1445         np = cpu_to_l2cache(i);
1446
1447         /* Skip all CPUs already part of current CPU l2-cache */
1448         if (np == l2_cache) {
1449             or_cpumasks_related(cpu, i, submask_fn, cpu_l2_cache_mask);
1450             cpumask_andnot(*mask, *mask, submask_fn(i));
1451         } else {
1452             cpumask_andnot(*mask, *mask, cpu_l2_cache_mask(i));
1453         }
1454
1455         of_node_put(np);
1456     }
1457     of_node_put(l2_cache);
1458
1459     return true;
1460 }
1461
1462 #ifdef CONFIG_HOTPLUG_CPU
1463 static void remove_cpu_from_masks(int cpu)
1464 {
1465     struct cpumask *(*mask_fn)(int) = cpu_sibling_mask;
1466     int i;
1467
1468     unmap_cpu_from_node(cpu);
1469
1470     if (shared_caches)
1471         mask_fn = cpu_l2_cache_mask;
1472
1473     for_each_cpu(i, mask_fn(cpu)) {
1474         set_cpus_unrelated(cpu, i, cpu_l2_cache_mask);
1475         set_cpus_unrelated(cpu, i, cpu_sibling_mask);
1476         if (has_big_cores)
1477             set_cpus_unrelated(cpu, i, cpu_smallcore_mask);
1478     }
1479
1480     for_each_cpu(i, cpu_core_mask(cpu))
1481         set_cpus_unrelated(cpu, i, cpu_core_mask);
1482
1483     if (has_coregroup_support()) {
1484         for_each_cpu(i, cpu_coregroup_mask(cpu))
1485             set_cpus_unrelated(cpu, i, cpu_coregroup_mask);
1486     }
1487 }
1488 #endif
1489
1490 static inline void add_cpu_to_smallcore_masks(int cpu)
1491 {
1492     int i;
1493
1494     if (!has_big_cores)
1495         return;
1496
1497     cpumask_set_cpu(cpu, cpu_smallcore_mask(cpu));
1498
1499     for_each_cpu(i, per_cpu(thread_group_l1_cache_map, cpu)) {
1500         if (cpu_online(i))
1501             set_cpus_related(i, cpu, cpu_smallcore_mask);
1502     }
1503 }
1504
1505 static void update_coregroup_mask(int cpu, cpumask_var_t *mask)
1506 {
1507     struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
1508     int coregroup_id = cpu_to_coregroup_id(cpu);
1509     int i;
1510
1511     if (shared_caches)
1512         submask_fn = cpu_l2_cache_mask;
1513
1514     if (!*mask) {
1515         /* Assume only siblings are part of this CPU's coregroup */
1516         for_each_cpu(i, submask_fn(cpu))
1517             set_cpus_related(cpu, i, cpu_coregroup_mask);
1518
1519         return;
1520     }
1521
1522     cpumask_and(*mask, cpu_online_mask, cpu_cpu_mask(cpu));
1523
1524     /* Update coregroup mask with all the CPUs that are part of submask */
1525     or_cpumasks_related(cpu, cpu, submask_fn, cpu_coregroup_mask);
1526
1527     /* Skip all CPUs already part of coregroup mask */
1528     cpumask_andnot(*mask, *mask, cpu_coregroup_mask(cpu));
1529
1530     for_each_cpu(i, *mask) {
1531         /* Skip all CPUs not part of this coregroup */
1532         if (coregroup_id == cpu_to_coregroup_id(i)) {
1533             or_cpumasks_related(cpu, i, submask_fn, cpu_coregroup_mask);
1534             cpumask_andnot(*mask, *mask, submask_fn(i));
1535         } else {
1536             cpumask_andnot(*mask, *mask, cpu_coregroup_mask(i));
1537         }
1538     }
1539 }
1540
1541 static void add_cpu_to_masks(int cpu)
1542 {
1543     struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
1544     int first_thread = cpu_first_thread_sibling(cpu);
1545     cpumask_var_t mask;
1546     int chip_id = -1;
1547     bool ret;
1548     int i;
1549
1550     /*
1551      * This CPU will not be in the online mask yet so we need to manually
1552      * add it to it's own thread sibling mask.
1553      */
1554     map_cpu_to_node(cpu, cpu_to_node(cpu));
1555     cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
1556     cpumask_set_cpu(cpu, cpu_core_mask(cpu));
1557
1558     for (i = first_thread; i < first_thread + threads_per_core; i++)
1559         if (cpu_online(i))
1560             set_cpus_related(i, cpu, cpu_sibling_mask);
1561
1562     add_cpu_to_smallcore_masks(cpu);
1563
1564     /* In CPU-hotplug path, hence use GFP_ATOMIC */
1565     ret = alloc_cpumask_var_node(&mask, GFP_ATOMIC, cpu_to_node(cpu));
1566     update_mask_by_l2(cpu, &mask);
1567
1568     if (has_coregroup_support())
1569         update_coregroup_mask(cpu, &mask);
1570
1571     if (chip_id_lookup_table && ret)
1572         chip_id = cpu_to_chip_id(cpu);
1573
1574     if (shared_caches)
1575         submask_fn = cpu_l2_cache_mask;
1576
1577     /* Update core_mask with all the CPUs that are part of submask */
1578     or_cpumasks_related(cpu, cpu, submask_fn, cpu_core_mask);
1579
1580     /* Skip all CPUs already part of current CPU core mask */
1581     cpumask_andnot(mask, cpu_online_mask, cpu_core_mask(cpu));
1582
1583     /* If chip_id is -1; limit the cpu_core_mask to within DIE*/
1584     if (chip_id == -1)
1585         cpumask_and(mask, mask, cpu_cpu_mask(cpu));
1586
1587     for_each_cpu(i, mask) {
1588         if (chip_id == cpu_to_chip_id(i)) {
1589             or_cpumasks_related(cpu, i, submask_fn, cpu_core_mask);
1590             cpumask_andnot(mask, mask, submask_fn(i));
1591         } else {
1592             cpumask_andnot(mask, mask, cpu_core_mask(i));
1593         }
1594     }
1595
1596     free_cpumask_var(mask);
1597 }
1598
1599 /* Activate a secondary processor. */
1600 void start_secondary(void *unused)
1601 {
1602     unsigned int cpu = raw_smp_processor_id();
1603
1604     /* PPC64 calls setup_kup() in early_setup_secondary() */
1605     if (IS_ENABLED(CONFIG_PPC32))
1606         setup_kup();
1607
1608     mmgrab(&init_mm);
1609     current->active_mm = &init_mm;
1610
1611     smp_store_cpu_info(cpu);
1612     set_dec(tb_ticks_per_jiffy);
1613     rcu_cpu_starting(cpu);
1614     cpu_callin_map[cpu] = 1;
1615
1616     if (smp_ops->setup_cpu)
1617         smp_ops->setup_cpu(cpu);
1618     if (smp_ops->take_timebase)
1619         smp_ops->take_timebase();
1620
1621     secondary_cpu_time_init();
1622
1623 #ifdef CONFIG_PPC64
1624     if (system_state == SYSTEM_RUNNING)
1625         vdso_data->processorCount++;
1626
1627     vdso_getcpu_init();
1628 #endif
1629     set_numa_node(numa_cpu_lookup_table[cpu]);
1630     set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
1631
1632     /* Update topology CPU masks */
1633     add_cpu_to_masks(cpu);
1634
1635     /*
1636      * Check for any shared caches. Note that this must be done on a
1637      * per-core basis because one core in the pair might be disabled.
1638      */
1639     if (!shared_caches) {
1640         struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask;
1641         struct cpumask *mask = cpu_l2_cache_mask(cpu);
1642
1643         if (has_big_cores)
1644             sibling_mask = cpu_smallcore_mask;
1645
1646         if (cpumask_weight(mask) > cpumask_weight(sibling_mask(cpu)))
1647             shared_caches = true;
1648     }
1649
1650     smp_wmb();
1651     notify_cpu_starting(cpu);
1652     set_cpu_online(cpu, true);
1653
1654     boot_init_stack_canary();
1655
1656     local_irq_enable();
1657
1658     /* We can enable ftrace for secondary cpus now */
1659     this_cpu_enable_ftrace();
1660
1661     cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
1662
1663     BUG();
1664 }
1665
1666 static void __init fixup_topology(void)
1667 {
1668     int i;
1669
1670 #ifdef CONFIG_SCHED_SMT
1671     if (has_big_cores) {
1672         pr_info("Big cores detected but using small core scheduling\n");
1673         powerpc_topology[smt_idx].mask = smallcore_smt_mask;
1674     }
1675 #endif
1676
1677     if (!has_coregroup_support())
1678         powerpc_topology[mc_idx].mask = powerpc_topology[cache_idx].mask;
1679
1680     /*
1681      * Try to consolidate topology levels here instead of
1682      * allowing scheduler to degenerate.
1683      * - Dont consolidate if masks are different.
1684      * - Dont consolidate if sd_flags exists and are different.
1685      */
1686     for (i = 1; i <= die_idx; i++) {
1687         if (powerpc_topology[i].mask != powerpc_topology[i - 1].mask)
1688             continue;
1689
1690         if (powerpc_topology[i].sd_flags && powerpc_topology[i - 1].sd_flags &&
1691                 powerpc_topology[i].sd_flags != powerpc_topology[i - 1].sd_flags)
1692             continue;
1693
1694         if (!powerpc_topology[i - 1].sd_flags)
1695             powerpc_topology[i - 1].sd_flags = powerpc_topology[i].sd_flags;
1696
1697         powerpc_topology[i].mask = powerpc_topology[i + 1].mask;
1698         powerpc_topology[i].sd_flags = powerpc_topology[i + 1].sd_flags;
1699 #ifdef CONFIG_SCHED_DEBUG
1700         powerpc_topology[i].name = powerpc_topology[i + 1].name;
1701 #endif
1702     }
1703 }
1704
1705 void __init smp_cpus_done(unsigned int max_cpus)
1706 {
1707     /*
1708      * We are running pinned to the boot CPU, see rest_init().
1709      */
1710     if (smp_ops && smp_ops->setup_cpu)
1711         smp_ops->setup_cpu(boot_cpuid);
1712
1713     if (smp_ops && smp_ops->bringup_done)
1714         smp_ops->bringup_done();
1715
1716     dump_numa_cpu_topology();
1717
1718     fixup_topology();
1719     set_sched_topology(powerpc_topology);
1720 }
1721
1722 #ifdef CONFIG_HOTPLUG_CPU
1723 int __cpu_disable(void)
1724 {
1725     int cpu = smp_processor_id();
1726     int err;
1727
1728     if (!smp_ops->cpu_disable)
1729         return -ENOSYS;
1730
1731     this_cpu_disable_ftrace();
1732
1733     err = smp_ops->cpu_disable();
1734     if (err)
1735         return err;
1736
1737     /* Update sibling maps */
1738     remove_cpu_from_masks(cpu);
1739
1740     return 0;
1741 }
1742
1743 void __cpu_die(unsigned int cpu)
1744 {
1745     if (smp_ops->cpu_die)
1746         smp_ops->cpu_die(cpu);
1747 }
1748
1749 void arch_cpu_idle_dead(void)
1750 {
1751     /*
1752      * Disable on the down path. This will be re-enabled by
1753      * start_secondary() via start_secondary_resume() below
1754      */
1755     this_cpu_disable_ftrace();
1756
1757     if (smp_ops->cpu_offline_self)
1758         smp_ops->cpu_offline_self();
1759
1760     /* If we return, we re-enter start_secondary */
1761     start_secondary_resume();
1762 }
1763
1764 #endif