the-tree/kernel/smp.c

0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Generic helpers for smp ipi calls
0004  *
0005  * (C) Jens Axboe <jens.axboe@oracle.com> 2008
0006  */
0007
0008 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0009
0010 #include <linux/irq_work.h>
0011 #include <linux/rcupdate.h>
0012 #include <linux/rculist.h>
0013 #include <linux/kernel.h>
0014 #include <linux/export.h>
0015 #include <linux/percpu.h>
0016 #include <linux/init.h>
0017 #include <linux/interrupt.h>
0018 #include <linux/gfp.h>
0019 #include <linux/smp.h>
0020 #include <linux/cpu.h>
0021 #include <linux/sched.h>
0022 #include <linux/sched/idle.h>
0023 #include <linux/hypervisor.h>
0024 #include <linux/sched/clock.h>
0025 #include <linux/nmi.h>
0026 #include <linux/sched/debug.h>
0027 #include <linux/jump_label.h>
0028
0029 #include "smpboot.h"
0030 #include "sched/smp.h"
0031
0032 #define CSD_TYPE(_csd)  ((_csd)->node.u_flags & CSD_FLAG_TYPE_MASK)
0033
0034 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
0035 union cfd_seq_cnt {
0036     u64     val;
0037     struct {
0038         u64 src:16;
0039         u64 dst:16;
0040 #define CFD_SEQ_NOCPU   0xffff
0041         u64 type:4;
0042 #define CFD_SEQ_QUEUE   0
0043 #define CFD_SEQ_IPI 1
0044 #define CFD_SEQ_NOIPI   2
0045 #define CFD_SEQ_PING    3
0046 #define CFD_SEQ_PINGED  4
0047 #define CFD_SEQ_HANDLE  5
0048 #define CFD_SEQ_DEQUEUE 6
0049 #define CFD_SEQ_IDLE    7
0050 #define CFD_SEQ_GOTIPI  8
0051 #define CFD_SEQ_HDLEND  9
0052         u64 cnt:28;
0053     }       u;
0054 };
0055
0056 static char *seq_type[] = {
0057     [CFD_SEQ_QUEUE]     = "queue",
0058     [CFD_SEQ_IPI]       = "ipi",
0059     [CFD_SEQ_NOIPI]     = "noipi",
0060     [CFD_SEQ_PING]      = "ping",
0061     [CFD_SEQ_PINGED]    = "pinged",
0062     [CFD_SEQ_HANDLE]    = "handle",
0063     [CFD_SEQ_DEQUEUE]   = "dequeue (src CPU 0 == empty)",
0064     [CFD_SEQ_IDLE]      = "idle",
0065     [CFD_SEQ_GOTIPI]    = "gotipi",
0066     [CFD_SEQ_HDLEND]    = "hdlend (src CPU 0 == early)",
0067 };
0068
0069 struct cfd_seq_local {
0070     u64 ping;
0071     u64 pinged;
0072     u64 handle;
0073     u64 dequeue;
0074     u64 idle;
0075     u64 gotipi;
0076     u64 hdlend;
0077 };
0078 #endif
0079
0080 struct cfd_percpu {
0081     call_single_data_t  csd;
0082 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
0083     u64 seq_queue;
0084     u64 seq_ipi;
0085     u64 seq_noipi;
0086 #endif
0087 };
0088
0089 struct call_function_data {
0090     struct cfd_percpu   __percpu *pcpu;
0091     cpumask_var_t       cpumask;
0092     cpumask_var_t       cpumask_ipi;
0093 };
0094
0095 static DEFINE_PER_CPU_ALIGNED(struct call_function_data, cfd_data);
0096
0097 static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);
0098
0099 static void __flush_smp_call_function_queue(bool warn_cpu_offline);
0100
0101 int smpcfd_prepare_cpu(unsigned int cpu)
0102 {
0103     struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
0104
0105     if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
0106                      cpu_to_node(cpu)))
0107         return -ENOMEM;
0108     if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL,
0109                      cpu_to_node(cpu))) {
0110         free_cpumask_var(cfd->cpumask);
0111         return -ENOMEM;
0112     }
0113     cfd->pcpu = alloc_percpu(struct cfd_percpu);
0114     if (!cfd->pcpu) {
0115         free_cpumask_var(cfd->cpumask);
0116         free_cpumask_var(cfd->cpumask_ipi);
0117         return -ENOMEM;
0118     }
0119
0120     return 0;
0121 }
0122
0123 int smpcfd_dead_cpu(unsigned int cpu)
0124 {
0125     struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
0126
0127     free_cpumask_var(cfd->cpumask);
0128     free_cpumask_var(cfd->cpumask_ipi);
0129     free_percpu(cfd->pcpu);
0130     return 0;
0131 }
0132
0133 int smpcfd_dying_cpu(unsigned int cpu)
0134 {
0135     /*
0136      * The IPIs for the smp-call-function callbacks queued by other
0137      * CPUs might arrive late, either due to hardware latencies or
0138      * because this CPU disabled interrupts (inside stop-machine)
0139      * before the IPIs were sent. So flush out any pending callbacks
0140      * explicitly (without waiting for the IPIs to arrive), to
0141      * ensure that the outgoing CPU doesn't go offline with work
0142      * still pending.
0143      */
0144     __flush_smp_call_function_queue(false);
0145     irq_work_run();
0146     return 0;
0147 }
0148
0149 void __init call_function_init(void)
0150 {
0151     int i;
0152
0153     for_each_possible_cpu(i)
0154         init_llist_head(&per_cpu(call_single_queue, i));
0155
0156     smpcfd_prepare_cpu(smp_processor_id());
0157 }
0158
0159 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
0160
0161 static DEFINE_STATIC_KEY_FALSE(csdlock_debug_enabled);
0162 static DEFINE_STATIC_KEY_FALSE(csdlock_debug_extended);
0163
0164 static int __init csdlock_debug(char *str)
0165 {
0166     unsigned int val = 0;
0167
0168     if (str && !strcmp(str, "ext")) {
0169         val = 1;
0170         static_branch_enable(&csdlock_debug_extended);
0171     } else
0172         get_option(&str, &val);
0173
0174     if (val)
0175         static_branch_enable(&csdlock_debug_enabled);
0176
0177     return 1;
0178 }
0179 __setup("csdlock_debug=", csdlock_debug);
0180
0181 static DEFINE_PER_CPU(call_single_data_t *, cur_csd);
0182 static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func);
0183 static DEFINE_PER_CPU(void *, cur_csd_info);
0184 static DEFINE_PER_CPU(struct cfd_seq_local, cfd_seq_local);
0185
0186 static ulong csd_lock_timeout = 5000;  /* CSD lock timeout in milliseconds. */
0187 module_param(csd_lock_timeout, ulong, 0444);
0188
0189 static atomic_t csd_bug_count = ATOMIC_INIT(0);
0190 static u64 cfd_seq;
0191
0192 #define CFD_SEQ(s, d, t, c) \
0193     (union cfd_seq_cnt){ .u.src = s, .u.dst = d, .u.type = t, .u.cnt = c }
0194
0195 static u64 cfd_seq_inc(unsigned int src, unsigned int dst, unsigned int type)
0196 {
0197     union cfd_seq_cnt new, old;
0198
0199     new = CFD_SEQ(src, dst, type, 0);
0200
0201     do {
0202         old.val = READ_ONCE(cfd_seq);
0203         new.u.cnt = old.u.cnt + 1;
0204     } while (cmpxchg(&cfd_seq, old.val, new.val) != old.val);
0205
0206     return old.val;
0207 }
0208
0209 #define cfd_seq_store(var, src, dst, type)              \
0210     do {                                \
0211         if (static_branch_unlikely(&csdlock_debug_extended))    \
0212             var = cfd_seq_inc(src, dst, type);      \
0213     } while (0)
0214
0215 /* Record current CSD work for current CPU, NULL to erase. */
0216 static void __csd_lock_record(struct __call_single_data *csd)
0217 {
0218     if (!csd) {
0219         smp_mb(); /* NULL cur_csd after unlock. */
0220         __this_cpu_write(cur_csd, NULL);
0221         return;
0222     }
0223     __this_cpu_write(cur_csd_func, csd->func);
0224     __this_cpu_write(cur_csd_info, csd->info);
0225     smp_wmb(); /* func and info before csd. */
0226     __this_cpu_write(cur_csd, csd);
0227     smp_mb(); /* Update cur_csd before function call. */
0228           /* Or before unlock, as the case may be. */
0229 }
0230
0231 static __always_inline void csd_lock_record(struct __call_single_data *csd)
0232 {
0233     if (static_branch_unlikely(&csdlock_debug_enabled))
0234         __csd_lock_record(csd);
0235 }
0236
0237 static int csd_lock_wait_getcpu(struct __call_single_data *csd)
0238 {
0239     unsigned int csd_type;
0240
0241     csd_type = CSD_TYPE(csd);
0242     if (csd_type == CSD_TYPE_ASYNC || csd_type == CSD_TYPE_SYNC)
0243         return csd->node.dst; /* Other CSD_TYPE_ values might not have ->dst. */
0244     return -1;
0245 }
0246
0247 static void cfd_seq_data_add(u64 val, unsigned int src, unsigned int dst,
0248                  unsigned int type, union cfd_seq_cnt *data,
0249                  unsigned int *n_data, unsigned int now)
0250 {
0251     union cfd_seq_cnt new[2];
0252     unsigned int i, j, k;
0253
0254     new[0].val = val;
0255     new[1] = CFD_SEQ(src, dst, type, new[0].u.cnt + 1);
0256
0257     for (i = 0; i < 2; i++) {
0258         if (new[i].u.cnt <= now)
0259             new[i].u.cnt |= 0x80000000U;
0260         for (j = 0; j < *n_data; j++) {
0261             if (new[i].u.cnt == data[j].u.cnt) {
0262                 /* Direct read value trumps generated one. */
0263                 if (i == 0)
0264                     data[j].val = new[i].val;
0265                 break;
0266             }
0267             if (new[i].u.cnt < data[j].u.cnt) {
0268                 for (k = *n_data; k > j; k--)
0269                     data[k].val = data[k - 1].val;
0270                 data[j].val = new[i].val;
0271                 (*n_data)++;
0272                 break;
0273             }
0274         }
0275         if (j == *n_data) {
0276             data[j].val = new[i].val;
0277             (*n_data)++;
0278         }
0279     }
0280 }
0281
0282 static const char *csd_lock_get_type(unsigned int type)
0283 {
0284     return (type >= ARRAY_SIZE(seq_type)) ? "?" : seq_type[type];
0285 }
0286
0287 static void csd_lock_print_extended(struct __call_single_data *csd, int cpu)
0288 {
0289     struct cfd_seq_local *seq = &per_cpu(cfd_seq_local, cpu);
0290     unsigned int srccpu = csd->node.src;
0291     struct call_function_data *cfd = per_cpu_ptr(&cfd_data, srccpu);
0292     struct cfd_percpu *pcpu = per_cpu_ptr(cfd->pcpu, cpu);
0293     unsigned int now;
0294     union cfd_seq_cnt data[2 * ARRAY_SIZE(seq_type)];
0295     unsigned int n_data = 0, i;
0296
0297     data[0].val = READ_ONCE(cfd_seq);
0298     now = data[0].u.cnt;
0299
0300     cfd_seq_data_add(pcpu->seq_queue,           srccpu, cpu,           CFD_SEQ_QUEUE,  data, &n_data, now);
0301     cfd_seq_data_add(pcpu->seq_ipi,             srccpu, cpu,           CFD_SEQ_IPI,    data, &n_data, now);
0302     cfd_seq_data_add(pcpu->seq_noipi,           srccpu, cpu,           CFD_SEQ_NOIPI,  data, &n_data, now);
0303
0304     cfd_seq_data_add(per_cpu(cfd_seq_local.ping, srccpu),   srccpu, CFD_SEQ_NOCPU, CFD_SEQ_PING,   data, &n_data, now);
0305     cfd_seq_data_add(per_cpu(cfd_seq_local.pinged, srccpu), srccpu, CFD_SEQ_NOCPU, CFD_SEQ_PINGED, data, &n_data, now);
0306
0307     cfd_seq_data_add(seq->idle,    CFD_SEQ_NOCPU, cpu, CFD_SEQ_IDLE,    data, &n_data, now);
0308     cfd_seq_data_add(seq->gotipi,  CFD_SEQ_NOCPU, cpu, CFD_SEQ_GOTIPI,  data, &n_data, now);
0309     cfd_seq_data_add(seq->handle,  CFD_SEQ_NOCPU, cpu, CFD_SEQ_HANDLE,  data, &n_data, now);
0310     cfd_seq_data_add(seq->dequeue, CFD_SEQ_NOCPU, cpu, CFD_SEQ_DEQUEUE, data, &n_data, now);
0311     cfd_seq_data_add(seq->hdlend,  CFD_SEQ_NOCPU, cpu, CFD_SEQ_HDLEND,  data, &n_data, now);
0312
0313     for (i = 0; i < n_data; i++) {
0314         pr_alert("\tcsd: cnt(%07x): %04x->%04x %s\n",
0315              data[i].u.cnt & ~0x80000000U, data[i].u.src,
0316              data[i].u.dst, csd_lock_get_type(data[i].u.type));
0317     }
0318     pr_alert("\tcsd: cnt now: %07x\n", now);
0319 }
0320
0321 /*
0322  * Complain if too much time spent waiting.  Note that only
0323  * the CSD_TYPE_SYNC/ASYNC types provide the destination CPU,
0324  * so waiting on other types gets much less information.
0325  */
0326 static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 *ts1, int *bug_id)
0327 {
0328     int cpu = -1;
0329     int cpux;
0330     bool firsttime;
0331     u64 ts2, ts_delta;
0332     call_single_data_t *cpu_cur_csd;
0333     unsigned int flags = READ_ONCE(csd->node.u_flags);
0334     unsigned long long csd_lock_timeout_ns = csd_lock_timeout * NSEC_PER_MSEC;
0335
0336     if (!(flags & CSD_FLAG_LOCK)) {
0337         if (!unlikely(*bug_id))
0338             return true;
0339         cpu = csd_lock_wait_getcpu(csd);
0340         pr_alert("csd: CSD lock (#%d) got unstuck on CPU#%02d, CPU#%02d released the lock.\n",
0341              *bug_id, raw_smp_processor_id(), cpu);
0342         return true;
0343     }
0344
0345     ts2 = sched_clock();
0346     ts_delta = ts2 - *ts1;
0347     if (likely(ts_delta <= csd_lock_timeout_ns || csd_lock_timeout_ns == 0))
0348         return false;
0349
0350     firsttime = !*bug_id;
0351     if (firsttime)
0352         *bug_id = atomic_inc_return(&csd_bug_count);
0353     cpu = csd_lock_wait_getcpu(csd);
0354     if (WARN_ONCE(cpu < 0 || cpu >= nr_cpu_ids, "%s: cpu = %d\n", __func__, cpu))
0355         cpux = 0;
0356     else
0357         cpux = cpu;
0358     cpu_cur_csd = smp_load_acquire(&per_cpu(cur_csd, cpux)); /* Before func and info. */
0359     pr_alert("csd: %s non-responsive CSD lock (#%d) on CPU#%d, waiting %llu ns for CPU#%02d %pS(%ps).\n",
0360          firsttime ? "Detected" : "Continued", *bug_id, raw_smp_processor_id(), ts2 - ts0,
0361          cpu, csd->func, csd->info);
0362     if (cpu_cur_csd && csd != cpu_cur_csd) {
0363         pr_alert("\tcsd: CSD lock (#%d) handling prior %pS(%ps) request.\n",
0364              *bug_id, READ_ONCE(per_cpu(cur_csd_func, cpux)),
0365              READ_ONCE(per_cpu(cur_csd_info, cpux)));
0366     } else {
0367         pr_alert("\tcsd: CSD lock (#%d) %s.\n",
0368              *bug_id, !cpu_cur_csd ? "unresponsive" : "handling this request");
0369     }
0370     if (cpu >= 0) {
0371         if (static_branch_unlikely(&csdlock_debug_extended))
0372             csd_lock_print_extended(csd, cpu);
0373         if (!trigger_single_cpu_backtrace(cpu))
0374             dump_cpu_task(cpu);
0375         if (!cpu_cur_csd) {
0376             pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id, raw_smp_processor_id(), cpu);
0377             arch_send_call_function_single_ipi(cpu);
0378         }
0379     }
0380     dump_stack();
0381     *ts1 = ts2;
0382
0383     return false;
0384 }
0385
0386 /*
0387  * csd_lock/csd_unlock used to serialize access to per-cpu csd resources
0388  *
0389  * For non-synchronous ipi calls the csd can still be in use by the
0390  * previous function call. For multi-cpu calls its even more interesting
0391  * as we'll have to ensure no other cpu is observing our csd.
0392  */
0393 static void __csd_lock_wait(struct __call_single_data *csd)
0394 {
0395     int bug_id = 0;
0396     u64 ts0, ts1;
0397
0398     ts1 = ts0 = sched_clock();
0399     for (;;) {
0400         if (csd_lock_wait_toolong(csd, ts0, &ts1, &bug_id))
0401             break;
0402         cpu_relax();
0403     }
0404     smp_acquire__after_ctrl_dep();
0405 }
0406
0407 static __always_inline void csd_lock_wait(struct __call_single_data *csd)
0408 {
0409     if (static_branch_unlikely(&csdlock_debug_enabled)) {
0410         __csd_lock_wait(csd);
0411         return;
0412     }
0413
0414     smp_cond_load_acquire(&csd->node.u_flags, !(VAL & CSD_FLAG_LOCK));
0415 }
0416
0417 static void __smp_call_single_queue_debug(int cpu, struct llist_node *node)
0418 {
0419     unsigned int this_cpu = smp_processor_id();
0420     struct cfd_seq_local *seq = this_cpu_ptr(&cfd_seq_local);
0421     struct call_function_data *cfd = this_cpu_ptr(&cfd_data);
0422     struct cfd_percpu *pcpu = per_cpu_ptr(cfd->pcpu, cpu);
0423
0424     cfd_seq_store(pcpu->seq_queue, this_cpu, cpu, CFD_SEQ_QUEUE);
0425     if (llist_add(node, &per_cpu(call_single_queue, cpu))) {
0426         cfd_seq_store(pcpu->seq_ipi, this_cpu, cpu, CFD_SEQ_IPI);
0427         cfd_seq_store(seq->ping, this_cpu, cpu, CFD_SEQ_PING);
0428         send_call_function_single_ipi(cpu);
0429         cfd_seq_store(seq->pinged, this_cpu, cpu, CFD_SEQ_PINGED);
0430     } else {
0431         cfd_seq_store(pcpu->seq_noipi, this_cpu, cpu, CFD_SEQ_NOIPI);
0432     }
0433 }
0434 #else
0435 #define cfd_seq_store(var, src, dst, type)
0436
0437 static void csd_lock_record(struct __call_single_data *csd)
0438 {
0439 }
0440
0441 static __always_inline void csd_lock_wait(struct __call_single_data *csd)
0442 {
0443     smp_cond_load_acquire(&csd->node.u_flags, !(VAL & CSD_FLAG_LOCK));
0444 }
0445 #endif
0446
0447 static __always_inline void csd_lock(struct __call_single_data *csd)
0448 {
0449     csd_lock_wait(csd);
0450     csd->node.u_flags |= CSD_FLAG_LOCK;
0451
0452     /*
0453      * prevent CPU from reordering the above assignment
0454      * to ->flags with any subsequent assignments to other
0455      * fields of the specified call_single_data_t structure:
0456      */
0457     smp_wmb();
0458 }
0459
0460 static __always_inline void csd_unlock(struct __call_single_data *csd)
0461 {
0462     WARN_ON(!(csd->node.u_flags & CSD_FLAG_LOCK));
0463
0464     /*
0465      * ensure we're all done before releasing data:
0466      */
0467     smp_store_release(&csd->node.u_flags, 0);
0468 }
0469
0470 static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);
0471
0472 void __smp_call_single_queue(int cpu, struct llist_node *node)
0473 {
0474 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
0475     if (static_branch_unlikely(&csdlock_debug_extended)) {
0476         unsigned int type;
0477
0478         type = CSD_TYPE(container_of(node, call_single_data_t,
0479                          node.llist));
0480         if (type == CSD_TYPE_SYNC || type == CSD_TYPE_ASYNC) {
0481             __smp_call_single_queue_debug(cpu, node);
0482             return;
0483         }
0484     }
0485 #endif
0486
0487     /*
0488      * The list addition should be visible before sending the IPI
0489      * handler locks the list to pull the entry off it because of
0490      * normal cache coherency rules implied by spinlocks.
0491      *
0492      * If IPIs can go out of order to the cache coherency protocol
0493      * in an architecture, sufficient synchronisation should be added
0494      * to arch code to make it appear to obey cache coherency WRT
0495      * locking and barrier primitives. Generic code isn't really
0496      * equipped to do the right thing...
0497      */
0498     if (llist_add(node, &per_cpu(call_single_queue, cpu)))
0499         send_call_function_single_ipi(cpu);
0500 }
0501
0502 /*
0503  * Insert a previously allocated call_single_data_t element
0504  * for execution on the given CPU. data must already have
0505  * ->func, ->info, and ->flags set.
0506  */
0507 static int generic_exec_single(int cpu, struct __call_single_data *csd)
0508 {
0509     if (cpu == smp_processor_id()) {
0510         smp_call_func_t func = csd->func;
0511         void *info = csd->info;
0512         unsigned long flags;
0513
0514         /*
0515          * We can unlock early even for the synchronous on-stack case,
0516          * since we're doing this from the same CPU..
0517          */
0518         csd_lock_record(csd);
0519         csd_unlock(csd);
0520         local_irq_save(flags);
0521         func(info);
0522         csd_lock_record(NULL);
0523         local_irq_restore(flags);
0524         return 0;
0525     }
0526
0527     if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu)) {
0528         csd_unlock(csd);
0529         return -ENXIO;
0530     }
0531
0532     __smp_call_single_queue(cpu, &csd->node.llist);
0533
0534     return 0;
0535 }
0536
0537 /**
0538  * generic_smp_call_function_single_interrupt - Execute SMP IPI callbacks
0539  *
0540  * Invoked by arch to handle an IPI for call function single.
0541  * Must be called with interrupts disabled.
0542  */
0543 void generic_smp_call_function_single_interrupt(void)
0544 {
0545     cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->gotipi, CFD_SEQ_NOCPU,
0546               smp_processor_id(), CFD_SEQ_GOTIPI);
0547     __flush_smp_call_function_queue(true);
0548 }
0549
0550 /**
0551  * __flush_smp_call_function_queue - Flush pending smp-call-function callbacks
0552  *
0553  * @warn_cpu_offline: If set to 'true', warn if callbacks were queued on an
0554  *            offline CPU. Skip this check if set to 'false'.
0555  *
0556  * Flush any pending smp-call-function callbacks queued on this CPU. This is
0557  * invoked by the generic IPI handler, as well as by a CPU about to go offline,
0558  * to ensure that all pending IPI callbacks are run before it goes completely
0559  * offline.
0560  *
0561  * Loop through the call_single_queue and run all the queued callbacks.
0562  * Must be called with interrupts disabled.
0563  */
0564 static void __flush_smp_call_function_queue(bool warn_cpu_offline)
0565 {
0566     call_single_data_t *csd, *csd_next;
0567     struct llist_node *entry, *prev;
0568     struct llist_head *head;
0569     static bool warned;
0570
0571     lockdep_assert_irqs_disabled();
0572
0573     head = this_cpu_ptr(&call_single_queue);
0574     cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->handle, CFD_SEQ_NOCPU,
0575               smp_processor_id(), CFD_SEQ_HANDLE);
0576     entry = llist_del_all(head);
0577     cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->dequeue,
0578               /* Special meaning of source cpu: 0 == queue empty */
0579               entry ? CFD_SEQ_NOCPU : 0,
0580               smp_processor_id(), CFD_SEQ_DEQUEUE);
0581     entry = llist_reverse_order(entry);
0582
0583     /* There shouldn't be any pending callbacks on an offline CPU. */
0584     if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) &&
0585              !warned && entry != NULL)) {
0586         warned = true;
0587         WARN(1, "IPI on offline CPU %d\n", smp_processor_id());
0588
0589         /*
0590          * We don't have to use the _safe() variant here
0591          * because we are not invoking the IPI handlers yet.
0592          */
0593         llist_for_each_entry(csd, entry, node.llist) {
0594             switch (CSD_TYPE(csd)) {
0595             case CSD_TYPE_ASYNC:
0596             case CSD_TYPE_SYNC:
0597             case CSD_TYPE_IRQ_WORK:
0598                 pr_warn("IPI callback %pS sent to offline CPU\n",
0599                     csd->func);
0600                 break;
0601
0602             case CSD_TYPE_TTWU:
0603                 pr_warn("IPI task-wakeup sent to offline CPU\n");
0604                 break;
0605
0606             default:
0607                 pr_warn("IPI callback, unknown type %d, sent to offline CPU\n",
0608                     CSD_TYPE(csd));
0609                 break;
0610             }
0611         }
0612     }
0613
0614     /*
0615      * First; run all SYNC callbacks, people are waiting for us.
0616      */
0617     prev = NULL;
0618     llist_for_each_entry_safe(csd, csd_next, entry, node.llist) {
0619         /* Do we wait until *after* callback? */
0620         if (CSD_TYPE(csd) == CSD_TYPE_SYNC) {
0621             smp_call_func_t func = csd->func;
0622             void *info = csd->info;
0623
0624             if (prev) {
0625                 prev->next = &csd_next->node.llist;
0626             } else {
0627                 entry = &csd_next->node.llist;
0628             }
0629
0630             csd_lock_record(csd);
0631             func(info);
0632             csd_unlock(csd);
0633             csd_lock_record(NULL);
0634         } else {
0635             prev = &csd->node.llist;
0636         }
0637     }
0638
0639     if (!entry) {
0640         cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->hdlend,
0641                   0, smp_processor_id(),
0642                   CFD_SEQ_HDLEND);
0643         return;
0644     }
0645
0646     /*
0647      * Second; run all !SYNC callbacks.
0648      */
0649     prev = NULL;
0650     llist_for_each_entry_safe(csd, csd_next, entry, node.llist) {
0651         int type = CSD_TYPE(csd);
0652
0653         if (type != CSD_TYPE_TTWU) {
0654             if (prev) {
0655                 prev->next = &csd_next->node.llist;
0656             } else {
0657                 entry = &csd_next->node.llist;
0658             }
0659
0660             if (type == CSD_TYPE_ASYNC) {
0661                 smp_call_func_t func = csd->func;
0662                 void *info = csd->info;
0663
0664                 csd_lock_record(csd);
0665                 csd_unlock(csd);
0666                 func(info);
0667                 csd_lock_record(NULL);
0668             } else if (type == CSD_TYPE_IRQ_WORK) {
0669                 irq_work_single(csd);
0670             }
0671
0672         } else {
0673             prev = &csd->node.llist;
0674         }
0675     }
0676
0677     /*
0678      * Third; only CSD_TYPE_TTWU is left, issue those.
0679      */
0680     if (entry)
0681         sched_ttwu_pending(entry);
0682
0683     cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->hdlend, CFD_SEQ_NOCPU,
0684               smp_processor_id(), CFD_SEQ_HDLEND);
0685 }
0686
0687
0688 /**
0689  * flush_smp_call_function_queue - Flush pending smp-call-function callbacks
0690  *                 from task context (idle, migration thread)
0691  *
0692  * When TIF_POLLING_NRFLAG is supported and a CPU is in idle and has it
0693  * set, then remote CPUs can avoid sending IPIs and wake the idle CPU by
0694  * setting TIF_NEED_RESCHED. The idle task on the woken up CPU has to
0695  * handle queued SMP function calls before scheduling.
0696  *
0697  * The migration thread has to ensure that an eventually pending wakeup has
0698  * been handled before it migrates a task.
0699  */
0700 void flush_smp_call_function_queue(void)
0701 {
0702     unsigned int was_pending;
0703     unsigned long flags;
0704
0705     if (llist_empty(this_cpu_ptr(&call_single_queue)))
0706         return;
0707
0708     cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->idle, CFD_SEQ_NOCPU,
0709               smp_processor_id(), CFD_SEQ_IDLE);
0710     local_irq_save(flags);
0711     /* Get the already pending soft interrupts for RT enabled kernels */
0712     was_pending = local_softirq_pending();
0713     __flush_smp_call_function_queue(true);
0714     if (local_softirq_pending())
0715         do_softirq_post_smp_call_flush(was_pending);
0716
0717     local_irq_restore(flags);
0718 }
0719
0720 /*
0721  * smp_call_function_single - Run a function on a specific CPU
0722  * @func: The function to run. This must be fast and non-blocking.
0723  * @info: An arbitrary pointer to pass to the function.
0724  * @wait: If true, wait until function has completed on other CPUs.
0725  *
0726  * Returns 0 on success, else a negative status code.
0727  */
0728 int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
0729                  int wait)
0730 {
0731     call_single_data_t *csd;
0732     call_single_data_t csd_stack = {
0733         .node = { .u_flags = CSD_FLAG_LOCK | CSD_TYPE_SYNC, },
0734     };
0735     int this_cpu;
0736     int err;
0737
0738     /*
0739      * prevent preemption and reschedule on another processor,
0740      * as well as CPU removal
0741      */
0742     this_cpu = get_cpu();
0743
0744     /*
0745      * Can deadlock when called with interrupts disabled.
0746      * We allow cpu's that are not yet online though, as no one else can
0747      * send smp call function interrupt to this cpu and as such deadlocks
0748      * can't happen.
0749      */
0750     WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
0751              && !oops_in_progress);
0752
0753     /*
0754      * When @wait we can deadlock when we interrupt between llist_add() and
0755      * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
0756      * csd_lock() on because the interrupt context uses the same csd
0757      * storage.
0758      */
0759     WARN_ON_ONCE(!in_task());
0760
0761     csd = &csd_stack;
0762     if (!wait) {
0763         csd = this_cpu_ptr(&csd_data);
0764         csd_lock(csd);
0765     }
0766
0767     csd->func = func;
0768     csd->info = info;
0769 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
0770     csd->node.src = smp_processor_id();
0771     csd->node.dst = cpu;
0772 #endif
0773
0774     err = generic_exec_single(cpu, csd);
0775
0776     if (wait)
0777         csd_lock_wait(csd);
0778
0779     put_cpu();
0780
0781     return err;
0782 }
0783 EXPORT_SYMBOL(smp_call_function_single);
0784
0785 /**
0786  * smp_call_function_single_async() - Run an asynchronous function on a
0787  *                   specific CPU.
0788  * @cpu: The CPU to run on.
0789  * @csd: Pre-allocated and setup data structure
0790  *
0791  * Like smp_call_function_single(), but the call is asynchonous and
0792  * can thus be done from contexts with disabled interrupts.
0793  *
0794  * The caller passes his own pre-allocated data structure
0795  * (ie: embedded in an object) and is responsible for synchronizing it
0796  * such that the IPIs performed on the @csd are strictly serialized.
0797  *
0798  * If the function is called with one csd which has not yet been
0799  * processed by previous call to smp_call_function_single_async(), the
0800  * function will return immediately with -EBUSY showing that the csd
0801  * object is still in progress.
0802  *
0803  * NOTE: Be careful, there is unfortunately no current debugging facility to
0804  * validate the correctness of this serialization.
0805  *
0806  * Return: %0 on success or negative errno value on error
0807  */
0808 int smp_call_function_single_async(int cpu, struct __call_single_data *csd)
0809 {
0810     int err = 0;
0811
0812     preempt_disable();
0813
0814     if (csd->node.u_flags & CSD_FLAG_LOCK) {
0815         err = -EBUSY;
0816         goto out;
0817     }
0818
0819     csd->node.u_flags = CSD_FLAG_LOCK;
0820     smp_wmb();
0821
0822     err = generic_exec_single(cpu, csd);
0823
0824 out:
0825     preempt_enable();
0826
0827     return err;
0828 }
0829 EXPORT_SYMBOL_GPL(smp_call_function_single_async);
0830
0831 /*
0832  * smp_call_function_any - Run a function on any of the given cpus
0833  * @mask: The mask of cpus it can run on.
0834  * @func: The function to run. This must be fast and non-blocking.
0835  * @info: An arbitrary pointer to pass to the function.
0836  * @wait: If true, wait until function has completed.
0837  *
0838  * Returns 0 on success, else a negative status code (if no cpus were online).
0839  *
0840  * Selection preference:
0841  *  1) current cpu if in @mask
0842  *  2) any cpu of current node if in @mask
0843  *  3) any other online cpu in @mask
0844  */
0845 int smp_call_function_any(const struct cpumask *mask,
0846               smp_call_func_t func, void *info, int wait)
0847 {
0848     unsigned int cpu;
0849     const struct cpumask *nodemask;
0850     int ret;
0851
0852     /* Try for same CPU (cheapest) */
0853     cpu = get_cpu();
0854     if (cpumask_test_cpu(cpu, mask))
0855         goto call;
0856
0857     /* Try for same node. */
0858     nodemask = cpumask_of_node(cpu_to_node(cpu));
0859     for (cpu = cpumask_first_and(nodemask, mask); cpu < nr_cpu_ids;
0860          cpu = cpumask_next_and(cpu, nodemask, mask)) {
0861         if (cpu_online(cpu))
0862             goto call;
0863     }
0864
0865     /* Any online will do: smp_call_function_single handles nr_cpu_ids. */
0866     cpu = cpumask_any_and(mask, cpu_online_mask);
0867 call:
0868     ret = smp_call_function_single(cpu, func, info, wait);
0869     put_cpu();
0870     return ret;
0871 }
0872 EXPORT_SYMBOL_GPL(smp_call_function_any);
0873
0874 /*
0875  * Flags to be used as scf_flags argument of smp_call_function_many_cond().
0876  *
0877  * %SCF_WAIT:       Wait until function execution is completed
0878  * %SCF_RUN_LOCAL:  Run also locally if local cpu is set in cpumask
0879  */
0880 #define SCF_WAIT    (1U << 0)
0881 #define SCF_RUN_LOCAL   (1U << 1)
0882
0883 static void smp_call_function_many_cond(const struct cpumask *mask,
0884                     smp_call_func_t func, void *info,
0885                     unsigned int scf_flags,
0886                     smp_cond_func_t cond_func)
0887 {
0888     int cpu, last_cpu, this_cpu = smp_processor_id();
0889     struct call_function_data *cfd;
0890     bool wait = scf_flags & SCF_WAIT;
0891     bool run_remote = false;
0892     bool run_local = false;
0893     int nr_cpus = 0;
0894
0895     lockdep_assert_preemption_disabled();
0896
0897     /*
0898      * Can deadlock when called with interrupts disabled.
0899      * We allow cpu's that are not yet online though, as no one else can
0900      * send smp call function interrupt to this cpu and as such deadlocks
0901      * can't happen.
0902      */
0903     if (cpu_online(this_cpu) && !oops_in_progress &&
0904         !early_boot_irqs_disabled)
0905         lockdep_assert_irqs_enabled();
0906
0907     /*
0908      * When @wait we can deadlock when we interrupt between llist_add() and
0909      * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
0910      * csd_lock() on because the interrupt context uses the same csd
0911      * storage.
0912      */
0913     WARN_ON_ONCE(!in_task());
0914
0915     /* Check if we need local execution. */
0916     if ((scf_flags & SCF_RUN_LOCAL) && cpumask_test_cpu(this_cpu, mask))
0917         run_local = true;
0918
0919     /* Check if we need remote execution, i.e., any CPU excluding this one. */
0920     cpu = cpumask_first_and(mask, cpu_online_mask);
0921     if (cpu == this_cpu)
0922         cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
0923     if (cpu < nr_cpu_ids)
0924         run_remote = true;
0925
0926     if (run_remote) {
0927         cfd = this_cpu_ptr(&cfd_data);
0928         cpumask_and(cfd->cpumask, mask, cpu_online_mask);
0929         __cpumask_clear_cpu(this_cpu, cfd->cpumask);
0930
0931         cpumask_clear(cfd->cpumask_ipi);
0932         for_each_cpu(cpu, cfd->cpumask) {
0933             struct cfd_percpu *pcpu = per_cpu_ptr(cfd->pcpu, cpu);
0934             call_single_data_t *csd = &pcpu->csd;
0935
0936             if (cond_func && !cond_func(cpu, info))
0937                 continue;
0938
0939             csd_lock(csd);
0940             if (wait)
0941                 csd->node.u_flags |= CSD_TYPE_SYNC;
0942             csd->func = func;
0943             csd->info = info;
0944 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
0945             csd->node.src = smp_processor_id();
0946             csd->node.dst = cpu;
0947 #endif
0948             cfd_seq_store(pcpu->seq_queue, this_cpu, cpu, CFD_SEQ_QUEUE);
0949             if (llist_add(&csd->node.llist, &per_cpu(call_single_queue, cpu))) {
0950                 __cpumask_set_cpu(cpu, cfd->cpumask_ipi);
0951                 nr_cpus++;
0952                 last_cpu = cpu;
0953
0954                 cfd_seq_store(pcpu->seq_ipi, this_cpu, cpu, CFD_SEQ_IPI);
0955             } else {
0956                 cfd_seq_store(pcpu->seq_noipi, this_cpu, cpu, CFD_SEQ_NOIPI);
0957             }
0958         }
0959
0960         cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->ping, this_cpu, CFD_SEQ_NOCPU, CFD_SEQ_PING);
0961
0962         /*
0963          * Choose the most efficient way to send an IPI. Note that the
0964          * number of CPUs might be zero due to concurrent changes to the
0965          * provided mask.
0966          */
0967         if (nr_cpus == 1)
0968             send_call_function_single_ipi(last_cpu);
0969         else if (likely(nr_cpus > 1))
0970             arch_send_call_function_ipi_mask(cfd->cpumask_ipi);
0971
0972         cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->pinged, this_cpu, CFD_SEQ_NOCPU, CFD_SEQ_PINGED);
0973     }
0974
0975     if (run_local && (!cond_func || cond_func(this_cpu, info))) {
0976         unsigned long flags;
0977
0978         local_irq_save(flags);
0979         func(info);
0980         local_irq_restore(flags);
0981     }
0982
0983     if (run_remote && wait) {
0984         for_each_cpu(cpu, cfd->cpumask) {
0985             call_single_data_t *csd;
0986
0987             csd = &per_cpu_ptr(cfd->pcpu, cpu)->csd;
0988             csd_lock_wait(csd);
0989         }
0990     }
0991 }
0992
0993 /**
0994  * smp_call_function_many(): Run a function on a set of CPUs.
0995  * @mask: The set of cpus to run on (only runs on online subset).
0996  * @func: The function to run. This must be fast and non-blocking.
0997  * @info: An arbitrary pointer to pass to the function.
0998  * @wait: Bitmask that controls the operation. If %SCF_WAIT is set, wait
0999  *        (atomically) until function has completed on other CPUs. If
1000  *        %SCF_RUN_LOCAL is set, the function will also be run locally
1001  *        if the local CPU is set in the @cpumask.
1002  *
1003  * If @wait is true, then returns once @func has returned.
1004  *
1005  * You must not call this function with disabled interrupts or from a
1006  * hardware interrupt handler or from a bottom half handler. Preemption
1007  * must be disabled when calling this function.
1008  */
1009 void smp_call_function_many(const struct cpumask *mask,
1010                 smp_call_func_t func, void *info, bool wait)
1011 {
1012     smp_call_function_many_cond(mask, func, info, wait * SCF_WAIT, NULL);
1013 }
1014 EXPORT_SYMBOL(smp_call_function_many);
1015
1016 /**
1017  * smp_call_function(): Run a function on all other CPUs.
1018  * @func: The function to run. This must be fast and non-blocking.
1019  * @info: An arbitrary pointer to pass to the function.
1020  * @wait: If true, wait (atomically) until function has completed
1021  *        on other CPUs.
1022  *
1023  * Returns 0.
1024  *
1025  * If @wait is true, then returns once @func has returned; otherwise
1026  * it returns just before the target cpu calls @func.
1027  *
1028  * You must not call this function with disabled interrupts or from a
1029  * hardware interrupt handler or from a bottom half handler.
1030  */
1031 void smp_call_function(smp_call_func_t func, void *info, int wait)
1032 {
1033     preempt_disable();
1034     smp_call_function_many(cpu_online_mask, func, info, wait);
1035     preempt_enable();
1036 }
1037 EXPORT_SYMBOL(smp_call_function);
1038
1039 /* Setup configured maximum number of CPUs to activate */
1040 unsigned int setup_max_cpus = NR_CPUS;
1041 EXPORT_SYMBOL(setup_max_cpus);
1042
1043
1044 /*
1045  * Setup routine for controlling SMP activation
1046  *
1047  * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
1048  * activation entirely (the MPS table probe still happens, though).
1049  *
1050  * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
1051  * greater than 0, limits the maximum number of CPUs activated in
1052  * SMP mode to <NUM>.
1053  */
1054
1055 void __weak arch_disable_smp_support(void) { }
1056
1057 static int __init nosmp(char *str)
1058 {
1059     setup_max_cpus = 0;
1060     arch_disable_smp_support();
1061
1062     return 0;
1063 }
1064
1065 early_param("nosmp", nosmp);
1066
1067 /* this is hard limit */
1068 static int __init nrcpus(char *str)
1069 {
1070     int nr_cpus;
1071
1072     if (get_option(&str, &nr_cpus) && nr_cpus > 0 && nr_cpus < nr_cpu_ids)
1073         nr_cpu_ids = nr_cpus;
1074
1075     return 0;
1076 }
1077
1078 early_param("nr_cpus", nrcpus);
1079
1080 static int __init maxcpus(char *str)
1081 {
1082     get_option(&str, &setup_max_cpus);
1083     if (setup_max_cpus == 0)
1084         arch_disable_smp_support();
1085
1086     return 0;
1087 }
1088
1089 early_param("maxcpus", maxcpus);
1090
1091 /* Setup number of possible processor ids */
1092 unsigned int nr_cpu_ids __read_mostly = NR_CPUS;
1093 EXPORT_SYMBOL(nr_cpu_ids);
1094
1095 /* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */
1096 void __init setup_nr_cpu_ids(void)
1097 {
1098     nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
1099 }
1100
1101 /* Called by boot processor to activate the rest. */
1102 void __init smp_init(void)
1103 {
1104     int num_nodes, num_cpus;
1105
1106     idle_threads_init();
1107     cpuhp_threads_init();
1108
1109     pr_info("Bringing up secondary CPUs ...\n");
1110
1111     bringup_nonboot_cpus(setup_max_cpus);
1112
1113     num_nodes = num_online_nodes();
1114     num_cpus  = num_online_cpus();
1115     pr_info("Brought up %d node%s, %d CPU%s\n",
1116         num_nodes, (num_nodes > 1 ? "s" : ""),
1117         num_cpus,  (num_cpus  > 1 ? "s" : ""));
1118
1119     /* Any cleanup work */
1120     smp_cpus_done(setup_max_cpus);
1121 }
1122
1123 /*
1124  * on_each_cpu_cond(): Call a function on each processor for which
1125  * the supplied function cond_func returns true, optionally waiting
1126  * for all the required CPUs to finish. This may include the local
1127  * processor.
1128  * @cond_func:  A callback function that is passed a cpu id and
1129  *      the info parameter. The function is called
1130  *      with preemption disabled. The function should
1131  *      return a blooean value indicating whether to IPI
1132  *      the specified CPU.
1133  * @func:   The function to run on all applicable CPUs.
1134  *      This must be fast and non-blocking.
1135  * @info:   An arbitrary pointer to pass to both functions.
1136  * @wait:   If true, wait (atomically) until function has
1137  *      completed on other CPUs.
1138  *
1139  * Preemption is disabled to protect against CPUs going offline but not online.
1140  * CPUs going online during the call will not be seen or sent an IPI.
1141  *
1142  * You must not call this function with disabled interrupts or
1143  * from a hardware interrupt handler or from a bottom half handler.
1144  */
1145 void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
1146                void *info, bool wait, const struct cpumask *mask)
1147 {
1148     unsigned int scf_flags = SCF_RUN_LOCAL;
1149
1150     if (wait)
1151         scf_flags |= SCF_WAIT;
1152
1153     preempt_disable();
1154     smp_call_function_many_cond(mask, func, info, scf_flags, cond_func);
1155     preempt_enable();
1156 }
1157 EXPORT_SYMBOL(on_each_cpu_cond_mask);
1158
1159 static void do_nothing(void *unused)
1160 {
1161 }
1162
1163 /**
1164  * kick_all_cpus_sync - Force all cpus out of idle
1165  *
1166  * Used to synchronize the update of pm_idle function pointer. It's
1167  * called after the pointer is updated and returns after the dummy
1168  * callback function has been executed on all cpus. The execution of
1169  * the function can only happen on the remote cpus after they have
1170  * left the idle function which had been called via pm_idle function
1171  * pointer. So it's guaranteed that nothing uses the previous pointer
1172  * anymore.
1173  */
1174 void kick_all_cpus_sync(void)
1175 {
1176     /* Make sure the change is visible before we kick the cpus */
1177     smp_mb();
1178     smp_call_function(do_nothing, NULL, 1);
1179 }
1180 EXPORT_SYMBOL_GPL(kick_all_cpus_sync);
1181
1182 /**
1183  * wake_up_all_idle_cpus - break all cpus out of idle
1184  * wake_up_all_idle_cpus try to break all cpus which is in idle state even
1185  * including idle polling cpus, for non-idle cpus, we will do nothing
1186  * for them.
1187  */
1188 void wake_up_all_idle_cpus(void)
1189 {
1190     int cpu;
1191
1192     for_each_possible_cpu(cpu) {
1193         preempt_disable();
1194         if (cpu != smp_processor_id() && cpu_online(cpu))
1195             wake_up_if_idle(cpu);
1196         preempt_enable();
1197     }
1198 }
1199 EXPORT_SYMBOL_GPL(wake_up_all_idle_cpus);
1200
1201 /**
1202  * struct smp_call_on_cpu_struct - Call a function on a specific CPU
1203  * @work: &work_struct
1204  * @done: &completion to signal
1205  * @func: function to call
1206  * @data: function's data argument
1207  * @ret: return value from @func
1208  * @cpu: target CPU (%-1 for any CPU)
1209  *
1210  * Used to call a function on a specific cpu and wait for it to return.
1211  * Optionally make sure the call is done on a specified physical cpu via vcpu
1212  * pinning in order to support virtualized environments.
1213  */
1214 struct smp_call_on_cpu_struct {
1215     struct work_struct  work;
1216     struct completion   done;
1217     int         (*func)(void *);
1218     void            *data;
1219     int         ret;
1220     int         cpu;
1221 };
1222
1223 static void smp_call_on_cpu_callback(struct work_struct *work)
1224 {
1225     struct smp_call_on_cpu_struct *sscs;
1226
1227     sscs = container_of(work, struct smp_call_on_cpu_struct, work);
1228     if (sscs->cpu >= 0)
1229         hypervisor_pin_vcpu(sscs->cpu);
1230     sscs->ret = sscs->func(sscs->data);
1231     if (sscs->cpu >= 0)
1232         hypervisor_pin_vcpu(-1);
1233
1234     complete(&sscs->done);
1235 }
1236
1237 int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par, bool phys)
1238 {
1239     struct smp_call_on_cpu_struct sscs = {
1240         .done = COMPLETION_INITIALIZER_ONSTACK(sscs.done),
1241         .func = func,
1242         .data = par,
1243         .cpu  = phys ? cpu : -1,
1244     };
1245
1246     INIT_WORK_ONSTACK(&sscs.work, smp_call_on_cpu_callback);
1247
1248     if (cpu >= nr_cpu_ids || !cpu_online(cpu))
1249         return -ENXIO;
1250
1251     queue_work_on(cpu, system_wq, &sscs.work);
1252     wait_for_completion(&sscs.done);
1253
1254     return sscs.ret;
1255 }
1256 EXPORT_SYMBOL_GPL(smp_call_on_cpu);