Back to home page

LXR

 
 

    


0001 /*
0002  * kernel/stop_machine.c
0003  *
0004  * Copyright (C) 2008, 2005 IBM Corporation.
0005  * Copyright (C) 2008, 2005 Rusty Russell rusty@rustcorp.com.au
0006  * Copyright (C) 2010       SUSE Linux Products GmbH
0007  * Copyright (C) 2010       Tejun Heo <tj@kernel.org>
0008  *
0009  * This file is released under the GPLv2 and any later version.
0010  */
0011 #include <linux/completion.h>
0012 #include <linux/cpu.h>
0013 #include <linux/init.h>
0014 #include <linux/kthread.h>
0015 #include <linux/export.h>
0016 #include <linux/percpu.h>
0017 #include <linux/sched.h>
0018 #include <linux/stop_machine.h>
0019 #include <linux/interrupt.h>
0020 #include <linux/kallsyms.h>
0021 #include <linux/smpboot.h>
0022 #include <linux/atomic.h>
0023 #include <linux/nmi.h>
0024 
0025 /*
0026  * Structure to determine completion condition and record errors.  May
0027  * be shared by works on different cpus.
0028  */
0029 struct cpu_stop_done {
0030     atomic_t        nr_todo;    /* nr left to execute */
0031     int         ret;        /* collected return value */
0032     struct completion   completion; /* fired if nr_todo reaches 0 */
0033 };
0034 
0035 /* the actual stopper, one per every possible cpu, enabled on online cpus */
0036 struct cpu_stopper {
0037     struct task_struct  *thread;
0038 
0039     spinlock_t      lock;
0040     bool            enabled;    /* is this stopper enabled? */
0041     struct list_head    works;      /* list of pending works */
0042 
0043     struct cpu_stop_work    stop_work;  /* for stop_cpus */
0044 };
0045 
0046 static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
0047 static bool stop_machine_initialized = false;
0048 
0049 /* static data for stop_cpus */
0050 static DEFINE_MUTEX(stop_cpus_mutex);
0051 static bool stop_cpus_in_progress;
0052 
0053 static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
0054 {
0055     memset(done, 0, sizeof(*done));
0056     atomic_set(&done->nr_todo, nr_todo);
0057     init_completion(&done->completion);
0058 }
0059 
0060 /* signal completion unless @done is NULL */
0061 static void cpu_stop_signal_done(struct cpu_stop_done *done)
0062 {
0063     if (atomic_dec_and_test(&done->nr_todo))
0064         complete(&done->completion);
0065 }
0066 
0067 static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
0068                     struct cpu_stop_work *work)
0069 {
0070     list_add_tail(&work->list, &stopper->works);
0071     wake_up_process(stopper->thread);
0072 }
0073 
0074 /* queue @work to @stopper.  if offline, @work is completed immediately */
0075 static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
0076 {
0077     struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
0078     unsigned long flags;
0079     bool enabled;
0080 
0081     spin_lock_irqsave(&stopper->lock, flags);
0082     enabled = stopper->enabled;
0083     if (enabled)
0084         __cpu_stop_queue_work(stopper, work);
0085     else if (work->done)
0086         cpu_stop_signal_done(work->done);
0087     spin_unlock_irqrestore(&stopper->lock, flags);
0088 
0089     return enabled;
0090 }
0091 
0092 /**
0093  * stop_one_cpu - stop a cpu
0094  * @cpu: cpu to stop
0095  * @fn: function to execute
0096  * @arg: argument to @fn
0097  *
0098  * Execute @fn(@arg) on @cpu.  @fn is run in a process context with
0099  * the highest priority preempting any task on the cpu and
0100  * monopolizing it.  This function returns after the execution is
0101  * complete.
0102  *
0103  * This function doesn't guarantee @cpu stays online till @fn
0104  * completes.  If @cpu goes down in the middle, execution may happen
0105  * partially or fully on different cpus.  @fn should either be ready
0106  * for that or the caller should ensure that @cpu stays online until
0107  * this function completes.
0108  *
0109  * CONTEXT:
0110  * Might sleep.
0111  *
0112  * RETURNS:
0113  * -ENOENT if @fn(@arg) was not executed because @cpu was offline;
0114  * otherwise, the return value of @fn.
0115  */
0116 int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
0117 {
0118     struct cpu_stop_done done;
0119     struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done };
0120 
0121     cpu_stop_init_done(&done, 1);
0122     if (!cpu_stop_queue_work(cpu, &work))
0123         return -ENOENT;
0124     /*
0125      * In case @cpu == smp_proccessor_id() we can avoid a sleep+wakeup
0126      * cycle by doing a preemption:
0127      */
0128     cond_resched();
0129     wait_for_completion(&done.completion);
0130     return done.ret;
0131 }
0132 
0133 /* This controls the threads on each CPU. */
0134 enum multi_stop_state {
0135     /* Dummy starting state for thread. */
0136     MULTI_STOP_NONE,
0137     /* Awaiting everyone to be scheduled. */
0138     MULTI_STOP_PREPARE,
0139     /* Disable interrupts. */
0140     MULTI_STOP_DISABLE_IRQ,
0141     /* Run the function */
0142     MULTI_STOP_RUN,
0143     /* Exit */
0144     MULTI_STOP_EXIT,
0145 };
0146 
0147 struct multi_stop_data {
0148     cpu_stop_fn_t       fn;
0149     void            *data;
0150     /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
0151     unsigned int        num_threads;
0152     const struct cpumask    *active_cpus;
0153 
0154     enum multi_stop_state   state;
0155     atomic_t        thread_ack;
0156 };
0157 
0158 static void set_state(struct multi_stop_data *msdata,
0159               enum multi_stop_state newstate)
0160 {
0161     /* Reset ack counter. */
0162     atomic_set(&msdata->thread_ack, msdata->num_threads);
0163     smp_wmb();
0164     msdata->state = newstate;
0165 }
0166 
0167 /* Last one to ack a state moves to the next state. */
0168 static void ack_state(struct multi_stop_data *msdata)
0169 {
0170     if (atomic_dec_and_test(&msdata->thread_ack))
0171         set_state(msdata, msdata->state + 1);
0172 }
0173 
0174 /* This is the cpu_stop function which stops the CPU. */
0175 static int multi_cpu_stop(void *data)
0176 {
0177     struct multi_stop_data *msdata = data;
0178     enum multi_stop_state curstate = MULTI_STOP_NONE;
0179     int cpu = smp_processor_id(), err = 0;
0180     unsigned long flags;
0181     bool is_active;
0182 
0183     /*
0184      * When called from stop_machine_from_inactive_cpu(), irq might
0185      * already be disabled.  Save the state and restore it on exit.
0186      */
0187     local_save_flags(flags);
0188 
0189     if (!msdata->active_cpus)
0190         is_active = cpu == cpumask_first(cpu_online_mask);
0191     else
0192         is_active = cpumask_test_cpu(cpu, msdata->active_cpus);
0193 
0194     /* Simple state machine */
0195     do {
0196         /* Chill out and ensure we re-read multi_stop_state. */
0197         cpu_relax_yield();
0198         if (msdata->state != curstate) {
0199             curstate = msdata->state;
0200             switch (curstate) {
0201             case MULTI_STOP_DISABLE_IRQ:
0202                 local_irq_disable();
0203                 hard_irq_disable();
0204                 break;
0205             case MULTI_STOP_RUN:
0206                 if (is_active)
0207                     err = msdata->fn(msdata->data);
0208                 break;
0209             default:
0210                 break;
0211             }
0212             ack_state(msdata);
0213         } else if (curstate > MULTI_STOP_PREPARE) {
0214             /*
0215              * At this stage all other CPUs we depend on must spin
0216              * in the same loop. Any reason for hard-lockup should
0217              * be detected and reported on their side.
0218              */
0219             touch_nmi_watchdog();
0220         }
0221     } while (curstate != MULTI_STOP_EXIT);
0222 
0223     local_irq_restore(flags);
0224     return err;
0225 }
0226 
0227 static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
0228                     int cpu2, struct cpu_stop_work *work2)
0229 {
0230     struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1);
0231     struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
0232     int err;
0233 retry:
0234     spin_lock_irq(&stopper1->lock);
0235     spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
0236 
0237     err = -ENOENT;
0238     if (!stopper1->enabled || !stopper2->enabled)
0239         goto unlock;
0240     /*
0241      * Ensure that if we race with __stop_cpus() the stoppers won't get
0242      * queued up in reverse order leading to system deadlock.
0243      *
0244      * We can't miss stop_cpus_in_progress if queue_stop_cpus_work() has
0245      * queued a work on cpu1 but not on cpu2, we hold both locks.
0246      *
0247      * It can be falsely true but it is safe to spin until it is cleared,
0248      * queue_stop_cpus_work() does everything under preempt_disable().
0249      */
0250     err = -EDEADLK;
0251     if (unlikely(stop_cpus_in_progress))
0252             goto unlock;
0253 
0254     err = 0;
0255     __cpu_stop_queue_work(stopper1, work1);
0256     __cpu_stop_queue_work(stopper2, work2);
0257 unlock:
0258     spin_unlock(&stopper2->lock);
0259     spin_unlock_irq(&stopper1->lock);
0260 
0261     if (unlikely(err == -EDEADLK)) {
0262         while (stop_cpus_in_progress)
0263             cpu_relax();
0264         goto retry;
0265     }
0266     return err;
0267 }
0268 /**
0269  * stop_two_cpus - stops two cpus
0270  * @cpu1: the cpu to stop
0271  * @cpu2: the other cpu to stop
0272  * @fn: function to execute
0273  * @arg: argument to @fn
0274  *
0275  * Stops both the current and specified CPU and runs @fn on one of them.
0276  *
0277  * returns when both are completed.
0278  */
0279 int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg)
0280 {
0281     struct cpu_stop_done done;
0282     struct cpu_stop_work work1, work2;
0283     struct multi_stop_data msdata;
0284 
0285     msdata = (struct multi_stop_data){
0286         .fn = fn,
0287         .data = arg,
0288         .num_threads = 2,
0289         .active_cpus = cpumask_of(cpu1),
0290     };
0291 
0292     work1 = work2 = (struct cpu_stop_work){
0293         .fn = multi_cpu_stop,
0294         .arg = &msdata,
0295         .done = &done
0296     };
0297 
0298     cpu_stop_init_done(&done, 2);
0299     set_state(&msdata, MULTI_STOP_PREPARE);
0300 
0301     if (cpu1 > cpu2)
0302         swap(cpu1, cpu2);
0303     if (cpu_stop_queue_two_works(cpu1, &work1, cpu2, &work2))
0304         return -ENOENT;
0305 
0306     wait_for_completion(&done.completion);
0307     return done.ret;
0308 }
0309 
0310 /**
0311  * stop_one_cpu_nowait - stop a cpu but don't wait for completion
0312  * @cpu: cpu to stop
0313  * @fn: function to execute
0314  * @arg: argument to @fn
0315  * @work_buf: pointer to cpu_stop_work structure
0316  *
0317  * Similar to stop_one_cpu() but doesn't wait for completion.  The
0318  * caller is responsible for ensuring @work_buf is currently unused
0319  * and will remain untouched until stopper starts executing @fn.
0320  *
0321  * CONTEXT:
0322  * Don't care.
0323  *
0324  * RETURNS:
0325  * true if cpu_stop_work was queued successfully and @fn will be called,
0326  * false otherwise.
0327  */
0328 bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
0329             struct cpu_stop_work *work_buf)
0330 {
0331     *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, };
0332     return cpu_stop_queue_work(cpu, work_buf);
0333 }
0334 
0335 static bool queue_stop_cpus_work(const struct cpumask *cpumask,
0336                  cpu_stop_fn_t fn, void *arg,
0337                  struct cpu_stop_done *done)
0338 {
0339     struct cpu_stop_work *work;
0340     unsigned int cpu;
0341     bool queued = false;
0342 
0343     /*
0344      * Disable preemption while queueing to avoid getting
0345      * preempted by a stopper which might wait for other stoppers
0346      * to enter @fn which can lead to deadlock.
0347      */
0348     preempt_disable();
0349     stop_cpus_in_progress = true;
0350     for_each_cpu(cpu, cpumask) {
0351         work = &per_cpu(cpu_stopper.stop_work, cpu);
0352         work->fn = fn;
0353         work->arg = arg;
0354         work->done = done;
0355         if (cpu_stop_queue_work(cpu, work))
0356             queued = true;
0357     }
0358     stop_cpus_in_progress = false;
0359     preempt_enable();
0360 
0361     return queued;
0362 }
0363 
0364 static int __stop_cpus(const struct cpumask *cpumask,
0365                cpu_stop_fn_t fn, void *arg)
0366 {
0367     struct cpu_stop_done done;
0368 
0369     cpu_stop_init_done(&done, cpumask_weight(cpumask));
0370     if (!queue_stop_cpus_work(cpumask, fn, arg, &done))
0371         return -ENOENT;
0372     wait_for_completion(&done.completion);
0373     return done.ret;
0374 }
0375 
0376 /**
0377  * stop_cpus - stop multiple cpus
0378  * @cpumask: cpus to stop
0379  * @fn: function to execute
0380  * @arg: argument to @fn
0381  *
0382  * Execute @fn(@arg) on online cpus in @cpumask.  On each target cpu,
0383  * @fn is run in a process context with the highest priority
0384  * preempting any task on the cpu and monopolizing it.  This function
0385  * returns after all executions are complete.
0386  *
0387  * This function doesn't guarantee the cpus in @cpumask stay online
0388  * till @fn completes.  If some cpus go down in the middle, execution
0389  * on the cpu may happen partially or fully on different cpus.  @fn
0390  * should either be ready for that or the caller should ensure that
0391  * the cpus stay online until this function completes.
0392  *
0393  * All stop_cpus() calls are serialized making it safe for @fn to wait
0394  * for all cpus to start executing it.
0395  *
0396  * CONTEXT:
0397  * Might sleep.
0398  *
0399  * RETURNS:
0400  * -ENOENT if @fn(@arg) was not executed at all because all cpus in
0401  * @cpumask were offline; otherwise, 0 if all executions of @fn
0402  * returned 0, any non zero return value if any returned non zero.
0403  */
0404 int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
0405 {
0406     int ret;
0407 
0408     /* static works are used, process one request at a time */
0409     mutex_lock(&stop_cpus_mutex);
0410     ret = __stop_cpus(cpumask, fn, arg);
0411     mutex_unlock(&stop_cpus_mutex);
0412     return ret;
0413 }
0414 
0415 /**
0416  * try_stop_cpus - try to stop multiple cpus
0417  * @cpumask: cpus to stop
0418  * @fn: function to execute
0419  * @arg: argument to @fn
0420  *
0421  * Identical to stop_cpus() except that it fails with -EAGAIN if
0422  * someone else is already using the facility.
0423  *
0424  * CONTEXT:
0425  * Might sleep.
0426  *
0427  * RETURNS:
0428  * -EAGAIN if someone else is already stopping cpus, -ENOENT if
0429  * @fn(@arg) was not executed at all because all cpus in @cpumask were
0430  * offline; otherwise, 0 if all executions of @fn returned 0, any non
0431  * zero return value if any returned non zero.
0432  */
0433 int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
0434 {
0435     int ret;
0436 
0437     /* static works are used, process one request at a time */
0438     if (!mutex_trylock(&stop_cpus_mutex))
0439         return -EAGAIN;
0440     ret = __stop_cpus(cpumask, fn, arg);
0441     mutex_unlock(&stop_cpus_mutex);
0442     return ret;
0443 }
0444 
0445 static int cpu_stop_should_run(unsigned int cpu)
0446 {
0447     struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
0448     unsigned long flags;
0449     int run;
0450 
0451     spin_lock_irqsave(&stopper->lock, flags);
0452     run = !list_empty(&stopper->works);
0453     spin_unlock_irqrestore(&stopper->lock, flags);
0454     return run;
0455 }
0456 
0457 static void cpu_stopper_thread(unsigned int cpu)
0458 {
0459     struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
0460     struct cpu_stop_work *work;
0461 
0462 repeat:
0463     work = NULL;
0464     spin_lock_irq(&stopper->lock);
0465     if (!list_empty(&stopper->works)) {
0466         work = list_first_entry(&stopper->works,
0467                     struct cpu_stop_work, list);
0468         list_del_init(&work->list);
0469     }
0470     spin_unlock_irq(&stopper->lock);
0471 
0472     if (work) {
0473         cpu_stop_fn_t fn = work->fn;
0474         void *arg = work->arg;
0475         struct cpu_stop_done *done = work->done;
0476         int ret;
0477 
0478         /* cpu stop callbacks must not sleep, make in_atomic() == T */
0479         preempt_count_inc();
0480         ret = fn(arg);
0481         if (done) {
0482             if (ret)
0483                 done->ret = ret;
0484             cpu_stop_signal_done(done);
0485         }
0486         preempt_count_dec();
0487         WARN_ONCE(preempt_count(),
0488               "cpu_stop: %pf(%p) leaked preempt count\n", fn, arg);
0489         goto repeat;
0490     }
0491 }
0492 
0493 void stop_machine_park(int cpu)
0494 {
0495     struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
0496     /*
0497      * Lockless. cpu_stopper_thread() will take stopper->lock and flush
0498      * the pending works before it parks, until then it is fine to queue
0499      * the new works.
0500      */
0501     stopper->enabled = false;
0502     kthread_park(stopper->thread);
0503 }
0504 
0505 extern void sched_set_stop_task(int cpu, struct task_struct *stop);
0506 
0507 static void cpu_stop_create(unsigned int cpu)
0508 {
0509     sched_set_stop_task(cpu, per_cpu(cpu_stopper.thread, cpu));
0510 }
0511 
0512 static void cpu_stop_park(unsigned int cpu)
0513 {
0514     struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
0515 
0516     WARN_ON(!list_empty(&stopper->works));
0517 }
0518 
0519 void stop_machine_unpark(int cpu)
0520 {
0521     struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
0522 
0523     stopper->enabled = true;
0524     kthread_unpark(stopper->thread);
0525 }
0526 
0527 static struct smp_hotplug_thread cpu_stop_threads = {
0528     .store          = &cpu_stopper.thread,
0529     .thread_should_run  = cpu_stop_should_run,
0530     .thread_fn      = cpu_stopper_thread,
0531     .thread_comm        = "migration/%u",
0532     .create         = cpu_stop_create,
0533     .park           = cpu_stop_park,
0534     .selfparking        = true,
0535 };
0536 
0537 static int __init cpu_stop_init(void)
0538 {
0539     unsigned int cpu;
0540 
0541     for_each_possible_cpu(cpu) {
0542         struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
0543 
0544         spin_lock_init(&stopper->lock);
0545         INIT_LIST_HEAD(&stopper->works);
0546     }
0547 
0548     BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads));
0549     stop_machine_unpark(raw_smp_processor_id());
0550     stop_machine_initialized = true;
0551     return 0;
0552 }
0553 early_initcall(cpu_stop_init);
0554 
0555 static int __stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
0556 {
0557     struct multi_stop_data msdata = {
0558         .fn = fn,
0559         .data = data,
0560         .num_threads = num_online_cpus(),
0561         .active_cpus = cpus,
0562     };
0563 
0564     if (!stop_machine_initialized) {
0565         /*
0566          * Handle the case where stop_machine() is called
0567          * early in boot before stop_machine() has been
0568          * initialized.
0569          */
0570         unsigned long flags;
0571         int ret;
0572 
0573         WARN_ON_ONCE(msdata.num_threads != 1);
0574 
0575         local_irq_save(flags);
0576         hard_irq_disable();
0577         ret = (*fn)(data);
0578         local_irq_restore(flags);
0579 
0580         return ret;
0581     }
0582 
0583     /* Set the initial state and stop all online cpus. */
0584     set_state(&msdata, MULTI_STOP_PREPARE);
0585     return stop_cpus(cpu_online_mask, multi_cpu_stop, &msdata);
0586 }
0587 
0588 int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
0589 {
0590     int ret;
0591 
0592     /* No CPUs can come up or down during this. */
0593     get_online_cpus();
0594     ret = __stop_machine(fn, data, cpus);
0595     put_online_cpus();
0596     return ret;
0597 }
0598 EXPORT_SYMBOL_GPL(stop_machine);
0599 
0600 /**
0601  * stop_machine_from_inactive_cpu - stop_machine() from inactive CPU
0602  * @fn: the function to run
0603  * @data: the data ptr for the @fn()
0604  * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
0605  *
0606  * This is identical to stop_machine() but can be called from a CPU which
0607  * is not active.  The local CPU is in the process of hotplug (so no other
0608  * CPU hotplug can start) and not marked active and doesn't have enough
0609  * context to sleep.
0610  *
0611  * This function provides stop_machine() functionality for such state by
0612  * using busy-wait for synchronization and executing @fn directly for local
0613  * CPU.
0614  *
0615  * CONTEXT:
0616  * Local CPU is inactive.  Temporarily stops all active CPUs.
0617  *
0618  * RETURNS:
0619  * 0 if all executions of @fn returned 0, any non zero return value if any
0620  * returned non zero.
0621  */
0622 int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
0623                   const struct cpumask *cpus)
0624 {
0625     struct multi_stop_data msdata = { .fn = fn, .data = data,
0626                         .active_cpus = cpus };
0627     struct cpu_stop_done done;
0628     int ret;
0629 
0630     /* Local CPU must be inactive and CPU hotplug in progress. */
0631     BUG_ON(cpu_active(raw_smp_processor_id()));
0632     msdata.num_threads = num_active_cpus() + 1; /* +1 for local */
0633 
0634     /* No proper task established and can't sleep - busy wait for lock. */
0635     while (!mutex_trylock(&stop_cpus_mutex))
0636         cpu_relax();
0637 
0638     /* Schedule work on other CPUs and execute directly for local CPU */
0639     set_state(&msdata, MULTI_STOP_PREPARE);
0640     cpu_stop_init_done(&done, num_active_cpus());
0641     queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata,
0642                  &done);
0643     ret = multi_cpu_stop(&msdata);
0644 
0645     /* Busy wait for completion. */
0646     while (!completion_done(&done.completion))
0647         cpu_relax();
0648 
0649     mutex_unlock(&stop_cpus_mutex);
0650     return ret ?: done.ret;
0651 }