Back to home page

LXR

 
 

    


0001 /* CPU control.
0002  * (C) 2001, 2002, 2003, 2004 Rusty Russell
0003  *
0004  * This code is licenced under the GPL.
0005  */
0006 #include <linux/proc_fs.h>
0007 #include <linux/smp.h>
0008 #include <linux/init.h>
0009 #include <linux/notifier.h>
0010 #include <linux/sched.h>
0011 #include <linux/unistd.h>
0012 #include <linux/cpu.h>
0013 #include <linux/oom.h>
0014 #include <linux/rcupdate.h>
0015 #include <linux/export.h>
0016 #include <linux/bug.h>
0017 #include <linux/kthread.h>
0018 #include <linux/stop_machine.h>
0019 #include <linux/mutex.h>
0020 #include <linux/gfp.h>
0021 #include <linux/suspend.h>
0022 #include <linux/lockdep.h>
0023 #include <linux/tick.h>
0024 #include <linux/irq.h>
0025 #include <linux/smpboot.h>
0026 #include <linux/relay.h>
0027 #include <linux/slab.h>
0028 
0029 #include <trace/events/power.h>
0030 #define CREATE_TRACE_POINTS
0031 #include <trace/events/cpuhp.h>
0032 
0033 #include "smpboot.h"
0034 
0035 /**
0036  * cpuhp_cpu_state - Per cpu hotplug state storage
0037  * @state:  The current cpu state
0038  * @target: The target state
0039  * @thread: Pointer to the hotplug thread
0040  * @should_run: Thread should execute
0041  * @rollback:   Perform a rollback
0042  * @single: Single callback invocation
0043  * @bringup:    Single callback bringup or teardown selector
0044  * @cb_state:   The state for a single callback (install/uninstall)
0045  * @result: Result of the operation
0046  * @done:   Signal completion to the issuer of the task
0047  */
0048 struct cpuhp_cpu_state {
0049     enum cpuhp_state    state;
0050     enum cpuhp_state    target;
0051 #ifdef CONFIG_SMP
0052     struct task_struct  *thread;
0053     bool            should_run;
0054     bool            rollback;
0055     bool            single;
0056     bool            bringup;
0057     struct hlist_node   *node;
0058     enum cpuhp_state    cb_state;
0059     int         result;
0060     struct completion   done;
0061 #endif
0062 };
0063 
0064 static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state);
0065 
0066 /**
0067  * cpuhp_step - Hotplug state machine step
0068  * @name:   Name of the step
0069  * @startup:    Startup function of the step
0070  * @teardown:   Teardown function of the step
0071  * @skip_onerr: Do not invoke the functions on error rollback
0072  *      Will go away once the notifiers are gone
0073  * @cant_stop:  Bringup/teardown can't be stopped at this step
0074  */
0075 struct cpuhp_step {
0076     const char      *name;
0077     union {
0078         int     (*single)(unsigned int cpu);
0079         int     (*multi)(unsigned int cpu,
0080                      struct hlist_node *node);
0081     } startup;
0082     union {
0083         int     (*single)(unsigned int cpu);
0084         int     (*multi)(unsigned int cpu,
0085                      struct hlist_node *node);
0086     } teardown;
0087     struct hlist_head   list;
0088     bool            skip_onerr;
0089     bool            cant_stop;
0090     bool            multi_instance;
0091 };
0092 
0093 static DEFINE_MUTEX(cpuhp_state_mutex);
0094 static struct cpuhp_step cpuhp_bp_states[];
0095 static struct cpuhp_step cpuhp_ap_states[];
0096 
0097 static bool cpuhp_is_ap_state(enum cpuhp_state state)
0098 {
0099     /*
0100      * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
0101      * purposes as that state is handled explicitly in cpu_down.
0102      */
0103     return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
0104 }
0105 
0106 static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
0107 {
0108     struct cpuhp_step *sp;
0109 
0110     sp = cpuhp_is_ap_state(state) ? cpuhp_ap_states : cpuhp_bp_states;
0111     return sp + state;
0112 }
0113 
0114 /**
0115  * cpuhp_invoke_callback _ Invoke the callbacks for a given state
0116  * @cpu:    The cpu for which the callback should be invoked
0117  * @step:   The step in the state machine
0118  * @bringup:    True if the bringup callback should be invoked
0119  *
0120  * Called from cpu hotplug and from the state register machinery.
0121  */
0122 static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
0123                  bool bringup, struct hlist_node *node)
0124 {
0125     struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
0126     struct cpuhp_step *step = cpuhp_get_step(state);
0127     int (*cbm)(unsigned int cpu, struct hlist_node *node);
0128     int (*cb)(unsigned int cpu);
0129     int ret, cnt;
0130 
0131     if (!step->multi_instance) {
0132         cb = bringup ? step->startup.single : step->teardown.single;
0133         if (!cb)
0134             return 0;
0135         trace_cpuhp_enter(cpu, st->target, state, cb);
0136         ret = cb(cpu);
0137         trace_cpuhp_exit(cpu, st->state, state, ret);
0138         return ret;
0139     }
0140     cbm = bringup ? step->startup.multi : step->teardown.multi;
0141     if (!cbm)
0142         return 0;
0143 
0144     /* Single invocation for instance add/remove */
0145     if (node) {
0146         trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
0147         ret = cbm(cpu, node);
0148         trace_cpuhp_exit(cpu, st->state, state, ret);
0149         return ret;
0150     }
0151 
0152     /* State transition. Invoke on all instances */
0153     cnt = 0;
0154     hlist_for_each(node, &step->list) {
0155         trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
0156         ret = cbm(cpu, node);
0157         trace_cpuhp_exit(cpu, st->state, state, ret);
0158         if (ret)
0159             goto err;
0160         cnt++;
0161     }
0162     return 0;
0163 err:
0164     /* Rollback the instances if one failed */
0165     cbm = !bringup ? step->startup.multi : step->teardown.multi;
0166     if (!cbm)
0167         return ret;
0168 
0169     hlist_for_each(node, &step->list) {
0170         if (!cnt--)
0171             break;
0172         cbm(cpu, node);
0173     }
0174     return ret;
0175 }
0176 
0177 #ifdef CONFIG_SMP
0178 /* Serializes the updates to cpu_online_mask, cpu_present_mask */
0179 static DEFINE_MUTEX(cpu_add_remove_lock);
0180 bool cpuhp_tasks_frozen;
0181 EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
0182 
0183 /*
0184  * The following two APIs (cpu_maps_update_begin/done) must be used when
0185  * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
0186  */
0187 void cpu_maps_update_begin(void)
0188 {
0189     mutex_lock(&cpu_add_remove_lock);
0190 }
0191 
0192 void cpu_maps_update_done(void)
0193 {
0194     mutex_unlock(&cpu_add_remove_lock);
0195 }
0196 
0197 /* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
0198  * Should always be manipulated under cpu_add_remove_lock
0199  */
0200 static int cpu_hotplug_disabled;
0201 
0202 #ifdef CONFIG_HOTPLUG_CPU
0203 
0204 static struct {
0205     struct task_struct *active_writer;
0206     /* wait queue to wake up the active_writer */
0207     wait_queue_head_t wq;
0208     /* verifies that no writer will get active while readers are active */
0209     struct mutex lock;
0210     /*
0211      * Also blocks the new readers during
0212      * an ongoing cpu hotplug operation.
0213      */
0214     atomic_t refcount;
0215 
0216 #ifdef CONFIG_DEBUG_LOCK_ALLOC
0217     struct lockdep_map dep_map;
0218 #endif
0219 } cpu_hotplug = {
0220     .active_writer = NULL,
0221     .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
0222     .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
0223 #ifdef CONFIG_DEBUG_LOCK_ALLOC
0224     .dep_map = STATIC_LOCKDEP_MAP_INIT("cpu_hotplug.dep_map", &cpu_hotplug.dep_map),
0225 #endif
0226 };
0227 
0228 /* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
0229 #define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
0230 #define cpuhp_lock_acquire_tryread() \
0231                   lock_map_acquire_tryread(&cpu_hotplug.dep_map)
0232 #define cpuhp_lock_acquire()      lock_map_acquire(&cpu_hotplug.dep_map)
0233 #define cpuhp_lock_release()      lock_map_release(&cpu_hotplug.dep_map)
0234 
0235 
0236 void get_online_cpus(void)
0237 {
0238     might_sleep();
0239     if (cpu_hotplug.active_writer == current)
0240         return;
0241     cpuhp_lock_acquire_read();
0242     mutex_lock(&cpu_hotplug.lock);
0243     atomic_inc(&cpu_hotplug.refcount);
0244     mutex_unlock(&cpu_hotplug.lock);
0245 }
0246 EXPORT_SYMBOL_GPL(get_online_cpus);
0247 
0248 void put_online_cpus(void)
0249 {
0250     int refcount;
0251 
0252     if (cpu_hotplug.active_writer == current)
0253         return;
0254 
0255     refcount = atomic_dec_return(&cpu_hotplug.refcount);
0256     if (WARN_ON(refcount < 0)) /* try to fix things up */
0257         atomic_inc(&cpu_hotplug.refcount);
0258 
0259     if (refcount <= 0 && waitqueue_active(&cpu_hotplug.wq))
0260         wake_up(&cpu_hotplug.wq);
0261 
0262     cpuhp_lock_release();
0263 
0264 }
0265 EXPORT_SYMBOL_GPL(put_online_cpus);
0266 
0267 /*
0268  * This ensures that the hotplug operation can begin only when the
0269  * refcount goes to zero.
0270  *
0271  * Note that during a cpu-hotplug operation, the new readers, if any,
0272  * will be blocked by the cpu_hotplug.lock
0273  *
0274  * Since cpu_hotplug_begin() is always called after invoking
0275  * cpu_maps_update_begin(), we can be sure that only one writer is active.
0276  *
0277  * Note that theoretically, there is a possibility of a livelock:
0278  * - Refcount goes to zero, last reader wakes up the sleeping
0279  *   writer.
0280  * - Last reader unlocks the cpu_hotplug.lock.
0281  * - A new reader arrives at this moment, bumps up the refcount.
0282  * - The writer acquires the cpu_hotplug.lock finds the refcount
0283  *   non zero and goes to sleep again.
0284  *
0285  * However, this is very difficult to achieve in practice since
0286  * get_online_cpus() not an api which is called all that often.
0287  *
0288  */
0289 void cpu_hotplug_begin(void)
0290 {
0291     DEFINE_WAIT(wait);
0292 
0293     cpu_hotplug.active_writer = current;
0294     cpuhp_lock_acquire();
0295 
0296     for (;;) {
0297         mutex_lock(&cpu_hotplug.lock);
0298         prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
0299         if (likely(!atomic_read(&cpu_hotplug.refcount)))
0300                 break;
0301         mutex_unlock(&cpu_hotplug.lock);
0302         schedule();
0303     }
0304     finish_wait(&cpu_hotplug.wq, &wait);
0305 }
0306 
0307 void cpu_hotplug_done(void)
0308 {
0309     cpu_hotplug.active_writer = NULL;
0310     mutex_unlock(&cpu_hotplug.lock);
0311     cpuhp_lock_release();
0312 }
0313 
0314 /*
0315  * Wait for currently running CPU hotplug operations to complete (if any) and
0316  * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
0317  * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
0318  * hotplug path before performing hotplug operations. So acquiring that lock
0319  * guarantees mutual exclusion from any currently running hotplug operations.
0320  */
0321 void cpu_hotplug_disable(void)
0322 {
0323     cpu_maps_update_begin();
0324     cpu_hotplug_disabled++;
0325     cpu_maps_update_done();
0326 }
0327 EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
0328 
0329 static void __cpu_hotplug_enable(void)
0330 {
0331     if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
0332         return;
0333     cpu_hotplug_disabled--;
0334 }
0335 
0336 void cpu_hotplug_enable(void)
0337 {
0338     cpu_maps_update_begin();
0339     __cpu_hotplug_enable();
0340     cpu_maps_update_done();
0341 }
0342 EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
0343 #endif  /* CONFIG_HOTPLUG_CPU */
0344 
0345 /* Notifier wrappers for transitioning to state machine */
0346 
0347 static int bringup_wait_for_ap(unsigned int cpu)
0348 {
0349     struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
0350 
0351     wait_for_completion(&st->done);
0352     return st->result;
0353 }
0354 
0355 static int bringup_cpu(unsigned int cpu)
0356 {
0357     struct task_struct *idle = idle_thread_get(cpu);
0358     int ret;
0359 
0360     /*
0361      * Some architectures have to walk the irq descriptors to
0362      * setup the vector space for the cpu which comes online.
0363      * Prevent irq alloc/free across the bringup.
0364      */
0365     irq_lock_sparse();
0366 
0367     /* Arch-specific enabling code. */
0368     ret = __cpu_up(cpu, idle);
0369     irq_unlock_sparse();
0370     if (ret)
0371         return ret;
0372     ret = bringup_wait_for_ap(cpu);
0373     BUG_ON(!cpu_online(cpu));
0374     return ret;
0375 }
0376 
0377 /*
0378  * Hotplug state machine related functions
0379  */
0380 static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
0381 {
0382     for (st->state++; st->state < st->target; st->state++) {
0383         struct cpuhp_step *step = cpuhp_get_step(st->state);
0384 
0385         if (!step->skip_onerr)
0386             cpuhp_invoke_callback(cpu, st->state, true, NULL);
0387     }
0388 }
0389 
0390 static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
0391                 enum cpuhp_state target)
0392 {
0393     enum cpuhp_state prev_state = st->state;
0394     int ret = 0;
0395 
0396     for (; st->state > target; st->state--) {
0397         ret = cpuhp_invoke_callback(cpu, st->state, false, NULL);
0398         if (ret) {
0399             st->target = prev_state;
0400             undo_cpu_down(cpu, st);
0401             break;
0402         }
0403     }
0404     return ret;
0405 }
0406 
0407 static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
0408 {
0409     for (st->state--; st->state > st->target; st->state--) {
0410         struct cpuhp_step *step = cpuhp_get_step(st->state);
0411 
0412         if (!step->skip_onerr)
0413             cpuhp_invoke_callback(cpu, st->state, false, NULL);
0414     }
0415 }
0416 
0417 static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
0418                   enum cpuhp_state target)
0419 {
0420     enum cpuhp_state prev_state = st->state;
0421     int ret = 0;
0422 
0423     while (st->state < target) {
0424         st->state++;
0425         ret = cpuhp_invoke_callback(cpu, st->state, true, NULL);
0426         if (ret) {
0427             st->target = prev_state;
0428             undo_cpu_up(cpu, st);
0429             break;
0430         }
0431     }
0432     return ret;
0433 }
0434 
0435 /*
0436  * The cpu hotplug threads manage the bringup and teardown of the cpus
0437  */
0438 static void cpuhp_create(unsigned int cpu)
0439 {
0440     struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
0441 
0442     init_completion(&st->done);
0443 }
0444 
0445 static int cpuhp_should_run(unsigned int cpu)
0446 {
0447     struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
0448 
0449     return st->should_run;
0450 }
0451 
0452 /* Execute the teardown callbacks. Used to be CPU_DOWN_PREPARE */
0453 static int cpuhp_ap_offline(unsigned int cpu, struct cpuhp_cpu_state *st)
0454 {
0455     enum cpuhp_state target = max((int)st->target, CPUHP_TEARDOWN_CPU);
0456 
0457     return cpuhp_down_callbacks(cpu, st, target);
0458 }
0459 
0460 /* Execute the online startup callbacks. Used to be CPU_ONLINE */
0461 static int cpuhp_ap_online(unsigned int cpu, struct cpuhp_cpu_state *st)
0462 {
0463     return cpuhp_up_callbacks(cpu, st, st->target);
0464 }
0465 
0466 /*
0467  * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
0468  * callbacks when a state gets [un]installed at runtime.
0469  */
0470 static void cpuhp_thread_fun(unsigned int cpu)
0471 {
0472     struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
0473     int ret = 0;
0474 
0475     /*
0476      * Paired with the mb() in cpuhp_kick_ap_work and
0477      * cpuhp_invoke_ap_callback, so the work set is consistent visible.
0478      */
0479     smp_mb();
0480     if (!st->should_run)
0481         return;
0482 
0483     st->should_run = false;
0484 
0485     /* Single callback invocation for [un]install ? */
0486     if (st->single) {
0487         if (st->cb_state < CPUHP_AP_ONLINE) {
0488             local_irq_disable();
0489             ret = cpuhp_invoke_callback(cpu, st->cb_state,
0490                             st->bringup, st->node);
0491             local_irq_enable();
0492         } else {
0493             ret = cpuhp_invoke_callback(cpu, st->cb_state,
0494                             st->bringup, st->node);
0495         }
0496     } else if (st->rollback) {
0497         BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
0498 
0499         undo_cpu_down(cpu, st);
0500         st->rollback = false;
0501     } else {
0502         /* Cannot happen .... */
0503         BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
0504 
0505         /* Regular hotplug work */
0506         if (st->state < st->target)
0507             ret = cpuhp_ap_online(cpu, st);
0508         else if (st->state > st->target)
0509             ret = cpuhp_ap_offline(cpu, st);
0510     }
0511     st->result = ret;
0512     complete(&st->done);
0513 }
0514 
0515 /* Invoke a single callback on a remote cpu */
0516 static int
0517 cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
0518              struct hlist_node *node)
0519 {
0520     struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
0521 
0522     if (!cpu_online(cpu))
0523         return 0;
0524 
0525     /*
0526      * If we are up and running, use the hotplug thread. For early calls
0527      * we invoke the thread function directly.
0528      */
0529     if (!st->thread)
0530         return cpuhp_invoke_callback(cpu, state, bringup, node);
0531 
0532     st->cb_state = state;
0533     st->single = true;
0534     st->bringup = bringup;
0535     st->node = node;
0536 
0537     /*
0538      * Make sure the above stores are visible before should_run becomes
0539      * true. Paired with the mb() above in cpuhp_thread_fun()
0540      */
0541     smp_mb();
0542     st->should_run = true;
0543     wake_up_process(st->thread);
0544     wait_for_completion(&st->done);
0545     return st->result;
0546 }
0547 
0548 /* Regular hotplug invocation of the AP hotplug thread */
0549 static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st)
0550 {
0551     st->result = 0;
0552     st->single = false;
0553     /*
0554      * Make sure the above stores are visible before should_run becomes
0555      * true. Paired with the mb() above in cpuhp_thread_fun()
0556      */
0557     smp_mb();
0558     st->should_run = true;
0559     wake_up_process(st->thread);
0560 }
0561 
0562 static int cpuhp_kick_ap_work(unsigned int cpu)
0563 {
0564     struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
0565     enum cpuhp_state state = st->state;
0566 
0567     trace_cpuhp_enter(cpu, st->target, state, cpuhp_kick_ap_work);
0568     __cpuhp_kick_ap_work(st);
0569     wait_for_completion(&st->done);
0570     trace_cpuhp_exit(cpu, st->state, state, st->result);
0571     return st->result;
0572 }
0573 
0574 static struct smp_hotplug_thread cpuhp_threads = {
0575     .store          = &cpuhp_state.thread,
0576     .create         = &cpuhp_create,
0577     .thread_should_run  = cpuhp_should_run,
0578     .thread_fn      = cpuhp_thread_fun,
0579     .thread_comm        = "cpuhp/%u",
0580     .selfparking        = true,
0581 };
0582 
0583 void __init cpuhp_threads_init(void)
0584 {
0585     BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
0586     kthread_unpark(this_cpu_read(cpuhp_state.thread));
0587 }
0588 
0589 #ifdef CONFIG_HOTPLUG_CPU
0590 /**
0591  * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
0592  * @cpu: a CPU id
0593  *
0594  * This function walks all processes, finds a valid mm struct for each one and
0595  * then clears a corresponding bit in mm's cpumask.  While this all sounds
0596  * trivial, there are various non-obvious corner cases, which this function
0597  * tries to solve in a safe manner.
0598  *
0599  * Also note that the function uses a somewhat relaxed locking scheme, so it may
0600  * be called only for an already offlined CPU.
0601  */
0602 void clear_tasks_mm_cpumask(int cpu)
0603 {
0604     struct task_struct *p;
0605 
0606     /*
0607      * This function is called after the cpu is taken down and marked
0608      * offline, so its not like new tasks will ever get this cpu set in
0609      * their mm mask. -- Peter Zijlstra
0610      * Thus, we may use rcu_read_lock() here, instead of grabbing
0611      * full-fledged tasklist_lock.
0612      */
0613     WARN_ON(cpu_online(cpu));
0614     rcu_read_lock();
0615     for_each_process(p) {
0616         struct task_struct *t;
0617 
0618         /*
0619          * Main thread might exit, but other threads may still have
0620          * a valid mm. Find one.
0621          */
0622         t = find_lock_task_mm(p);
0623         if (!t)
0624             continue;
0625         cpumask_clear_cpu(cpu, mm_cpumask(t->mm));
0626         task_unlock(t);
0627     }
0628     rcu_read_unlock();
0629 }
0630 
0631 static inline void check_for_tasks(int dead_cpu)
0632 {
0633     struct task_struct *g, *p;
0634 
0635     read_lock(&tasklist_lock);
0636     for_each_process_thread(g, p) {
0637         if (!p->on_rq)
0638             continue;
0639         /*
0640          * We do the check with unlocked task_rq(p)->lock.
0641          * Order the reading to do not warn about a task,
0642          * which was running on this cpu in the past, and
0643          * it's just been woken on another cpu.
0644          */
0645         rmb();
0646         if (task_cpu(p) != dead_cpu)
0647             continue;
0648 
0649         pr_warn("Task %s (pid=%d) is on cpu %d (state=%ld, flags=%x)\n",
0650             p->comm, task_pid_nr(p), dead_cpu, p->state, p->flags);
0651     }
0652     read_unlock(&tasklist_lock);
0653 }
0654 
0655 /* Take this CPU down. */
0656 static int take_cpu_down(void *_param)
0657 {
0658     struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
0659     enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
0660     int err, cpu = smp_processor_id();
0661 
0662     /* Ensure this CPU doesn't handle any more interrupts. */
0663     err = __cpu_disable();
0664     if (err < 0)
0665         return err;
0666 
0667     /*
0668      * We get here while we are in CPUHP_TEARDOWN_CPU state and we must not
0669      * do this step again.
0670      */
0671     WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
0672     st->state--;
0673     /* Invoke the former CPU_DYING callbacks */
0674     for (; st->state > target; st->state--)
0675         cpuhp_invoke_callback(cpu, st->state, false, NULL);
0676 
0677     /* Give up timekeeping duties */
0678     tick_handover_do_timer();
0679     /* Park the stopper thread */
0680     stop_machine_park(cpu);
0681     return 0;
0682 }
0683 
0684 static int takedown_cpu(unsigned int cpu)
0685 {
0686     struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
0687     int err;
0688 
0689     /* Park the smpboot threads */
0690     kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
0691     smpboot_park_threads(cpu);
0692 
0693     /*
0694      * Prevent irq alloc/free while the dying cpu reorganizes the
0695      * interrupt affinities.
0696      */
0697     irq_lock_sparse();
0698 
0699     /*
0700      * So now all preempt/rcu users must observe !cpu_active().
0701      */
0702     err = stop_machine(take_cpu_down, NULL, cpumask_of(cpu));
0703     if (err) {
0704         /* CPU refused to die */
0705         irq_unlock_sparse();
0706         /* Unpark the hotplug thread so we can rollback there */
0707         kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
0708         return err;
0709     }
0710     BUG_ON(cpu_online(cpu));
0711 
0712     /*
0713      * The CPUHP_AP_SCHED_MIGRATE_DYING callback will have removed all
0714      * runnable tasks from the cpu, there's only the idle task left now
0715      * that the migration thread is done doing the stop_machine thing.
0716      *
0717      * Wait for the stop thread to go away.
0718      */
0719     wait_for_completion(&st->done);
0720     BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
0721 
0722     /* Interrupts are moved away from the dying cpu, reenable alloc/free */
0723     irq_unlock_sparse();
0724 
0725     hotplug_cpu__broadcast_tick_pull(cpu);
0726     /* This actually kills the CPU. */
0727     __cpu_die(cpu);
0728 
0729     tick_cleanup_dead_cpu(cpu);
0730     return 0;
0731 }
0732 
0733 static void cpuhp_complete_idle_dead(void *arg)
0734 {
0735     struct cpuhp_cpu_state *st = arg;
0736 
0737     complete(&st->done);
0738 }
0739 
0740 void cpuhp_report_idle_dead(void)
0741 {
0742     struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
0743 
0744     BUG_ON(st->state != CPUHP_AP_OFFLINE);
0745     rcu_report_dead(smp_processor_id());
0746     st->state = CPUHP_AP_IDLE_DEAD;
0747     /*
0748      * We cannot call complete after rcu_report_dead() so we delegate it
0749      * to an online cpu.
0750      */
0751     smp_call_function_single(cpumask_first(cpu_online_mask),
0752                  cpuhp_complete_idle_dead, st, 0);
0753 }
0754 
0755 #else
0756 #define takedown_cpu        NULL
0757 #endif
0758 
0759 #ifdef CONFIG_HOTPLUG_CPU
0760 
0761 /* Requires cpu_add_remove_lock to be held */
0762 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
0763                enum cpuhp_state target)
0764 {
0765     struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
0766     int prev_state, ret = 0;
0767 
0768     if (num_online_cpus() == 1)
0769         return -EBUSY;
0770 
0771     if (!cpu_present(cpu))
0772         return -EINVAL;
0773 
0774     cpu_hotplug_begin();
0775 
0776     cpuhp_tasks_frozen = tasks_frozen;
0777 
0778     prev_state = st->state;
0779     st->target = target;
0780     /*
0781      * If the current CPU state is in the range of the AP hotplug thread,
0782      * then we need to kick the thread.
0783      */
0784     if (st->state > CPUHP_TEARDOWN_CPU) {
0785         ret = cpuhp_kick_ap_work(cpu);
0786         /*
0787          * The AP side has done the error rollback already. Just
0788          * return the error code..
0789          */
0790         if (ret)
0791             goto out;
0792 
0793         /*
0794          * We might have stopped still in the range of the AP hotplug
0795          * thread. Nothing to do anymore.
0796          */
0797         if (st->state > CPUHP_TEARDOWN_CPU)
0798             goto out;
0799     }
0800     /*
0801      * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
0802      * to do the further cleanups.
0803      */
0804     ret = cpuhp_down_callbacks(cpu, st, target);
0805     if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) {
0806         st->target = prev_state;
0807         st->rollback = true;
0808         cpuhp_kick_ap_work(cpu);
0809     }
0810 
0811 out:
0812     cpu_hotplug_done();
0813     return ret;
0814 }
0815 
0816 static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
0817 {
0818     int err;
0819 
0820     cpu_maps_update_begin();
0821 
0822     if (cpu_hotplug_disabled) {
0823         err = -EBUSY;
0824         goto out;
0825     }
0826 
0827     err = _cpu_down(cpu, 0, target);
0828 
0829 out:
0830     cpu_maps_update_done();
0831     return err;
0832 }
0833 int cpu_down(unsigned int cpu)
0834 {
0835     return do_cpu_down(cpu, CPUHP_OFFLINE);
0836 }
0837 EXPORT_SYMBOL(cpu_down);
0838 #endif /*CONFIG_HOTPLUG_CPU*/
0839 
0840 /**
0841  * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
0842  * @cpu: cpu that just started
0843  *
0844  * It must be called by the arch code on the new cpu, before the new cpu
0845  * enables interrupts and before the "boot" cpu returns from __cpu_up().
0846  */
0847 void notify_cpu_starting(unsigned int cpu)
0848 {
0849     struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
0850     enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
0851 
0852     rcu_cpu_starting(cpu);  /* Enables RCU usage on this CPU. */
0853     while (st->state < target) {
0854         st->state++;
0855         cpuhp_invoke_callback(cpu, st->state, true, NULL);
0856     }
0857 }
0858 
0859 /*
0860  * Called from the idle task. We need to set active here, so we can kick off
0861  * the stopper thread and unpark the smpboot threads. If the target state is
0862  * beyond CPUHP_AP_ONLINE_IDLE we kick cpuhp thread and let it bring up the
0863  * cpu further.
0864  */
0865 void cpuhp_online_idle(enum cpuhp_state state)
0866 {
0867     struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
0868     unsigned int cpu = smp_processor_id();
0869 
0870     /* Happens for the boot cpu */
0871     if (state != CPUHP_AP_ONLINE_IDLE)
0872         return;
0873 
0874     st->state = CPUHP_AP_ONLINE_IDLE;
0875 
0876     /* Unpark the stopper thread and the hotplug thread of this cpu */
0877     stop_machine_unpark(cpu);
0878     kthread_unpark(st->thread);
0879 
0880     /* Should we go further up ? */
0881     if (st->target > CPUHP_AP_ONLINE_IDLE)
0882         __cpuhp_kick_ap_work(st);
0883     else
0884         complete(&st->done);
0885 }
0886 
0887 /* Requires cpu_add_remove_lock to be held */
0888 static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
0889 {
0890     struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
0891     struct task_struct *idle;
0892     int ret = 0;
0893 
0894     cpu_hotplug_begin();
0895 
0896     if (!cpu_present(cpu)) {
0897         ret = -EINVAL;
0898         goto out;
0899     }
0900 
0901     /*
0902      * The caller of do_cpu_up might have raced with another
0903      * caller. Ignore it for now.
0904      */
0905     if (st->state >= target)
0906         goto out;
0907 
0908     if (st->state == CPUHP_OFFLINE) {
0909         /* Let it fail before we try to bring the cpu up */
0910         idle = idle_thread_get(cpu);
0911         if (IS_ERR(idle)) {
0912             ret = PTR_ERR(idle);
0913             goto out;
0914         }
0915     }
0916 
0917     cpuhp_tasks_frozen = tasks_frozen;
0918 
0919     st->target = target;
0920     /*
0921      * If the current CPU state is in the range of the AP hotplug thread,
0922      * then we need to kick the thread once more.
0923      */
0924     if (st->state > CPUHP_BRINGUP_CPU) {
0925         ret = cpuhp_kick_ap_work(cpu);
0926         /*
0927          * The AP side has done the error rollback already. Just
0928          * return the error code..
0929          */
0930         if (ret)
0931             goto out;
0932     }
0933 
0934     /*
0935      * Try to reach the target state. We max out on the BP at
0936      * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
0937      * responsible for bringing it up to the target state.
0938      */
0939     target = min((int)target, CPUHP_BRINGUP_CPU);
0940     ret = cpuhp_up_callbacks(cpu, st, target);
0941 out:
0942     cpu_hotplug_done();
0943     return ret;
0944 }
0945 
0946 static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
0947 {
0948     int err = 0;
0949 
0950     if (!cpu_possible(cpu)) {
0951         pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
0952                cpu);
0953 #if defined(CONFIG_IA64)
0954         pr_err("please check additional_cpus= boot parameter\n");
0955 #endif
0956         return -EINVAL;
0957     }
0958 
0959     err = try_online_node(cpu_to_node(cpu));
0960     if (err)
0961         return err;
0962 
0963     cpu_maps_update_begin();
0964 
0965     if (cpu_hotplug_disabled) {
0966         err = -EBUSY;
0967         goto out;
0968     }
0969 
0970     err = _cpu_up(cpu, 0, target);
0971 out:
0972     cpu_maps_update_done();
0973     return err;
0974 }
0975 
0976 int cpu_up(unsigned int cpu)
0977 {
0978     return do_cpu_up(cpu, CPUHP_ONLINE);
0979 }
0980 EXPORT_SYMBOL_GPL(cpu_up);
0981 
0982 #ifdef CONFIG_PM_SLEEP_SMP
0983 static cpumask_var_t frozen_cpus;
0984 
0985 int freeze_secondary_cpus(int primary)
0986 {
0987     int cpu, error = 0;
0988 
0989     cpu_maps_update_begin();
0990     if (!cpu_online(primary))
0991         primary = cpumask_first(cpu_online_mask);
0992     /*
0993      * We take down all of the non-boot CPUs in one shot to avoid races
0994      * with the userspace trying to use the CPU hotplug at the same time
0995      */
0996     cpumask_clear(frozen_cpus);
0997 
0998     pr_info("Disabling non-boot CPUs ...\n");
0999     for_each_online_cpu(cpu) {
1000         if (cpu == primary)
1001             continue;
1002         trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
1003         error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
1004         trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
1005         if (!error)
1006             cpumask_set_cpu(cpu, frozen_cpus);
1007         else {
1008             pr_err("Error taking CPU%d down: %d\n", cpu, error);
1009             break;
1010         }
1011     }
1012 
1013     if (!error)
1014         BUG_ON(num_online_cpus() > 1);
1015     else
1016         pr_err("Non-boot CPUs are not disabled\n");
1017 
1018     /*
1019      * Make sure the CPUs won't be enabled by someone else. We need to do
1020      * this even in case of failure as all disable_nonboot_cpus() users are
1021      * supposed to do enable_nonboot_cpus() on the failure path.
1022      */
1023     cpu_hotplug_disabled++;
1024 
1025     cpu_maps_update_done();
1026     return error;
1027 }
1028 
1029 void __weak arch_enable_nonboot_cpus_begin(void)
1030 {
1031 }
1032 
1033 void __weak arch_enable_nonboot_cpus_end(void)
1034 {
1035 }
1036 
1037 void enable_nonboot_cpus(void)
1038 {
1039     int cpu, error;
1040 
1041     /* Allow everyone to use the CPU hotplug again */
1042     cpu_maps_update_begin();
1043     __cpu_hotplug_enable();
1044     if (cpumask_empty(frozen_cpus))
1045         goto out;
1046 
1047     pr_info("Enabling non-boot CPUs ...\n");
1048 
1049     arch_enable_nonboot_cpus_begin();
1050 
1051     for_each_cpu(cpu, frozen_cpus) {
1052         trace_suspend_resume(TPS("CPU_ON"), cpu, true);
1053         error = _cpu_up(cpu, 1, CPUHP_ONLINE);
1054         trace_suspend_resume(TPS("CPU_ON"), cpu, false);
1055         if (!error) {
1056             pr_info("CPU%d is up\n", cpu);
1057             continue;
1058         }
1059         pr_warn("Error taking CPU%d up: %d\n", cpu, error);
1060     }
1061 
1062     arch_enable_nonboot_cpus_end();
1063 
1064     cpumask_clear(frozen_cpus);
1065 out:
1066     cpu_maps_update_done();
1067 }
1068 
1069 static int __init alloc_frozen_cpus(void)
1070 {
1071     if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
1072         return -ENOMEM;
1073     return 0;
1074 }
1075 core_initcall(alloc_frozen_cpus);
1076 
1077 /*
1078  * When callbacks for CPU hotplug notifications are being executed, we must
1079  * ensure that the state of the system with respect to the tasks being frozen
1080  * or not, as reported by the notification, remains unchanged *throughout the
1081  * duration* of the execution of the callbacks.
1082  * Hence we need to prevent the freezer from racing with regular CPU hotplug.
1083  *
1084  * This synchronization is implemented by mutually excluding regular CPU
1085  * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
1086  * Hibernate notifications.
1087  */
1088 static int
1089 cpu_hotplug_pm_callback(struct notifier_block *nb,
1090             unsigned long action, void *ptr)
1091 {
1092     switch (action) {
1093 
1094     case PM_SUSPEND_PREPARE:
1095     case PM_HIBERNATION_PREPARE:
1096         cpu_hotplug_disable();
1097         break;
1098 
1099     case PM_POST_SUSPEND:
1100     case PM_POST_HIBERNATION:
1101         cpu_hotplug_enable();
1102         break;
1103 
1104     default:
1105         return NOTIFY_DONE;
1106     }
1107 
1108     return NOTIFY_OK;
1109 }
1110 
1111 
1112 static int __init cpu_hotplug_pm_sync_init(void)
1113 {
1114     /*
1115      * cpu_hotplug_pm_callback has higher priority than x86
1116      * bsp_pm_callback which depends on cpu_hotplug_pm_callback
1117      * to disable cpu hotplug to avoid cpu hotplug race.
1118      */
1119     pm_notifier(cpu_hotplug_pm_callback, 0);
1120     return 0;
1121 }
1122 core_initcall(cpu_hotplug_pm_sync_init);
1123 
1124 #endif /* CONFIG_PM_SLEEP_SMP */
1125 
1126 #endif /* CONFIG_SMP */
1127 
1128 /* Boot processor state steps */
1129 static struct cpuhp_step cpuhp_bp_states[] = {
1130     [CPUHP_OFFLINE] = {
1131         .name           = "offline",
1132         .startup.single     = NULL,
1133         .teardown.single    = NULL,
1134     },
1135 #ifdef CONFIG_SMP
1136     [CPUHP_CREATE_THREADS]= {
1137         .name           = "threads:prepare",
1138         .startup.single     = smpboot_create_threads,
1139         .teardown.single    = NULL,
1140         .cant_stop      = true,
1141     },
1142     [CPUHP_PERF_PREPARE] = {
1143         .name           = "perf:prepare",
1144         .startup.single     = perf_event_init_cpu,
1145         .teardown.single    = perf_event_exit_cpu,
1146     },
1147     [CPUHP_WORKQUEUE_PREP] = {
1148         .name           = "workqueue:prepare",
1149         .startup.single     = workqueue_prepare_cpu,
1150         .teardown.single    = NULL,
1151     },
1152     [CPUHP_HRTIMERS_PREPARE] = {
1153         .name           = "hrtimers:prepare",
1154         .startup.single     = hrtimers_prepare_cpu,
1155         .teardown.single    = hrtimers_dead_cpu,
1156     },
1157     [CPUHP_SMPCFD_PREPARE] = {
1158         .name           = "smpcfd:prepare",
1159         .startup.single     = smpcfd_prepare_cpu,
1160         .teardown.single    = smpcfd_dead_cpu,
1161     },
1162     [CPUHP_RELAY_PREPARE] = {
1163         .name           = "relay:prepare",
1164         .startup.single     = relay_prepare_cpu,
1165         .teardown.single    = NULL,
1166     },
1167     [CPUHP_SLAB_PREPARE] = {
1168         .name           = "slab:prepare",
1169         .startup.single     = slab_prepare_cpu,
1170         .teardown.single    = slab_dead_cpu,
1171     },
1172     [CPUHP_RCUTREE_PREP] = {
1173         .name           = "RCU/tree:prepare",
1174         .startup.single     = rcutree_prepare_cpu,
1175         .teardown.single    = rcutree_dead_cpu,
1176     },
1177     /*
1178      * On the tear-down path, timers_dead_cpu() must be invoked
1179      * before blk_mq_queue_reinit_notify() from notify_dead(),
1180      * otherwise a RCU stall occurs.
1181      */
1182     [CPUHP_TIMERS_DEAD] = {
1183         .name           = "timers:dead",
1184         .startup.single     = NULL,
1185         .teardown.single    = timers_dead_cpu,
1186     },
1187     /* Kicks the plugged cpu into life */
1188     [CPUHP_BRINGUP_CPU] = {
1189         .name           = "cpu:bringup",
1190         .startup.single     = bringup_cpu,
1191         .teardown.single    = NULL,
1192         .cant_stop      = true,
1193     },
1194     [CPUHP_AP_SMPCFD_DYING] = {
1195         .name           = "smpcfd:dying",
1196         .startup.single     = NULL,
1197         .teardown.single    = smpcfd_dying_cpu,
1198     },
1199     /*
1200      * Handled on controll processor until the plugged processor manages
1201      * this itself.
1202      */
1203     [CPUHP_TEARDOWN_CPU] = {
1204         .name           = "cpu:teardown",
1205         .startup.single     = NULL,
1206         .teardown.single    = takedown_cpu,
1207         .cant_stop      = true,
1208     },
1209 #else
1210     [CPUHP_BRINGUP_CPU] = { },
1211 #endif
1212 };
1213 
1214 /* Application processor state steps */
1215 static struct cpuhp_step cpuhp_ap_states[] = {
1216 #ifdef CONFIG_SMP
1217     /* Final state before CPU kills itself */
1218     [CPUHP_AP_IDLE_DEAD] = {
1219         .name           = "idle:dead",
1220     },
1221     /*
1222      * Last state before CPU enters the idle loop to die. Transient state
1223      * for synchronization.
1224      */
1225     [CPUHP_AP_OFFLINE] = {
1226         .name           = "ap:offline",
1227         .cant_stop      = true,
1228     },
1229     /* First state is scheduler control. Interrupts are disabled */
1230     [CPUHP_AP_SCHED_STARTING] = {
1231         .name           = "sched:starting",
1232         .startup.single     = sched_cpu_starting,
1233         .teardown.single    = sched_cpu_dying,
1234     },
1235     [CPUHP_AP_RCUTREE_DYING] = {
1236         .name           = "RCU/tree:dying",
1237         .startup.single     = NULL,
1238         .teardown.single    = rcutree_dying_cpu,
1239     },
1240     /* Entry state on starting. Interrupts enabled from here on. Transient
1241      * state for synchronsization */
1242     [CPUHP_AP_ONLINE] = {
1243         .name           = "ap:online",
1244     },
1245     /* Handle smpboot threads park/unpark */
1246     [CPUHP_AP_SMPBOOT_THREADS] = {
1247         .name           = "smpboot/threads:online",
1248         .startup.single     = smpboot_unpark_threads,
1249         .teardown.single    = NULL,
1250     },
1251     [CPUHP_AP_PERF_ONLINE] = {
1252         .name           = "perf:online",
1253         .startup.single     = perf_event_init_cpu,
1254         .teardown.single    = perf_event_exit_cpu,
1255     },
1256     [CPUHP_AP_WORKQUEUE_ONLINE] = {
1257         .name           = "workqueue:online",
1258         .startup.single     = workqueue_online_cpu,
1259         .teardown.single    = workqueue_offline_cpu,
1260     },
1261     [CPUHP_AP_RCUTREE_ONLINE] = {
1262         .name           = "RCU/tree:online",
1263         .startup.single     = rcutree_online_cpu,
1264         .teardown.single    = rcutree_offline_cpu,
1265     },
1266 #endif
1267     /*
1268      * The dynamically registered state space is here
1269      */
1270 
1271 #ifdef CONFIG_SMP
1272     /* Last state is scheduler control setting the cpu active */
1273     [CPUHP_AP_ACTIVE] = {
1274         .name           = "sched:active",
1275         .startup.single     = sched_cpu_activate,
1276         .teardown.single    = sched_cpu_deactivate,
1277     },
1278 #endif
1279 
1280     /* CPU is fully up and running. */
1281     [CPUHP_ONLINE] = {
1282         .name           = "online",
1283         .startup.single     = NULL,
1284         .teardown.single    = NULL,
1285     },
1286 };
1287 
1288 /* Sanity check for callbacks */
1289 static int cpuhp_cb_check(enum cpuhp_state state)
1290 {
1291     if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE)
1292         return -EINVAL;
1293     return 0;
1294 }
1295 
1296 /*
1297  * Returns a free for dynamic slot assignment of the Online state. The states
1298  * are protected by the cpuhp_slot_states mutex and an empty slot is identified
1299  * by having no name assigned.
1300  */
1301 static int cpuhp_reserve_state(enum cpuhp_state state)
1302 {
1303     enum cpuhp_state i, end;
1304     struct cpuhp_step *step;
1305 
1306     switch (state) {
1307     case CPUHP_AP_ONLINE_DYN:
1308         step = cpuhp_ap_states + CPUHP_AP_ONLINE_DYN;
1309         end = CPUHP_AP_ONLINE_DYN_END;
1310         break;
1311     case CPUHP_BP_PREPARE_DYN:
1312         step = cpuhp_bp_states + CPUHP_BP_PREPARE_DYN;
1313         end = CPUHP_BP_PREPARE_DYN_END;
1314         break;
1315     default:
1316         return -EINVAL;
1317     }
1318 
1319     for (i = state; i <= end; i++, step++) {
1320         if (!step->name)
1321             return i;
1322     }
1323     WARN(1, "No more dynamic states available for CPU hotplug\n");
1324     return -ENOSPC;
1325 }
1326 
1327 static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name,
1328                  int (*startup)(unsigned int cpu),
1329                  int (*teardown)(unsigned int cpu),
1330                  bool multi_instance)
1331 {
1332     /* (Un)Install the callbacks for further cpu hotplug operations */
1333     struct cpuhp_step *sp;
1334     int ret = 0;
1335 
1336     mutex_lock(&cpuhp_state_mutex);
1337 
1338     if (state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN) {
1339         ret = cpuhp_reserve_state(state);
1340         if (ret < 0)
1341             goto out;
1342         state = ret;
1343     }
1344     sp = cpuhp_get_step(state);
1345     if (name && sp->name) {
1346         ret = -EBUSY;
1347         goto out;
1348     }
1349     sp->startup.single = startup;
1350     sp->teardown.single = teardown;
1351     sp->name = name;
1352     sp->multi_instance = multi_instance;
1353     INIT_HLIST_HEAD(&sp->list);
1354 out:
1355     mutex_unlock(&cpuhp_state_mutex);
1356     return ret;
1357 }
1358 
1359 static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
1360 {
1361     return cpuhp_get_step(state)->teardown.single;
1362 }
1363 
1364 /*
1365  * Call the startup/teardown function for a step either on the AP or
1366  * on the current CPU.
1367  */
1368 static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
1369                 struct hlist_node *node)
1370 {
1371     struct cpuhp_step *sp = cpuhp_get_step(state);
1372     int ret;
1373 
1374     if ((bringup && !sp->startup.single) ||
1375         (!bringup && !sp->teardown.single))
1376         return 0;
1377     /*
1378      * The non AP bound callbacks can fail on bringup. On teardown
1379      * e.g. module removal we crash for now.
1380      */
1381 #ifdef CONFIG_SMP
1382     if (cpuhp_is_ap_state(state))
1383         ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
1384     else
1385         ret = cpuhp_invoke_callback(cpu, state, bringup, node);
1386 #else
1387     ret = cpuhp_invoke_callback(cpu, state, bringup, node);
1388 #endif
1389     BUG_ON(ret && !bringup);
1390     return ret;
1391 }
1392 
1393 /*
1394  * Called from __cpuhp_setup_state on a recoverable failure.
1395  *
1396  * Note: The teardown callbacks for rollback are not allowed to fail!
1397  */
1398 static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
1399                    struct hlist_node *node)
1400 {
1401     int cpu;
1402 
1403     /* Roll back the already executed steps on the other cpus */
1404     for_each_present_cpu(cpu) {
1405         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1406         int cpustate = st->state;
1407 
1408         if (cpu >= failedcpu)
1409             break;
1410 
1411         /* Did we invoke the startup call on that cpu ? */
1412         if (cpustate >= state)
1413             cpuhp_issue_call(cpu, state, false, node);
1414     }
1415 }
1416 
1417 int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
1418                    bool invoke)
1419 {
1420     struct cpuhp_step *sp;
1421     int cpu;
1422     int ret;
1423 
1424     sp = cpuhp_get_step(state);
1425     if (sp->multi_instance == false)
1426         return -EINVAL;
1427 
1428     get_online_cpus();
1429 
1430     if (!invoke || !sp->startup.multi)
1431         goto add_node;
1432 
1433     /*
1434      * Try to call the startup callback for each present cpu
1435      * depending on the hotplug state of the cpu.
1436      */
1437     for_each_present_cpu(cpu) {
1438         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1439         int cpustate = st->state;
1440 
1441         if (cpustate < state)
1442             continue;
1443 
1444         ret = cpuhp_issue_call(cpu, state, true, node);
1445         if (ret) {
1446             if (sp->teardown.multi)
1447                 cpuhp_rollback_install(cpu, state, node);
1448             goto err;
1449         }
1450     }
1451 add_node:
1452     ret = 0;
1453     mutex_lock(&cpuhp_state_mutex);
1454     hlist_add_head(node, &sp->list);
1455     mutex_unlock(&cpuhp_state_mutex);
1456 
1457 err:
1458     put_online_cpus();
1459     return ret;
1460 }
1461 EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
1462 
1463 /**
1464  * __cpuhp_setup_state - Setup the callbacks for an hotplug machine state
1465  * @state:      The state to setup
1466  * @invoke:     If true, the startup function is invoked for cpus where
1467  *          cpu state >= @state
1468  * @startup:        startup callback function
1469  * @teardown:       teardown callback function
1470  * @multi_instance: State is set up for multiple instances which get
1471  *          added afterwards.
1472  *
1473  * Returns:
1474  *   On success:
1475  *      Positive state number if @state is CPUHP_AP_ONLINE_DYN
1476  *      0 for all other states
1477  *   On failure: proper (negative) error code
1478  */
1479 int __cpuhp_setup_state(enum cpuhp_state state,
1480             const char *name, bool invoke,
1481             int (*startup)(unsigned int cpu),
1482             int (*teardown)(unsigned int cpu),
1483             bool multi_instance)
1484 {
1485     int cpu, ret = 0;
1486     bool dynstate;
1487 
1488     if (cpuhp_cb_check(state) || !name)
1489         return -EINVAL;
1490 
1491     get_online_cpus();
1492 
1493     ret = cpuhp_store_callbacks(state, name, startup, teardown,
1494                     multi_instance);
1495 
1496     dynstate = state == CPUHP_AP_ONLINE_DYN;
1497     if (ret > 0 && dynstate) {
1498         state = ret;
1499         ret = 0;
1500     }
1501 
1502     if (ret || !invoke || !startup)
1503         goto out;
1504 
1505     /*
1506      * Try to call the startup callback for each present cpu
1507      * depending on the hotplug state of the cpu.
1508      */
1509     for_each_present_cpu(cpu) {
1510         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1511         int cpustate = st->state;
1512 
1513         if (cpustate < state)
1514             continue;
1515 
1516         ret = cpuhp_issue_call(cpu, state, true, NULL);
1517         if (ret) {
1518             if (teardown)
1519                 cpuhp_rollback_install(cpu, state, NULL);
1520             cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
1521             goto out;
1522         }
1523     }
1524 out:
1525     put_online_cpus();
1526     /*
1527      * If the requested state is CPUHP_AP_ONLINE_DYN, return the
1528      * dynamically allocated state in case of success.
1529      */
1530     if (!ret && dynstate)
1531         return state;
1532     return ret;
1533 }
1534 EXPORT_SYMBOL(__cpuhp_setup_state);
1535 
1536 int __cpuhp_state_remove_instance(enum cpuhp_state state,
1537                   struct hlist_node *node, bool invoke)
1538 {
1539     struct cpuhp_step *sp = cpuhp_get_step(state);
1540     int cpu;
1541 
1542     BUG_ON(cpuhp_cb_check(state));
1543 
1544     if (!sp->multi_instance)
1545         return -EINVAL;
1546 
1547     get_online_cpus();
1548     if (!invoke || !cpuhp_get_teardown_cb(state))
1549         goto remove;
1550     /*
1551      * Call the teardown callback for each present cpu depending
1552      * on the hotplug state of the cpu. This function is not
1553      * allowed to fail currently!
1554      */
1555     for_each_present_cpu(cpu) {
1556         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1557         int cpustate = st->state;
1558 
1559         if (cpustate >= state)
1560             cpuhp_issue_call(cpu, state, false, node);
1561     }
1562 
1563 remove:
1564     mutex_lock(&cpuhp_state_mutex);
1565     hlist_del(node);
1566     mutex_unlock(&cpuhp_state_mutex);
1567     put_online_cpus();
1568 
1569     return 0;
1570 }
1571 EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
1572 /**
1573  * __cpuhp_remove_state - Remove the callbacks for an hotplug machine state
1574  * @state:  The state to remove
1575  * @invoke: If true, the teardown function is invoked for cpus where
1576  *      cpu state >= @state
1577  *
1578  * The teardown callback is currently not allowed to fail. Think
1579  * about module removal!
1580  */
1581 void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
1582 {
1583     struct cpuhp_step *sp = cpuhp_get_step(state);
1584     int cpu;
1585 
1586     BUG_ON(cpuhp_cb_check(state));
1587 
1588     get_online_cpus();
1589 
1590     if (sp->multi_instance) {
1591         WARN(!hlist_empty(&sp->list),
1592              "Error: Removing state %d which has instances left.\n",
1593              state);
1594         goto remove;
1595     }
1596 
1597     if (!invoke || !cpuhp_get_teardown_cb(state))
1598         goto remove;
1599 
1600     /*
1601      * Call the teardown callback for each present cpu depending
1602      * on the hotplug state of the cpu. This function is not
1603      * allowed to fail currently!
1604      */
1605     for_each_present_cpu(cpu) {
1606         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1607         int cpustate = st->state;
1608 
1609         if (cpustate >= state)
1610             cpuhp_issue_call(cpu, state, false, NULL);
1611     }
1612 remove:
1613     cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
1614     put_online_cpus();
1615 }
1616 EXPORT_SYMBOL(__cpuhp_remove_state);
1617 
1618 #if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
1619 static ssize_t show_cpuhp_state(struct device *dev,
1620                 struct device_attribute *attr, char *buf)
1621 {
1622     struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
1623 
1624     return sprintf(buf, "%d\n", st->state);
1625 }
1626 static DEVICE_ATTR(state, 0444, show_cpuhp_state, NULL);
1627 
1628 static ssize_t write_cpuhp_target(struct device *dev,
1629                   struct device_attribute *attr,
1630                   const char *buf, size_t count)
1631 {
1632     struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
1633     struct cpuhp_step *sp;
1634     int target, ret;
1635 
1636     ret = kstrtoint(buf, 10, &target);
1637     if (ret)
1638         return ret;
1639 
1640 #ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL
1641     if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE)
1642         return -EINVAL;
1643 #else
1644     if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE)
1645         return -EINVAL;
1646 #endif
1647 
1648     ret = lock_device_hotplug_sysfs();
1649     if (ret)
1650         return ret;
1651 
1652     mutex_lock(&cpuhp_state_mutex);
1653     sp = cpuhp_get_step(target);
1654     ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
1655     mutex_unlock(&cpuhp_state_mutex);
1656     if (ret)
1657         return ret;
1658 
1659     if (st->state < target)
1660         ret = do_cpu_up(dev->id, target);
1661     else
1662         ret = do_cpu_down(dev->id, target);
1663 
1664     unlock_device_hotplug();
1665     return ret ? ret : count;
1666 }
1667 
1668 static ssize_t show_cpuhp_target(struct device *dev,
1669                  struct device_attribute *attr, char *buf)
1670 {
1671     struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
1672 
1673     return sprintf(buf, "%d\n", st->target);
1674 }
1675 static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target);
1676 
1677 static struct attribute *cpuhp_cpu_attrs[] = {
1678     &dev_attr_state.attr,
1679     &dev_attr_target.attr,
1680     NULL
1681 };
1682 
1683 static struct attribute_group cpuhp_cpu_attr_group = {
1684     .attrs = cpuhp_cpu_attrs,
1685     .name = "hotplug",
1686     NULL
1687 };
1688 
1689 static ssize_t show_cpuhp_states(struct device *dev,
1690                  struct device_attribute *attr, char *buf)
1691 {
1692     ssize_t cur, res = 0;
1693     int i;
1694 
1695     mutex_lock(&cpuhp_state_mutex);
1696     for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) {
1697         struct cpuhp_step *sp = cpuhp_get_step(i);
1698 
1699         if (sp->name) {
1700             cur = sprintf(buf, "%3d: %s\n", i, sp->name);
1701             buf += cur;
1702             res += cur;
1703         }
1704     }
1705     mutex_unlock(&cpuhp_state_mutex);
1706     return res;
1707 }
1708 static DEVICE_ATTR(states, 0444, show_cpuhp_states, NULL);
1709 
1710 static struct attribute *cpuhp_cpu_root_attrs[] = {
1711     &dev_attr_states.attr,
1712     NULL
1713 };
1714 
1715 static struct attribute_group cpuhp_cpu_root_attr_group = {
1716     .attrs = cpuhp_cpu_root_attrs,
1717     .name = "hotplug",
1718     NULL
1719 };
1720 
1721 static int __init cpuhp_sysfs_init(void)
1722 {
1723     int cpu, ret;
1724 
1725     ret = sysfs_create_group(&cpu_subsys.dev_root->kobj,
1726                  &cpuhp_cpu_root_attr_group);
1727     if (ret)
1728         return ret;
1729 
1730     for_each_possible_cpu(cpu) {
1731         struct device *dev = get_cpu_device(cpu);
1732 
1733         if (!dev)
1734             continue;
1735         ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group);
1736         if (ret)
1737             return ret;
1738     }
1739     return 0;
1740 }
1741 device_initcall(cpuhp_sysfs_init);
1742 #endif
1743 
1744 /*
1745  * cpu_bit_bitmap[] is a special, "compressed" data structure that
1746  * represents all NR_CPUS bits binary values of 1<<nr.
1747  *
1748  * It is used by cpumask_of() to get a constant address to a CPU
1749  * mask value that has a single bit set only.
1750  */
1751 
1752 /* cpu_bit_bitmap[0] is empty - so we can back into it */
1753 #define MASK_DECLARE_1(x)   [x+1][0] = (1UL << (x))
1754 #define MASK_DECLARE_2(x)   MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
1755 #define MASK_DECLARE_4(x)   MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
1756 #define MASK_DECLARE_8(x)   MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
1757 
1758 const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
1759 
1760     MASK_DECLARE_8(0),  MASK_DECLARE_8(8),
1761     MASK_DECLARE_8(16), MASK_DECLARE_8(24),
1762 #if BITS_PER_LONG > 32
1763     MASK_DECLARE_8(32), MASK_DECLARE_8(40),
1764     MASK_DECLARE_8(48), MASK_DECLARE_8(56),
1765 #endif
1766 };
1767 EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
1768 
1769 const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
1770 EXPORT_SYMBOL(cpu_all_bits);
1771 
1772 #ifdef CONFIG_INIT_ALL_POSSIBLE
1773 struct cpumask __cpu_possible_mask __read_mostly
1774     = {CPU_BITS_ALL};
1775 #else
1776 struct cpumask __cpu_possible_mask __read_mostly;
1777 #endif
1778 EXPORT_SYMBOL(__cpu_possible_mask);
1779 
1780 struct cpumask __cpu_online_mask __read_mostly;
1781 EXPORT_SYMBOL(__cpu_online_mask);
1782 
1783 struct cpumask __cpu_present_mask __read_mostly;
1784 EXPORT_SYMBOL(__cpu_present_mask);
1785 
1786 struct cpumask __cpu_active_mask __read_mostly;
1787 EXPORT_SYMBOL(__cpu_active_mask);
1788 
1789 void init_cpu_present(const struct cpumask *src)
1790 {
1791     cpumask_copy(&__cpu_present_mask, src);
1792 }
1793 
1794 void init_cpu_possible(const struct cpumask *src)
1795 {
1796     cpumask_copy(&__cpu_possible_mask, src);
1797 }
1798 
1799 void init_cpu_online(const struct cpumask *src)
1800 {
1801     cpumask_copy(&__cpu_online_mask, src);
1802 }
1803 
1804 /*
1805  * Activate the first processor.
1806  */
1807 void __init boot_cpu_init(void)
1808 {
1809     int cpu = smp_processor_id();
1810 
1811     /* Mark the boot cpu "present", "online" etc for SMP and UP case */
1812     set_cpu_online(cpu, true);
1813     set_cpu_active(cpu, true);
1814     set_cpu_present(cpu, true);
1815     set_cpu_possible(cpu, true);
1816 }
1817 
1818 /*
1819  * Must be called _AFTER_ setting up the per_cpu areas
1820  */
1821 void __init boot_cpu_state_init(void)
1822 {
1823     per_cpu_ptr(&cpuhp_state, smp_processor_id())->state = CPUHP_ONLINE;
1824 }