Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * This file contains functions which emulate a local clock-event
0004  * device via a broadcast event source.
0005  *
0006  * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
0007  * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
0008  * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
0009  */
0010 #include <linux/cpu.h>
0011 #include <linux/err.h>
0012 #include <linux/hrtimer.h>
0013 #include <linux/interrupt.h>
0014 #include <linux/percpu.h>
0015 #include <linux/profile.h>
0016 #include <linux/sched.h>
0017 #include <linux/smp.h>
0018 #include <linux/module.h>
0019 
0020 #include "tick-internal.h"
0021 
0022 /*
0023  * Broadcast support for broken x86 hardware, where the local apic
0024  * timer stops in C3 state.
0025  */
0026 
0027 static struct tick_device tick_broadcast_device;
0028 static cpumask_var_t tick_broadcast_mask __cpumask_var_read_mostly;
0029 static cpumask_var_t tick_broadcast_on __cpumask_var_read_mostly;
0030 static cpumask_var_t tmpmask __cpumask_var_read_mostly;
0031 static int tick_broadcast_forced;
0032 
0033 static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
0034 
0035 #ifdef CONFIG_TICK_ONESHOT
0036 static DEFINE_PER_CPU(struct clock_event_device *, tick_oneshot_wakeup_device);
0037 
0038 static void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
0039 static void tick_broadcast_clear_oneshot(int cpu);
0040 static void tick_resume_broadcast_oneshot(struct clock_event_device *bc);
0041 # ifdef CONFIG_HOTPLUG_CPU
0042 static void tick_broadcast_oneshot_offline(unsigned int cpu);
0043 # endif
0044 #else
0045 static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
0046 static inline void tick_broadcast_clear_oneshot(int cpu) { }
0047 static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { }
0048 # ifdef CONFIG_HOTPLUG_CPU
0049 static inline void tick_broadcast_oneshot_offline(unsigned int cpu) { }
0050 # endif
0051 #endif
0052 
0053 /*
0054  * Debugging: see timer_list.c
0055  */
0056 struct tick_device *tick_get_broadcast_device(void)
0057 {
0058     return &tick_broadcast_device;
0059 }
0060 
0061 struct cpumask *tick_get_broadcast_mask(void)
0062 {
0063     return tick_broadcast_mask;
0064 }
0065 
0066 static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu);
0067 
0068 const struct clock_event_device *tick_get_wakeup_device(int cpu)
0069 {
0070     return tick_get_oneshot_wakeup_device(cpu);
0071 }
0072 
0073 /*
0074  * Start the device in periodic mode
0075  */
0076 static void tick_broadcast_start_periodic(struct clock_event_device *bc)
0077 {
0078     if (bc)
0079         tick_setup_periodic(bc, 1);
0080 }
0081 
0082 /*
0083  * Check, if the device can be utilized as broadcast device:
0084  */
0085 static bool tick_check_broadcast_device(struct clock_event_device *curdev,
0086                     struct clock_event_device *newdev)
0087 {
0088     if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) ||
0089         (newdev->features & CLOCK_EVT_FEAT_PERCPU) ||
0090         (newdev->features & CLOCK_EVT_FEAT_C3STOP))
0091         return false;
0092 
0093     if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT &&
0094         !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
0095         return false;
0096 
0097     return !curdev || newdev->rating > curdev->rating;
0098 }
0099 
0100 #ifdef CONFIG_TICK_ONESHOT
0101 static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu)
0102 {
0103     return per_cpu(tick_oneshot_wakeup_device, cpu);
0104 }
0105 
0106 static void tick_oneshot_wakeup_handler(struct clock_event_device *wd)
0107 {
0108     /*
0109      * If we woke up early and the tick was reprogrammed in the
0110      * meantime then this may be spurious but harmless.
0111      */
0112     tick_receive_broadcast();
0113 }
0114 
0115 static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev,
0116                        int cpu)
0117 {
0118     struct clock_event_device *curdev = tick_get_oneshot_wakeup_device(cpu);
0119 
0120     if (!newdev)
0121         goto set_device;
0122 
0123     if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) ||
0124         (newdev->features & CLOCK_EVT_FEAT_C3STOP))
0125          return false;
0126 
0127     if (!(newdev->features & CLOCK_EVT_FEAT_PERCPU) ||
0128         !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
0129         return false;
0130 
0131     if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu)))
0132         return false;
0133 
0134     if (curdev && newdev->rating <= curdev->rating)
0135         return false;
0136 
0137     if (!try_module_get(newdev->owner))
0138         return false;
0139 
0140     newdev->event_handler = tick_oneshot_wakeup_handler;
0141 set_device:
0142     clockevents_exchange_device(curdev, newdev);
0143     per_cpu(tick_oneshot_wakeup_device, cpu) = newdev;
0144     return true;
0145 }
0146 #else
0147 static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu)
0148 {
0149     return NULL;
0150 }
0151 
0152 static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev,
0153                        int cpu)
0154 {
0155     return false;
0156 }
0157 #endif
0158 
0159 /*
0160  * Conditionally install/replace broadcast device
0161  */
0162 void tick_install_broadcast_device(struct clock_event_device *dev, int cpu)
0163 {
0164     struct clock_event_device *cur = tick_broadcast_device.evtdev;
0165 
0166     if (tick_set_oneshot_wakeup_device(dev, cpu))
0167         return;
0168 
0169     if (!tick_check_broadcast_device(cur, dev))
0170         return;
0171 
0172     if (!try_module_get(dev->owner))
0173         return;
0174 
0175     clockevents_exchange_device(cur, dev);
0176     if (cur)
0177         cur->event_handler = clockevents_handle_noop;
0178     tick_broadcast_device.evtdev = dev;
0179     if (!cpumask_empty(tick_broadcast_mask))
0180         tick_broadcast_start_periodic(dev);
0181 
0182     if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
0183         return;
0184 
0185     /*
0186      * If the system already runs in oneshot mode, switch the newly
0187      * registered broadcast device to oneshot mode explicitly.
0188      */
0189     if (tick_broadcast_oneshot_active()) {
0190         tick_broadcast_switch_to_oneshot();
0191         return;
0192     }
0193 
0194     /*
0195      * Inform all cpus about this. We might be in a situation
0196      * where we did not switch to oneshot mode because the per cpu
0197      * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack
0198      * of a oneshot capable broadcast device. Without that
0199      * notification the systems stays stuck in periodic mode
0200      * forever.
0201      */
0202     tick_clock_notify();
0203 }
0204 
0205 /*
0206  * Check, if the device is the broadcast device
0207  */
0208 int tick_is_broadcast_device(struct clock_event_device *dev)
0209 {
0210     return (dev && tick_broadcast_device.evtdev == dev);
0211 }
0212 
0213 int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq)
0214 {
0215     int ret = -ENODEV;
0216 
0217     if (tick_is_broadcast_device(dev)) {
0218         raw_spin_lock(&tick_broadcast_lock);
0219         ret = __clockevents_update_freq(dev, freq);
0220         raw_spin_unlock(&tick_broadcast_lock);
0221     }
0222     return ret;
0223 }
0224 
0225 
0226 static void err_broadcast(const struct cpumask *mask)
0227 {
0228     pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n");
0229 }
0230 
0231 static void tick_device_setup_broadcast_func(struct clock_event_device *dev)
0232 {
0233     if (!dev->broadcast)
0234         dev->broadcast = tick_broadcast;
0235     if (!dev->broadcast) {
0236         pr_warn_once("%s depends on broadcast, but no broadcast function available\n",
0237                  dev->name);
0238         dev->broadcast = err_broadcast;
0239     }
0240 }
0241 
0242 /*
0243  * Check, if the device is dysfunctional and a placeholder, which
0244  * needs to be handled by the broadcast device.
0245  */
0246 int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
0247 {
0248     struct clock_event_device *bc = tick_broadcast_device.evtdev;
0249     unsigned long flags;
0250     int ret = 0;
0251 
0252     raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
0253 
0254     /*
0255      * Devices might be registered with both periodic and oneshot
0256      * mode disabled. This signals, that the device needs to be
0257      * operated from the broadcast device and is a placeholder for
0258      * the cpu local device.
0259      */
0260     if (!tick_device_is_functional(dev)) {
0261         dev->event_handler = tick_handle_periodic;
0262         tick_device_setup_broadcast_func(dev);
0263         cpumask_set_cpu(cpu, tick_broadcast_mask);
0264         if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
0265             tick_broadcast_start_periodic(bc);
0266         else
0267             tick_broadcast_setup_oneshot(bc);
0268         ret = 1;
0269     } else {
0270         /*
0271          * Clear the broadcast bit for this cpu if the
0272          * device is not power state affected.
0273          */
0274         if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
0275             cpumask_clear_cpu(cpu, tick_broadcast_mask);
0276         else
0277             tick_device_setup_broadcast_func(dev);
0278 
0279         /*
0280          * Clear the broadcast bit if the CPU is not in
0281          * periodic broadcast on state.
0282          */
0283         if (!cpumask_test_cpu(cpu, tick_broadcast_on))
0284             cpumask_clear_cpu(cpu, tick_broadcast_mask);
0285 
0286         switch (tick_broadcast_device.mode) {
0287         case TICKDEV_MODE_ONESHOT:
0288             /*
0289              * If the system is in oneshot mode we can
0290              * unconditionally clear the oneshot mask bit,
0291              * because the CPU is running and therefore
0292              * not in an idle state which causes the power
0293              * state affected device to stop. Let the
0294              * caller initialize the device.
0295              */
0296             tick_broadcast_clear_oneshot(cpu);
0297             ret = 0;
0298             break;
0299 
0300         case TICKDEV_MODE_PERIODIC:
0301             /*
0302              * If the system is in periodic mode, check
0303              * whether the broadcast device can be
0304              * switched off now.
0305              */
0306             if (cpumask_empty(tick_broadcast_mask) && bc)
0307                 clockevents_shutdown(bc);
0308             /*
0309              * If we kept the cpu in the broadcast mask,
0310              * tell the caller to leave the per cpu device
0311              * in shutdown state. The periodic interrupt
0312              * is delivered by the broadcast device, if
0313              * the broadcast device exists and is not
0314              * hrtimer based.
0315              */
0316             if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER))
0317                 ret = cpumask_test_cpu(cpu, tick_broadcast_mask);
0318             break;
0319         default:
0320             break;
0321         }
0322     }
0323     raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
0324     return ret;
0325 }
0326 
0327 int tick_receive_broadcast(void)
0328 {
0329     struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
0330     struct clock_event_device *evt = td->evtdev;
0331 
0332     if (!evt)
0333         return -ENODEV;
0334 
0335     if (!evt->event_handler)
0336         return -EINVAL;
0337 
0338     evt->event_handler(evt);
0339     return 0;
0340 }
0341 
0342 /*
0343  * Broadcast the event to the cpus, which are set in the mask (mangled).
0344  */
0345 static bool tick_do_broadcast(struct cpumask *mask)
0346 {
0347     int cpu = smp_processor_id();
0348     struct tick_device *td;
0349     bool local = false;
0350 
0351     /*
0352      * Check, if the current cpu is in the mask
0353      */
0354     if (cpumask_test_cpu(cpu, mask)) {
0355         struct clock_event_device *bc = tick_broadcast_device.evtdev;
0356 
0357         cpumask_clear_cpu(cpu, mask);
0358         /*
0359          * We only run the local handler, if the broadcast
0360          * device is not hrtimer based. Otherwise we run into
0361          * a hrtimer recursion.
0362          *
0363          * local timer_interrupt()
0364          *   local_handler()
0365          *     expire_hrtimers()
0366          *       bc_handler()
0367          *         local_handler()
0368          *       expire_hrtimers()
0369          */
0370         local = !(bc->features & CLOCK_EVT_FEAT_HRTIMER);
0371     }
0372 
0373     if (!cpumask_empty(mask)) {
0374         /*
0375          * It might be necessary to actually check whether the devices
0376          * have different broadcast functions. For now, just use the
0377          * one of the first device. This works as long as we have this
0378          * misfeature only on x86 (lapic)
0379          */
0380         td = &per_cpu(tick_cpu_device, cpumask_first(mask));
0381         td->evtdev->broadcast(mask);
0382     }
0383     return local;
0384 }
0385 
0386 /*
0387  * Periodic broadcast:
0388  * - invoke the broadcast handlers
0389  */
0390 static bool tick_do_periodic_broadcast(void)
0391 {
0392     cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);
0393     return tick_do_broadcast(tmpmask);
0394 }
0395 
0396 /*
0397  * Event handler for periodic broadcast ticks
0398  */
0399 static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
0400 {
0401     struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
0402     bool bc_local;
0403 
0404     raw_spin_lock(&tick_broadcast_lock);
0405 
0406     /* Handle spurious interrupts gracefully */
0407     if (clockevent_state_shutdown(tick_broadcast_device.evtdev)) {
0408         raw_spin_unlock(&tick_broadcast_lock);
0409         return;
0410     }
0411 
0412     bc_local = tick_do_periodic_broadcast();
0413 
0414     if (clockevent_state_oneshot(dev)) {
0415         ktime_t next = ktime_add_ns(dev->next_event, TICK_NSEC);
0416 
0417         clockevents_program_event(dev, next, true);
0418     }
0419     raw_spin_unlock(&tick_broadcast_lock);
0420 
0421     /*
0422      * We run the handler of the local cpu after dropping
0423      * tick_broadcast_lock because the handler might deadlock when
0424      * trying to switch to oneshot mode.
0425      */
0426     if (bc_local)
0427         td->evtdev->event_handler(td->evtdev);
0428 }
0429 
0430 /**
0431  * tick_broadcast_control - Enable/disable or force broadcast mode
0432  * @mode:   The selected broadcast mode
0433  *
0434  * Called when the system enters a state where affected tick devices
0435  * might stop. Note: TICK_BROADCAST_FORCE cannot be undone.
0436  */
0437 void tick_broadcast_control(enum tick_broadcast_mode mode)
0438 {
0439     struct clock_event_device *bc, *dev;
0440     struct tick_device *td;
0441     int cpu, bc_stopped;
0442     unsigned long flags;
0443 
0444     /* Protects also the local clockevent device. */
0445     raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
0446     td = this_cpu_ptr(&tick_cpu_device);
0447     dev = td->evtdev;
0448 
0449     /*
0450      * Is the device not affected by the powerstate ?
0451      */
0452     if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
0453         goto out;
0454 
0455     if (!tick_device_is_functional(dev))
0456         goto out;
0457 
0458     cpu = smp_processor_id();
0459     bc = tick_broadcast_device.evtdev;
0460     bc_stopped = cpumask_empty(tick_broadcast_mask);
0461 
0462     switch (mode) {
0463     case TICK_BROADCAST_FORCE:
0464         tick_broadcast_forced = 1;
0465         fallthrough;
0466     case TICK_BROADCAST_ON:
0467         cpumask_set_cpu(cpu, tick_broadcast_on);
0468         if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
0469             /*
0470              * Only shutdown the cpu local device, if:
0471              *
0472              * - the broadcast device exists
0473              * - the broadcast device is not a hrtimer based one
0474              * - the broadcast device is in periodic mode to
0475              *   avoid a hiccup during switch to oneshot mode
0476              */
0477             if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER) &&
0478                 tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
0479                 clockevents_shutdown(dev);
0480         }
0481         break;
0482 
0483     case TICK_BROADCAST_OFF:
0484         if (tick_broadcast_forced)
0485             break;
0486         cpumask_clear_cpu(cpu, tick_broadcast_on);
0487         if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
0488             if (tick_broadcast_device.mode ==
0489                 TICKDEV_MODE_PERIODIC)
0490                 tick_setup_periodic(dev, 0);
0491         }
0492         break;
0493     }
0494 
0495     if (bc) {
0496         if (cpumask_empty(tick_broadcast_mask)) {
0497             if (!bc_stopped)
0498                 clockevents_shutdown(bc);
0499         } else if (bc_stopped) {
0500             if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
0501                 tick_broadcast_start_periodic(bc);
0502             else
0503                 tick_broadcast_setup_oneshot(bc);
0504         }
0505     }
0506 out:
0507     raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
0508 }
0509 EXPORT_SYMBOL_GPL(tick_broadcast_control);
0510 
0511 /*
0512  * Set the periodic handler depending on broadcast on/off
0513  */
0514 void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
0515 {
0516     if (!broadcast)
0517         dev->event_handler = tick_handle_periodic;
0518     else
0519         dev->event_handler = tick_handle_periodic_broadcast;
0520 }
0521 
0522 #ifdef CONFIG_HOTPLUG_CPU
0523 static void tick_shutdown_broadcast(void)
0524 {
0525     struct clock_event_device *bc = tick_broadcast_device.evtdev;
0526 
0527     if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
0528         if (bc && cpumask_empty(tick_broadcast_mask))
0529             clockevents_shutdown(bc);
0530     }
0531 }
0532 
0533 /*
0534  * Remove a CPU from broadcasting
0535  */
0536 void tick_broadcast_offline(unsigned int cpu)
0537 {
0538     raw_spin_lock(&tick_broadcast_lock);
0539     cpumask_clear_cpu(cpu, tick_broadcast_mask);
0540     cpumask_clear_cpu(cpu, tick_broadcast_on);
0541     tick_broadcast_oneshot_offline(cpu);
0542     tick_shutdown_broadcast();
0543     raw_spin_unlock(&tick_broadcast_lock);
0544 }
0545 
0546 #endif
0547 
0548 void tick_suspend_broadcast(void)
0549 {
0550     struct clock_event_device *bc;
0551     unsigned long flags;
0552 
0553     raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
0554 
0555     bc = tick_broadcast_device.evtdev;
0556     if (bc)
0557         clockevents_shutdown(bc);
0558 
0559     raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
0560 }
0561 
0562 /*
0563  * This is called from tick_resume_local() on a resuming CPU. That's
0564  * called from the core resume function, tick_unfreeze() and the magic XEN
0565  * resume hackery.
0566  *
0567  * In none of these cases the broadcast device mode can change and the
0568  * bit of the resuming CPU in the broadcast mask is safe as well.
0569  */
0570 bool tick_resume_check_broadcast(void)
0571 {
0572     if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT)
0573         return false;
0574     else
0575         return cpumask_test_cpu(smp_processor_id(), tick_broadcast_mask);
0576 }
0577 
0578 void tick_resume_broadcast(void)
0579 {
0580     struct clock_event_device *bc;
0581     unsigned long flags;
0582 
0583     raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
0584 
0585     bc = tick_broadcast_device.evtdev;
0586 
0587     if (bc) {
0588         clockevents_tick_resume(bc);
0589 
0590         switch (tick_broadcast_device.mode) {
0591         case TICKDEV_MODE_PERIODIC:
0592             if (!cpumask_empty(tick_broadcast_mask))
0593                 tick_broadcast_start_periodic(bc);
0594             break;
0595         case TICKDEV_MODE_ONESHOT:
0596             if (!cpumask_empty(tick_broadcast_mask))
0597                 tick_resume_broadcast_oneshot(bc);
0598             break;
0599         }
0600     }
0601     raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
0602 }
0603 
0604 #ifdef CONFIG_TICK_ONESHOT
0605 
0606 static cpumask_var_t tick_broadcast_oneshot_mask __cpumask_var_read_mostly;
0607 static cpumask_var_t tick_broadcast_pending_mask __cpumask_var_read_mostly;
0608 static cpumask_var_t tick_broadcast_force_mask __cpumask_var_read_mostly;
0609 
0610 /*
0611  * Exposed for debugging: see timer_list.c
0612  */
0613 struct cpumask *tick_get_broadcast_oneshot_mask(void)
0614 {
0615     return tick_broadcast_oneshot_mask;
0616 }
0617 
0618 /*
0619  * Called before going idle with interrupts disabled. Checks whether a
0620  * broadcast event from the other core is about to happen. We detected
0621  * that in tick_broadcast_oneshot_control(). The callsite can use this
0622  * to avoid a deep idle transition as we are about to get the
0623  * broadcast IPI right away.
0624  */
0625 int tick_check_broadcast_expired(void)
0626 {
0627     return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
0628 }
0629 
0630 /*
0631  * Set broadcast interrupt affinity
0632  */
0633 static void tick_broadcast_set_affinity(struct clock_event_device *bc,
0634                     const struct cpumask *cpumask)
0635 {
0636     if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ))
0637         return;
0638 
0639     if (cpumask_equal(bc->cpumask, cpumask))
0640         return;
0641 
0642     bc->cpumask = cpumask;
0643     irq_set_affinity(bc->irq, bc->cpumask);
0644 }
0645 
0646 static void tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
0647                      ktime_t expires)
0648 {
0649     if (!clockevent_state_oneshot(bc))
0650         clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT);
0651 
0652     clockevents_program_event(bc, expires, 1);
0653     tick_broadcast_set_affinity(bc, cpumask_of(cpu));
0654 }
0655 
0656 static void tick_resume_broadcast_oneshot(struct clock_event_device *bc)
0657 {
0658     clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT);
0659 }
0660 
0661 /*
0662  * Called from irq_enter() when idle was interrupted to reenable the
0663  * per cpu device.
0664  */
0665 void tick_check_oneshot_broadcast_this_cpu(void)
0666 {
0667     if (cpumask_test_cpu(smp_processor_id(), tick_broadcast_oneshot_mask)) {
0668         struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
0669 
0670         /*
0671          * We might be in the middle of switching over from
0672          * periodic to oneshot. If the CPU has not yet
0673          * switched over, leave the device alone.
0674          */
0675         if (td->mode == TICKDEV_MODE_ONESHOT) {
0676             clockevents_switch_state(td->evtdev,
0677                           CLOCK_EVT_STATE_ONESHOT);
0678         }
0679     }
0680 }
0681 
0682 /*
0683  * Handle oneshot mode broadcasting
0684  */
0685 static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
0686 {
0687     struct tick_device *td;
0688     ktime_t now, next_event;
0689     int cpu, next_cpu = 0;
0690     bool bc_local;
0691 
0692     raw_spin_lock(&tick_broadcast_lock);
0693     dev->next_event = KTIME_MAX;
0694     next_event = KTIME_MAX;
0695     cpumask_clear(tmpmask);
0696     now = ktime_get();
0697     /* Find all expired events */
0698     for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
0699         /*
0700          * Required for !SMP because for_each_cpu() reports
0701          * unconditionally CPU0 as set on UP kernels.
0702          */
0703         if (!IS_ENABLED(CONFIG_SMP) &&
0704             cpumask_empty(tick_broadcast_oneshot_mask))
0705             break;
0706 
0707         td = &per_cpu(tick_cpu_device, cpu);
0708         if (td->evtdev->next_event <= now) {
0709             cpumask_set_cpu(cpu, tmpmask);
0710             /*
0711              * Mark the remote cpu in the pending mask, so
0712              * it can avoid reprogramming the cpu local
0713              * timer in tick_broadcast_oneshot_control().
0714              */
0715             cpumask_set_cpu(cpu, tick_broadcast_pending_mask);
0716         } else if (td->evtdev->next_event < next_event) {
0717             next_event = td->evtdev->next_event;
0718             next_cpu = cpu;
0719         }
0720     }
0721 
0722     /*
0723      * Remove the current cpu from the pending mask. The event is
0724      * delivered immediately in tick_do_broadcast() !
0725      */
0726     cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask);
0727 
0728     /* Take care of enforced broadcast requests */
0729     cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask);
0730     cpumask_clear(tick_broadcast_force_mask);
0731 
0732     /*
0733      * Sanity check. Catch the case where we try to broadcast to
0734      * offline cpus.
0735      */
0736     if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask)))
0737         cpumask_and(tmpmask, tmpmask, cpu_online_mask);
0738 
0739     /*
0740      * Wakeup the cpus which have an expired event.
0741      */
0742     bc_local = tick_do_broadcast(tmpmask);
0743 
0744     /*
0745      * Two reasons for reprogram:
0746      *
0747      * - The global event did not expire any CPU local
0748      * events. This happens in dyntick mode, as the maximum PIT
0749      * delta is quite small.
0750      *
0751      * - There are pending events on sleeping CPUs which were not
0752      * in the event mask
0753      */
0754     if (next_event != KTIME_MAX)
0755         tick_broadcast_set_event(dev, next_cpu, next_event);
0756 
0757     raw_spin_unlock(&tick_broadcast_lock);
0758 
0759     if (bc_local) {
0760         td = this_cpu_ptr(&tick_cpu_device);
0761         td->evtdev->event_handler(td->evtdev);
0762     }
0763 }
0764 
0765 static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu)
0766 {
0767     if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER))
0768         return 0;
0769     if (bc->next_event == KTIME_MAX)
0770         return 0;
0771     return bc->bound_on == cpu ? -EBUSY : 0;
0772 }
0773 
0774 static void broadcast_shutdown_local(struct clock_event_device *bc,
0775                      struct clock_event_device *dev)
0776 {
0777     /*
0778      * For hrtimer based broadcasting we cannot shutdown the cpu
0779      * local device if our own event is the first one to expire or
0780      * if we own the broadcast timer.
0781      */
0782     if (bc->features & CLOCK_EVT_FEAT_HRTIMER) {
0783         if (broadcast_needs_cpu(bc, smp_processor_id()))
0784             return;
0785         if (dev->next_event < bc->next_event)
0786             return;
0787     }
0788     clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
0789 }
0790 
0791 static int ___tick_broadcast_oneshot_control(enum tick_broadcast_state state,
0792                          struct tick_device *td,
0793                          int cpu)
0794 {
0795     struct clock_event_device *bc, *dev = td->evtdev;
0796     int ret = 0;
0797     ktime_t now;
0798 
0799     raw_spin_lock(&tick_broadcast_lock);
0800     bc = tick_broadcast_device.evtdev;
0801 
0802     if (state == TICK_BROADCAST_ENTER) {
0803         /*
0804          * If the current CPU owns the hrtimer broadcast
0805          * mechanism, it cannot go deep idle and we do not add
0806          * the CPU to the broadcast mask. We don't have to go
0807          * through the EXIT path as the local timer is not
0808          * shutdown.
0809          */
0810         ret = broadcast_needs_cpu(bc, cpu);
0811         if (ret)
0812             goto out;
0813 
0814         /*
0815          * If the broadcast device is in periodic mode, we
0816          * return.
0817          */
0818         if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
0819             /* If it is a hrtimer based broadcast, return busy */
0820             if (bc->features & CLOCK_EVT_FEAT_HRTIMER)
0821                 ret = -EBUSY;
0822             goto out;
0823         }
0824 
0825         if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
0826             WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
0827 
0828             /* Conditionally shut down the local timer. */
0829             broadcast_shutdown_local(bc, dev);
0830 
0831             /*
0832              * We only reprogram the broadcast timer if we
0833              * did not mark ourself in the force mask and
0834              * if the cpu local event is earlier than the
0835              * broadcast event. If the current CPU is in
0836              * the force mask, then we are going to be
0837              * woken by the IPI right away; we return
0838              * busy, so the CPU does not try to go deep
0839              * idle.
0840              */
0841             if (cpumask_test_cpu(cpu, tick_broadcast_force_mask)) {
0842                 ret = -EBUSY;
0843             } else if (dev->next_event < bc->next_event) {
0844                 tick_broadcast_set_event(bc, cpu, dev->next_event);
0845                 /*
0846                  * In case of hrtimer broadcasts the
0847                  * programming might have moved the
0848                  * timer to this cpu. If yes, remove
0849                  * us from the broadcast mask and
0850                  * return busy.
0851                  */
0852                 ret = broadcast_needs_cpu(bc, cpu);
0853                 if (ret) {
0854                     cpumask_clear_cpu(cpu,
0855                         tick_broadcast_oneshot_mask);
0856                 }
0857             }
0858         }
0859     } else {
0860         if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
0861             clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
0862             /*
0863              * The cpu which was handling the broadcast
0864              * timer marked this cpu in the broadcast
0865              * pending mask and fired the broadcast
0866              * IPI. So we are going to handle the expired
0867              * event anyway via the broadcast IPI
0868              * handler. No need to reprogram the timer
0869              * with an already expired event.
0870              */
0871             if (cpumask_test_and_clear_cpu(cpu,
0872                        tick_broadcast_pending_mask))
0873                 goto out;
0874 
0875             /*
0876              * Bail out if there is no next event.
0877              */
0878             if (dev->next_event == KTIME_MAX)
0879                 goto out;
0880             /*
0881              * If the pending bit is not set, then we are
0882              * either the CPU handling the broadcast
0883              * interrupt or we got woken by something else.
0884              *
0885              * We are no longer in the broadcast mask, so
0886              * if the cpu local expiry time is already
0887              * reached, we would reprogram the cpu local
0888              * timer with an already expired event.
0889              *
0890              * This can lead to a ping-pong when we return
0891              * to idle and therefore rearm the broadcast
0892              * timer before the cpu local timer was able
0893              * to fire. This happens because the forced
0894              * reprogramming makes sure that the event
0895              * will happen in the future and depending on
0896              * the min_delta setting this might be far
0897              * enough out that the ping-pong starts.
0898              *
0899              * If the cpu local next_event has expired
0900              * then we know that the broadcast timer
0901              * next_event has expired as well and
0902              * broadcast is about to be handled. So we
0903              * avoid reprogramming and enforce that the
0904              * broadcast handler, which did not run yet,
0905              * will invoke the cpu local handler.
0906              *
0907              * We cannot call the handler directly from
0908              * here, because we might be in a NOHZ phase
0909              * and we did not go through the irq_enter()
0910              * nohz fixups.
0911              */
0912             now = ktime_get();
0913             if (dev->next_event <= now) {
0914                 cpumask_set_cpu(cpu, tick_broadcast_force_mask);
0915                 goto out;
0916             }
0917             /*
0918              * We got woken by something else. Reprogram
0919              * the cpu local timer device.
0920              */
0921             tick_program_event(dev->next_event, 1);
0922         }
0923     }
0924 out:
0925     raw_spin_unlock(&tick_broadcast_lock);
0926     return ret;
0927 }
0928 
0929 static int tick_oneshot_wakeup_control(enum tick_broadcast_state state,
0930                        struct tick_device *td,
0931                        int cpu)
0932 {
0933     struct clock_event_device *dev, *wd;
0934 
0935     dev = td->evtdev;
0936     if (td->mode != TICKDEV_MODE_ONESHOT)
0937         return -EINVAL;
0938 
0939     wd = tick_get_oneshot_wakeup_device(cpu);
0940     if (!wd)
0941         return -ENODEV;
0942 
0943     switch (state) {
0944     case TICK_BROADCAST_ENTER:
0945         clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT_STOPPED);
0946         clockevents_switch_state(wd, CLOCK_EVT_STATE_ONESHOT);
0947         clockevents_program_event(wd, dev->next_event, 1);
0948         break;
0949     case TICK_BROADCAST_EXIT:
0950         /* We may have transitioned to oneshot mode while idle */
0951         if (clockevent_get_state(wd) != CLOCK_EVT_STATE_ONESHOT)
0952             return -ENODEV;
0953     }
0954 
0955     return 0;
0956 }
0957 
0958 int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
0959 {
0960     struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
0961     int cpu = smp_processor_id();
0962 
0963     if (!tick_oneshot_wakeup_control(state, td, cpu))
0964         return 0;
0965 
0966     if (tick_broadcast_device.evtdev)
0967         return ___tick_broadcast_oneshot_control(state, td, cpu);
0968 
0969     /*
0970      * If there is no broadcast or wakeup device, tell the caller not
0971      * to go into deep idle.
0972      */
0973     return -EBUSY;
0974 }
0975 
0976 /*
0977  * Reset the one shot broadcast for a cpu
0978  *
0979  * Called with tick_broadcast_lock held
0980  */
0981 static void tick_broadcast_clear_oneshot(int cpu)
0982 {
0983     cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
0984     cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
0985 }
0986 
0987 static void tick_broadcast_init_next_event(struct cpumask *mask,
0988                        ktime_t expires)
0989 {
0990     struct tick_device *td;
0991     int cpu;
0992 
0993     for_each_cpu(cpu, mask) {
0994         td = &per_cpu(tick_cpu_device, cpu);
0995         if (td->evtdev)
0996             td->evtdev->next_event = expires;
0997     }
0998 }
0999 
1000 static inline ktime_t tick_get_next_period(void)
1001 {
1002     ktime_t next;
1003 
1004     /*
1005      * Protect against concurrent updates (store /load tearing on
1006      * 32bit). It does not matter if the time is already in the
1007      * past. The broadcast device which is about to be programmed will
1008      * fire in any case.
1009      */
1010     raw_spin_lock(&jiffies_lock);
1011     next = tick_next_period;
1012     raw_spin_unlock(&jiffies_lock);
1013     return next;
1014 }
1015 
1016 /**
1017  * tick_broadcast_setup_oneshot - setup the broadcast device
1018  */
1019 static void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
1020 {
1021     int cpu = smp_processor_id();
1022 
1023     if (!bc)
1024         return;
1025 
1026     /* Set it up only once ! */
1027     if (bc->event_handler != tick_handle_oneshot_broadcast) {
1028         int was_periodic = clockevent_state_periodic(bc);
1029 
1030         bc->event_handler = tick_handle_oneshot_broadcast;
1031 
1032         /*
1033          * We must be careful here. There might be other CPUs
1034          * waiting for periodic broadcast. We need to set the
1035          * oneshot_mask bits for those and program the
1036          * broadcast device to fire.
1037          */
1038         cpumask_copy(tmpmask, tick_broadcast_mask);
1039         cpumask_clear_cpu(cpu, tmpmask);
1040         cpumask_or(tick_broadcast_oneshot_mask,
1041                tick_broadcast_oneshot_mask, tmpmask);
1042 
1043         if (was_periodic && !cpumask_empty(tmpmask)) {
1044             ktime_t nextevt = tick_get_next_period();
1045 
1046             clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT);
1047             tick_broadcast_init_next_event(tmpmask, nextevt);
1048             tick_broadcast_set_event(bc, cpu, nextevt);
1049         } else
1050             bc->next_event = KTIME_MAX;
1051     } else {
1052         /*
1053          * The first cpu which switches to oneshot mode sets
1054          * the bit for all other cpus which are in the general
1055          * (periodic) broadcast mask. So the bit is set and
1056          * would prevent the first broadcast enter after this
1057          * to program the bc device.
1058          */
1059         tick_broadcast_clear_oneshot(cpu);
1060     }
1061 }
1062 
1063 /*
1064  * Select oneshot operating mode for the broadcast device
1065  */
1066 void tick_broadcast_switch_to_oneshot(void)
1067 {
1068     struct clock_event_device *bc;
1069     unsigned long flags;
1070 
1071     raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
1072 
1073     tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
1074     bc = tick_broadcast_device.evtdev;
1075     if (bc)
1076         tick_broadcast_setup_oneshot(bc);
1077 
1078     raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
1079 }
1080 
1081 #ifdef CONFIG_HOTPLUG_CPU
1082 void hotplug_cpu__broadcast_tick_pull(int deadcpu)
1083 {
1084     struct clock_event_device *bc;
1085     unsigned long flags;
1086 
1087     raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
1088     bc = tick_broadcast_device.evtdev;
1089 
1090     if (bc && broadcast_needs_cpu(bc, deadcpu)) {
1091         /* This moves the broadcast assignment to this CPU: */
1092         clockevents_program_event(bc, bc->next_event, 1);
1093     }
1094     raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
1095 }
1096 
1097 /*
1098  * Remove a dying CPU from broadcasting
1099  */
1100 static void tick_broadcast_oneshot_offline(unsigned int cpu)
1101 {
1102     if (tick_get_oneshot_wakeup_device(cpu))
1103         tick_set_oneshot_wakeup_device(NULL, cpu);
1104 
1105     /*
1106      * Clear the broadcast masks for the dead cpu, but do not stop
1107      * the broadcast device!
1108      */
1109     cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
1110     cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
1111     cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
1112 }
1113 #endif
1114 
1115 /*
1116  * Check, whether the broadcast device is in one shot mode
1117  */
1118 int tick_broadcast_oneshot_active(void)
1119 {
1120     return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT;
1121 }
1122 
1123 /*
1124  * Check whether the broadcast device supports oneshot.
1125  */
1126 bool tick_broadcast_oneshot_available(void)
1127 {
1128     struct clock_event_device *bc = tick_broadcast_device.evtdev;
1129 
1130     return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false;
1131 }
1132 
1133 #else
1134 int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
1135 {
1136     struct clock_event_device *bc = tick_broadcast_device.evtdev;
1137 
1138     if (!bc || (bc->features & CLOCK_EVT_FEAT_HRTIMER))
1139         return -EBUSY;
1140 
1141     return 0;
1142 }
1143 #endif
1144 
1145 void __init tick_broadcast_init(void)
1146 {
1147     zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
1148     zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT);
1149     zalloc_cpumask_var(&tmpmask, GFP_NOWAIT);
1150 #ifdef CONFIG_TICK_ONESHOT
1151     zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
1152     zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT);
1153     zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT);
1154 #endif
1155 }