Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0+
0002 /*
0003  * This file contains the functions which manage clocksource drivers.
0004  *
0005  * Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com)
0006  */
0007 
0008 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0009 
0010 #include <linux/device.h>
0011 #include <linux/clocksource.h>
0012 #include <linux/init.h>
0013 #include <linux/module.h>
0014 #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
0015 #include <linux/tick.h>
0016 #include <linux/kthread.h>
0017 #include <linux/prandom.h>
0018 #include <linux/cpu.h>
0019 
0020 #include "tick-internal.h"
0021 #include "timekeeping_internal.h"
0022 
0023 /**
0024  * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks
0025  * @mult:   pointer to mult variable
0026  * @shift:  pointer to shift variable
0027  * @from:   frequency to convert from
0028  * @to:     frequency to convert to
0029  * @maxsec: guaranteed runtime conversion range in seconds
0030  *
0031  * The function evaluates the shift/mult pair for the scaled math
0032  * operations of clocksources and clockevents.
0033  *
0034  * @to and @from are frequency values in HZ. For clock sources @to is
0035  * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock
0036  * event @to is the counter frequency and @from is NSEC_PER_SEC.
0037  *
0038  * The @maxsec conversion range argument controls the time frame in
0039  * seconds which must be covered by the runtime conversion with the
0040  * calculated mult and shift factors. This guarantees that no 64bit
0041  * overflow happens when the input value of the conversion is
0042  * multiplied with the calculated mult factor. Larger ranges may
0043  * reduce the conversion accuracy by choosing smaller mult and shift
0044  * factors.
0045  */
0046 void
0047 clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec)
0048 {
0049     u64 tmp;
0050     u32 sft, sftacc= 32;
0051 
0052     /*
0053      * Calculate the shift factor which is limiting the conversion
0054      * range:
0055      */
0056     tmp = ((u64)maxsec * from) >> 32;
0057     while (tmp) {
0058         tmp >>=1;
0059         sftacc--;
0060     }
0061 
0062     /*
0063      * Find the conversion shift/mult pair which has the best
0064      * accuracy and fits the maxsec conversion range:
0065      */
0066     for (sft = 32; sft > 0; sft--) {
0067         tmp = (u64) to << sft;
0068         tmp += from / 2;
0069         do_div(tmp, from);
0070         if ((tmp >> sftacc) == 0)
0071             break;
0072     }
0073     *mult = tmp;
0074     *shift = sft;
0075 }
0076 EXPORT_SYMBOL_GPL(clocks_calc_mult_shift);
0077 
0078 /*[Clocksource internal variables]---------
0079  * curr_clocksource:
0080  *  currently selected clocksource.
0081  * suspend_clocksource:
0082  *  used to calculate the suspend time.
0083  * clocksource_list:
0084  *  linked list with the registered clocksources
0085  * clocksource_mutex:
0086  *  protects manipulations to curr_clocksource and the clocksource_list
0087  * override_name:
0088  *  Name of the user-specified clocksource.
0089  */
0090 static struct clocksource *curr_clocksource;
0091 static struct clocksource *suspend_clocksource;
0092 static LIST_HEAD(clocksource_list);
0093 static DEFINE_MUTEX(clocksource_mutex);
0094 static char override_name[CS_NAME_LEN];
0095 static int finished_booting;
0096 static u64 suspend_start;
0097 
0098 /*
0099  * Threshold: 0.0312s, when doubled: 0.0625s.
0100  * Also a default for cs->uncertainty_margin when registering clocks.
0101  */
0102 #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 5)
0103 
0104 /*
0105  * Maximum permissible delay between two readouts of the watchdog
0106  * clocksource surrounding a read of the clocksource being validated.
0107  * This delay could be due to SMIs, NMIs, or to VCPU preemptions.  Used as
0108  * a lower bound for cs->uncertainty_margin values when registering clocks.
0109  */
0110 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US
0111 #define MAX_SKEW_USEC   CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US
0112 #else
0113 #define MAX_SKEW_USEC   100
0114 #endif
0115 
0116 #define WATCHDOG_MAX_SKEW (MAX_SKEW_USEC * NSEC_PER_USEC)
0117 
0118 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
0119 static void clocksource_watchdog_work(struct work_struct *work);
0120 static void clocksource_select(void);
0121 
0122 static LIST_HEAD(watchdog_list);
0123 static struct clocksource *watchdog;
0124 static struct timer_list watchdog_timer;
0125 static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
0126 static DEFINE_SPINLOCK(watchdog_lock);
0127 static int watchdog_running;
0128 static atomic_t watchdog_reset_pending;
0129 
0130 static inline void clocksource_watchdog_lock(unsigned long *flags)
0131 {
0132     spin_lock_irqsave(&watchdog_lock, *flags);
0133 }
0134 
0135 static inline void clocksource_watchdog_unlock(unsigned long *flags)
0136 {
0137     spin_unlock_irqrestore(&watchdog_lock, *flags);
0138 }
0139 
0140 static int clocksource_watchdog_kthread(void *data);
0141 static void __clocksource_change_rating(struct clocksource *cs, int rating);
0142 
0143 /*
0144  * Interval: 0.5sec.
0145  */
0146 #define WATCHDOG_INTERVAL (HZ >> 1)
0147 
0148 static void clocksource_watchdog_work(struct work_struct *work)
0149 {
0150     /*
0151      * We cannot directly run clocksource_watchdog_kthread() here, because
0152      * clocksource_select() calls timekeeping_notify() which uses
0153      * stop_machine(). One cannot use stop_machine() from a workqueue() due
0154      * lock inversions wrt CPU hotplug.
0155      *
0156      * Also, we only ever run this work once or twice during the lifetime
0157      * of the kernel, so there is no point in creating a more permanent
0158      * kthread for this.
0159      *
0160      * If kthread_run fails the next watchdog scan over the
0161      * watchdog_list will find the unstable clock again.
0162      */
0163     kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog");
0164 }
0165 
0166 static void __clocksource_unstable(struct clocksource *cs)
0167 {
0168     cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
0169     cs->flags |= CLOCK_SOURCE_UNSTABLE;
0170 
0171     /*
0172      * If the clocksource is registered clocksource_watchdog_kthread() will
0173      * re-rate and re-select.
0174      */
0175     if (list_empty(&cs->list)) {
0176         cs->rating = 0;
0177         return;
0178     }
0179 
0180     if (cs->mark_unstable)
0181         cs->mark_unstable(cs);
0182 
0183     /* kick clocksource_watchdog_kthread() */
0184     if (finished_booting)
0185         schedule_work(&watchdog_work);
0186 }
0187 
0188 /**
0189  * clocksource_mark_unstable - mark clocksource unstable via watchdog
0190  * @cs:     clocksource to be marked unstable
0191  *
0192  * This function is called by the x86 TSC code to mark clocksources as unstable;
0193  * it defers demotion and re-selection to a kthread.
0194  */
0195 void clocksource_mark_unstable(struct clocksource *cs)
0196 {
0197     unsigned long flags;
0198 
0199     spin_lock_irqsave(&watchdog_lock, flags);
0200     if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) {
0201         if (!list_empty(&cs->list) && list_empty(&cs->wd_list))
0202             list_add(&cs->wd_list, &watchdog_list);
0203         __clocksource_unstable(cs);
0204     }
0205     spin_unlock_irqrestore(&watchdog_lock, flags);
0206 }
0207 
0208 ulong max_cswd_read_retries = 2;
0209 module_param(max_cswd_read_retries, ulong, 0644);
0210 EXPORT_SYMBOL_GPL(max_cswd_read_retries);
0211 static int verify_n_cpus = 8;
0212 module_param(verify_n_cpus, int, 0644);
0213 
0214 enum wd_read_status {
0215     WD_READ_SUCCESS,
0216     WD_READ_UNSTABLE,
0217     WD_READ_SKIP
0218 };
0219 
0220 static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
0221 {
0222     unsigned int nretries;
0223     u64 wd_end, wd_end2, wd_delta;
0224     int64_t wd_delay, wd_seq_delay;
0225 
0226     for (nretries = 0; nretries <= max_cswd_read_retries; nretries++) {
0227         local_irq_disable();
0228         *wdnow = watchdog->read(watchdog);
0229         *csnow = cs->read(cs);
0230         wd_end = watchdog->read(watchdog);
0231         wd_end2 = watchdog->read(watchdog);
0232         local_irq_enable();
0233 
0234         wd_delta = clocksource_delta(wd_end, *wdnow, watchdog->mask);
0235         wd_delay = clocksource_cyc2ns(wd_delta, watchdog->mult,
0236                           watchdog->shift);
0237         if (wd_delay <= WATCHDOG_MAX_SKEW) {
0238             if (nretries > 1 || nretries >= max_cswd_read_retries) {
0239                 pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n",
0240                     smp_processor_id(), watchdog->name, nretries);
0241             }
0242             return WD_READ_SUCCESS;
0243         }
0244 
0245         /*
0246          * Now compute delay in consecutive watchdog read to see if
0247          * there is too much external interferences that cause
0248          * significant delay in reading both clocksource and watchdog.
0249          *
0250          * If consecutive WD read-back delay > WATCHDOG_MAX_SKEW/2,
0251          * report system busy, reinit the watchdog and skip the current
0252          * watchdog test.
0253          */
0254         wd_delta = clocksource_delta(wd_end2, wd_end, watchdog->mask);
0255         wd_seq_delay = clocksource_cyc2ns(wd_delta, watchdog->mult, watchdog->shift);
0256         if (wd_seq_delay > WATCHDOG_MAX_SKEW/2)
0257             goto skip_test;
0258     }
0259 
0260     pr_warn("timekeeping watchdog on CPU%d: %s read-back delay of %lldns, attempt %d, marking unstable\n",
0261         smp_processor_id(), watchdog->name, wd_delay, nretries);
0262     return WD_READ_UNSTABLE;
0263 
0264 skip_test:
0265     pr_info("timekeeping watchdog on CPU%d: %s wd-wd read-back delay of %lldns\n",
0266         smp_processor_id(), watchdog->name, wd_seq_delay);
0267     pr_info("wd-%s-wd read-back delay of %lldns, clock-skew test skipped!\n",
0268         cs->name, wd_delay);
0269     return WD_READ_SKIP;
0270 }
0271 
0272 static u64 csnow_mid;
0273 static cpumask_t cpus_ahead;
0274 static cpumask_t cpus_behind;
0275 static cpumask_t cpus_chosen;
0276 
0277 static void clocksource_verify_choose_cpus(void)
0278 {
0279     int cpu, i, n = verify_n_cpus;
0280 
0281     if (n < 0) {
0282         /* Check all of the CPUs. */
0283         cpumask_copy(&cpus_chosen, cpu_online_mask);
0284         cpumask_clear_cpu(smp_processor_id(), &cpus_chosen);
0285         return;
0286     }
0287 
0288     /* If no checking desired, or no other CPU to check, leave. */
0289     cpumask_clear(&cpus_chosen);
0290     if (n == 0 || num_online_cpus() <= 1)
0291         return;
0292 
0293     /* Make sure to select at least one CPU other than the current CPU. */
0294     cpu = cpumask_first(cpu_online_mask);
0295     if (cpu == smp_processor_id())
0296         cpu = cpumask_next(cpu, cpu_online_mask);
0297     if (WARN_ON_ONCE(cpu >= nr_cpu_ids))
0298         return;
0299     cpumask_set_cpu(cpu, &cpus_chosen);
0300 
0301     /* Force a sane value for the boot parameter. */
0302     if (n > nr_cpu_ids)
0303         n = nr_cpu_ids;
0304 
0305     /*
0306      * Randomly select the specified number of CPUs.  If the same
0307      * CPU is selected multiple times, that CPU is checked only once,
0308      * and no replacement CPU is selected.  This gracefully handles
0309      * situations where verify_n_cpus is greater than the number of
0310      * CPUs that are currently online.
0311      */
0312     for (i = 1; i < n; i++) {
0313         cpu = prandom_u32() % nr_cpu_ids;
0314         cpu = cpumask_next(cpu - 1, cpu_online_mask);
0315         if (cpu >= nr_cpu_ids)
0316             cpu = cpumask_first(cpu_online_mask);
0317         if (!WARN_ON_ONCE(cpu >= nr_cpu_ids))
0318             cpumask_set_cpu(cpu, &cpus_chosen);
0319     }
0320 
0321     /* Don't verify ourselves. */
0322     cpumask_clear_cpu(smp_processor_id(), &cpus_chosen);
0323 }
0324 
0325 static void clocksource_verify_one_cpu(void *csin)
0326 {
0327     struct clocksource *cs = (struct clocksource *)csin;
0328 
0329     csnow_mid = cs->read(cs);
0330 }
0331 
0332 void clocksource_verify_percpu(struct clocksource *cs)
0333 {
0334     int64_t cs_nsec, cs_nsec_max = 0, cs_nsec_min = LLONG_MAX;
0335     u64 csnow_begin, csnow_end;
0336     int cpu, testcpu;
0337     s64 delta;
0338 
0339     if (verify_n_cpus == 0)
0340         return;
0341     cpumask_clear(&cpus_ahead);
0342     cpumask_clear(&cpus_behind);
0343     cpus_read_lock();
0344     preempt_disable();
0345     clocksource_verify_choose_cpus();
0346     if (cpumask_empty(&cpus_chosen)) {
0347         preempt_enable();
0348         cpus_read_unlock();
0349         pr_warn("Not enough CPUs to check clocksource '%s'.\n", cs->name);
0350         return;
0351     }
0352     testcpu = smp_processor_id();
0353     pr_warn("Checking clocksource %s synchronization from CPU %d to CPUs %*pbl.\n", cs->name, testcpu, cpumask_pr_args(&cpus_chosen));
0354     for_each_cpu(cpu, &cpus_chosen) {
0355         if (cpu == testcpu)
0356             continue;
0357         csnow_begin = cs->read(cs);
0358         smp_call_function_single(cpu, clocksource_verify_one_cpu, cs, 1);
0359         csnow_end = cs->read(cs);
0360         delta = (s64)((csnow_mid - csnow_begin) & cs->mask);
0361         if (delta < 0)
0362             cpumask_set_cpu(cpu, &cpus_behind);
0363         delta = (csnow_end - csnow_mid) & cs->mask;
0364         if (delta < 0)
0365             cpumask_set_cpu(cpu, &cpus_ahead);
0366         delta = clocksource_delta(csnow_end, csnow_begin, cs->mask);
0367         cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
0368         if (cs_nsec > cs_nsec_max)
0369             cs_nsec_max = cs_nsec;
0370         if (cs_nsec < cs_nsec_min)
0371             cs_nsec_min = cs_nsec;
0372     }
0373     preempt_enable();
0374     cpus_read_unlock();
0375     if (!cpumask_empty(&cpus_ahead))
0376         pr_warn("        CPUs %*pbl ahead of CPU %d for clocksource %s.\n",
0377             cpumask_pr_args(&cpus_ahead), testcpu, cs->name);
0378     if (!cpumask_empty(&cpus_behind))
0379         pr_warn("        CPUs %*pbl behind CPU %d for clocksource %s.\n",
0380             cpumask_pr_args(&cpus_behind), testcpu, cs->name);
0381     if (!cpumask_empty(&cpus_ahead) || !cpumask_empty(&cpus_behind))
0382         pr_warn("        CPU %d check durations %lldns - %lldns for clocksource %s.\n",
0383             testcpu, cs_nsec_min, cs_nsec_max, cs->name);
0384 }
0385 EXPORT_SYMBOL_GPL(clocksource_verify_percpu);
0386 
0387 static void clocksource_watchdog(struct timer_list *unused)
0388 {
0389     u64 csnow, wdnow, cslast, wdlast, delta;
0390     int next_cpu, reset_pending;
0391     int64_t wd_nsec, cs_nsec;
0392     struct clocksource *cs;
0393     enum wd_read_status read_ret;
0394     u32 md;
0395 
0396     spin_lock(&watchdog_lock);
0397     if (!watchdog_running)
0398         goto out;
0399 
0400     reset_pending = atomic_read(&watchdog_reset_pending);
0401 
0402     list_for_each_entry(cs, &watchdog_list, wd_list) {
0403 
0404         /* Clocksource already marked unstable? */
0405         if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
0406             if (finished_booting)
0407                 schedule_work(&watchdog_work);
0408             continue;
0409         }
0410 
0411         read_ret = cs_watchdog_read(cs, &csnow, &wdnow);
0412 
0413         if (read_ret != WD_READ_SUCCESS) {
0414             if (read_ret == WD_READ_UNSTABLE)
0415                 /* Clock readout unreliable, so give it up. */
0416                 __clocksource_unstable(cs);
0417             continue;
0418         }
0419 
0420         /* Clocksource initialized ? */
0421         if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) ||
0422             atomic_read(&watchdog_reset_pending)) {
0423             cs->flags |= CLOCK_SOURCE_WATCHDOG;
0424             cs->wd_last = wdnow;
0425             cs->cs_last = csnow;
0426             continue;
0427         }
0428 
0429         delta = clocksource_delta(wdnow, cs->wd_last, watchdog->mask);
0430         wd_nsec = clocksource_cyc2ns(delta, watchdog->mult,
0431                          watchdog->shift);
0432 
0433         delta = clocksource_delta(csnow, cs->cs_last, cs->mask);
0434         cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
0435         wdlast = cs->wd_last; /* save these in case we print them */
0436         cslast = cs->cs_last;
0437         cs->cs_last = csnow;
0438         cs->wd_last = wdnow;
0439 
0440         if (atomic_read(&watchdog_reset_pending))
0441             continue;
0442 
0443         /* Check the deviation from the watchdog clocksource. */
0444         md = cs->uncertainty_margin + watchdog->uncertainty_margin;
0445         if (abs(cs_nsec - wd_nsec) > md) {
0446             pr_warn("timekeeping watchdog on CPU%d: Marking clocksource '%s' as unstable because the skew is too large:\n",
0447                 smp_processor_id(), cs->name);
0448             pr_warn("                      '%s' wd_nsec: %lld wd_now: %llx wd_last: %llx mask: %llx\n",
0449                 watchdog->name, wd_nsec, wdnow, wdlast, watchdog->mask);
0450             pr_warn("                      '%s' cs_nsec: %lld cs_now: %llx cs_last: %llx mask: %llx\n",
0451                 cs->name, cs_nsec, csnow, cslast, cs->mask);
0452             if (curr_clocksource == cs)
0453                 pr_warn("                      '%s' is current clocksource.\n", cs->name);
0454             else if (curr_clocksource)
0455                 pr_warn("                      '%s' (not '%s') is current clocksource.\n", curr_clocksource->name, cs->name);
0456             else
0457                 pr_warn("                      No current clocksource.\n");
0458             __clocksource_unstable(cs);
0459             continue;
0460         }
0461 
0462         if (cs == curr_clocksource && cs->tick_stable)
0463             cs->tick_stable(cs);
0464 
0465         if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
0466             (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
0467             (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
0468             /* Mark it valid for high-res. */
0469             cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
0470 
0471             /*
0472              * clocksource_done_booting() will sort it if
0473              * finished_booting is not set yet.
0474              */
0475             if (!finished_booting)
0476                 continue;
0477 
0478             /*
0479              * If this is not the current clocksource let
0480              * the watchdog thread reselect it. Due to the
0481              * change to high res this clocksource might
0482              * be preferred now. If it is the current
0483              * clocksource let the tick code know about
0484              * that change.
0485              */
0486             if (cs != curr_clocksource) {
0487                 cs->flags |= CLOCK_SOURCE_RESELECT;
0488                 schedule_work(&watchdog_work);
0489             } else {
0490                 tick_clock_notify();
0491             }
0492         }
0493     }
0494 
0495     /*
0496      * We only clear the watchdog_reset_pending, when we did a
0497      * full cycle through all clocksources.
0498      */
0499     if (reset_pending)
0500         atomic_dec(&watchdog_reset_pending);
0501 
0502     /*
0503      * Cycle through CPUs to check if the CPUs stay synchronized
0504      * to each other.
0505      */
0506     next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
0507     if (next_cpu >= nr_cpu_ids)
0508         next_cpu = cpumask_first(cpu_online_mask);
0509 
0510     /*
0511      * Arm timer if not already pending: could race with concurrent
0512      * pair clocksource_stop_watchdog() clocksource_start_watchdog().
0513      */
0514     if (!timer_pending(&watchdog_timer)) {
0515         watchdog_timer.expires += WATCHDOG_INTERVAL;
0516         add_timer_on(&watchdog_timer, next_cpu);
0517     }
0518 out:
0519     spin_unlock(&watchdog_lock);
0520 }
0521 
0522 static inline void clocksource_start_watchdog(void)
0523 {
0524     if (watchdog_running || !watchdog || list_empty(&watchdog_list))
0525         return;
0526     timer_setup(&watchdog_timer, clocksource_watchdog, 0);
0527     watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
0528     add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask));
0529     watchdog_running = 1;
0530 }
0531 
0532 static inline void clocksource_stop_watchdog(void)
0533 {
0534     if (!watchdog_running || (watchdog && !list_empty(&watchdog_list)))
0535         return;
0536     del_timer(&watchdog_timer);
0537     watchdog_running = 0;
0538 }
0539 
0540 static inline void clocksource_reset_watchdog(void)
0541 {
0542     struct clocksource *cs;
0543 
0544     list_for_each_entry(cs, &watchdog_list, wd_list)
0545         cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
0546 }
0547 
0548 static void clocksource_resume_watchdog(void)
0549 {
0550     atomic_inc(&watchdog_reset_pending);
0551 }
0552 
0553 static void clocksource_enqueue_watchdog(struct clocksource *cs)
0554 {
0555     INIT_LIST_HEAD(&cs->wd_list);
0556 
0557     if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
0558         /* cs is a clocksource to be watched. */
0559         list_add(&cs->wd_list, &watchdog_list);
0560         cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
0561     } else {
0562         /* cs is a watchdog. */
0563         if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
0564             cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
0565     }
0566 }
0567 
0568 static void clocksource_select_watchdog(bool fallback)
0569 {
0570     struct clocksource *cs, *old_wd;
0571     unsigned long flags;
0572 
0573     spin_lock_irqsave(&watchdog_lock, flags);
0574     /* save current watchdog */
0575     old_wd = watchdog;
0576     if (fallback)
0577         watchdog = NULL;
0578 
0579     list_for_each_entry(cs, &clocksource_list, list) {
0580         /* cs is a clocksource to be watched. */
0581         if (cs->flags & CLOCK_SOURCE_MUST_VERIFY)
0582             continue;
0583 
0584         /* Skip current if we were requested for a fallback. */
0585         if (fallback && cs == old_wd)
0586             continue;
0587 
0588         /* Pick the best watchdog. */
0589         if (!watchdog || cs->rating > watchdog->rating)
0590             watchdog = cs;
0591     }
0592     /* If we failed to find a fallback restore the old one. */
0593     if (!watchdog)
0594         watchdog = old_wd;
0595 
0596     /* If we changed the watchdog we need to reset cycles. */
0597     if (watchdog != old_wd)
0598         clocksource_reset_watchdog();
0599 
0600     /* Check if the watchdog timer needs to be started. */
0601     clocksource_start_watchdog();
0602     spin_unlock_irqrestore(&watchdog_lock, flags);
0603 }
0604 
0605 static void clocksource_dequeue_watchdog(struct clocksource *cs)
0606 {
0607     if (cs != watchdog) {
0608         if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
0609             /* cs is a watched clocksource. */
0610             list_del_init(&cs->wd_list);
0611             /* Check if the watchdog timer needs to be stopped. */
0612             clocksource_stop_watchdog();
0613         }
0614     }
0615 }
0616 
0617 static int __clocksource_watchdog_kthread(void)
0618 {
0619     struct clocksource *cs, *tmp;
0620     unsigned long flags;
0621     int select = 0;
0622 
0623     /* Do any required per-CPU skew verification. */
0624     if (curr_clocksource &&
0625         curr_clocksource->flags & CLOCK_SOURCE_UNSTABLE &&
0626         curr_clocksource->flags & CLOCK_SOURCE_VERIFY_PERCPU)
0627         clocksource_verify_percpu(curr_clocksource);
0628 
0629     spin_lock_irqsave(&watchdog_lock, flags);
0630     list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
0631         if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
0632             list_del_init(&cs->wd_list);
0633             __clocksource_change_rating(cs, 0);
0634             select = 1;
0635         }
0636         if (cs->flags & CLOCK_SOURCE_RESELECT) {
0637             cs->flags &= ~CLOCK_SOURCE_RESELECT;
0638             select = 1;
0639         }
0640     }
0641     /* Check if the watchdog timer needs to be stopped. */
0642     clocksource_stop_watchdog();
0643     spin_unlock_irqrestore(&watchdog_lock, flags);
0644 
0645     return select;
0646 }
0647 
0648 static int clocksource_watchdog_kthread(void *data)
0649 {
0650     mutex_lock(&clocksource_mutex);
0651     if (__clocksource_watchdog_kthread())
0652         clocksource_select();
0653     mutex_unlock(&clocksource_mutex);
0654     return 0;
0655 }
0656 
0657 static bool clocksource_is_watchdog(struct clocksource *cs)
0658 {
0659     return cs == watchdog;
0660 }
0661 
0662 #else /* CONFIG_CLOCKSOURCE_WATCHDOG */
0663 
0664 static void clocksource_enqueue_watchdog(struct clocksource *cs)
0665 {
0666     if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
0667         cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
0668 }
0669 
0670 static void clocksource_select_watchdog(bool fallback) { }
0671 static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
0672 static inline void clocksource_resume_watchdog(void) { }
0673 static inline int __clocksource_watchdog_kthread(void) { return 0; }
0674 static bool clocksource_is_watchdog(struct clocksource *cs) { return false; }
0675 void clocksource_mark_unstable(struct clocksource *cs) { }
0676 
0677 static inline void clocksource_watchdog_lock(unsigned long *flags) { }
0678 static inline void clocksource_watchdog_unlock(unsigned long *flags) { }
0679 
0680 #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
0681 
0682 static bool clocksource_is_suspend(struct clocksource *cs)
0683 {
0684     return cs == suspend_clocksource;
0685 }
0686 
0687 static void __clocksource_suspend_select(struct clocksource *cs)
0688 {
0689     /*
0690      * Skip the clocksource which will be stopped in suspend state.
0691      */
0692     if (!(cs->flags & CLOCK_SOURCE_SUSPEND_NONSTOP))
0693         return;
0694 
0695     /*
0696      * The nonstop clocksource can be selected as the suspend clocksource to
0697      * calculate the suspend time, so it should not supply suspend/resume
0698      * interfaces to suspend the nonstop clocksource when system suspends.
0699      */
0700     if (cs->suspend || cs->resume) {
0701         pr_warn("Nonstop clocksource %s should not supply suspend/resume interfaces\n",
0702             cs->name);
0703     }
0704 
0705     /* Pick the best rating. */
0706     if (!suspend_clocksource || cs->rating > suspend_clocksource->rating)
0707         suspend_clocksource = cs;
0708 }
0709 
0710 /**
0711  * clocksource_suspend_select - Select the best clocksource for suspend timing
0712  * @fallback:   if select a fallback clocksource
0713  */
0714 static void clocksource_suspend_select(bool fallback)
0715 {
0716     struct clocksource *cs, *old_suspend;
0717 
0718     old_suspend = suspend_clocksource;
0719     if (fallback)
0720         suspend_clocksource = NULL;
0721 
0722     list_for_each_entry(cs, &clocksource_list, list) {
0723         /* Skip current if we were requested for a fallback. */
0724         if (fallback && cs == old_suspend)
0725             continue;
0726 
0727         __clocksource_suspend_select(cs);
0728     }
0729 }
0730 
0731 /**
0732  * clocksource_start_suspend_timing - Start measuring the suspend timing
0733  * @cs:         current clocksource from timekeeping
0734  * @start_cycles:   current cycles from timekeeping
0735  *
0736  * This function will save the start cycle values of suspend timer to calculate
0737  * the suspend time when resuming system.
0738  *
0739  * This function is called late in the suspend process from timekeeping_suspend(),
0740  * that means processes are frozen, non-boot cpus and interrupts are disabled
0741  * now. It is therefore possible to start the suspend timer without taking the
0742  * clocksource mutex.
0743  */
0744 void clocksource_start_suspend_timing(struct clocksource *cs, u64 start_cycles)
0745 {
0746     if (!suspend_clocksource)
0747         return;
0748 
0749     /*
0750      * If current clocksource is the suspend timer, we should use the
0751      * tkr_mono.cycle_last value as suspend_start to avoid same reading
0752      * from suspend timer.
0753      */
0754     if (clocksource_is_suspend(cs)) {
0755         suspend_start = start_cycles;
0756         return;
0757     }
0758 
0759     if (suspend_clocksource->enable &&
0760         suspend_clocksource->enable(suspend_clocksource)) {
0761         pr_warn_once("Failed to enable the non-suspend-able clocksource.\n");
0762         return;
0763     }
0764 
0765     suspend_start = suspend_clocksource->read(suspend_clocksource);
0766 }
0767 
0768 /**
0769  * clocksource_stop_suspend_timing - Stop measuring the suspend timing
0770  * @cs:     current clocksource from timekeeping
0771  * @cycle_now:  current cycles from timekeeping
0772  *
0773  * This function will calculate the suspend time from suspend timer.
0774  *
0775  * Returns nanoseconds since suspend started, 0 if no usable suspend clocksource.
0776  *
0777  * This function is called early in the resume process from timekeeping_resume(),
0778  * that means there is only one cpu, no processes are running and the interrupts
0779  * are disabled. It is therefore possible to stop the suspend timer without
0780  * taking the clocksource mutex.
0781  */
0782 u64 clocksource_stop_suspend_timing(struct clocksource *cs, u64 cycle_now)
0783 {
0784     u64 now, delta, nsec = 0;
0785 
0786     if (!suspend_clocksource)
0787         return 0;
0788 
0789     /*
0790      * If current clocksource is the suspend timer, we should use the
0791      * tkr_mono.cycle_last value from timekeeping as current cycle to
0792      * avoid same reading from suspend timer.
0793      */
0794     if (clocksource_is_suspend(cs))
0795         now = cycle_now;
0796     else
0797         now = suspend_clocksource->read(suspend_clocksource);
0798 
0799     if (now > suspend_start) {
0800         delta = clocksource_delta(now, suspend_start,
0801                       suspend_clocksource->mask);
0802         nsec = mul_u64_u32_shr(delta, suspend_clocksource->mult,
0803                        suspend_clocksource->shift);
0804     }
0805 
0806     /*
0807      * Disable the suspend timer to save power if current clocksource is
0808      * not the suspend timer.
0809      */
0810     if (!clocksource_is_suspend(cs) && suspend_clocksource->disable)
0811         suspend_clocksource->disable(suspend_clocksource);
0812 
0813     return nsec;
0814 }
0815 
0816 /**
0817  * clocksource_suspend - suspend the clocksource(s)
0818  */
0819 void clocksource_suspend(void)
0820 {
0821     struct clocksource *cs;
0822 
0823     list_for_each_entry_reverse(cs, &clocksource_list, list)
0824         if (cs->suspend)
0825             cs->suspend(cs);
0826 }
0827 
0828 /**
0829  * clocksource_resume - resume the clocksource(s)
0830  */
0831 void clocksource_resume(void)
0832 {
0833     struct clocksource *cs;
0834 
0835     list_for_each_entry(cs, &clocksource_list, list)
0836         if (cs->resume)
0837             cs->resume(cs);
0838 
0839     clocksource_resume_watchdog();
0840 }
0841 
0842 /**
0843  * clocksource_touch_watchdog - Update watchdog
0844  *
0845  * Update the watchdog after exception contexts such as kgdb so as not
0846  * to incorrectly trip the watchdog. This might fail when the kernel
0847  * was stopped in code which holds watchdog_lock.
0848  */
0849 void clocksource_touch_watchdog(void)
0850 {
0851     clocksource_resume_watchdog();
0852 }
0853 
0854 /**
0855  * clocksource_max_adjustment- Returns max adjustment amount
0856  * @cs:         Pointer to clocksource
0857  *
0858  */
0859 static u32 clocksource_max_adjustment(struct clocksource *cs)
0860 {
0861     u64 ret;
0862     /*
0863      * We won't try to correct for more than 11% adjustments (110,000 ppm),
0864      */
0865     ret = (u64)cs->mult * 11;
0866     do_div(ret,100);
0867     return (u32)ret;
0868 }
0869 
0870 /**
0871  * clocks_calc_max_nsecs - Returns maximum nanoseconds that can be converted
0872  * @mult:   cycle to nanosecond multiplier
0873  * @shift:  cycle to nanosecond divisor (power of two)
0874  * @maxadj: maximum adjustment value to mult (~11%)
0875  * @mask:   bitmask for two's complement subtraction of non 64 bit counters
0876  * @max_cyc:    maximum cycle value before potential overflow (does not include
0877  *      any safety margin)
0878  *
0879  * NOTE: This function includes a safety margin of 50%, in other words, we
0880  * return half the number of nanoseconds the hardware counter can technically
0881  * cover. This is done so that we can potentially detect problems caused by
0882  * delayed timers or bad hardware, which might result in time intervals that
0883  * are larger than what the math used can handle without overflows.
0884  */
0885 u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc)
0886 {
0887     u64 max_nsecs, max_cycles;
0888 
0889     /*
0890      * Calculate the maximum number of cycles that we can pass to the
0891      * cyc2ns() function without overflowing a 64-bit result.
0892      */
0893     max_cycles = ULLONG_MAX;
0894     do_div(max_cycles, mult+maxadj);
0895 
0896     /*
0897      * The actual maximum number of cycles we can defer the clocksource is
0898      * determined by the minimum of max_cycles and mask.
0899      * Note: Here we subtract the maxadj to make sure we don't sleep for
0900      * too long if there's a large negative adjustment.
0901      */
0902     max_cycles = min(max_cycles, mask);
0903     max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift);
0904 
0905     /* return the max_cycles value as well if requested */
0906     if (max_cyc)
0907         *max_cyc = max_cycles;
0908 
0909     /* Return 50% of the actual maximum, so we can detect bad values */
0910     max_nsecs >>= 1;
0911 
0912     return max_nsecs;
0913 }
0914 
0915 /**
0916  * clocksource_update_max_deferment - Updates the clocksource max_idle_ns & max_cycles
0917  * @cs:         Pointer to clocksource to be updated
0918  *
0919  */
0920 static inline void clocksource_update_max_deferment(struct clocksource *cs)
0921 {
0922     cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift,
0923                         cs->maxadj, cs->mask,
0924                         &cs->max_cycles);
0925 }
0926 
0927 static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur)
0928 {
0929     struct clocksource *cs;
0930 
0931     if (!finished_booting || list_empty(&clocksource_list))
0932         return NULL;
0933 
0934     /*
0935      * We pick the clocksource with the highest rating. If oneshot
0936      * mode is active, we pick the highres valid clocksource with
0937      * the best rating.
0938      */
0939     list_for_each_entry(cs, &clocksource_list, list) {
0940         if (skipcur && cs == curr_clocksource)
0941             continue;
0942         if (oneshot && !(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES))
0943             continue;
0944         return cs;
0945     }
0946     return NULL;
0947 }
0948 
0949 static void __clocksource_select(bool skipcur)
0950 {
0951     bool oneshot = tick_oneshot_mode_active();
0952     struct clocksource *best, *cs;
0953 
0954     /* Find the best suitable clocksource */
0955     best = clocksource_find_best(oneshot, skipcur);
0956     if (!best)
0957         return;
0958 
0959     if (!strlen(override_name))
0960         goto found;
0961 
0962     /* Check for the override clocksource. */
0963     list_for_each_entry(cs, &clocksource_list, list) {
0964         if (skipcur && cs == curr_clocksource)
0965             continue;
0966         if (strcmp(cs->name, override_name) != 0)
0967             continue;
0968         /*
0969          * Check to make sure we don't switch to a non-highres
0970          * capable clocksource if the tick code is in oneshot
0971          * mode (highres or nohz)
0972          */
0973         if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && oneshot) {
0974             /* Override clocksource cannot be used. */
0975             if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
0976                 pr_warn("Override clocksource %s is unstable and not HRT compatible - cannot switch while in HRT/NOHZ mode\n",
0977                     cs->name);
0978                 override_name[0] = 0;
0979             } else {
0980                 /*
0981                  * The override cannot be currently verified.
0982                  * Deferring to let the watchdog check.
0983                  */
0984                 pr_info("Override clocksource %s is not currently HRT compatible - deferring\n",
0985                     cs->name);
0986             }
0987         } else
0988             /* Override clocksource can be used. */
0989             best = cs;
0990         break;
0991     }
0992 
0993 found:
0994     if (curr_clocksource != best && !timekeeping_notify(best)) {
0995         pr_info("Switched to clocksource %s\n", best->name);
0996         curr_clocksource = best;
0997     }
0998 }
0999 
1000 /**
1001  * clocksource_select - Select the best clocksource available
1002  *
1003  * Private function. Must hold clocksource_mutex when called.
1004  *
1005  * Select the clocksource with the best rating, or the clocksource,
1006  * which is selected by userspace override.
1007  */
1008 static void clocksource_select(void)
1009 {
1010     __clocksource_select(false);
1011 }
1012 
1013 static void clocksource_select_fallback(void)
1014 {
1015     __clocksource_select(true);
1016 }
1017 
1018 /*
1019  * clocksource_done_booting - Called near the end of core bootup
1020  *
1021  * Hack to avoid lots of clocksource churn at boot time.
1022  * We use fs_initcall because we want this to start before
1023  * device_initcall but after subsys_initcall.
1024  */
1025 static int __init clocksource_done_booting(void)
1026 {
1027     mutex_lock(&clocksource_mutex);
1028     curr_clocksource = clocksource_default_clock();
1029     finished_booting = 1;
1030     /*
1031      * Run the watchdog first to eliminate unstable clock sources
1032      */
1033     __clocksource_watchdog_kthread();
1034     clocksource_select();
1035     mutex_unlock(&clocksource_mutex);
1036     return 0;
1037 }
1038 fs_initcall(clocksource_done_booting);
1039 
1040 /*
1041  * Enqueue the clocksource sorted by rating
1042  */
1043 static void clocksource_enqueue(struct clocksource *cs)
1044 {
1045     struct list_head *entry = &clocksource_list;
1046     struct clocksource *tmp;
1047 
1048     list_for_each_entry(tmp, &clocksource_list, list) {
1049         /* Keep track of the place, where to insert */
1050         if (tmp->rating < cs->rating)
1051             break;
1052         entry = &tmp->list;
1053     }
1054     list_add(&cs->list, entry);
1055 }
1056 
1057 /**
1058  * __clocksource_update_freq_scale - Used update clocksource with new freq
1059  * @cs:     clocksource to be registered
1060  * @scale:  Scale factor multiplied against freq to get clocksource hz
1061  * @freq:   clocksource frequency (cycles per second) divided by scale
1062  *
1063  * This should only be called from the clocksource->enable() method.
1064  *
1065  * This *SHOULD NOT* be called directly! Please use the
1066  * __clocksource_update_freq_hz() or __clocksource_update_freq_khz() helper
1067  * functions.
1068  */
1069 void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq)
1070 {
1071     u64 sec;
1072 
1073     /*
1074      * Default clocksources are *special* and self-define their mult/shift.
1075      * But, you're not special, so you should specify a freq value.
1076      */
1077     if (freq) {
1078         /*
1079          * Calc the maximum number of seconds which we can run before
1080          * wrapping around. For clocksources which have a mask > 32-bit
1081          * we need to limit the max sleep time to have a good
1082          * conversion precision. 10 minutes is still a reasonable
1083          * amount. That results in a shift value of 24 for a
1084          * clocksource with mask >= 40-bit and f >= 4GHz. That maps to
1085          * ~ 0.06ppm granularity for NTP.
1086          */
1087         sec = cs->mask;
1088         do_div(sec, freq);
1089         do_div(sec, scale);
1090         if (!sec)
1091             sec = 1;
1092         else if (sec > 600 && cs->mask > UINT_MAX)
1093             sec = 600;
1094 
1095         clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
1096                        NSEC_PER_SEC / scale, sec * scale);
1097     }
1098 
1099     /*
1100      * If the uncertainty margin is not specified, calculate it.
1101      * If both scale and freq are non-zero, calculate the clock
1102      * period, but bound below at 2*WATCHDOG_MAX_SKEW.  However,
1103      * if either of scale or freq is zero, be very conservative and
1104      * take the tens-of-milliseconds WATCHDOG_THRESHOLD value for the
1105      * uncertainty margin.  Allow stupidly small uncertainty margins
1106      * to be specified by the caller for testing purposes, but warn
1107      * to discourage production use of this capability.
1108      */
1109     if (scale && freq && !cs->uncertainty_margin) {
1110         cs->uncertainty_margin = NSEC_PER_SEC / (scale * freq);
1111         if (cs->uncertainty_margin < 2 * WATCHDOG_MAX_SKEW)
1112             cs->uncertainty_margin = 2 * WATCHDOG_MAX_SKEW;
1113     } else if (!cs->uncertainty_margin) {
1114         cs->uncertainty_margin = WATCHDOG_THRESHOLD;
1115     }
1116     WARN_ON_ONCE(cs->uncertainty_margin < 2 * WATCHDOG_MAX_SKEW);
1117 
1118     /*
1119      * Ensure clocksources that have large 'mult' values don't overflow
1120      * when adjusted.
1121      */
1122     cs->maxadj = clocksource_max_adjustment(cs);
1123     while (freq && ((cs->mult + cs->maxadj < cs->mult)
1124         || (cs->mult - cs->maxadj > cs->mult))) {
1125         cs->mult >>= 1;
1126         cs->shift--;
1127         cs->maxadj = clocksource_max_adjustment(cs);
1128     }
1129 
1130     /*
1131      * Only warn for *special* clocksources that self-define
1132      * their mult/shift values and don't specify a freq.
1133      */
1134     WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
1135         "timekeeping: Clocksource %s might overflow on 11%% adjustment\n",
1136         cs->name);
1137 
1138     clocksource_update_max_deferment(cs);
1139 
1140     pr_info("%s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n",
1141         cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns);
1142 }
1143 EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale);
1144 
1145 /**
1146  * __clocksource_register_scale - Used to install new clocksources
1147  * @cs:     clocksource to be registered
1148  * @scale:  Scale factor multiplied against freq to get clocksource hz
1149  * @freq:   clocksource frequency (cycles per second) divided by scale
1150  *
1151  * Returns -EBUSY if registration fails, zero otherwise.
1152  *
1153  * This *SHOULD NOT* be called directly! Please use the
1154  * clocksource_register_hz() or clocksource_register_khz helper functions.
1155  */
1156 int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
1157 {
1158     unsigned long flags;
1159 
1160     clocksource_arch_init(cs);
1161 
1162     if (WARN_ON_ONCE((unsigned int)cs->id >= CSID_MAX))
1163         cs->id = CSID_GENERIC;
1164     if (cs->vdso_clock_mode < 0 ||
1165         cs->vdso_clock_mode >= VDSO_CLOCKMODE_MAX) {
1166         pr_warn("clocksource %s registered with invalid VDSO mode %d. Disabling VDSO support.\n",
1167             cs->name, cs->vdso_clock_mode);
1168         cs->vdso_clock_mode = VDSO_CLOCKMODE_NONE;
1169     }
1170 
1171     /* Initialize mult/shift and max_idle_ns */
1172     __clocksource_update_freq_scale(cs, scale, freq);
1173 
1174     /* Add clocksource to the clocksource list */
1175     mutex_lock(&clocksource_mutex);
1176 
1177     clocksource_watchdog_lock(&flags);
1178     clocksource_enqueue(cs);
1179     clocksource_enqueue_watchdog(cs);
1180     clocksource_watchdog_unlock(&flags);
1181 
1182     clocksource_select();
1183     clocksource_select_watchdog(false);
1184     __clocksource_suspend_select(cs);
1185     mutex_unlock(&clocksource_mutex);
1186     return 0;
1187 }
1188 EXPORT_SYMBOL_GPL(__clocksource_register_scale);
1189 
1190 static void __clocksource_change_rating(struct clocksource *cs, int rating)
1191 {
1192     list_del(&cs->list);
1193     cs->rating = rating;
1194     clocksource_enqueue(cs);
1195 }
1196 
1197 /**
1198  * clocksource_change_rating - Change the rating of a registered clocksource
1199  * @cs:     clocksource to be changed
1200  * @rating: new rating
1201  */
1202 void clocksource_change_rating(struct clocksource *cs, int rating)
1203 {
1204     unsigned long flags;
1205 
1206     mutex_lock(&clocksource_mutex);
1207     clocksource_watchdog_lock(&flags);
1208     __clocksource_change_rating(cs, rating);
1209     clocksource_watchdog_unlock(&flags);
1210 
1211     clocksource_select();
1212     clocksource_select_watchdog(false);
1213     clocksource_suspend_select(false);
1214     mutex_unlock(&clocksource_mutex);
1215 }
1216 EXPORT_SYMBOL(clocksource_change_rating);
1217 
1218 /*
1219  * Unbind clocksource @cs. Called with clocksource_mutex held
1220  */
1221 static int clocksource_unbind(struct clocksource *cs)
1222 {
1223     unsigned long flags;
1224 
1225     if (clocksource_is_watchdog(cs)) {
1226         /* Select and try to install a replacement watchdog. */
1227         clocksource_select_watchdog(true);
1228         if (clocksource_is_watchdog(cs))
1229             return -EBUSY;
1230     }
1231 
1232     if (cs == curr_clocksource) {
1233         /* Select and try to install a replacement clock source */
1234         clocksource_select_fallback();
1235         if (curr_clocksource == cs)
1236             return -EBUSY;
1237     }
1238 
1239     if (clocksource_is_suspend(cs)) {
1240         /*
1241          * Select and try to install a replacement suspend clocksource.
1242          * If no replacement suspend clocksource, we will just let the
1243          * clocksource go and have no suspend clocksource.
1244          */
1245         clocksource_suspend_select(true);
1246     }
1247 
1248     clocksource_watchdog_lock(&flags);
1249     clocksource_dequeue_watchdog(cs);
1250     list_del_init(&cs->list);
1251     clocksource_watchdog_unlock(&flags);
1252 
1253     return 0;
1254 }
1255 
1256 /**
1257  * clocksource_unregister - remove a registered clocksource
1258  * @cs: clocksource to be unregistered
1259  */
1260 int clocksource_unregister(struct clocksource *cs)
1261 {
1262     int ret = 0;
1263 
1264     mutex_lock(&clocksource_mutex);
1265     if (!list_empty(&cs->list))
1266         ret = clocksource_unbind(cs);
1267     mutex_unlock(&clocksource_mutex);
1268     return ret;
1269 }
1270 EXPORT_SYMBOL(clocksource_unregister);
1271 
1272 #ifdef CONFIG_SYSFS
1273 /**
1274  * current_clocksource_show - sysfs interface for current clocksource
1275  * @dev:    unused
1276  * @attr:   unused
1277  * @buf:    char buffer to be filled with clocksource list
1278  *
1279  * Provides sysfs interface for listing current clocksource.
1280  */
1281 static ssize_t current_clocksource_show(struct device *dev,
1282                     struct device_attribute *attr,
1283                     char *buf)
1284 {
1285     ssize_t count = 0;
1286 
1287     mutex_lock(&clocksource_mutex);
1288     count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name);
1289     mutex_unlock(&clocksource_mutex);
1290 
1291     return count;
1292 }
1293 
1294 ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt)
1295 {
1296     size_t ret = cnt;
1297 
1298     /* strings from sysfs write are not 0 terminated! */
1299     if (!cnt || cnt >= CS_NAME_LEN)
1300         return -EINVAL;
1301 
1302     /* strip of \n: */
1303     if (buf[cnt-1] == '\n')
1304         cnt--;
1305     if (cnt > 0)
1306         memcpy(dst, buf, cnt);
1307     dst[cnt] = 0;
1308     return ret;
1309 }
1310 
1311 /**
1312  * current_clocksource_store - interface for manually overriding clocksource
1313  * @dev:    unused
1314  * @attr:   unused
1315  * @buf:    name of override clocksource
1316  * @count:  length of buffer
1317  *
1318  * Takes input from sysfs interface for manually overriding the default
1319  * clocksource selection.
1320  */
1321 static ssize_t current_clocksource_store(struct device *dev,
1322                      struct device_attribute *attr,
1323                      const char *buf, size_t count)
1324 {
1325     ssize_t ret;
1326 
1327     mutex_lock(&clocksource_mutex);
1328 
1329     ret = sysfs_get_uname(buf, override_name, count);
1330     if (ret >= 0)
1331         clocksource_select();
1332 
1333     mutex_unlock(&clocksource_mutex);
1334 
1335     return ret;
1336 }
1337 static DEVICE_ATTR_RW(current_clocksource);
1338 
1339 /**
1340  * unbind_clocksource_store - interface for manually unbinding clocksource
1341  * @dev:    unused
1342  * @attr:   unused
1343  * @buf:    unused
1344  * @count:  length of buffer
1345  *
1346  * Takes input from sysfs interface for manually unbinding a clocksource.
1347  */
1348 static ssize_t unbind_clocksource_store(struct device *dev,
1349                     struct device_attribute *attr,
1350                     const char *buf, size_t count)
1351 {
1352     struct clocksource *cs;
1353     char name[CS_NAME_LEN];
1354     ssize_t ret;
1355 
1356     ret = sysfs_get_uname(buf, name, count);
1357     if (ret < 0)
1358         return ret;
1359 
1360     ret = -ENODEV;
1361     mutex_lock(&clocksource_mutex);
1362     list_for_each_entry(cs, &clocksource_list, list) {
1363         if (strcmp(cs->name, name))
1364             continue;
1365         ret = clocksource_unbind(cs);
1366         break;
1367     }
1368     mutex_unlock(&clocksource_mutex);
1369 
1370     return ret ? ret : count;
1371 }
1372 static DEVICE_ATTR_WO(unbind_clocksource);
1373 
1374 /**
1375  * available_clocksource_show - sysfs interface for listing clocksource
1376  * @dev:    unused
1377  * @attr:   unused
1378  * @buf:    char buffer to be filled with clocksource list
1379  *
1380  * Provides sysfs interface for listing registered clocksources
1381  */
1382 static ssize_t available_clocksource_show(struct device *dev,
1383                       struct device_attribute *attr,
1384                       char *buf)
1385 {
1386     struct clocksource *src;
1387     ssize_t count = 0;
1388 
1389     mutex_lock(&clocksource_mutex);
1390     list_for_each_entry(src, &clocksource_list, list) {
1391         /*
1392          * Don't show non-HRES clocksource if the tick code is
1393          * in one shot mode (highres=on or nohz=on)
1394          */
1395         if (!tick_oneshot_mode_active() ||
1396             (src->flags & CLOCK_SOURCE_VALID_FOR_HRES))
1397             count += snprintf(buf + count,
1398                   max((ssize_t)PAGE_SIZE - count, (ssize_t)0),
1399                   "%s ", src->name);
1400     }
1401     mutex_unlock(&clocksource_mutex);
1402 
1403     count += snprintf(buf + count,
1404               max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n");
1405 
1406     return count;
1407 }
1408 static DEVICE_ATTR_RO(available_clocksource);
1409 
1410 static struct attribute *clocksource_attrs[] = {
1411     &dev_attr_current_clocksource.attr,
1412     &dev_attr_unbind_clocksource.attr,
1413     &dev_attr_available_clocksource.attr,
1414     NULL
1415 };
1416 ATTRIBUTE_GROUPS(clocksource);
1417 
1418 static struct bus_type clocksource_subsys = {
1419     .name = "clocksource",
1420     .dev_name = "clocksource",
1421 };
1422 
1423 static struct device device_clocksource = {
1424     .id = 0,
1425     .bus    = &clocksource_subsys,
1426     .groups = clocksource_groups,
1427 };
1428 
1429 static int __init init_clocksource_sysfs(void)
1430 {
1431     int error = subsys_system_register(&clocksource_subsys, NULL);
1432 
1433     if (!error)
1434         error = device_register(&device_clocksource);
1435 
1436     return error;
1437 }
1438 
1439 device_initcall(init_clocksource_sysfs);
1440 #endif /* CONFIG_SYSFS */
1441 
1442 /**
1443  * boot_override_clocksource - boot clock override
1444  * @str:    override name
1445  *
1446  * Takes a clocksource= boot argument and uses it
1447  * as the clocksource override name.
1448  */
1449 static int __init boot_override_clocksource(char* str)
1450 {
1451     mutex_lock(&clocksource_mutex);
1452     if (str)
1453         strlcpy(override_name, str, sizeof(override_name));
1454     mutex_unlock(&clocksource_mutex);
1455     return 1;
1456 }
1457 
1458 __setup("clocksource=", boot_override_clocksource);
1459 
1460 /**
1461  * boot_override_clock - Compatibility layer for deprecated boot option
1462  * @str:    override name
1463  *
1464  * DEPRECATED! Takes a clock= boot argument and uses it
1465  * as the clocksource override name
1466  */
1467 static int __init boot_override_clock(char* str)
1468 {
1469     if (!strcmp(str, "pmtmr")) {
1470         pr_warn("clock=pmtmr is deprecated - use clocksource=acpi_pm\n");
1471         return boot_override_clocksource("acpi_pm");
1472     }
1473     pr_warn("clock= boot option is deprecated - use clocksource=xyz\n");
1474     return boot_override_clocksource(str);
1475 }
1476 
1477 __setup("clock=", boot_override_clock);