0001
0002 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0003
0004 #include <linux/kernel.h>
0005 #include <linux/sched.h>
0006 #include <linux/sched/clock.h>
0007 #include <linux/init.h>
0008 #include <linux/export.h>
0009 #include <linux/timer.h>
0010 #include <linux/acpi_pmtmr.h>
0011 #include <linux/cpufreq.h>
0012 #include <linux/delay.h>
0013 #include <linux/clocksource.h>
0014 #include <linux/percpu.h>
0015 #include <linux/timex.h>
0016 #include <linux/static_key.h>
0017 #include <linux/static_call.h>
0018
0019 #include <asm/hpet.h>
0020 #include <asm/timer.h>
0021 #include <asm/vgtod.h>
0022 #include <asm/time.h>
0023 #include <asm/delay.h>
0024 #include <asm/hypervisor.h>
0025 #include <asm/nmi.h>
0026 #include <asm/x86_init.h>
0027 #include <asm/geode.h>
0028 #include <asm/apic.h>
0029 #include <asm/intel-family.h>
0030 #include <asm/i8259.h>
0031 #include <asm/uv/uv.h>
0032
0033 unsigned int __read_mostly cpu_khz;
0034 EXPORT_SYMBOL(cpu_khz);
0035
0036 unsigned int __read_mostly tsc_khz;
0037 EXPORT_SYMBOL(tsc_khz);
0038
0039 #define KHZ 1000
0040
0041
0042
0043
0044 static int __read_mostly tsc_unstable;
0045 static unsigned int __initdata tsc_early_khz;
0046
0047 static DEFINE_STATIC_KEY_FALSE(__use_tsc);
0048
0049 int tsc_clocksource_reliable;
0050
0051 static u32 art_to_tsc_numerator;
0052 static u32 art_to_tsc_denominator;
0053 static u64 art_to_tsc_offset;
0054 struct clocksource *art_related_clocksource;
0055
0056 struct cyc2ns {
0057 struct cyc2ns_data data[2];
0058 seqcount_latch_t seq;
0059
0060 };
0061
0062 static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns);
0063
0064 static int __init tsc_early_khz_setup(char *buf)
0065 {
0066 return kstrtouint(buf, 0, &tsc_early_khz);
0067 }
0068 early_param("tsc_early_khz", tsc_early_khz_setup);
0069
0070 __always_inline void cyc2ns_read_begin(struct cyc2ns_data *data)
0071 {
0072 int seq, idx;
0073
0074 preempt_disable_notrace();
0075
0076 do {
0077 seq = this_cpu_read(cyc2ns.seq.seqcount.sequence);
0078 idx = seq & 1;
0079
0080 data->cyc2ns_offset = this_cpu_read(cyc2ns.data[idx].cyc2ns_offset);
0081 data->cyc2ns_mul = this_cpu_read(cyc2ns.data[idx].cyc2ns_mul);
0082 data->cyc2ns_shift = this_cpu_read(cyc2ns.data[idx].cyc2ns_shift);
0083
0084 } while (unlikely(seq != this_cpu_read(cyc2ns.seq.seqcount.sequence)));
0085 }
0086
0087 __always_inline void cyc2ns_read_end(void)
0088 {
0089 preempt_enable_notrace();
0090 }
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116 static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc)
0117 {
0118 struct cyc2ns_data data;
0119 unsigned long long ns;
0120
0121 cyc2ns_read_begin(&data);
0122
0123 ns = data.cyc2ns_offset;
0124 ns += mul_u64_u32_shr(cyc, data.cyc2ns_mul, data.cyc2ns_shift);
0125
0126 cyc2ns_read_end();
0127
0128 return ns;
0129 }
0130
0131 static void __set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now)
0132 {
0133 unsigned long long ns_now;
0134 struct cyc2ns_data data;
0135 struct cyc2ns *c2n;
0136
0137 ns_now = cycles_2_ns(tsc_now);
0138
0139
0140
0141
0142
0143
0144 clocks_calc_mult_shift(&data.cyc2ns_mul, &data.cyc2ns_shift, khz,
0145 NSEC_PER_MSEC, 0);
0146
0147
0148
0149
0150
0151
0152
0153 if (data.cyc2ns_shift == 32) {
0154 data.cyc2ns_shift = 31;
0155 data.cyc2ns_mul >>= 1;
0156 }
0157
0158 data.cyc2ns_offset = ns_now -
0159 mul_u64_u32_shr(tsc_now, data.cyc2ns_mul, data.cyc2ns_shift);
0160
0161 c2n = per_cpu_ptr(&cyc2ns, cpu);
0162
0163 raw_write_seqcount_latch(&c2n->seq);
0164 c2n->data[0] = data;
0165 raw_write_seqcount_latch(&c2n->seq);
0166 c2n->data[1] = data;
0167 }
0168
0169 static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now)
0170 {
0171 unsigned long flags;
0172
0173 local_irq_save(flags);
0174 sched_clock_idle_sleep_event();
0175
0176 if (khz)
0177 __set_cyc2ns_scale(khz, cpu, tsc_now);
0178
0179 sched_clock_idle_wakeup_event();
0180 local_irq_restore(flags);
0181 }
0182
0183
0184
0185
0186 static void __init cyc2ns_init_boot_cpu(void)
0187 {
0188 struct cyc2ns *c2n = this_cpu_ptr(&cyc2ns);
0189
0190 seqcount_latch_init(&c2n->seq);
0191 __set_cyc2ns_scale(tsc_khz, smp_processor_id(), rdtsc());
0192 }
0193
0194
0195
0196
0197
0198
0199 static void __init cyc2ns_init_secondary_cpus(void)
0200 {
0201 unsigned int cpu, this_cpu = smp_processor_id();
0202 struct cyc2ns *c2n = this_cpu_ptr(&cyc2ns);
0203 struct cyc2ns_data *data = c2n->data;
0204
0205 for_each_possible_cpu(cpu) {
0206 if (cpu != this_cpu) {
0207 seqcount_latch_init(&c2n->seq);
0208 c2n = per_cpu_ptr(&cyc2ns, cpu);
0209 c2n->data[0] = data[0];
0210 c2n->data[1] = data[1];
0211 }
0212 }
0213 }
0214
0215
0216
0217
0218 u64 native_sched_clock(void)
0219 {
0220 if (static_branch_likely(&__use_tsc)) {
0221 u64 tsc_now = rdtsc();
0222
0223
0224 return cycles_2_ns(tsc_now);
0225 }
0226
0227
0228
0229
0230
0231
0232
0233
0234
0235
0236
0237 return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
0238 }
0239
0240
0241
0242
0243 u64 native_sched_clock_from_tsc(u64 tsc)
0244 {
0245 return cycles_2_ns(tsc);
0246 }
0247
0248
0249
0250 #ifdef CONFIG_PARAVIRT
0251 unsigned long long sched_clock(void)
0252 {
0253 return paravirt_sched_clock();
0254 }
0255
0256 bool using_native_sched_clock(void)
0257 {
0258 return static_call_query(pv_sched_clock) == native_sched_clock;
0259 }
0260 #else
0261 unsigned long long
0262 sched_clock(void) __attribute__((alias("native_sched_clock")));
0263
0264 bool using_native_sched_clock(void) { return true; }
0265 #endif
0266
0267 int check_tsc_unstable(void)
0268 {
0269 return tsc_unstable;
0270 }
0271 EXPORT_SYMBOL_GPL(check_tsc_unstable);
0272
0273 #ifdef CONFIG_X86_TSC
0274 int __init notsc_setup(char *str)
0275 {
0276 mark_tsc_unstable("boot parameter notsc");
0277 return 1;
0278 }
0279 #else
0280
0281
0282
0283
0284 int __init notsc_setup(char *str)
0285 {
0286 setup_clear_cpu_cap(X86_FEATURE_TSC);
0287 return 1;
0288 }
0289 #endif
0290
0291 __setup("notsc", notsc_setup);
0292
0293 static int no_sched_irq_time;
0294 static int no_tsc_watchdog;
0295
0296 static int __init tsc_setup(char *str)
0297 {
0298 if (!strcmp(str, "reliable"))
0299 tsc_clocksource_reliable = 1;
0300 if (!strncmp(str, "noirqtime", 9))
0301 no_sched_irq_time = 1;
0302 if (!strcmp(str, "unstable"))
0303 mark_tsc_unstable("boot parameter");
0304 if (!strcmp(str, "nowatchdog"))
0305 no_tsc_watchdog = 1;
0306 return 1;
0307 }
0308
0309 __setup("tsc=", tsc_setup);
0310
0311 #define MAX_RETRIES 5
0312 #define TSC_DEFAULT_THRESHOLD 0x20000
0313
0314
0315
0316
0317 static u64 tsc_read_refs(u64 *p, int hpet)
0318 {
0319 u64 t1, t2;
0320 u64 thresh = tsc_khz ? tsc_khz >> 5 : TSC_DEFAULT_THRESHOLD;
0321 int i;
0322
0323 for (i = 0; i < MAX_RETRIES; i++) {
0324 t1 = get_cycles();
0325 if (hpet)
0326 *p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
0327 else
0328 *p = acpi_pm_read_early();
0329 t2 = get_cycles();
0330 if ((t2 - t1) < thresh)
0331 return t2;
0332 }
0333 return ULLONG_MAX;
0334 }
0335
0336
0337
0338
0339 static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2)
0340 {
0341 u64 tmp;
0342
0343 if (hpet2 < hpet1)
0344 hpet2 += 0x100000000ULL;
0345 hpet2 -= hpet1;
0346 tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
0347 do_div(tmp, 1000000);
0348 deltatsc = div64_u64(deltatsc, tmp);
0349
0350 return (unsigned long) deltatsc;
0351 }
0352
0353
0354
0355
0356 static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2)
0357 {
0358 u64 tmp;
0359
0360 if (!pm1 && !pm2)
0361 return ULONG_MAX;
0362
0363 if (pm2 < pm1)
0364 pm2 += (u64)ACPI_PM_OVRRUN;
0365 pm2 -= pm1;
0366 tmp = pm2 * 1000000000LL;
0367 do_div(tmp, PMTMR_TICKS_PER_SEC);
0368 do_div(deltatsc, tmp);
0369
0370 return (unsigned long) deltatsc;
0371 }
0372
0373 #define CAL_MS 10
0374 #define CAL_LATCH (PIT_TICK_RATE / (1000 / CAL_MS))
0375 #define CAL_PIT_LOOPS 1000
0376
0377 #define CAL2_MS 50
0378 #define CAL2_LATCH (PIT_TICK_RATE / (1000 / CAL2_MS))
0379 #define CAL2_PIT_LOOPS 5000
0380
0381
0382
0383
0384
0385
0386
0387
0388
0389 static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
0390 {
0391 u64 tsc, t1, t2, delta;
0392 unsigned long tscmin, tscmax;
0393 int pitcnt;
0394
0395 if (!has_legacy_pic()) {
0396
0397
0398
0399
0400
0401 udelay(10 * USEC_PER_MSEC);
0402 udelay(10 * USEC_PER_MSEC);
0403 udelay(10 * USEC_PER_MSEC);
0404 udelay(10 * USEC_PER_MSEC);
0405 udelay(10 * USEC_PER_MSEC);
0406 return ULONG_MAX;
0407 }
0408
0409
0410 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
0411
0412
0413
0414
0415
0416
0417 outb(0xb0, 0x43);
0418 outb(latch & 0xff, 0x42);
0419 outb(latch >> 8, 0x42);
0420
0421 tsc = t1 = t2 = get_cycles();
0422
0423 pitcnt = 0;
0424 tscmax = 0;
0425 tscmin = ULONG_MAX;
0426 while ((inb(0x61) & 0x20) == 0) {
0427 t2 = get_cycles();
0428 delta = t2 - tsc;
0429 tsc = t2;
0430 if ((unsigned long) delta < tscmin)
0431 tscmin = (unsigned int) delta;
0432 if ((unsigned long) delta > tscmax)
0433 tscmax = (unsigned int) delta;
0434 pitcnt++;
0435 }
0436
0437
0438
0439
0440
0441
0442
0443
0444
0445
0446 if (pitcnt < loopmin || tscmax > 10 * tscmin)
0447 return ULONG_MAX;
0448
0449
0450 delta = t2 - t1;
0451 do_div(delta, ms);
0452 return delta;
0453 }
0454
0455
0456
0457
0458
0459
0460
0461
0462
0463
0464
0465
0466
0467
0468
0469
0470
0471
0472
0473
0474
0475
0476
0477
0478
0479
0480
0481
0482
0483
0484
0485
0486
0487
0488
0489
0490 static inline int pit_verify_msb(unsigned char val)
0491 {
0492
0493 inb(0x42);
0494 return inb(0x42) == val;
0495 }
0496
0497 static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap)
0498 {
0499 int count;
0500 u64 tsc = 0, prev_tsc = 0;
0501
0502 for (count = 0; count < 50000; count++) {
0503 if (!pit_verify_msb(val))
0504 break;
0505 prev_tsc = tsc;
0506 tsc = get_cycles();
0507 }
0508 *deltap = get_cycles() - prev_tsc;
0509 *tscp = tsc;
0510
0511
0512
0513
0514
0515 return count > 5;
0516 }
0517
0518
0519
0520
0521
0522
0523
0524 #define MAX_QUICK_PIT_MS 50
0525 #define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256)
0526
0527 static unsigned long quick_pit_calibrate(void)
0528 {
0529 int i;
0530 u64 tsc, delta;
0531 unsigned long d1, d2;
0532
0533 if (!has_legacy_pic())
0534 return 0;
0535
0536
0537 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
0538
0539
0540
0541
0542
0543
0544
0545
0546
0547
0548 outb(0xb0, 0x43);
0549
0550
0551 outb(0xff, 0x42);
0552 outb(0xff, 0x42);
0553
0554
0555
0556
0557
0558
0559
0560 pit_verify_msb(0);
0561
0562 if (pit_expect_msb(0xff, &tsc, &d1)) {
0563 for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) {
0564 if (!pit_expect_msb(0xff-i, &delta, &d2))
0565 break;
0566
0567 delta -= tsc;
0568
0569
0570
0571
0572
0573 if (i == 1 &&
0574 d1 + d2 >= (delta * MAX_QUICK_PIT_ITERATIONS) >> 11)
0575 return 0;
0576
0577
0578
0579
0580 if (d1+d2 >= delta >> 11)
0581 continue;
0582
0583
0584
0585
0586
0587
0588
0589
0590 if (!pit_verify_msb(0xfe - i))
0591 break;
0592 goto success;
0593 }
0594 }
0595 pr_info("Fast TSC calibration failed\n");
0596 return 0;
0597
0598 success:
0599
0600
0601
0602
0603
0604
0605
0606
0607
0608
0609
0610
0611
0612 delta *= PIT_TICK_RATE;
0613 do_div(delta, i*256*1000);
0614 pr_info("Fast TSC calibration using PIT\n");
0615 return delta;
0616 }
0617
0618
0619
0620
0621
0622 unsigned long native_calibrate_tsc(void)
0623 {
0624 unsigned int eax_denominator, ebx_numerator, ecx_hz, edx;
0625 unsigned int crystal_khz;
0626
0627 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
0628 return 0;
0629
0630 if (boot_cpu_data.cpuid_level < 0x15)
0631 return 0;
0632
0633 eax_denominator = ebx_numerator = ecx_hz = edx = 0;
0634
0635
0636 cpuid(0x15, &eax_denominator, &ebx_numerator, &ecx_hz, &edx);
0637
0638 if (ebx_numerator == 0 || eax_denominator == 0)
0639 return 0;
0640
0641 crystal_khz = ecx_hz / 1000;
0642
0643
0644
0645
0646
0647
0648 if (crystal_khz == 0 &&
0649 boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT_D)
0650 crystal_khz = 25000;
0651
0652
0653
0654
0655
0656
0657 if (crystal_khz != 0)
0658 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
0659
0660
0661
0662
0663
0664
0665 if (crystal_khz == 0 && boot_cpu_data.cpuid_level >= 0x16) {
0666 unsigned int eax_base_mhz, ebx, ecx, edx;
0667
0668 cpuid(0x16, &eax_base_mhz, &ebx, &ecx, &edx);
0669 crystal_khz = eax_base_mhz * 1000 *
0670 eax_denominator / ebx_numerator;
0671 }
0672
0673 if (crystal_khz == 0)
0674 return 0;
0675
0676
0677
0678
0679
0680 if (boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT)
0681 setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
0682
0683 #ifdef CONFIG_X86_LOCAL_APIC
0684
0685
0686
0687
0688
0689
0690 lapic_timer_period = crystal_khz * 1000 / HZ;
0691 #endif
0692
0693 return crystal_khz * ebx_numerator / eax_denominator;
0694 }
0695
0696 static unsigned long cpu_khz_from_cpuid(void)
0697 {
0698 unsigned int eax_base_mhz, ebx_max_mhz, ecx_bus_mhz, edx;
0699
0700 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
0701 return 0;
0702
0703 if (boot_cpu_data.cpuid_level < 0x16)
0704 return 0;
0705
0706 eax_base_mhz = ebx_max_mhz = ecx_bus_mhz = edx = 0;
0707
0708 cpuid(0x16, &eax_base_mhz, &ebx_max_mhz, &ecx_bus_mhz, &edx);
0709
0710 return eax_base_mhz * 1000;
0711 }
0712
0713
0714
0715
0716
0717 static unsigned long pit_hpet_ptimer_calibrate_cpu(void)
0718 {
0719 u64 tsc1, tsc2, delta, ref1, ref2;
0720 unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
0721 unsigned long flags, latch, ms;
0722 int hpet = is_hpet_enabled(), i, loopmin;
0723
0724
0725
0726
0727
0728
0729
0730
0731
0732
0733
0734
0735
0736
0737
0738
0739
0740
0741
0742
0743
0744
0745
0746
0747
0748
0749
0750 latch = CAL_LATCH;
0751 ms = CAL_MS;
0752 loopmin = CAL_PIT_LOOPS;
0753
0754 for (i = 0; i < 3; i++) {
0755 unsigned long tsc_pit_khz;
0756
0757
0758
0759
0760
0761
0762
0763 local_irq_save(flags);
0764 tsc1 = tsc_read_refs(&ref1, hpet);
0765 tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin);
0766 tsc2 = tsc_read_refs(&ref2, hpet);
0767 local_irq_restore(flags);
0768
0769
0770 tsc_pit_min = min(tsc_pit_min, tsc_pit_khz);
0771
0772
0773 if (ref1 == ref2)
0774 continue;
0775
0776
0777 if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX)
0778 continue;
0779
0780 tsc2 = (tsc2 - tsc1) * 1000000LL;
0781 if (hpet)
0782 tsc2 = calc_hpet_ref(tsc2, ref1, ref2);
0783 else
0784 tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2);
0785
0786 tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2);
0787
0788
0789 delta = ((u64) tsc_pit_min) * 100;
0790 do_div(delta, tsc_ref_min);
0791
0792
0793
0794
0795
0796
0797
0798 if (delta >= 90 && delta <= 110) {
0799 pr_info("PIT calibration matches %s. %d loops\n",
0800 hpet ? "HPET" : "PMTIMER", i + 1);
0801 return tsc_ref_min;
0802 }
0803
0804
0805
0806
0807
0808
0809
0810 if (i == 1 && tsc_pit_min == ULONG_MAX) {
0811 latch = CAL2_LATCH;
0812 ms = CAL2_MS;
0813 loopmin = CAL2_PIT_LOOPS;
0814 }
0815 }
0816
0817
0818
0819
0820 if (tsc_pit_min == ULONG_MAX) {
0821
0822 pr_warn("Unable to calibrate against PIT\n");
0823
0824
0825 if (!hpet && !ref1 && !ref2) {
0826 pr_notice("No reference (HPET/PMTIMER) available\n");
0827 return 0;
0828 }
0829
0830
0831 if (tsc_ref_min == ULONG_MAX) {
0832 pr_warn("HPET/PMTIMER calibration failed\n");
0833 return 0;
0834 }
0835
0836
0837 pr_info("using %s reference calibration\n",
0838 hpet ? "HPET" : "PMTIMER");
0839
0840 return tsc_ref_min;
0841 }
0842
0843
0844 if (!hpet && !ref1 && !ref2) {
0845 pr_info("Using PIT calibration value\n");
0846 return tsc_pit_min;
0847 }
0848
0849
0850 if (tsc_ref_min == ULONG_MAX) {
0851 pr_warn("HPET/PMTIMER calibration failed. Using PIT calibration.\n");
0852 return tsc_pit_min;
0853 }
0854
0855
0856
0857
0858
0859
0860 pr_warn("PIT calibration deviates from %s: %lu %lu\n",
0861 hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min);
0862 pr_info("Using PIT calibration value\n");
0863 return tsc_pit_min;
0864 }
0865
0866
0867
0868
0869 unsigned long native_calibrate_cpu_early(void)
0870 {
0871 unsigned long flags, fast_calibrate = cpu_khz_from_cpuid();
0872
0873 if (!fast_calibrate)
0874 fast_calibrate = cpu_khz_from_msr();
0875 if (!fast_calibrate) {
0876 local_irq_save(flags);
0877 fast_calibrate = quick_pit_calibrate();
0878 local_irq_restore(flags);
0879 }
0880 return fast_calibrate;
0881 }
0882
0883
0884
0885
0886
0887 static unsigned long native_calibrate_cpu(void)
0888 {
0889 unsigned long tsc_freq = native_calibrate_cpu_early();
0890
0891 if (!tsc_freq)
0892 tsc_freq = pit_hpet_ptimer_calibrate_cpu();
0893
0894 return tsc_freq;
0895 }
0896
0897 void recalibrate_cpu_khz(void)
0898 {
0899 #ifndef CONFIG_SMP
0900 unsigned long cpu_khz_old = cpu_khz;
0901
0902 if (!boot_cpu_has(X86_FEATURE_TSC))
0903 return;
0904
0905 cpu_khz = x86_platform.calibrate_cpu();
0906 tsc_khz = x86_platform.calibrate_tsc();
0907 if (tsc_khz == 0)
0908 tsc_khz = cpu_khz;
0909 else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz)
0910 cpu_khz = tsc_khz;
0911 cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy,
0912 cpu_khz_old, cpu_khz);
0913 #endif
0914 }
0915
0916 EXPORT_SYMBOL(recalibrate_cpu_khz);
0917
0918
0919 static unsigned long long cyc2ns_suspend;
0920
0921 void tsc_save_sched_clock_state(void)
0922 {
0923 if (!sched_clock_stable())
0924 return;
0925
0926 cyc2ns_suspend = sched_clock();
0927 }
0928
0929
0930
0931
0932
0933
0934
0935
0936
0937 void tsc_restore_sched_clock_state(void)
0938 {
0939 unsigned long long offset;
0940 unsigned long flags;
0941 int cpu;
0942
0943 if (!sched_clock_stable())
0944 return;
0945
0946 local_irq_save(flags);
0947
0948
0949
0950
0951
0952
0953
0954 this_cpu_write(cyc2ns.data[0].cyc2ns_offset, 0);
0955 this_cpu_write(cyc2ns.data[1].cyc2ns_offset, 0);
0956
0957 offset = cyc2ns_suspend - sched_clock();
0958
0959 for_each_possible_cpu(cpu) {
0960 per_cpu(cyc2ns.data[0].cyc2ns_offset, cpu) = offset;
0961 per_cpu(cyc2ns.data[1].cyc2ns_offset, cpu) = offset;
0962 }
0963
0964 local_irq_restore(flags);
0965 }
0966
0967 #ifdef CONFIG_CPU_FREQ
0968
0969
0970
0971
0972
0973
0974
0975
0976
0977
0978
0979 static unsigned int ref_freq;
0980 static unsigned long loops_per_jiffy_ref;
0981 static unsigned long tsc_khz_ref;
0982
0983 static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
0984 void *data)
0985 {
0986 struct cpufreq_freqs *freq = data;
0987
0988 if (num_online_cpus() > 1) {
0989 mark_tsc_unstable("cpufreq changes on SMP");
0990 return 0;
0991 }
0992
0993 if (!ref_freq) {
0994 ref_freq = freq->old;
0995 loops_per_jiffy_ref = boot_cpu_data.loops_per_jiffy;
0996 tsc_khz_ref = tsc_khz;
0997 }
0998
0999 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
1000 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) {
1001 boot_cpu_data.loops_per_jiffy =
1002 cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
1003
1004 tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
1005 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
1006 mark_tsc_unstable("cpufreq changes");
1007
1008 set_cyc2ns_scale(tsc_khz, freq->policy->cpu, rdtsc());
1009 }
1010
1011 return 0;
1012 }
1013
1014 static struct notifier_block time_cpufreq_notifier_block = {
1015 .notifier_call = time_cpufreq_notifier
1016 };
1017
1018 static int __init cpufreq_register_tsc_scaling(void)
1019 {
1020 if (!boot_cpu_has(X86_FEATURE_TSC))
1021 return 0;
1022 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
1023 return 0;
1024 cpufreq_register_notifier(&time_cpufreq_notifier_block,
1025 CPUFREQ_TRANSITION_NOTIFIER);
1026 return 0;
1027 }
1028
1029 core_initcall(cpufreq_register_tsc_scaling);
1030
1031 #endif
1032
1033 #define ART_CPUID_LEAF (0x15)
1034 #define ART_MIN_DENOMINATOR (1)
1035
1036
1037
1038
1039
1040 static void __init detect_art(void)
1041 {
1042 unsigned int unused[2];
1043
1044 if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF)
1045 return;
1046
1047
1048
1049
1050
1051 if (boot_cpu_has(X86_FEATURE_HYPERVISOR) ||
1052 !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) ||
1053 !boot_cpu_has(X86_FEATURE_TSC_ADJUST) ||
1054 tsc_async_resets)
1055 return;
1056
1057 cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator,
1058 &art_to_tsc_numerator, unused, unused+1);
1059
1060 if (art_to_tsc_denominator < ART_MIN_DENOMINATOR)
1061 return;
1062
1063 rdmsrl(MSR_IA32_TSC_ADJUST, art_to_tsc_offset);
1064
1065
1066 setup_force_cpu_cap(X86_FEATURE_ART);
1067 }
1068
1069
1070
1071
1072 static void tsc_resume(struct clocksource *cs)
1073 {
1074 tsc_verify_tsc_adjust(true);
1075 }
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093 static u64 read_tsc(struct clocksource *cs)
1094 {
1095 return (u64)rdtsc_ordered();
1096 }
1097
1098 static void tsc_cs_mark_unstable(struct clocksource *cs)
1099 {
1100 if (tsc_unstable)
1101 return;
1102
1103 tsc_unstable = 1;
1104 if (using_native_sched_clock())
1105 clear_sched_clock_stable();
1106 disable_sched_clock_irqtime();
1107 pr_info("Marking TSC unstable due to clocksource watchdog\n");
1108 }
1109
1110 static void tsc_cs_tick_stable(struct clocksource *cs)
1111 {
1112 if (tsc_unstable)
1113 return;
1114
1115 if (using_native_sched_clock())
1116 sched_clock_tick_stable();
1117 }
1118
1119 static int tsc_cs_enable(struct clocksource *cs)
1120 {
1121 vclocks_set_used(VDSO_CLOCKMODE_TSC);
1122 return 0;
1123 }
1124
1125
1126
1127
1128 static struct clocksource clocksource_tsc_early = {
1129 .name = "tsc-early",
1130 .rating = 299,
1131 .uncertainty_margin = 32 * NSEC_PER_MSEC,
1132 .read = read_tsc,
1133 .mask = CLOCKSOURCE_MASK(64),
1134 .flags = CLOCK_SOURCE_IS_CONTINUOUS |
1135 CLOCK_SOURCE_MUST_VERIFY,
1136 .vdso_clock_mode = VDSO_CLOCKMODE_TSC,
1137 .enable = tsc_cs_enable,
1138 .resume = tsc_resume,
1139 .mark_unstable = tsc_cs_mark_unstable,
1140 .tick_stable = tsc_cs_tick_stable,
1141 .list = LIST_HEAD_INIT(clocksource_tsc_early.list),
1142 };
1143
1144
1145
1146
1147
1148
1149 static struct clocksource clocksource_tsc = {
1150 .name = "tsc",
1151 .rating = 300,
1152 .read = read_tsc,
1153 .mask = CLOCKSOURCE_MASK(64),
1154 .flags = CLOCK_SOURCE_IS_CONTINUOUS |
1155 CLOCK_SOURCE_VALID_FOR_HRES |
1156 CLOCK_SOURCE_MUST_VERIFY |
1157 CLOCK_SOURCE_VERIFY_PERCPU,
1158 .vdso_clock_mode = VDSO_CLOCKMODE_TSC,
1159 .enable = tsc_cs_enable,
1160 .resume = tsc_resume,
1161 .mark_unstable = tsc_cs_mark_unstable,
1162 .tick_stable = tsc_cs_tick_stable,
1163 .list = LIST_HEAD_INIT(clocksource_tsc.list),
1164 };
1165
1166 void mark_tsc_unstable(char *reason)
1167 {
1168 if (tsc_unstable)
1169 return;
1170
1171 tsc_unstable = 1;
1172 if (using_native_sched_clock())
1173 clear_sched_clock_stable();
1174 disable_sched_clock_irqtime();
1175 pr_info("Marking TSC unstable due to %s\n", reason);
1176
1177 clocksource_mark_unstable(&clocksource_tsc_early);
1178 clocksource_mark_unstable(&clocksource_tsc);
1179 }
1180
1181 EXPORT_SYMBOL_GPL(mark_tsc_unstable);
1182
1183 static void __init tsc_disable_clocksource_watchdog(void)
1184 {
1185 clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
1186 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
1187 }
1188
1189 static void __init check_system_tsc_reliable(void)
1190 {
1191 #if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC)
1192 if (is_geode_lx()) {
1193
1194 #define RTSC_SUSP 0x100
1195 unsigned long res_low, res_high;
1196
1197 rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
1198
1199 if (res_low & RTSC_SUSP)
1200 tsc_clocksource_reliable = 1;
1201 }
1202 #endif
1203 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
1204 tsc_clocksource_reliable = 1;
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
1218 boot_cpu_has(X86_FEATURE_NONSTOP_TSC) &&
1219 boot_cpu_has(X86_FEATURE_TSC_ADJUST) &&
1220 nr_online_nodes <= 2)
1221 tsc_disable_clocksource_watchdog();
1222 }
1223
1224
1225
1226
1227
1228 int unsynchronized_tsc(void)
1229 {
1230 if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_unstable)
1231 return 1;
1232
1233 #ifdef CONFIG_SMP
1234 if (apic_is_clustered_box())
1235 return 1;
1236 #endif
1237
1238 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
1239 return 0;
1240
1241 if (tsc_clocksource_reliable)
1242 return 0;
1243
1244
1245
1246
1247 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
1248
1249 if (num_possible_cpus() > 1)
1250 return 1;
1251 }
1252
1253 return 0;
1254 }
1255
1256
1257
1258
1259 struct system_counterval_t convert_art_to_tsc(u64 art)
1260 {
1261 u64 tmp, res, rem;
1262
1263 rem = do_div(art, art_to_tsc_denominator);
1264
1265 res = art * art_to_tsc_numerator;
1266 tmp = rem * art_to_tsc_numerator;
1267
1268 do_div(tmp, art_to_tsc_denominator);
1269 res += tmp + art_to_tsc_offset;
1270
1271 return (struct system_counterval_t) {.cs = art_related_clocksource,
1272 .cycles = res};
1273 }
1274 EXPORT_SYMBOL(convert_art_to_tsc);
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297 struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns)
1298 {
1299 u64 tmp, res, rem;
1300
1301 rem = do_div(art_ns, USEC_PER_SEC);
1302
1303 res = art_ns * tsc_khz;
1304 tmp = rem * tsc_khz;
1305
1306 do_div(tmp, USEC_PER_SEC);
1307 res += tmp;
1308
1309 return (struct system_counterval_t) { .cs = art_related_clocksource,
1310 .cycles = res};
1311 }
1312 EXPORT_SYMBOL(convert_art_ns_to_tsc);
1313
1314
1315 static void tsc_refine_calibration_work(struct work_struct *work);
1316 static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331 static void tsc_refine_calibration_work(struct work_struct *work)
1332 {
1333 static u64 tsc_start = ULLONG_MAX, ref_start;
1334 static int hpet;
1335 u64 tsc_stop, ref_stop, delta;
1336 unsigned long freq;
1337 int cpu;
1338
1339
1340 if (tsc_unstable)
1341 goto unreg;
1342
1343
1344
1345
1346
1347
1348 if (tsc_start == ULLONG_MAX) {
1349 restart:
1350
1351
1352
1353
1354 hpet = is_hpet_enabled();
1355 tsc_start = tsc_read_refs(&ref_start, hpet);
1356 schedule_delayed_work(&tsc_irqwork, HZ);
1357 return;
1358 }
1359
1360 tsc_stop = tsc_read_refs(&ref_stop, hpet);
1361
1362
1363 if (ref_start == ref_stop)
1364 goto out;
1365
1366
1367 if (tsc_stop == ULLONG_MAX)
1368 goto restart;
1369
1370 delta = tsc_stop - tsc_start;
1371 delta *= 1000000LL;
1372 if (hpet)
1373 freq = calc_hpet_ref(delta, ref_start, ref_stop);
1374 else
1375 freq = calc_pmtimer_ref(delta, ref_start, ref_stop);
1376
1377
1378 if (abs(tsc_khz - freq) > tsc_khz/100)
1379 goto out;
1380
1381 tsc_khz = freq;
1382 pr_info("Refined TSC clocksource calibration: %lu.%03lu MHz\n",
1383 (unsigned long)tsc_khz / 1000,
1384 (unsigned long)tsc_khz % 1000);
1385
1386
1387 lapic_update_tsc_freq();
1388
1389
1390 for_each_possible_cpu(cpu)
1391 set_cyc2ns_scale(tsc_khz, cpu, tsc_stop);
1392
1393 out:
1394 if (tsc_unstable)
1395 goto unreg;
1396
1397 if (boot_cpu_has(X86_FEATURE_ART))
1398 art_related_clocksource = &clocksource_tsc;
1399 clocksource_register_khz(&clocksource_tsc, tsc_khz);
1400 unreg:
1401 clocksource_unregister(&clocksource_tsc_early);
1402 }
1403
1404
1405 static int __init init_tsc_clocksource(void)
1406 {
1407 if (!boot_cpu_has(X86_FEATURE_TSC) || !tsc_khz)
1408 return 0;
1409
1410 if (tsc_unstable)
1411 goto unreg;
1412
1413 if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
1414 clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
1415
1416
1417
1418
1419
1420 if (boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) {
1421 if (boot_cpu_has(X86_FEATURE_ART))
1422 art_related_clocksource = &clocksource_tsc;
1423 clocksource_register_khz(&clocksource_tsc, tsc_khz);
1424 unreg:
1425 clocksource_unregister(&clocksource_tsc_early);
1426 return 0;
1427 }
1428
1429 schedule_delayed_work(&tsc_irqwork, 0);
1430 return 0;
1431 }
1432
1433
1434
1435
1436 device_initcall(init_tsc_clocksource);
1437
1438 static bool __init determine_cpu_tsc_frequencies(bool early)
1439 {
1440
1441 WARN_ON(cpu_khz || tsc_khz);
1442
1443 if (early) {
1444 cpu_khz = x86_platform.calibrate_cpu();
1445 if (tsc_early_khz)
1446 tsc_khz = tsc_early_khz;
1447 else
1448 tsc_khz = x86_platform.calibrate_tsc();
1449 } else {
1450
1451 WARN_ON(x86_platform.calibrate_cpu != native_calibrate_cpu);
1452 cpu_khz = pit_hpet_ptimer_calibrate_cpu();
1453 }
1454
1455
1456
1457
1458
1459
1460 if (tsc_khz == 0)
1461 tsc_khz = cpu_khz;
1462 else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz)
1463 cpu_khz = tsc_khz;
1464
1465 if (tsc_khz == 0)
1466 return false;
1467
1468 pr_info("Detected %lu.%03lu MHz processor\n",
1469 (unsigned long)cpu_khz / KHZ,
1470 (unsigned long)cpu_khz % KHZ);
1471
1472 if (cpu_khz != tsc_khz) {
1473 pr_info("Detected %lu.%03lu MHz TSC",
1474 (unsigned long)tsc_khz / KHZ,
1475 (unsigned long)tsc_khz % KHZ);
1476 }
1477 return true;
1478 }
1479
1480 static unsigned long __init get_loops_per_jiffy(void)
1481 {
1482 u64 lpj = (u64)tsc_khz * KHZ;
1483
1484 do_div(lpj, HZ);
1485 return lpj;
1486 }
1487
1488 static void __init tsc_enable_sched_clock(void)
1489 {
1490 loops_per_jiffy = get_loops_per_jiffy();
1491 use_tsc_delay();
1492
1493
1494 tsc_store_and_check_tsc_adjust(true);
1495 cyc2ns_init_boot_cpu();
1496 static_branch_enable(&__use_tsc);
1497 }
1498
1499 void __init tsc_early_init(void)
1500 {
1501 if (!boot_cpu_has(X86_FEATURE_TSC))
1502 return;
1503
1504 if (is_early_uv_system())
1505 return;
1506 if (!determine_cpu_tsc_frequencies(true))
1507 return;
1508 tsc_enable_sched_clock();
1509 }
1510
1511 void __init tsc_init(void)
1512 {
1513
1514
1515
1516
1517 if (x86_platform.calibrate_cpu == native_calibrate_cpu_early)
1518 x86_platform.calibrate_cpu = native_calibrate_cpu;
1519
1520 if (!boot_cpu_has(X86_FEATURE_TSC)) {
1521 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
1522 return;
1523 }
1524
1525 if (!tsc_khz) {
1526
1527 if (!determine_cpu_tsc_frequencies(false)) {
1528 mark_tsc_unstable("could not calculate TSC khz");
1529 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
1530 return;
1531 }
1532 tsc_enable_sched_clock();
1533 }
1534
1535 cyc2ns_init_secondary_cpus();
1536
1537 if (!no_sched_irq_time)
1538 enable_sched_clock_irqtime();
1539
1540 lpj_fine = get_loops_per_jiffy();
1541
1542 check_system_tsc_reliable();
1543
1544 if (unsynchronized_tsc()) {
1545 mark_tsc_unstable("TSCs unsynchronized");
1546 return;
1547 }
1548
1549 if (tsc_clocksource_reliable || no_tsc_watchdog)
1550 tsc_disable_clocksource_watchdog();
1551
1552 clocksource_register_khz(&clocksource_tsc_early, tsc_khz);
1553 detect_art();
1554 }
1555
1556 #ifdef CONFIG_SMP
1557
1558
1559
1560
1561
1562
1563 unsigned long calibrate_delay_is_known(void)
1564 {
1565 int sibling, cpu = smp_processor_id();
1566 int constant_tsc = cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC);
1567 const struct cpumask *mask = topology_core_cpumask(cpu);
1568
1569 if (!constant_tsc || !mask)
1570 return 0;
1571
1572 sibling = cpumask_any_but(mask, cpu);
1573 if (sibling < nr_cpu_ids)
1574 return cpu_data(sibling).loops_per_jiffy;
1575 return 0;
1576 }
1577 #endif