Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  *  Precise Delay Loops for i386
0004  *
0005  *  Copyright (C) 1993 Linus Torvalds
0006  *  Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
0007  *  Copyright (C) 2008 Jiri Hladky <hladky _dot_ jiri _at_ gmail _dot_ com>
0008  *
0009  *  The __delay function must _NOT_ be inlined as its execution time
0010  *  depends wildly on alignment on many x86 processors. The additional
0011  *  jump magic is needed to get the timing stable on all the CPU's
0012  *  we have to worry about.
0013  */
0014 
0015 #include <linux/export.h>
0016 #include <linux/sched.h>
0017 #include <linux/timex.h>
0018 #include <linux/preempt.h>
0019 #include <linux/delay.h>
0020 
0021 #include <asm/processor.h>
0022 #include <asm/delay.h>
0023 #include <asm/timer.h>
0024 #include <asm/mwait.h>
0025 
0026 #ifdef CONFIG_SMP
0027 # include <asm/smp.h>
0028 #endif
0029 
0030 static void delay_loop(u64 __loops);
0031 
0032 /*
0033  * Calibration and selection of the delay mechanism happens only once
0034  * during boot.
0035  */
0036 static void (*delay_fn)(u64) __ro_after_init = delay_loop;
0037 static void (*delay_halt_fn)(u64 start, u64 cycles) __ro_after_init;
0038 
0039 /* simple loop based delay: */
0040 static void delay_loop(u64 __loops)
0041 {
0042     unsigned long loops = (unsigned long)__loops;
0043 
0044     asm volatile(
0045         "   test %0,%0  \n"
0046         "   jz 3f       \n"
0047         "   jmp 1f      \n"
0048 
0049         ".align 16      \n"
0050         "1: jmp 2f      \n"
0051 
0052         ".align 16      \n"
0053         "2: dec %0      \n"
0054         "   jnz 2b      \n"
0055         "3: dec %0      \n"
0056 
0057         : "+a" (loops)
0058         :
0059     );
0060 }
0061 
0062 /* TSC based delay: */
0063 static void delay_tsc(u64 cycles)
0064 {
0065     u64 bclock, now;
0066     int cpu;
0067 
0068     preempt_disable();
0069     cpu = smp_processor_id();
0070     bclock = rdtsc_ordered();
0071     for (;;) {
0072         now = rdtsc_ordered();
0073         if ((now - bclock) >= cycles)
0074             break;
0075 
0076         /* Allow RT tasks to run */
0077         preempt_enable();
0078         rep_nop();
0079         preempt_disable();
0080 
0081         /*
0082          * It is possible that we moved to another CPU, and
0083          * since TSC's are per-cpu we need to calculate
0084          * that. The delay must guarantee that we wait "at
0085          * least" the amount of time. Being moved to another
0086          * CPU could make the wait longer but we just need to
0087          * make sure we waited long enough. Rebalance the
0088          * counter for this CPU.
0089          */
0090         if (unlikely(cpu != smp_processor_id())) {
0091             cycles -= (now - bclock);
0092             cpu = smp_processor_id();
0093             bclock = rdtsc_ordered();
0094         }
0095     }
0096     preempt_enable();
0097 }
0098 
0099 /*
0100  * On Intel the TPAUSE instruction waits until any of:
0101  * 1) the TSC counter exceeds the value provided in EDX:EAX
0102  * 2) global timeout in IA32_UMWAIT_CONTROL is exceeded
0103  * 3) an external interrupt occurs
0104  */
0105 static void delay_halt_tpause(u64 start, u64 cycles)
0106 {
0107     u64 until = start + cycles;
0108     u32 eax, edx;
0109 
0110     eax = lower_32_bits(until);
0111     edx = upper_32_bits(until);
0112 
0113     /*
0114      * Hard code the deeper (C0.2) sleep state because exit latency is
0115      * small compared to the "microseconds" that usleep() will delay.
0116      */
0117     __tpause(TPAUSE_C02_STATE, edx, eax);
0118 }
0119 
0120 /*
0121  * On some AMD platforms, MWAITX has a configurable 32-bit timer, that
0122  * counts with TSC frequency. The input value is the number of TSC cycles
0123  * to wait. MWAITX will also exit when the timer expires.
0124  */
0125 static void delay_halt_mwaitx(u64 unused, u64 cycles)
0126 {
0127     u64 delay;
0128 
0129     delay = min_t(u64, MWAITX_MAX_WAIT_CYCLES, cycles);
0130     /*
0131      * Use cpu_tss_rw as a cacheline-aligned, seldomly accessed per-cpu
0132      * variable as the monitor target.
0133      */
0134      __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
0135 
0136     /*
0137      * AMD, like Intel, supports the EAX hint and EAX=0xf means, do not
0138      * enter any deep C-state and we use it here in delay() to minimize
0139      * wakeup latency.
0140      */
0141     __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
0142 }
0143 
0144 /*
0145  * Call a vendor specific function to delay for a given amount of time. Because
0146  * these functions may return earlier than requested, check for actual elapsed
0147  * time and call again until done.
0148  */
0149 static void delay_halt(u64 __cycles)
0150 {
0151     u64 start, end, cycles = __cycles;
0152 
0153     /*
0154      * Timer value of 0 causes MWAITX to wait indefinitely, unless there
0155      * is a store on the memory monitored by MONITORX.
0156      */
0157     if (!cycles)
0158         return;
0159 
0160     start = rdtsc_ordered();
0161 
0162     for (;;) {
0163         delay_halt_fn(start, cycles);
0164         end = rdtsc_ordered();
0165 
0166         if (cycles <= end - start)
0167             break;
0168 
0169         cycles -= end - start;
0170         start = end;
0171     }
0172 }
0173 
0174 void __init use_tsc_delay(void)
0175 {
0176     if (delay_fn == delay_loop)
0177         delay_fn = delay_tsc;
0178 }
0179 
0180 void __init use_tpause_delay(void)
0181 {
0182     delay_halt_fn = delay_halt_tpause;
0183     delay_fn = delay_halt;
0184 }
0185 
0186 void use_mwaitx_delay(void)
0187 {
0188     delay_halt_fn = delay_halt_mwaitx;
0189     delay_fn = delay_halt;
0190 }
0191 
0192 int read_current_timer(unsigned long *timer_val)
0193 {
0194     if (delay_fn == delay_tsc) {
0195         *timer_val = rdtsc();
0196         return 0;
0197     }
0198     return -1;
0199 }
0200 
0201 void __delay(unsigned long loops)
0202 {
0203     delay_fn(loops);
0204 }
0205 EXPORT_SYMBOL(__delay);
0206 
0207 noinline void __const_udelay(unsigned long xloops)
0208 {
0209     unsigned long lpj = this_cpu_read(cpu_info.loops_per_jiffy) ? : loops_per_jiffy;
0210     int d0;
0211 
0212     xloops *= 4;
0213     asm("mull %%edx"
0214         :"=d" (xloops), "=&a" (d0)
0215         :"1" (xloops), "0" (lpj * (HZ / 4)));
0216 
0217     __delay(++xloops);
0218 }
0219 EXPORT_SYMBOL(__const_udelay);
0220 
0221 void __udelay(unsigned long usecs)
0222 {
0223     __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
0224 }
0225 EXPORT_SYMBOL(__udelay);
0226 
0227 void __ndelay(unsigned long nsecs)
0228 {
0229     __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
0230 }
0231 EXPORT_SYMBOL(__ndelay);