Back to home page

LXR

 
 

    


0001 /*
0002  * Context tracking: Probe on high level context boundaries such as kernel
0003  * and userspace. This includes syscalls and exceptions entry/exit.
0004  *
0005  * This is used by RCU to remove its dependency on the timer tick while a CPU
0006  * runs in userspace.
0007  *
0008  *  Started by Frederic Weisbecker:
0009  *
0010  * Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com>
0011  *
0012  * Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton,
0013  * Steven Rostedt, Peter Zijlstra for suggestions and improvements.
0014  *
0015  */
0016 
0017 #include <linux/context_tracking.h>
0018 #include <linux/rcupdate.h>
0019 #include <linux/sched.h>
0020 #include <linux/hardirq.h>
0021 #include <linux/export.h>
0022 #include <linux/kprobes.h>
0023 
0024 #define CREATE_TRACE_POINTS
0025 #include <trace/events/context_tracking.h>
0026 
0027 DEFINE_STATIC_KEY_FALSE(context_tracking_enabled);
0028 EXPORT_SYMBOL_GPL(context_tracking_enabled);
0029 
0030 DEFINE_PER_CPU(struct context_tracking, context_tracking);
0031 EXPORT_SYMBOL_GPL(context_tracking);
0032 
0033 static bool context_tracking_recursion_enter(void)
0034 {
0035     int recursion;
0036 
0037     recursion = __this_cpu_inc_return(context_tracking.recursion);
0038     if (recursion == 1)
0039         return true;
0040 
0041     WARN_ONCE((recursion < 1), "Invalid context tracking recursion value %d\n", recursion);
0042     __this_cpu_dec(context_tracking.recursion);
0043 
0044     return false;
0045 }
0046 
0047 static void context_tracking_recursion_exit(void)
0048 {
0049     __this_cpu_dec(context_tracking.recursion);
0050 }
0051 
0052 /**
0053  * context_tracking_enter - Inform the context tracking that the CPU is going
0054  *                          enter user or guest space mode.
0055  *
0056  * This function must be called right before we switch from the kernel
0057  * to user or guest space, when it's guaranteed the remaining kernel
0058  * instructions to execute won't use any RCU read side critical section
0059  * because this function sets RCU in extended quiescent state.
0060  */
0061 void __context_tracking_enter(enum ctx_state state)
0062 {
0063     /* Kernel threads aren't supposed to go to userspace */
0064     WARN_ON_ONCE(!current->mm);
0065 
0066     if (!context_tracking_recursion_enter())
0067         return;
0068 
0069     if ( __this_cpu_read(context_tracking.state) != state) {
0070         if (__this_cpu_read(context_tracking.active)) {
0071             /*
0072              * At this stage, only low level arch entry code remains and
0073              * then we'll run in userspace. We can assume there won't be
0074              * any RCU read-side critical section until the next call to
0075              * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
0076              * on the tick.
0077              */
0078             if (state == CONTEXT_USER) {
0079                 trace_user_enter(0);
0080                 vtime_user_enter(current);
0081             }
0082             rcu_user_enter();
0083         }
0084         /*
0085          * Even if context tracking is disabled on this CPU, because it's outside
0086          * the full dynticks mask for example, we still have to keep track of the
0087          * context transitions and states to prevent inconsistency on those of
0088          * other CPUs.
0089          * If a task triggers an exception in userspace, sleep on the exception
0090          * handler and then migrate to another CPU, that new CPU must know where
0091          * the exception returns by the time we call exception_exit().
0092          * This information can only be provided by the previous CPU when it called
0093          * exception_enter().
0094          * OTOH we can spare the calls to vtime and RCU when context_tracking.active
0095          * is false because we know that CPU is not tickless.
0096          */
0097         __this_cpu_write(context_tracking.state, state);
0098     }
0099     context_tracking_recursion_exit();
0100 }
0101 NOKPROBE_SYMBOL(__context_tracking_enter);
0102 EXPORT_SYMBOL_GPL(__context_tracking_enter);
0103 
0104 void context_tracking_enter(enum ctx_state state)
0105 {
0106     unsigned long flags;
0107 
0108     /*
0109      * Some contexts may involve an exception occuring in an irq,
0110      * leading to that nesting:
0111      * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
0112      * This would mess up the dyntick_nesting count though. And rcu_irq_*()
0113      * helpers are enough to protect RCU uses inside the exception. So
0114      * just return immediately if we detect we are in an IRQ.
0115      */
0116     if (in_interrupt())
0117         return;
0118 
0119     local_irq_save(flags);
0120     __context_tracking_enter(state);
0121     local_irq_restore(flags);
0122 }
0123 NOKPROBE_SYMBOL(context_tracking_enter);
0124 EXPORT_SYMBOL_GPL(context_tracking_enter);
0125 
0126 void context_tracking_user_enter(void)
0127 {
0128     user_enter();
0129 }
0130 NOKPROBE_SYMBOL(context_tracking_user_enter);
0131 
0132 /**
0133  * context_tracking_exit - Inform the context tracking that the CPU is
0134  *                         exiting user or guest mode and entering the kernel.
0135  *
0136  * This function must be called after we entered the kernel from user or
0137  * guest space before any use of RCU read side critical section. This
0138  * potentially include any high level kernel code like syscalls, exceptions,
0139  * signal handling, etc...
0140  *
0141  * This call supports re-entrancy. This way it can be called from any exception
0142  * handler without needing to know if we came from userspace or not.
0143  */
0144 void __context_tracking_exit(enum ctx_state state)
0145 {
0146     if (!context_tracking_recursion_enter())
0147         return;
0148 
0149     if (__this_cpu_read(context_tracking.state) == state) {
0150         if (__this_cpu_read(context_tracking.active)) {
0151             /*
0152              * We are going to run code that may use RCU. Inform
0153              * RCU core about that (ie: we may need the tick again).
0154              */
0155             rcu_user_exit();
0156             if (state == CONTEXT_USER) {
0157                 vtime_user_exit(current);
0158                 trace_user_exit(0);
0159             }
0160         }
0161         __this_cpu_write(context_tracking.state, CONTEXT_KERNEL);
0162     }
0163     context_tracking_recursion_exit();
0164 }
0165 NOKPROBE_SYMBOL(__context_tracking_exit);
0166 EXPORT_SYMBOL_GPL(__context_tracking_exit);
0167 
0168 void context_tracking_exit(enum ctx_state state)
0169 {
0170     unsigned long flags;
0171 
0172     if (in_interrupt())
0173         return;
0174 
0175     local_irq_save(flags);
0176     __context_tracking_exit(state);
0177     local_irq_restore(flags);
0178 }
0179 NOKPROBE_SYMBOL(context_tracking_exit);
0180 EXPORT_SYMBOL_GPL(context_tracking_exit);
0181 
0182 void context_tracking_user_exit(void)
0183 {
0184     user_exit();
0185 }
0186 NOKPROBE_SYMBOL(context_tracking_user_exit);
0187 
0188 void __init context_tracking_cpu_set(int cpu)
0189 {
0190     static __initdata bool initialized = false;
0191 
0192     if (!per_cpu(context_tracking.active, cpu)) {
0193         per_cpu(context_tracking.active, cpu) = true;
0194         static_branch_inc(&context_tracking_enabled);
0195     }
0196 
0197     if (initialized)
0198         return;
0199 
0200     /*
0201      * Set TIF_NOHZ to init/0 and let it propagate to all tasks through fork
0202      * This assumes that init is the only task at this early boot stage.
0203      */
0204     set_tsk_thread_flag(&init_task, TIF_NOHZ);
0205     WARN_ON_ONCE(!tasklist_empty());
0206 
0207     initialized = true;
0208 }
0209 
0210 #ifdef CONFIG_CONTEXT_TRACKING_FORCE
0211 void __init context_tracking_init(void)
0212 {
0213     int cpu;
0214 
0215     for_each_possible_cpu(cpu)
0216         context_tracking_cpu_set(cpu);
0217 }
0218 #endif