Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Context tracking: Probe on high level context boundaries such as kernel,
0004  * userspace, guest or idle.
0005  *
0006  * This is used by RCU to remove its dependency on the timer tick while a CPU
0007  * runs in idle, userspace or guest mode.
0008  *
0009  * User/guest tracking started by Frederic Weisbecker:
0010  *
0011  * Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker
0012  *
0013  * Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton,
0014  * Steven Rostedt, Peter Zijlstra for suggestions and improvements.
0015  *
0016  * RCU extended quiescent state bits imported from kernel/rcu/tree.c
0017  * where the relevant authorship may be found.
0018  */
0019 
0020 #include <linux/context_tracking.h>
0021 #include <linux/rcupdate.h>
0022 #include <linux/sched.h>
0023 #include <linux/hardirq.h>
0024 #include <linux/export.h>
0025 #include <linux/kprobes.h>
0026 #include <trace/events/rcu.h>
0027 
0028 
0029 DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
0030 #ifdef CONFIG_CONTEXT_TRACKING_IDLE
0031     .dynticks_nesting = 1,
0032     .dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,
0033 #endif
0034     .state = ATOMIC_INIT(RCU_DYNTICKS_IDX),
0035 };
0036 EXPORT_SYMBOL_GPL(context_tracking);
0037 
0038 #ifdef CONFIG_CONTEXT_TRACKING_IDLE
0039 #define TPS(x)  tracepoint_string(x)
0040 
0041 /* Record the current task on dyntick-idle entry. */
0042 static __always_inline void rcu_dynticks_task_enter(void)
0043 {
0044 #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
0045     WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id());
0046 #endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
0047 }
0048 
0049 /* Record no current task on dyntick-idle exit. */
0050 static __always_inline void rcu_dynticks_task_exit(void)
0051 {
0052 #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
0053     WRITE_ONCE(current->rcu_tasks_idle_cpu, -1);
0054 #endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
0055 }
0056 
0057 /* Turn on heavyweight RCU tasks trace readers on idle/user entry. */
0058 static __always_inline void rcu_dynticks_task_trace_enter(void)
0059 {
0060 #ifdef CONFIG_TASKS_TRACE_RCU
0061     if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
0062         current->trc_reader_special.b.need_mb = true;
0063 #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
0064 }
0065 
0066 /* Turn off heavyweight RCU tasks trace readers on idle/user exit. */
0067 static __always_inline void rcu_dynticks_task_trace_exit(void)
0068 {
0069 #ifdef CONFIG_TASKS_TRACE_RCU
0070     if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
0071         current->trc_reader_special.b.need_mb = false;
0072 #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
0073 }
0074 
0075 /*
0076  * Record entry into an extended quiescent state.  This is only to be
0077  * called when not already in an extended quiescent state, that is,
0078  * RCU is watching prior to the call to this function and is no longer
0079  * watching upon return.
0080  */
0081 static noinstr void ct_kernel_exit_state(int offset)
0082 {
0083     int seq;
0084 
0085     /*
0086      * CPUs seeing atomic_add_return() must see prior RCU read-side
0087      * critical sections, and we also must force ordering with the
0088      * next idle sojourn.
0089      */
0090     rcu_dynticks_task_trace_enter();  // Before ->dynticks update!
0091     seq = ct_state_inc(offset);
0092     // RCU is no longer watching.  Better be in extended quiescent state!
0093     WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && (seq & RCU_DYNTICKS_IDX));
0094 }
0095 
0096 /*
0097  * Record exit from an extended quiescent state.  This is only to be
0098  * called from an extended quiescent state, that is, RCU is not watching
0099  * prior to the call to this function and is watching upon return.
0100  */
0101 static noinstr void ct_kernel_enter_state(int offset)
0102 {
0103     int seq;
0104 
0105     /*
0106      * CPUs seeing atomic_add_return() must see prior idle sojourns,
0107      * and we also must force ordering with the next RCU read-side
0108      * critical section.
0109      */
0110     seq = ct_state_inc(offset);
0111     // RCU is now watching.  Better not be in an extended quiescent state!
0112     rcu_dynticks_task_trace_exit();  // After ->dynticks update!
0113     WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(seq & RCU_DYNTICKS_IDX));
0114 }
0115 
0116 /*
0117  * Enter an RCU extended quiescent state, which can be either the
0118  * idle loop or adaptive-tickless usermode execution.
0119  *
0120  * We crowbar the ->dynticks_nmi_nesting field to zero to allow for
0121  * the possibility of usermode upcalls having messed up our count
0122  * of interrupt nesting level during the prior busy period.
0123  */
0124 static void noinstr ct_kernel_exit(bool user, int offset)
0125 {
0126     struct context_tracking *ct = this_cpu_ptr(&context_tracking);
0127 
0128     WARN_ON_ONCE(ct_dynticks_nmi_nesting() != DYNTICK_IRQ_NONIDLE);
0129     WRITE_ONCE(ct->dynticks_nmi_nesting, 0);
0130     WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
0131              ct_dynticks_nesting() == 0);
0132     if (ct_dynticks_nesting() != 1) {
0133         // RCU will still be watching, so just do accounting and leave.
0134         ct->dynticks_nesting--;
0135         return;
0136     }
0137 
0138     instrumentation_begin();
0139     lockdep_assert_irqs_disabled();
0140     trace_rcu_dyntick(TPS("Start"), ct_dynticks_nesting(), 0, ct_dynticks());
0141     WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
0142     rcu_preempt_deferred_qs(current);
0143 
0144     // instrumentation for the noinstr ct_kernel_exit_state()
0145     instrument_atomic_write(&ct->state, sizeof(ct->state));
0146 
0147     instrumentation_end();
0148     WRITE_ONCE(ct->dynticks_nesting, 0); /* Avoid irq-access tearing. */
0149     // RCU is watching here ...
0150     ct_kernel_exit_state(offset);
0151     // ... but is no longer watching here.
0152     rcu_dynticks_task_enter();
0153 }
0154 
0155 /*
0156  * Exit an RCU extended quiescent state, which can be either the
0157  * idle loop or adaptive-tickless usermode execution.
0158  *
0159  * We crowbar the ->dynticks_nmi_nesting field to DYNTICK_IRQ_NONIDLE to
0160  * allow for the possibility of usermode upcalls messing up our count of
0161  * interrupt nesting level during the busy period that is just now starting.
0162  */
0163 static void noinstr ct_kernel_enter(bool user, int offset)
0164 {
0165     struct context_tracking *ct = this_cpu_ptr(&context_tracking);
0166     long oldval;
0167 
0168     WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !raw_irqs_disabled());
0169     oldval = ct_dynticks_nesting();
0170     WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0);
0171     if (oldval) {
0172         // RCU was already watching, so just do accounting and leave.
0173         ct->dynticks_nesting++;
0174         return;
0175     }
0176     rcu_dynticks_task_exit();
0177     // RCU is not watching here ...
0178     ct_kernel_enter_state(offset);
0179     // ... but is watching here.
0180     instrumentation_begin();
0181 
0182     // instrumentation for the noinstr ct_kernel_enter_state()
0183     instrument_atomic_write(&ct->state, sizeof(ct->state));
0184 
0185     trace_rcu_dyntick(TPS("End"), ct_dynticks_nesting(), 1, ct_dynticks());
0186     WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
0187     WRITE_ONCE(ct->dynticks_nesting, 1);
0188     WARN_ON_ONCE(ct_dynticks_nmi_nesting());
0189     WRITE_ONCE(ct->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE);
0190     instrumentation_end();
0191 }
0192 
0193 /**
0194  * ct_nmi_exit - inform RCU of exit from NMI context
0195  *
0196  * If we are returning from the outermost NMI handler that interrupted an
0197  * RCU-idle period, update ct->state and ct->dynticks_nmi_nesting
0198  * to let the RCU grace-period handling know that the CPU is back to
0199  * being RCU-idle.
0200  *
0201  * If you add or remove a call to ct_nmi_exit(), be sure to test
0202  * with CONFIG_RCU_EQS_DEBUG=y.
0203  */
0204 void noinstr ct_nmi_exit(void)
0205 {
0206     struct context_tracking *ct = this_cpu_ptr(&context_tracking);
0207 
0208     instrumentation_begin();
0209     /*
0210      * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks.
0211      * (We are exiting an NMI handler, so RCU better be paying attention
0212      * to us!)
0213      */
0214     WARN_ON_ONCE(ct_dynticks_nmi_nesting() <= 0);
0215     WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs());
0216 
0217     /*
0218      * If the nesting level is not 1, the CPU wasn't RCU-idle, so
0219      * leave it in non-RCU-idle state.
0220      */
0221     if (ct_dynticks_nmi_nesting() != 1) {
0222         trace_rcu_dyntick(TPS("--="), ct_dynticks_nmi_nesting(), ct_dynticks_nmi_nesting() - 2,
0223                   ct_dynticks());
0224         WRITE_ONCE(ct->dynticks_nmi_nesting, /* No store tearing. */
0225                ct_dynticks_nmi_nesting() - 2);
0226         instrumentation_end();
0227         return;
0228     }
0229 
0230     /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
0231     trace_rcu_dyntick(TPS("Startirq"), ct_dynticks_nmi_nesting(), 0, ct_dynticks());
0232     WRITE_ONCE(ct->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
0233 
0234     // instrumentation for the noinstr ct_kernel_exit_state()
0235     instrument_atomic_write(&ct->state, sizeof(ct->state));
0236     instrumentation_end();
0237 
0238     // RCU is watching here ...
0239     ct_kernel_exit_state(RCU_DYNTICKS_IDX);
0240     // ... but is no longer watching here.
0241 
0242     if (!in_nmi())
0243         rcu_dynticks_task_enter();
0244 }
0245 
0246 /**
0247  * ct_nmi_enter - inform RCU of entry to NMI context
0248  *
0249  * If the CPU was idle from RCU's viewpoint, update ct->state and
0250  * ct->dynticks_nmi_nesting to let the RCU grace-period handling know
0251  * that the CPU is active.  This implementation permits nested NMIs, as
0252  * long as the nesting level does not overflow an int.  (You will probably
0253  * run out of stack space first.)
0254  *
0255  * If you add or remove a call to ct_nmi_enter(), be sure to test
0256  * with CONFIG_RCU_EQS_DEBUG=y.
0257  */
0258 void noinstr ct_nmi_enter(void)
0259 {
0260     long incby = 2;
0261     struct context_tracking *ct = this_cpu_ptr(&context_tracking);
0262 
0263     /* Complain about underflow. */
0264     WARN_ON_ONCE(ct_dynticks_nmi_nesting() < 0);
0265 
0266     /*
0267      * If idle from RCU viewpoint, atomically increment ->dynticks
0268      * to mark non-idle and increment ->dynticks_nmi_nesting by one.
0269      * Otherwise, increment ->dynticks_nmi_nesting by two.  This means
0270      * if ->dynticks_nmi_nesting is equal to one, we are guaranteed
0271      * to be in the outermost NMI handler that interrupted an RCU-idle
0272      * period (observation due to Andy Lutomirski).
0273      */
0274     if (rcu_dynticks_curr_cpu_in_eqs()) {
0275 
0276         if (!in_nmi())
0277             rcu_dynticks_task_exit();
0278 
0279         // RCU is not watching here ...
0280         ct_kernel_enter_state(RCU_DYNTICKS_IDX);
0281         // ... but is watching here.
0282 
0283         instrumentation_begin();
0284         // instrumentation for the noinstr rcu_dynticks_curr_cpu_in_eqs()
0285         instrument_atomic_read(&ct->state, sizeof(ct->state));
0286         // instrumentation for the noinstr ct_kernel_enter_state()
0287         instrument_atomic_write(&ct->state, sizeof(ct->state));
0288 
0289         incby = 1;
0290     } else if (!in_nmi()) {
0291         instrumentation_begin();
0292         rcu_irq_enter_check_tick();
0293     } else  {
0294         instrumentation_begin();
0295     }
0296 
0297     trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="),
0298               ct_dynticks_nmi_nesting(),
0299               ct_dynticks_nmi_nesting() + incby, ct_dynticks());
0300     instrumentation_end();
0301     WRITE_ONCE(ct->dynticks_nmi_nesting, /* Prevent store tearing. */
0302            ct_dynticks_nmi_nesting() + incby);
0303     barrier();
0304 }
0305 
0306 /**
0307  * ct_idle_enter - inform RCU that current CPU is entering idle
0308  *
0309  * Enter idle mode, in other words, -leave- the mode in which RCU
0310  * read-side critical sections can occur.  (Though RCU read-side
0311  * critical sections can occur in irq handlers in idle, a possibility
0312  * handled by irq_enter() and irq_exit().)
0313  *
0314  * If you add or remove a call to ct_idle_enter(), be sure to test with
0315  * CONFIG_RCU_EQS_DEBUG=y.
0316  */
0317 void noinstr ct_idle_enter(void)
0318 {
0319     WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !raw_irqs_disabled());
0320     ct_kernel_exit(false, RCU_DYNTICKS_IDX + CONTEXT_IDLE);
0321 }
0322 EXPORT_SYMBOL_GPL(ct_idle_enter);
0323 
0324 /**
0325  * ct_idle_exit - inform RCU that current CPU is leaving idle
0326  *
0327  * Exit idle mode, in other words, -enter- the mode in which RCU
0328  * read-side critical sections can occur.
0329  *
0330  * If you add or remove a call to ct_idle_exit(), be sure to test with
0331  * CONFIG_RCU_EQS_DEBUG=y.
0332  */
0333 void noinstr ct_idle_exit(void)
0334 {
0335     unsigned long flags;
0336 
0337     raw_local_irq_save(flags);
0338     ct_kernel_enter(false, RCU_DYNTICKS_IDX - CONTEXT_IDLE);
0339     raw_local_irq_restore(flags);
0340 }
0341 EXPORT_SYMBOL_GPL(ct_idle_exit);
0342 
0343 /**
0344  * ct_irq_enter - inform RCU that current CPU is entering irq away from idle
0345  *
0346  * Enter an interrupt handler, which might possibly result in exiting
0347  * idle mode, in other words, entering the mode in which read-side critical
0348  * sections can occur.  The caller must have disabled interrupts.
0349  *
0350  * Note that the Linux kernel is fully capable of entering an interrupt
0351  * handler that it never exits, for example when doing upcalls to user mode!
0352  * This code assumes that the idle loop never does upcalls to user mode.
0353  * If your architecture's idle loop does do upcalls to user mode (or does
0354  * anything else that results in unbalanced calls to the irq_enter() and
0355  * irq_exit() functions), RCU will give you what you deserve, good and hard.
0356  * But very infrequently and irreproducibly.
0357  *
0358  * Use things like work queues to work around this limitation.
0359  *
0360  * You have been warned.
0361  *
0362  * If you add or remove a call to ct_irq_enter(), be sure to test with
0363  * CONFIG_RCU_EQS_DEBUG=y.
0364  */
0365 noinstr void ct_irq_enter(void)
0366 {
0367     lockdep_assert_irqs_disabled();
0368     ct_nmi_enter();
0369 }
0370 
0371 /**
0372  * ct_irq_exit - inform RCU that current CPU is exiting irq towards idle
0373  *
0374  * Exit from an interrupt handler, which might possibly result in entering
0375  * idle mode, in other words, leaving the mode in which read-side critical
0376  * sections can occur.  The caller must have disabled interrupts.
0377  *
0378  * This code assumes that the idle loop never does anything that might
0379  * result in unbalanced calls to irq_enter() and irq_exit().  If your
0380  * architecture's idle loop violates this assumption, RCU will give you what
0381  * you deserve, good and hard.  But very infrequently and irreproducibly.
0382  *
0383  * Use things like work queues to work around this limitation.
0384  *
0385  * You have been warned.
0386  *
0387  * If you add or remove a call to ct_irq_exit(), be sure to test with
0388  * CONFIG_RCU_EQS_DEBUG=y.
0389  */
0390 noinstr void ct_irq_exit(void)
0391 {
0392     lockdep_assert_irqs_disabled();
0393     ct_nmi_exit();
0394 }
0395 
0396 /*
0397  * Wrapper for ct_irq_enter() where interrupts are enabled.
0398  *
0399  * If you add or remove a call to ct_irq_enter_irqson(), be sure to test
0400  * with CONFIG_RCU_EQS_DEBUG=y.
0401  */
0402 void ct_irq_enter_irqson(void)
0403 {
0404     unsigned long flags;
0405 
0406     local_irq_save(flags);
0407     ct_irq_enter();
0408     local_irq_restore(flags);
0409 }
0410 
0411 /*
0412  * Wrapper for ct_irq_exit() where interrupts are enabled.
0413  *
0414  * If you add or remove a call to ct_irq_exit_irqson(), be sure to test
0415  * with CONFIG_RCU_EQS_DEBUG=y.
0416  */
0417 void ct_irq_exit_irqson(void)
0418 {
0419     unsigned long flags;
0420 
0421     local_irq_save(flags);
0422     ct_irq_exit();
0423     local_irq_restore(flags);
0424 }
0425 #else
0426 static __always_inline void ct_kernel_exit(bool user, int offset) { }
0427 static __always_inline void ct_kernel_enter(bool user, int offset) { }
0428 #endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */
0429 
0430 #ifdef CONFIG_CONTEXT_TRACKING_USER
0431 
0432 #define CREATE_TRACE_POINTS
0433 #include <trace/events/context_tracking.h>
0434 
0435 DEFINE_STATIC_KEY_FALSE(context_tracking_key);
0436 EXPORT_SYMBOL_GPL(context_tracking_key);
0437 
0438 static noinstr bool context_tracking_recursion_enter(void)
0439 {
0440     int recursion;
0441 
0442     recursion = __this_cpu_inc_return(context_tracking.recursion);
0443     if (recursion == 1)
0444         return true;
0445 
0446     WARN_ONCE((recursion < 1), "Invalid context tracking recursion value %d\n", recursion);
0447     __this_cpu_dec(context_tracking.recursion);
0448 
0449     return false;
0450 }
0451 
0452 static __always_inline void context_tracking_recursion_exit(void)
0453 {
0454     __this_cpu_dec(context_tracking.recursion);
0455 }
0456 
0457 /**
0458  * __ct_user_enter - Inform the context tracking that the CPU is going
0459  *           to enter user or guest space mode.
0460  *
0461  * This function must be called right before we switch from the kernel
0462  * to user or guest space, when it's guaranteed the remaining kernel
0463  * instructions to execute won't use any RCU read side critical section
0464  * because this function sets RCU in extended quiescent state.
0465  */
0466 void noinstr __ct_user_enter(enum ctx_state state)
0467 {
0468     struct context_tracking *ct = this_cpu_ptr(&context_tracking);
0469     lockdep_assert_irqs_disabled();
0470 
0471     /* Kernel threads aren't supposed to go to userspace */
0472     WARN_ON_ONCE(!current->mm);
0473 
0474     if (!context_tracking_recursion_enter())
0475         return;
0476 
0477     if (__ct_state() != state) {
0478         if (ct->active) {
0479             /*
0480              * At this stage, only low level arch entry code remains and
0481              * then we'll run in userspace. We can assume there won't be
0482              * any RCU read-side critical section until the next call to
0483              * user_exit() or ct_irq_enter(). Let's remove RCU's dependency
0484              * on the tick.
0485              */
0486             if (state == CONTEXT_USER) {
0487                 instrumentation_begin();
0488                 trace_user_enter(0);
0489                 vtime_user_enter(current);
0490                 instrumentation_end();
0491             }
0492             /*
0493              * Other than generic entry implementation, we may be past the last
0494              * rescheduling opportunity in the entry code. Trigger a self IPI
0495              * that will fire and reschedule once we resume in user/guest mode.
0496              */
0497             rcu_irq_work_resched();
0498 
0499             /*
0500              * Enter RCU idle mode right before resuming userspace.  No use of RCU
0501              * is permitted between this call and rcu_eqs_exit(). This way the
0502              * CPU doesn't need to maintain the tick for RCU maintenance purposes
0503              * when the CPU runs in userspace.
0504              */
0505             ct_kernel_exit(true, RCU_DYNTICKS_IDX + state);
0506 
0507             /*
0508              * Special case if we only track user <-> kernel transitions for tickless
0509              * cputime accounting but we don't support RCU extended quiescent state.
0510              * In this we case we don't care about any concurrency/ordering.
0511              */
0512             if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE))
0513                 atomic_set(&ct->state, state);
0514         } else {
0515             /*
0516              * Even if context tracking is disabled on this CPU, because it's outside
0517              * the full dynticks mask for example, we still have to keep track of the
0518              * context transitions and states to prevent inconsistency on those of
0519              * other CPUs.
0520              * If a task triggers an exception in userspace, sleep on the exception
0521              * handler and then migrate to another CPU, that new CPU must know where
0522              * the exception returns by the time we call exception_exit().
0523              * This information can only be provided by the previous CPU when it called
0524              * exception_enter().
0525              * OTOH we can spare the calls to vtime and RCU when context_tracking.active
0526              * is false because we know that CPU is not tickless.
0527              */
0528             if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE)) {
0529                 /* Tracking for vtime only, no concurrent RCU EQS accounting */
0530                 atomic_set(&ct->state, state);
0531             } else {
0532                 /*
0533                  * Tracking for vtime and RCU EQS. Make sure we don't race
0534                  * with NMIs. OTOH we don't care about ordering here since
0535                  * RCU only requires RCU_DYNTICKS_IDX increments to be fully
0536                  * ordered.
0537                  */
0538                 atomic_add(state, &ct->state);
0539             }
0540         }
0541     }
0542     context_tracking_recursion_exit();
0543 }
0544 EXPORT_SYMBOL_GPL(__ct_user_enter);
0545 
0546 /*
0547  * OBSOLETE:
0548  * This function should be noinstr but the below local_irq_restore() is
0549  * unsafe because it involves illegal RCU uses through tracing and lockdep.
0550  * This is unlikely to be fixed as this function is obsolete. The preferred
0551  * way is to call __context_tracking_enter() through user_enter_irqoff()
0552  * or context_tracking_guest_enter(). It should be the arch entry code
0553  * responsibility to call into context tracking with IRQs disabled.
0554  */
0555 void ct_user_enter(enum ctx_state state)
0556 {
0557     unsigned long flags;
0558 
0559     /*
0560      * Some contexts may involve an exception occuring in an irq,
0561      * leading to that nesting:
0562      * ct_irq_enter() rcu_eqs_exit(true) rcu_eqs_enter(true) ct_irq_exit()
0563      * This would mess up the dyntick_nesting count though. And rcu_irq_*()
0564      * helpers are enough to protect RCU uses inside the exception. So
0565      * just return immediately if we detect we are in an IRQ.
0566      */
0567     if (in_interrupt())
0568         return;
0569 
0570     local_irq_save(flags);
0571     __ct_user_enter(state);
0572     local_irq_restore(flags);
0573 }
0574 NOKPROBE_SYMBOL(ct_user_enter);
0575 EXPORT_SYMBOL_GPL(ct_user_enter);
0576 
0577 /**
0578  * user_enter_callable() - Unfortunate ASM callable version of user_enter() for
0579  *             archs that didn't manage to check the context tracking
0580  *             static key from low level code.
0581  *
0582  * This OBSOLETE function should be noinstr but it unsafely calls
0583  * local_irq_restore(), involving illegal RCU uses through tracing and lockdep.
0584  * This is unlikely to be fixed as this function is obsolete. The preferred
0585  * way is to call user_enter_irqoff(). It should be the arch entry code
0586  * responsibility to call into context tracking with IRQs disabled.
0587  */
0588 void user_enter_callable(void)
0589 {
0590     user_enter();
0591 }
0592 NOKPROBE_SYMBOL(user_enter_callable);
0593 
0594 /**
0595  * __ct_user_exit - Inform the context tracking that the CPU is
0596  *          exiting user or guest mode and entering the kernel.
0597  *
0598  * This function must be called after we entered the kernel from user or
0599  * guest space before any use of RCU read side critical section. This
0600  * potentially include any high level kernel code like syscalls, exceptions,
0601  * signal handling, etc...
0602  *
0603  * This call supports re-entrancy. This way it can be called from any exception
0604  * handler without needing to know if we came from userspace or not.
0605  */
0606 void noinstr __ct_user_exit(enum ctx_state state)
0607 {
0608     struct context_tracking *ct = this_cpu_ptr(&context_tracking);
0609 
0610     if (!context_tracking_recursion_enter())
0611         return;
0612 
0613     if (__ct_state() == state) {
0614         if (ct->active) {
0615             /*
0616              * Exit RCU idle mode while entering the kernel because it can
0617              * run a RCU read side critical section anytime.
0618              */
0619             ct_kernel_enter(true, RCU_DYNTICKS_IDX - state);
0620             if (state == CONTEXT_USER) {
0621                 instrumentation_begin();
0622                 vtime_user_exit(current);
0623                 trace_user_exit(0);
0624                 instrumentation_end();
0625             }
0626 
0627             /*
0628              * Special case if we only track user <-> kernel transitions for tickless
0629              * cputime accounting but we don't support RCU extended quiescent state.
0630              * In this we case we don't care about any concurrency/ordering.
0631              */
0632             if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE))
0633                 atomic_set(&ct->state, CONTEXT_KERNEL);
0634 
0635         } else {
0636             if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE)) {
0637                 /* Tracking for vtime only, no concurrent RCU EQS accounting */
0638                 atomic_set(&ct->state, CONTEXT_KERNEL);
0639             } else {
0640                 /*
0641                  * Tracking for vtime and RCU EQS. Make sure we don't race
0642                  * with NMIs. OTOH we don't care about ordering here since
0643                  * RCU only requires RCU_DYNTICKS_IDX increments to be fully
0644                  * ordered.
0645                  */
0646                 atomic_sub(state, &ct->state);
0647             }
0648         }
0649     }
0650     context_tracking_recursion_exit();
0651 }
0652 EXPORT_SYMBOL_GPL(__ct_user_exit);
0653 
0654 /*
0655  * OBSOLETE:
0656  * This function should be noinstr but the below local_irq_save() is
0657  * unsafe because it involves illegal RCU uses through tracing and lockdep.
0658  * This is unlikely to be fixed as this function is obsolete. The preferred
0659  * way is to call __context_tracking_exit() through user_exit_irqoff()
0660  * or context_tracking_guest_exit(). It should be the arch entry code
0661  * responsibility to call into context tracking with IRQs disabled.
0662  */
0663 void ct_user_exit(enum ctx_state state)
0664 {
0665     unsigned long flags;
0666 
0667     if (in_interrupt())
0668         return;
0669 
0670     local_irq_save(flags);
0671     __ct_user_exit(state);
0672     local_irq_restore(flags);
0673 }
0674 NOKPROBE_SYMBOL(ct_user_exit);
0675 EXPORT_SYMBOL_GPL(ct_user_exit);
0676 
0677 /**
0678  * user_exit_callable() - Unfortunate ASM callable version of user_exit() for
0679  *            archs that didn't manage to check the context tracking
0680  *            static key from low level code.
0681  *
0682  * This OBSOLETE function should be noinstr but it unsafely calls local_irq_save(),
0683  * involving illegal RCU uses through tracing and lockdep. This is unlikely
0684  * to be fixed as this function is obsolete. The preferred way is to call
0685  * user_exit_irqoff(). It should be the arch entry code responsibility to
0686  * call into context tracking with IRQs disabled.
0687  */
0688 void user_exit_callable(void)
0689 {
0690     user_exit();
0691 }
0692 NOKPROBE_SYMBOL(user_exit_callable);
0693 
0694 void __init ct_cpu_track_user(int cpu)
0695 {
0696     static __initdata bool initialized = false;
0697 
0698     if (!per_cpu(context_tracking.active, cpu)) {
0699         per_cpu(context_tracking.active, cpu) = true;
0700         static_branch_inc(&context_tracking_key);
0701     }
0702 
0703     if (initialized)
0704         return;
0705 
0706 #ifdef CONFIG_HAVE_TIF_NOHZ
0707     /*
0708      * Set TIF_NOHZ to init/0 and let it propagate to all tasks through fork
0709      * This assumes that init is the only task at this early boot stage.
0710      */
0711     set_tsk_thread_flag(&init_task, TIF_NOHZ);
0712 #endif
0713     WARN_ON_ONCE(!tasklist_empty());
0714 
0715     initialized = true;
0716 }
0717 
0718 #ifdef CONFIG_CONTEXT_TRACKING_USER_FORCE
0719 void __init context_tracking_init(void)
0720 {
0721     int cpu;
0722 
0723     for_each_possible_cpu(cpu)
0724         ct_cpu_track_user(cpu);
0725 }
0726 #endif
0727 
0728 #endif /* #ifdef CONFIG_CONTEXT_TRACKING_USER */