![]() |
|
|||
0001 // SPDX-License-Identifier: GPL-2.0-only 0002 /* 0003 * Context tracking: Probe on high level context boundaries such as kernel, 0004 * userspace, guest or idle. 0005 * 0006 * This is used by RCU to remove its dependency on the timer tick while a CPU 0007 * runs in idle, userspace or guest mode. 0008 * 0009 * User/guest tracking started by Frederic Weisbecker: 0010 * 0011 * Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker 0012 * 0013 * Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton, 0014 * Steven Rostedt, Peter Zijlstra for suggestions and improvements. 0015 * 0016 * RCU extended quiescent state bits imported from kernel/rcu/tree.c 0017 * where the relevant authorship may be found. 0018 */ 0019 0020 #include <linux/context_tracking.h> 0021 #include <linux/rcupdate.h> 0022 #include <linux/sched.h> 0023 #include <linux/hardirq.h> 0024 #include <linux/export.h> 0025 #include <linux/kprobes.h> 0026 #include <trace/events/rcu.h> 0027 0028 0029 DEFINE_PER_CPU(struct context_tracking, context_tracking) = { 0030 #ifdef CONFIG_CONTEXT_TRACKING_IDLE 0031 .dynticks_nesting = 1, 0032 .dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE, 0033 #endif 0034 .state = ATOMIC_INIT(RCU_DYNTICKS_IDX), 0035 }; 0036 EXPORT_SYMBOL_GPL(context_tracking); 0037 0038 #ifdef CONFIG_CONTEXT_TRACKING_IDLE 0039 #define TPS(x) tracepoint_string(x) 0040 0041 /* Record the current task on dyntick-idle entry. */ 0042 static __always_inline void rcu_dynticks_task_enter(void) 0043 { 0044 #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) 0045 WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id()); 0046 #endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */ 0047 } 0048 0049 /* Record no current task on dyntick-idle exit. */ 0050 static __always_inline void rcu_dynticks_task_exit(void) 0051 { 0052 #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) 0053 WRITE_ONCE(current->rcu_tasks_idle_cpu, -1); 0054 #endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */ 0055 } 0056 0057 /* Turn on heavyweight RCU tasks trace readers on idle/user entry. */ 0058 static __always_inline void rcu_dynticks_task_trace_enter(void) 0059 { 0060 #ifdef CONFIG_TASKS_TRACE_RCU 0061 if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) 0062 current->trc_reader_special.b.need_mb = true; 0063 #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ 0064 } 0065 0066 /* Turn off heavyweight RCU tasks trace readers on idle/user exit. */ 0067 static __always_inline void rcu_dynticks_task_trace_exit(void) 0068 { 0069 #ifdef CONFIG_TASKS_TRACE_RCU 0070 if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) 0071 current->trc_reader_special.b.need_mb = false; 0072 #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ 0073 } 0074 0075 /* 0076 * Record entry into an extended quiescent state. This is only to be 0077 * called when not already in an extended quiescent state, that is, 0078 * RCU is watching prior to the call to this function and is no longer 0079 * watching upon return. 0080 */ 0081 static noinstr void ct_kernel_exit_state(int offset) 0082 { 0083 int seq; 0084 0085 /* 0086 * CPUs seeing atomic_add_return() must see prior RCU read-side 0087 * critical sections, and we also must force ordering with the 0088 * next idle sojourn. 0089 */ 0090 rcu_dynticks_task_trace_enter(); // Before ->dynticks update! 0091 seq = ct_state_inc(offset); 0092 // RCU is no longer watching. Better be in extended quiescent state! 0093 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && (seq & RCU_DYNTICKS_IDX)); 0094 } 0095 0096 /* 0097 * Record exit from an extended quiescent state. This is only to be 0098 * called from an extended quiescent state, that is, RCU is not watching 0099 * prior to the call to this function and is watching upon return. 0100 */ 0101 static noinstr void ct_kernel_enter_state(int offset) 0102 { 0103 int seq; 0104 0105 /* 0106 * CPUs seeing atomic_add_return() must see prior idle sojourns, 0107 * and we also must force ordering with the next RCU read-side 0108 * critical section. 0109 */ 0110 seq = ct_state_inc(offset); 0111 // RCU is now watching. Better not be in an extended quiescent state! 0112 rcu_dynticks_task_trace_exit(); // After ->dynticks update! 0113 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(seq & RCU_DYNTICKS_IDX)); 0114 } 0115 0116 /* 0117 * Enter an RCU extended quiescent state, which can be either the 0118 * idle loop or adaptive-tickless usermode execution. 0119 * 0120 * We crowbar the ->dynticks_nmi_nesting field to zero to allow for 0121 * the possibility of usermode upcalls having messed up our count 0122 * of interrupt nesting level during the prior busy period. 0123 */ 0124 static void noinstr ct_kernel_exit(bool user, int offset) 0125 { 0126 struct context_tracking *ct = this_cpu_ptr(&context_tracking); 0127 0128 WARN_ON_ONCE(ct_dynticks_nmi_nesting() != DYNTICK_IRQ_NONIDLE); 0129 WRITE_ONCE(ct->dynticks_nmi_nesting, 0); 0130 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && 0131 ct_dynticks_nesting() == 0); 0132 if (ct_dynticks_nesting() != 1) { 0133 // RCU will still be watching, so just do accounting and leave. 0134 ct->dynticks_nesting--; 0135 return; 0136 } 0137 0138 instrumentation_begin(); 0139 lockdep_assert_irqs_disabled(); 0140 trace_rcu_dyntick(TPS("Start"), ct_dynticks_nesting(), 0, ct_dynticks()); 0141 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); 0142 rcu_preempt_deferred_qs(current); 0143 0144 // instrumentation for the noinstr ct_kernel_exit_state() 0145 instrument_atomic_write(&ct->state, sizeof(ct->state)); 0146 0147 instrumentation_end(); 0148 WRITE_ONCE(ct->dynticks_nesting, 0); /* Avoid irq-access tearing. */ 0149 // RCU is watching here ... 0150 ct_kernel_exit_state(offset); 0151 // ... but is no longer watching here. 0152 rcu_dynticks_task_enter(); 0153 } 0154 0155 /* 0156 * Exit an RCU extended quiescent state, which can be either the 0157 * idle loop or adaptive-tickless usermode execution. 0158 * 0159 * We crowbar the ->dynticks_nmi_nesting field to DYNTICK_IRQ_NONIDLE to 0160 * allow for the possibility of usermode upcalls messing up our count of 0161 * interrupt nesting level during the busy period that is just now starting. 0162 */ 0163 static void noinstr ct_kernel_enter(bool user, int offset) 0164 { 0165 struct context_tracking *ct = this_cpu_ptr(&context_tracking); 0166 long oldval; 0167 0168 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !raw_irqs_disabled()); 0169 oldval = ct_dynticks_nesting(); 0170 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0); 0171 if (oldval) { 0172 // RCU was already watching, so just do accounting and leave. 0173 ct->dynticks_nesting++; 0174 return; 0175 } 0176 rcu_dynticks_task_exit(); 0177 // RCU is not watching here ... 0178 ct_kernel_enter_state(offset); 0179 // ... but is watching here. 0180 instrumentation_begin(); 0181 0182 // instrumentation for the noinstr ct_kernel_enter_state() 0183 instrument_atomic_write(&ct->state, sizeof(ct->state)); 0184 0185 trace_rcu_dyntick(TPS("End"), ct_dynticks_nesting(), 1, ct_dynticks()); 0186 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); 0187 WRITE_ONCE(ct->dynticks_nesting, 1); 0188 WARN_ON_ONCE(ct_dynticks_nmi_nesting()); 0189 WRITE_ONCE(ct->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE); 0190 instrumentation_end(); 0191 } 0192 0193 /** 0194 * ct_nmi_exit - inform RCU of exit from NMI context 0195 * 0196 * If we are returning from the outermost NMI handler that interrupted an 0197 * RCU-idle period, update ct->state and ct->dynticks_nmi_nesting 0198 * to let the RCU grace-period handling know that the CPU is back to 0199 * being RCU-idle. 0200 * 0201 * If you add or remove a call to ct_nmi_exit(), be sure to test 0202 * with CONFIG_RCU_EQS_DEBUG=y. 0203 */ 0204 void noinstr ct_nmi_exit(void) 0205 { 0206 struct context_tracking *ct = this_cpu_ptr(&context_tracking); 0207 0208 instrumentation_begin(); 0209 /* 0210 * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks. 0211 * (We are exiting an NMI handler, so RCU better be paying attention 0212 * to us!) 0213 */ 0214 WARN_ON_ONCE(ct_dynticks_nmi_nesting() <= 0); 0215 WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs()); 0216 0217 /* 0218 * If the nesting level is not 1, the CPU wasn't RCU-idle, so 0219 * leave it in non-RCU-idle state. 0220 */ 0221 if (ct_dynticks_nmi_nesting() != 1) { 0222 trace_rcu_dyntick(TPS("--="), ct_dynticks_nmi_nesting(), ct_dynticks_nmi_nesting() - 2, 0223 ct_dynticks()); 0224 WRITE_ONCE(ct->dynticks_nmi_nesting, /* No store tearing. */ 0225 ct_dynticks_nmi_nesting() - 2); 0226 instrumentation_end(); 0227 return; 0228 } 0229 0230 /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ 0231 trace_rcu_dyntick(TPS("Startirq"), ct_dynticks_nmi_nesting(), 0, ct_dynticks()); 0232 WRITE_ONCE(ct->dynticks_nmi_nesting, 0); /* Avoid store tearing. */ 0233 0234 // instrumentation for the noinstr ct_kernel_exit_state() 0235 instrument_atomic_write(&ct->state, sizeof(ct->state)); 0236 instrumentation_end(); 0237 0238 // RCU is watching here ... 0239 ct_kernel_exit_state(RCU_DYNTICKS_IDX); 0240 // ... but is no longer watching here. 0241 0242 if (!in_nmi()) 0243 rcu_dynticks_task_enter(); 0244 } 0245 0246 /** 0247 * ct_nmi_enter - inform RCU of entry to NMI context 0248 * 0249 * If the CPU was idle from RCU's viewpoint, update ct->state and 0250 * ct->dynticks_nmi_nesting to let the RCU grace-period handling know 0251 * that the CPU is active. This implementation permits nested NMIs, as 0252 * long as the nesting level does not overflow an int. (You will probably 0253 * run out of stack space first.) 0254 * 0255 * If you add or remove a call to ct_nmi_enter(), be sure to test 0256 * with CONFIG_RCU_EQS_DEBUG=y. 0257 */ 0258 void noinstr ct_nmi_enter(void) 0259 { 0260 long incby = 2; 0261 struct context_tracking *ct = this_cpu_ptr(&context_tracking); 0262 0263 /* Complain about underflow. */ 0264 WARN_ON_ONCE(ct_dynticks_nmi_nesting() < 0); 0265 0266 /* 0267 * If idle from RCU viewpoint, atomically increment ->dynticks 0268 * to mark non-idle and increment ->dynticks_nmi_nesting by one. 0269 * Otherwise, increment ->dynticks_nmi_nesting by two. This means 0270 * if ->dynticks_nmi_nesting is equal to one, we are guaranteed 0271 * to be in the outermost NMI handler that interrupted an RCU-idle 0272 * period (observation due to Andy Lutomirski). 0273 */ 0274 if (rcu_dynticks_curr_cpu_in_eqs()) { 0275 0276 if (!in_nmi()) 0277 rcu_dynticks_task_exit(); 0278 0279 // RCU is not watching here ... 0280 ct_kernel_enter_state(RCU_DYNTICKS_IDX); 0281 // ... but is watching here. 0282 0283 instrumentation_begin(); 0284 // instrumentation for the noinstr rcu_dynticks_curr_cpu_in_eqs() 0285 instrument_atomic_read(&ct->state, sizeof(ct->state)); 0286 // instrumentation for the noinstr ct_kernel_enter_state() 0287 instrument_atomic_write(&ct->state, sizeof(ct->state)); 0288 0289 incby = 1; 0290 } else if (!in_nmi()) { 0291 instrumentation_begin(); 0292 rcu_irq_enter_check_tick(); 0293 } else { 0294 instrumentation_begin(); 0295 } 0296 0297 trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="), 0298 ct_dynticks_nmi_nesting(), 0299 ct_dynticks_nmi_nesting() + incby, ct_dynticks()); 0300 instrumentation_end(); 0301 WRITE_ONCE(ct->dynticks_nmi_nesting, /* Prevent store tearing. */ 0302 ct_dynticks_nmi_nesting() + incby); 0303 barrier(); 0304 } 0305 0306 /** 0307 * ct_idle_enter - inform RCU that current CPU is entering idle 0308 * 0309 * Enter idle mode, in other words, -leave- the mode in which RCU 0310 * read-side critical sections can occur. (Though RCU read-side 0311 * critical sections can occur in irq handlers in idle, a possibility 0312 * handled by irq_enter() and irq_exit().) 0313 * 0314 * If you add or remove a call to ct_idle_enter(), be sure to test with 0315 * CONFIG_RCU_EQS_DEBUG=y. 0316 */ 0317 void noinstr ct_idle_enter(void) 0318 { 0319 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !raw_irqs_disabled()); 0320 ct_kernel_exit(false, RCU_DYNTICKS_IDX + CONTEXT_IDLE); 0321 } 0322 EXPORT_SYMBOL_GPL(ct_idle_enter); 0323 0324 /** 0325 * ct_idle_exit - inform RCU that current CPU is leaving idle 0326 * 0327 * Exit idle mode, in other words, -enter- the mode in which RCU 0328 * read-side critical sections can occur. 0329 * 0330 * If you add or remove a call to ct_idle_exit(), be sure to test with 0331 * CONFIG_RCU_EQS_DEBUG=y. 0332 */ 0333 void noinstr ct_idle_exit(void) 0334 { 0335 unsigned long flags; 0336 0337 raw_local_irq_save(flags); 0338 ct_kernel_enter(false, RCU_DYNTICKS_IDX - CONTEXT_IDLE); 0339 raw_local_irq_restore(flags); 0340 } 0341 EXPORT_SYMBOL_GPL(ct_idle_exit); 0342 0343 /** 0344 * ct_irq_enter - inform RCU that current CPU is entering irq away from idle 0345 * 0346 * Enter an interrupt handler, which might possibly result in exiting 0347 * idle mode, in other words, entering the mode in which read-side critical 0348 * sections can occur. The caller must have disabled interrupts. 0349 * 0350 * Note that the Linux kernel is fully capable of entering an interrupt 0351 * handler that it never exits, for example when doing upcalls to user mode! 0352 * This code assumes that the idle loop never does upcalls to user mode. 0353 * If your architecture's idle loop does do upcalls to user mode (or does 0354 * anything else that results in unbalanced calls to the irq_enter() and 0355 * irq_exit() functions), RCU will give you what you deserve, good and hard. 0356 * But very infrequently and irreproducibly. 0357 * 0358 * Use things like work queues to work around this limitation. 0359 * 0360 * You have been warned. 0361 * 0362 * If you add or remove a call to ct_irq_enter(), be sure to test with 0363 * CONFIG_RCU_EQS_DEBUG=y. 0364 */ 0365 noinstr void ct_irq_enter(void) 0366 { 0367 lockdep_assert_irqs_disabled(); 0368 ct_nmi_enter(); 0369 } 0370 0371 /** 0372 * ct_irq_exit - inform RCU that current CPU is exiting irq towards idle 0373 * 0374 * Exit from an interrupt handler, which might possibly result in entering 0375 * idle mode, in other words, leaving the mode in which read-side critical 0376 * sections can occur. The caller must have disabled interrupts. 0377 * 0378 * This code assumes that the idle loop never does anything that might 0379 * result in unbalanced calls to irq_enter() and irq_exit(). If your 0380 * architecture's idle loop violates this assumption, RCU will give you what 0381 * you deserve, good and hard. But very infrequently and irreproducibly. 0382 * 0383 * Use things like work queues to work around this limitation. 0384 * 0385 * You have been warned. 0386 * 0387 * If you add or remove a call to ct_irq_exit(), be sure to test with 0388 * CONFIG_RCU_EQS_DEBUG=y. 0389 */ 0390 noinstr void ct_irq_exit(void) 0391 { 0392 lockdep_assert_irqs_disabled(); 0393 ct_nmi_exit(); 0394 } 0395 0396 /* 0397 * Wrapper for ct_irq_enter() where interrupts are enabled. 0398 * 0399 * If you add or remove a call to ct_irq_enter_irqson(), be sure to test 0400 * with CONFIG_RCU_EQS_DEBUG=y. 0401 */ 0402 void ct_irq_enter_irqson(void) 0403 { 0404 unsigned long flags; 0405 0406 local_irq_save(flags); 0407 ct_irq_enter(); 0408 local_irq_restore(flags); 0409 } 0410 0411 /* 0412 * Wrapper for ct_irq_exit() where interrupts are enabled. 0413 * 0414 * If you add or remove a call to ct_irq_exit_irqson(), be sure to test 0415 * with CONFIG_RCU_EQS_DEBUG=y. 0416 */ 0417 void ct_irq_exit_irqson(void) 0418 { 0419 unsigned long flags; 0420 0421 local_irq_save(flags); 0422 ct_irq_exit(); 0423 local_irq_restore(flags); 0424 } 0425 #else 0426 static __always_inline void ct_kernel_exit(bool user, int offset) { } 0427 static __always_inline void ct_kernel_enter(bool user, int offset) { } 0428 #endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */ 0429 0430 #ifdef CONFIG_CONTEXT_TRACKING_USER 0431 0432 #define CREATE_TRACE_POINTS 0433 #include <trace/events/context_tracking.h> 0434 0435 DEFINE_STATIC_KEY_FALSE(context_tracking_key); 0436 EXPORT_SYMBOL_GPL(context_tracking_key); 0437 0438 static noinstr bool context_tracking_recursion_enter(void) 0439 { 0440 int recursion; 0441 0442 recursion = __this_cpu_inc_return(context_tracking.recursion); 0443 if (recursion == 1) 0444 return true; 0445 0446 WARN_ONCE((recursion < 1), "Invalid context tracking recursion value %d\n", recursion); 0447 __this_cpu_dec(context_tracking.recursion); 0448 0449 return false; 0450 } 0451 0452 static __always_inline void context_tracking_recursion_exit(void) 0453 { 0454 __this_cpu_dec(context_tracking.recursion); 0455 } 0456 0457 /** 0458 * __ct_user_enter - Inform the context tracking that the CPU is going 0459 * to enter user or guest space mode. 0460 * 0461 * This function must be called right before we switch from the kernel 0462 * to user or guest space, when it's guaranteed the remaining kernel 0463 * instructions to execute won't use any RCU read side critical section 0464 * because this function sets RCU in extended quiescent state. 0465 */ 0466 void noinstr __ct_user_enter(enum ctx_state state) 0467 { 0468 struct context_tracking *ct = this_cpu_ptr(&context_tracking); 0469 lockdep_assert_irqs_disabled(); 0470 0471 /* Kernel threads aren't supposed to go to userspace */ 0472 WARN_ON_ONCE(!current->mm); 0473 0474 if (!context_tracking_recursion_enter()) 0475 return; 0476 0477 if (__ct_state() != state) { 0478 if (ct->active) { 0479 /* 0480 * At this stage, only low level arch entry code remains and 0481 * then we'll run in userspace. We can assume there won't be 0482 * any RCU read-side critical section until the next call to 0483 * user_exit() or ct_irq_enter(). Let's remove RCU's dependency 0484 * on the tick. 0485 */ 0486 if (state == CONTEXT_USER) { 0487 instrumentation_begin(); 0488 trace_user_enter(0); 0489 vtime_user_enter(current); 0490 instrumentation_end(); 0491 } 0492 /* 0493 * Other than generic entry implementation, we may be past the last 0494 * rescheduling opportunity in the entry code. Trigger a self IPI 0495 * that will fire and reschedule once we resume in user/guest mode. 0496 */ 0497 rcu_irq_work_resched(); 0498 0499 /* 0500 * Enter RCU idle mode right before resuming userspace. No use of RCU 0501 * is permitted between this call and rcu_eqs_exit(). This way the 0502 * CPU doesn't need to maintain the tick for RCU maintenance purposes 0503 * when the CPU runs in userspace. 0504 */ 0505 ct_kernel_exit(true, RCU_DYNTICKS_IDX + state); 0506 0507 /* 0508 * Special case if we only track user <-> kernel transitions for tickless 0509 * cputime accounting but we don't support RCU extended quiescent state. 0510 * In this we case we don't care about any concurrency/ordering. 0511 */ 0512 if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE)) 0513 atomic_set(&ct->state, state); 0514 } else { 0515 /* 0516 * Even if context tracking is disabled on this CPU, because it's outside 0517 * the full dynticks mask for example, we still have to keep track of the 0518 * context transitions and states to prevent inconsistency on those of 0519 * other CPUs. 0520 * If a task triggers an exception in userspace, sleep on the exception 0521 * handler and then migrate to another CPU, that new CPU must know where 0522 * the exception returns by the time we call exception_exit(). 0523 * This information can only be provided by the previous CPU when it called 0524 * exception_enter(). 0525 * OTOH we can spare the calls to vtime and RCU when context_tracking.active 0526 * is false because we know that CPU is not tickless. 0527 */ 0528 if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE)) { 0529 /* Tracking for vtime only, no concurrent RCU EQS accounting */ 0530 atomic_set(&ct->state, state); 0531 } else { 0532 /* 0533 * Tracking for vtime and RCU EQS. Make sure we don't race 0534 * with NMIs. OTOH we don't care about ordering here since 0535 * RCU only requires RCU_DYNTICKS_IDX increments to be fully 0536 * ordered. 0537 */ 0538 atomic_add(state, &ct->state); 0539 } 0540 } 0541 } 0542 context_tracking_recursion_exit(); 0543 } 0544 EXPORT_SYMBOL_GPL(__ct_user_enter); 0545 0546 /* 0547 * OBSOLETE: 0548 * This function should be noinstr but the below local_irq_restore() is 0549 * unsafe because it involves illegal RCU uses through tracing and lockdep. 0550 * This is unlikely to be fixed as this function is obsolete. The preferred 0551 * way is to call __context_tracking_enter() through user_enter_irqoff() 0552 * or context_tracking_guest_enter(). It should be the arch entry code 0553 * responsibility to call into context tracking with IRQs disabled. 0554 */ 0555 void ct_user_enter(enum ctx_state state) 0556 { 0557 unsigned long flags; 0558 0559 /* 0560 * Some contexts may involve an exception occuring in an irq, 0561 * leading to that nesting: 0562 * ct_irq_enter() rcu_eqs_exit(true) rcu_eqs_enter(true) ct_irq_exit() 0563 * This would mess up the dyntick_nesting count though. And rcu_irq_*() 0564 * helpers are enough to protect RCU uses inside the exception. So 0565 * just return immediately if we detect we are in an IRQ. 0566 */ 0567 if (in_interrupt()) 0568 return; 0569 0570 local_irq_save(flags); 0571 __ct_user_enter(state); 0572 local_irq_restore(flags); 0573 } 0574 NOKPROBE_SYMBOL(ct_user_enter); 0575 EXPORT_SYMBOL_GPL(ct_user_enter); 0576 0577 /** 0578 * user_enter_callable() - Unfortunate ASM callable version of user_enter() for 0579 * archs that didn't manage to check the context tracking 0580 * static key from low level code. 0581 * 0582 * This OBSOLETE function should be noinstr but it unsafely calls 0583 * local_irq_restore(), involving illegal RCU uses through tracing and lockdep. 0584 * This is unlikely to be fixed as this function is obsolete. The preferred 0585 * way is to call user_enter_irqoff(). It should be the arch entry code 0586 * responsibility to call into context tracking with IRQs disabled. 0587 */ 0588 void user_enter_callable(void) 0589 { 0590 user_enter(); 0591 } 0592 NOKPROBE_SYMBOL(user_enter_callable); 0593 0594 /** 0595 * __ct_user_exit - Inform the context tracking that the CPU is 0596 * exiting user or guest mode and entering the kernel. 0597 * 0598 * This function must be called after we entered the kernel from user or 0599 * guest space before any use of RCU read side critical section. This 0600 * potentially include any high level kernel code like syscalls, exceptions, 0601 * signal handling, etc... 0602 * 0603 * This call supports re-entrancy. This way it can be called from any exception 0604 * handler without needing to know if we came from userspace or not. 0605 */ 0606 void noinstr __ct_user_exit(enum ctx_state state) 0607 { 0608 struct context_tracking *ct = this_cpu_ptr(&context_tracking); 0609 0610 if (!context_tracking_recursion_enter()) 0611 return; 0612 0613 if (__ct_state() == state) { 0614 if (ct->active) { 0615 /* 0616 * Exit RCU idle mode while entering the kernel because it can 0617 * run a RCU read side critical section anytime. 0618 */ 0619 ct_kernel_enter(true, RCU_DYNTICKS_IDX - state); 0620 if (state == CONTEXT_USER) { 0621 instrumentation_begin(); 0622 vtime_user_exit(current); 0623 trace_user_exit(0); 0624 instrumentation_end(); 0625 } 0626 0627 /* 0628 * Special case if we only track user <-> kernel transitions for tickless 0629 * cputime accounting but we don't support RCU extended quiescent state. 0630 * In this we case we don't care about any concurrency/ordering. 0631 */ 0632 if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE)) 0633 atomic_set(&ct->state, CONTEXT_KERNEL); 0634 0635 } else { 0636 if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE)) { 0637 /* Tracking for vtime only, no concurrent RCU EQS accounting */ 0638 atomic_set(&ct->state, CONTEXT_KERNEL); 0639 } else { 0640 /* 0641 * Tracking for vtime and RCU EQS. Make sure we don't race 0642 * with NMIs. OTOH we don't care about ordering here since 0643 * RCU only requires RCU_DYNTICKS_IDX increments to be fully 0644 * ordered. 0645 */ 0646 atomic_sub(state, &ct->state); 0647 } 0648 } 0649 } 0650 context_tracking_recursion_exit(); 0651 } 0652 EXPORT_SYMBOL_GPL(__ct_user_exit); 0653 0654 /* 0655 * OBSOLETE: 0656 * This function should be noinstr but the below local_irq_save() is 0657 * unsafe because it involves illegal RCU uses through tracing and lockdep. 0658 * This is unlikely to be fixed as this function is obsolete. The preferred 0659 * way is to call __context_tracking_exit() through user_exit_irqoff() 0660 * or context_tracking_guest_exit(). It should be the arch entry code 0661 * responsibility to call into context tracking with IRQs disabled. 0662 */ 0663 void ct_user_exit(enum ctx_state state) 0664 { 0665 unsigned long flags; 0666 0667 if (in_interrupt()) 0668 return; 0669 0670 local_irq_save(flags); 0671 __ct_user_exit(state); 0672 local_irq_restore(flags); 0673 } 0674 NOKPROBE_SYMBOL(ct_user_exit); 0675 EXPORT_SYMBOL_GPL(ct_user_exit); 0676 0677 /** 0678 * user_exit_callable() - Unfortunate ASM callable version of user_exit() for 0679 * archs that didn't manage to check the context tracking 0680 * static key from low level code. 0681 * 0682 * This OBSOLETE function should be noinstr but it unsafely calls local_irq_save(), 0683 * involving illegal RCU uses through tracing and lockdep. This is unlikely 0684 * to be fixed as this function is obsolete. The preferred way is to call 0685 * user_exit_irqoff(). It should be the arch entry code responsibility to 0686 * call into context tracking with IRQs disabled. 0687 */ 0688 void user_exit_callable(void) 0689 { 0690 user_exit(); 0691 } 0692 NOKPROBE_SYMBOL(user_exit_callable); 0693 0694 void __init ct_cpu_track_user(int cpu) 0695 { 0696 static __initdata bool initialized = false; 0697 0698 if (!per_cpu(context_tracking.active, cpu)) { 0699 per_cpu(context_tracking.active, cpu) = true; 0700 static_branch_inc(&context_tracking_key); 0701 } 0702 0703 if (initialized) 0704 return; 0705 0706 #ifdef CONFIG_HAVE_TIF_NOHZ 0707 /* 0708 * Set TIF_NOHZ to init/0 and let it propagate to all tasks through fork 0709 * This assumes that init is the only task at this early boot stage. 0710 */ 0711 set_tsk_thread_flag(&init_task, TIF_NOHZ); 0712 #endif 0713 WARN_ON_ONCE(!tasklist_empty()); 0714 0715 initialized = true; 0716 } 0717 0718 #ifdef CONFIG_CONTEXT_TRACKING_USER_FORCE 0719 void __init context_tracking_init(void) 0720 { 0721 int cpu; 0722 0723 for_each_possible_cpu(cpu) 0724 ct_cpu_track_user(cpu); 0725 } 0726 #endif 0727 0728 #endif /* #ifdef CONFIG_CONTEXT_TRACKING_USER */
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.1.0 LXR engine. The LXR team |
![]() ![]() |