0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014 #include "../locking/rtmutex_common.h"
0015
0016 static bool rcu_rdp_is_offloaded(struct rcu_data *rdp)
0017 {
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027 RCU_LOCKDEP_WARN(
0028 !(lockdep_is_held(&rcu_state.barrier_mutex) ||
0029 (IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_held()) ||
0030 rcu_lockdep_is_held_nocb(rdp) ||
0031 (rdp == this_cpu_ptr(&rcu_data) &&
0032 !(IS_ENABLED(CONFIG_PREEMPT_COUNT) && preemptible())) ||
0033 rcu_current_is_nocb_kthread(rdp)),
0034 "Unsafe read of RCU_NOCB offloaded state"
0035 );
0036
0037 return rcu_segcblist_is_offloaded(&rdp->cblist);
0038 }
0039
0040
0041
0042
0043
0044 static void __init rcu_bootup_announce_oddness(void)
0045 {
0046 if (IS_ENABLED(CONFIG_RCU_TRACE))
0047 pr_info("\tRCU event tracing is enabled.\n");
0048 if ((IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 64) ||
0049 (!IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 32))
0050 pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d.\n",
0051 RCU_FANOUT);
0052 if (rcu_fanout_exact)
0053 pr_info("\tHierarchical RCU autobalancing is disabled.\n");
0054 if (IS_ENABLED(CONFIG_PROVE_RCU))
0055 pr_info("\tRCU lockdep checking is enabled.\n");
0056 if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
0057 pr_info("\tRCU strict (and thus non-scalable) grace periods are enabled.\n");
0058 if (RCU_NUM_LVLS >= 4)
0059 pr_info("\tFour(or more)-level hierarchy is enabled.\n");
0060 if (RCU_FANOUT_LEAF != 16)
0061 pr_info("\tBuild-time adjustment of leaf fanout to %d.\n",
0062 RCU_FANOUT_LEAF);
0063 if (rcu_fanout_leaf != RCU_FANOUT_LEAF)
0064 pr_info("\tBoot-time adjustment of leaf fanout to %d.\n",
0065 rcu_fanout_leaf);
0066 if (nr_cpu_ids != NR_CPUS)
0067 pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%u.\n", NR_CPUS, nr_cpu_ids);
0068 #ifdef CONFIG_RCU_BOOST
0069 pr_info("\tRCU priority boosting: priority %d delay %d ms.\n",
0070 kthread_prio, CONFIG_RCU_BOOST_DELAY);
0071 #endif
0072 if (blimit != DEFAULT_RCU_BLIMIT)
0073 pr_info("\tBoot-time adjustment of callback invocation limit to %ld.\n", blimit);
0074 if (qhimark != DEFAULT_RCU_QHIMARK)
0075 pr_info("\tBoot-time adjustment of callback high-water mark to %ld.\n", qhimark);
0076 if (qlowmark != DEFAULT_RCU_QLOMARK)
0077 pr_info("\tBoot-time adjustment of callback low-water mark to %ld.\n", qlowmark);
0078 if (qovld != DEFAULT_RCU_QOVLD)
0079 pr_info("\tBoot-time adjustment of callback overload level to %ld.\n", qovld);
0080 if (jiffies_till_first_fqs != ULONG_MAX)
0081 pr_info("\tBoot-time adjustment of first FQS scan delay to %ld jiffies.\n", jiffies_till_first_fqs);
0082 if (jiffies_till_next_fqs != ULONG_MAX)
0083 pr_info("\tBoot-time adjustment of subsequent FQS scan delay to %ld jiffies.\n", jiffies_till_next_fqs);
0084 if (jiffies_till_sched_qs != ULONG_MAX)
0085 pr_info("\tBoot-time adjustment of scheduler-enlistment delay to %ld jiffies.\n", jiffies_till_sched_qs);
0086 if (rcu_kick_kthreads)
0087 pr_info("\tKick kthreads if too-long grace period.\n");
0088 if (IS_ENABLED(CONFIG_DEBUG_OBJECTS_RCU_HEAD))
0089 pr_info("\tRCU callback double-/use-after-free debug is enabled.\n");
0090 if (gp_preinit_delay)
0091 pr_info("\tRCU debug GP pre-init slowdown %d jiffies.\n", gp_preinit_delay);
0092 if (gp_init_delay)
0093 pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
0094 if (gp_cleanup_delay)
0095 pr_info("\tRCU debug GP cleanup slowdown %d jiffies.\n", gp_cleanup_delay);
0096 if (!use_softirq)
0097 pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n");
0098 if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
0099 pr_info("\tRCU debug extended QS entry/exit.\n");
0100 rcupdate_announce_bootup_oddness();
0101 }
0102
0103 #ifdef CONFIG_PREEMPT_RCU
0104
0105 static void rcu_report_exp_rnp(struct rcu_node *rnp, bool wake);
0106 static void rcu_read_unlock_special(struct task_struct *t);
0107
0108
0109
0110
0111 static void __init rcu_bootup_announce(void)
0112 {
0113 pr_info("Preemptible hierarchical RCU implementation.\n");
0114 rcu_bootup_announce_oddness();
0115 }
0116
0117
0118 #define RCU_GP_TASKS 0x8
0119 #define RCU_EXP_TASKS 0x4
0120 #define RCU_GP_BLKD 0x2
0121 #define RCU_EXP_BLKD 0x1
0122
0123
0124
0125
0126
0127
0128
0129
0130
0131
0132
0133
0134
0135
0136
0137
0138
0139
0140
0141
0142
0143
0144
0145
0146
0147
0148
0149
0150
0151 static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
0152 __releases(rnp->lock)
0153 {
0154 int blkd_state = (rnp->gp_tasks ? RCU_GP_TASKS : 0) +
0155 (rnp->exp_tasks ? RCU_EXP_TASKS : 0) +
0156 (rnp->qsmask & rdp->grpmask ? RCU_GP_BLKD : 0) +
0157 (rnp->expmask & rdp->grpmask ? RCU_EXP_BLKD : 0);
0158 struct task_struct *t = current;
0159
0160 raw_lockdep_assert_held_rcu_node(rnp);
0161 WARN_ON_ONCE(rdp->mynode != rnp);
0162 WARN_ON_ONCE(!rcu_is_leaf_node(rnp));
0163
0164 WARN_ON_ONCE(rnp->qsmaskinitnext & ~rnp->qsmaskinit & rnp->qsmask &
0165 rdp->grpmask);
0166
0167
0168
0169
0170
0171
0172 switch (blkd_state) {
0173 case 0:
0174 case RCU_EXP_TASKS:
0175 case RCU_EXP_TASKS + RCU_GP_BLKD:
0176 case RCU_GP_TASKS:
0177 case RCU_GP_TASKS + RCU_EXP_TASKS:
0178
0179
0180
0181
0182
0183
0184
0185 list_add(&t->rcu_node_entry, &rnp->blkd_tasks);
0186 break;
0187
0188 case RCU_EXP_BLKD:
0189 case RCU_GP_BLKD:
0190 case RCU_GP_BLKD + RCU_EXP_BLKD:
0191 case RCU_GP_TASKS + RCU_EXP_BLKD:
0192 case RCU_GP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD:
0193 case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD:
0194
0195
0196
0197
0198
0199
0200
0201
0202
0203 list_add_tail(&t->rcu_node_entry, &rnp->blkd_tasks);
0204 break;
0205
0206 case RCU_EXP_TASKS + RCU_EXP_BLKD:
0207 case RCU_EXP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD:
0208 case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_EXP_BLKD:
0209
0210
0211
0212
0213
0214
0215
0216 list_add(&t->rcu_node_entry, rnp->exp_tasks);
0217 break;
0218
0219 case RCU_GP_TASKS + RCU_GP_BLKD:
0220 case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_GP_BLKD:
0221
0222
0223
0224
0225
0226
0227 list_add(&t->rcu_node_entry, rnp->gp_tasks);
0228 break;
0229
0230 default:
0231
0232
0233 WARN_ON_ONCE(1);
0234 break;
0235 }
0236
0237
0238
0239
0240
0241
0242
0243 if (!rnp->gp_tasks && (blkd_state & RCU_GP_BLKD)) {
0244 WRITE_ONCE(rnp->gp_tasks, &t->rcu_node_entry);
0245 WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq);
0246 }
0247 if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD))
0248 WRITE_ONCE(rnp->exp_tasks, &t->rcu_node_entry);
0249 WARN_ON_ONCE(!(blkd_state & RCU_GP_BLKD) !=
0250 !(rnp->qsmask & rdp->grpmask));
0251 WARN_ON_ONCE(!(blkd_state & RCU_EXP_BLKD) !=
0252 !(rnp->expmask & rdp->grpmask));
0253 raw_spin_unlock_rcu_node(rnp);
0254
0255
0256
0257
0258
0259
0260
0261 if (blkd_state & RCU_EXP_BLKD && rdp->cpu_no_qs.b.exp)
0262 rcu_report_exp_rdp(rdp);
0263 else
0264 WARN_ON_ONCE(rdp->cpu_no_qs.b.exp);
0265 }
0266
0267
0268
0269
0270
0271
0272
0273
0274
0275
0276
0277
0278
0279
0280
0281
0282
0283
0284 static void rcu_qs(void)
0285 {
0286 RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!\n");
0287 if (__this_cpu_read(rcu_data.cpu_no_qs.b.norm)) {
0288 trace_rcu_grace_period(TPS("rcu_preempt"),
0289 __this_cpu_read(rcu_data.gp_seq),
0290 TPS("cpuqs"));
0291 __this_cpu_write(rcu_data.cpu_no_qs.b.norm, false);
0292 barrier();
0293 WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, false);
0294 }
0295 }
0296
0297
0298
0299
0300
0301
0302
0303
0304
0305
0306
0307
0308
0309
0310 void rcu_note_context_switch(bool preempt)
0311 {
0312 struct task_struct *t = current;
0313 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
0314 struct rcu_node *rnp;
0315
0316 trace_rcu_utilization(TPS("Start context switch"));
0317 lockdep_assert_irqs_disabled();
0318 WARN_ONCE(!preempt && rcu_preempt_depth() > 0, "Voluntary context switch within RCU read-side critical section!");
0319 if (rcu_preempt_depth() > 0 &&
0320 !t->rcu_read_unlock_special.b.blocked) {
0321
0322
0323 rnp = rdp->mynode;
0324 raw_spin_lock_rcu_node(rnp);
0325 t->rcu_read_unlock_special.b.blocked = true;
0326 t->rcu_blocked_node = rnp;
0327
0328
0329
0330
0331
0332
0333 WARN_ON_ONCE(!rcu_rdp_cpu_online(rdp));
0334 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
0335 trace_rcu_preempt_task(rcu_state.name,
0336 t->pid,
0337 (rnp->qsmask & rdp->grpmask)
0338 ? rnp->gp_seq
0339 : rcu_seq_snap(&rnp->gp_seq));
0340 rcu_preempt_ctxt_queue(rnp, rdp);
0341 } else {
0342 rcu_preempt_deferred_qs(t);
0343 }
0344
0345
0346
0347
0348
0349
0350
0351
0352
0353
0354 rcu_qs();
0355 if (rdp->cpu_no_qs.b.exp)
0356 rcu_report_exp_rdp(rdp);
0357 rcu_tasks_qs(current, preempt);
0358 trace_rcu_utilization(TPS("End context switch"));
0359 }
0360 EXPORT_SYMBOL_GPL(rcu_note_context_switch);
0361
0362
0363
0364
0365
0366
0367 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
0368 {
0369 return READ_ONCE(rnp->gp_tasks) != NULL;
0370 }
0371
0372
0373 #define RCU_NEST_PMAX (INT_MAX / 2)
0374
0375 static void rcu_preempt_read_enter(void)
0376 {
0377 WRITE_ONCE(current->rcu_read_lock_nesting, READ_ONCE(current->rcu_read_lock_nesting) + 1);
0378 }
0379
0380 static int rcu_preempt_read_exit(void)
0381 {
0382 int ret = READ_ONCE(current->rcu_read_lock_nesting) - 1;
0383
0384 WRITE_ONCE(current->rcu_read_lock_nesting, ret);
0385 return ret;
0386 }
0387
0388 static void rcu_preempt_depth_set(int val)
0389 {
0390 WRITE_ONCE(current->rcu_read_lock_nesting, val);
0391 }
0392
0393
0394
0395
0396
0397
0398 void __rcu_read_lock(void)
0399 {
0400 rcu_preempt_read_enter();
0401 if (IS_ENABLED(CONFIG_PROVE_LOCKING))
0402 WARN_ON_ONCE(rcu_preempt_depth() > RCU_NEST_PMAX);
0403 if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) && rcu_state.gp_kthread)
0404 WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, true);
0405 barrier();
0406 }
0407 EXPORT_SYMBOL_GPL(__rcu_read_lock);
0408
0409
0410
0411
0412
0413
0414
0415
0416 void __rcu_read_unlock(void)
0417 {
0418 struct task_struct *t = current;
0419
0420 barrier();
0421 if (rcu_preempt_read_exit() == 0) {
0422 barrier();
0423 if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
0424 rcu_read_unlock_special(t);
0425 }
0426 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
0427 int rrln = rcu_preempt_depth();
0428
0429 WARN_ON_ONCE(rrln < 0 || rrln > RCU_NEST_PMAX);
0430 }
0431 }
0432 EXPORT_SYMBOL_GPL(__rcu_read_unlock);
0433
0434
0435
0436
0437
0438 static struct list_head *rcu_next_node_entry(struct task_struct *t,
0439 struct rcu_node *rnp)
0440 {
0441 struct list_head *np;
0442
0443 np = t->rcu_node_entry.next;
0444 if (np == &rnp->blkd_tasks)
0445 np = NULL;
0446 return np;
0447 }
0448
0449
0450
0451
0452
0453 static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
0454 {
0455 return !list_empty(&rnp->blkd_tasks);
0456 }
0457
0458
0459
0460
0461
0462
0463 static notrace void
0464 rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
0465 {
0466 bool empty_exp;
0467 bool empty_norm;
0468 bool empty_exp_now;
0469 struct list_head *np;
0470 bool drop_boost_mutex = false;
0471 struct rcu_data *rdp;
0472 struct rcu_node *rnp;
0473 union rcu_special special;
0474
0475
0476
0477
0478
0479
0480 special = t->rcu_read_unlock_special;
0481 rdp = this_cpu_ptr(&rcu_data);
0482 if (!special.s && !rdp->cpu_no_qs.b.exp) {
0483 local_irq_restore(flags);
0484 return;
0485 }
0486 t->rcu_read_unlock_special.s = 0;
0487 if (special.b.need_qs) {
0488 if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) {
0489 rdp->cpu_no_qs.b.norm = false;
0490 rcu_report_qs_rdp(rdp);
0491 udelay(rcu_unlock_delay);
0492 } else {
0493 rcu_qs();
0494 }
0495 }
0496
0497
0498
0499
0500
0501
0502
0503 if (rdp->cpu_no_qs.b.exp)
0504 rcu_report_exp_rdp(rdp);
0505
0506
0507 if (special.b.blocked) {
0508
0509
0510
0511
0512
0513
0514
0515 rnp = t->rcu_blocked_node;
0516 raw_spin_lock_rcu_node(rnp);
0517 WARN_ON_ONCE(rnp != t->rcu_blocked_node);
0518 WARN_ON_ONCE(!rcu_is_leaf_node(rnp));
0519 empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);
0520 WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq &&
0521 (!empty_norm || rnp->qsmask));
0522 empty_exp = sync_rcu_exp_done(rnp);
0523 smp_mb();
0524 np = rcu_next_node_entry(t, rnp);
0525 list_del_init(&t->rcu_node_entry);
0526 t->rcu_blocked_node = NULL;
0527 trace_rcu_unlock_preempted_task(TPS("rcu_preempt"),
0528 rnp->gp_seq, t->pid);
0529 if (&t->rcu_node_entry == rnp->gp_tasks)
0530 WRITE_ONCE(rnp->gp_tasks, np);
0531 if (&t->rcu_node_entry == rnp->exp_tasks)
0532 WRITE_ONCE(rnp->exp_tasks, np);
0533 if (IS_ENABLED(CONFIG_RCU_BOOST)) {
0534
0535 drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx.rtmutex) == t;
0536 if (&t->rcu_node_entry == rnp->boost_tasks)
0537 WRITE_ONCE(rnp->boost_tasks, np);
0538 }
0539
0540
0541
0542
0543
0544
0545
0546 empty_exp_now = sync_rcu_exp_done(rnp);
0547 if (!empty_norm && !rcu_preempt_blocked_readers_cgp(rnp)) {
0548 trace_rcu_quiescent_state_report(TPS("preempt_rcu"),
0549 rnp->gp_seq,
0550 0, rnp->qsmask,
0551 rnp->level,
0552 rnp->grplo,
0553 rnp->grphi,
0554 !!rnp->gp_tasks);
0555 rcu_report_unblock_qs_rnp(rnp, flags);
0556 } else {
0557 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
0558 }
0559
0560
0561
0562
0563
0564 if (!empty_exp && empty_exp_now)
0565 rcu_report_exp_rnp(rnp, true);
0566
0567
0568 if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex)
0569 rt_mutex_futex_unlock(&rnp->boost_mtx.rtmutex);
0570 } else {
0571 local_irq_restore(flags);
0572 }
0573 }
0574
0575
0576
0577
0578
0579
0580
0581
0582
0583
0584 static notrace bool rcu_preempt_need_deferred_qs(struct task_struct *t)
0585 {
0586 return (__this_cpu_read(rcu_data.cpu_no_qs.b.exp) ||
0587 READ_ONCE(t->rcu_read_unlock_special.s)) &&
0588 rcu_preempt_depth() == 0;
0589 }
0590
0591
0592
0593
0594
0595
0596
0597
0598 notrace void rcu_preempt_deferred_qs(struct task_struct *t)
0599 {
0600 unsigned long flags;
0601
0602 if (!rcu_preempt_need_deferred_qs(t))
0603 return;
0604 local_irq_save(flags);
0605 rcu_preempt_deferred_qs_irqrestore(t, flags);
0606 }
0607
0608
0609
0610
0611 static void rcu_preempt_deferred_qs_handler(struct irq_work *iwp)
0612 {
0613 struct rcu_data *rdp;
0614
0615 rdp = container_of(iwp, struct rcu_data, defer_qs_iw);
0616 rdp->defer_qs_iw_pending = false;
0617 }
0618
0619
0620
0621
0622
0623
0624 static void rcu_read_unlock_special(struct task_struct *t)
0625 {
0626 unsigned long flags;
0627 bool irqs_were_disabled;
0628 bool preempt_bh_were_disabled =
0629 !!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK));
0630
0631
0632 if (in_nmi())
0633 return;
0634
0635 local_irq_save(flags);
0636 irqs_were_disabled = irqs_disabled_flags(flags);
0637 if (preempt_bh_were_disabled || irqs_were_disabled) {
0638 bool expboost;
0639 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
0640 struct rcu_node *rnp = rdp->mynode;
0641
0642 expboost = (t->rcu_blocked_node && READ_ONCE(t->rcu_blocked_node->exp_tasks)) ||
0643 (rdp->grpmask & READ_ONCE(rnp->expmask)) ||
0644 IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ||
0645 (IS_ENABLED(CONFIG_RCU_BOOST) && irqs_were_disabled &&
0646 t->rcu_blocked_node);
0647
0648 if (use_softirq && (in_hardirq() || (expboost && !irqs_were_disabled))) {
0649
0650
0651
0652 raise_softirq_irqoff(RCU_SOFTIRQ);
0653 } else {
0654
0655
0656
0657
0658 set_tsk_need_resched(current);
0659 set_preempt_need_resched();
0660 if (IS_ENABLED(CONFIG_IRQ_WORK) && irqs_were_disabled &&
0661 expboost && !rdp->defer_qs_iw_pending && cpu_online(rdp->cpu)) {
0662
0663
0664 if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) &&
0665 IS_ENABLED(CONFIG_PREEMPT_RT))
0666 rdp->defer_qs_iw = IRQ_WORK_INIT_HARD(
0667 rcu_preempt_deferred_qs_handler);
0668 else
0669 init_irq_work(&rdp->defer_qs_iw,
0670 rcu_preempt_deferred_qs_handler);
0671 rdp->defer_qs_iw_pending = true;
0672 irq_work_queue_on(&rdp->defer_qs_iw, rdp->cpu);
0673 }
0674 }
0675 local_irq_restore(flags);
0676 return;
0677 }
0678 rcu_preempt_deferred_qs_irqrestore(t, flags);
0679 }
0680
0681
0682
0683
0684
0685
0686
0687
0688
0689
0690 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
0691 {
0692 struct task_struct *t;
0693
0694 RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_check_blocked_tasks() invoked with preemption enabled!!!\n");
0695 raw_lockdep_assert_held_rcu_node(rnp);
0696 if (WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)))
0697 dump_blkd_tasks(rnp, 10);
0698 if (rcu_preempt_has_tasks(rnp) &&
0699 (rnp->qsmaskinit || rnp->wait_blkd_tasks)) {
0700 WRITE_ONCE(rnp->gp_tasks, rnp->blkd_tasks.next);
0701 t = container_of(rnp->gp_tasks, struct task_struct,
0702 rcu_node_entry);
0703 trace_rcu_unlock_preempted_task(TPS("rcu_preempt-GPS"),
0704 rnp->gp_seq, t->pid);
0705 }
0706 WARN_ON_ONCE(rnp->qsmask);
0707 }
0708
0709
0710
0711
0712
0713
0714
0715
0716 static void rcu_flavor_sched_clock_irq(int user)
0717 {
0718 struct task_struct *t = current;
0719
0720 lockdep_assert_irqs_disabled();
0721 if (user || rcu_is_cpu_rrupt_from_idle()) {
0722 rcu_note_voluntary_context_switch(current);
0723 }
0724 if (rcu_preempt_depth() > 0 ||
0725 (preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK))) {
0726
0727 if (rcu_preempt_need_deferred_qs(t)) {
0728 set_tsk_need_resched(t);
0729 set_preempt_need_resched();
0730 }
0731 } else if (rcu_preempt_need_deferred_qs(t)) {
0732 rcu_preempt_deferred_qs(t);
0733 return;
0734 } else if (!WARN_ON_ONCE(rcu_preempt_depth())) {
0735 rcu_qs();
0736 return;
0737 }
0738
0739
0740 if (rcu_preempt_depth() > 0 &&
0741 __this_cpu_read(rcu_data.core_needs_qs) &&
0742 __this_cpu_read(rcu_data.cpu_no_qs.b.norm) &&
0743 !t->rcu_read_unlock_special.b.need_qs &&
0744 time_after(jiffies, rcu_state.gp_start + HZ))
0745 t->rcu_read_unlock_special.b.need_qs = true;
0746 }
0747
0748
0749
0750
0751
0752
0753
0754
0755
0756 void exit_rcu(void)
0757 {
0758 struct task_struct *t = current;
0759
0760 if (unlikely(!list_empty(¤t->rcu_node_entry))) {
0761 rcu_preempt_depth_set(1);
0762 barrier();
0763 WRITE_ONCE(t->rcu_read_unlock_special.b.blocked, true);
0764 } else if (unlikely(rcu_preempt_depth())) {
0765 rcu_preempt_depth_set(1);
0766 } else {
0767 return;
0768 }
0769 __rcu_read_unlock();
0770 rcu_preempt_deferred_qs(current);
0771 }
0772
0773
0774
0775
0776
0777 static void
0778 dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
0779 {
0780 int cpu;
0781 int i;
0782 struct list_head *lhp;
0783 struct rcu_data *rdp;
0784 struct rcu_node *rnp1;
0785
0786 raw_lockdep_assert_held_rcu_node(rnp);
0787 pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n",
0788 __func__, rnp->grplo, rnp->grphi, rnp->level,
0789 (long)READ_ONCE(rnp->gp_seq), (long)rnp->completedqs);
0790 for (rnp1 = rnp; rnp1; rnp1 = rnp1->parent)
0791 pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx\n",
0792 __func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext);
0793 pr_info("%s: ->gp_tasks %p ->boost_tasks %p ->exp_tasks %p\n",
0794 __func__, READ_ONCE(rnp->gp_tasks), data_race(rnp->boost_tasks),
0795 READ_ONCE(rnp->exp_tasks));
0796 pr_info("%s: ->blkd_tasks", __func__);
0797 i = 0;
0798 list_for_each(lhp, &rnp->blkd_tasks) {
0799 pr_cont(" %p", lhp);
0800 if (++i >= ncheck)
0801 break;
0802 }
0803 pr_cont("\n");
0804 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) {
0805 rdp = per_cpu_ptr(&rcu_data, cpu);
0806 pr_info("\t%d: %c online: %ld(%d) offline: %ld(%d)\n",
0807 cpu, ".o"[rcu_rdp_cpu_online(rdp)],
0808 (long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_flags,
0809 (long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_flags);
0810 }
0811 }
0812
0813 #else
0814
0815
0816
0817
0818
0819
0820 void rcu_read_unlock_strict(void)
0821 {
0822 struct rcu_data *rdp;
0823
0824 if (irqs_disabled() || preempt_count() || !rcu_state.gp_kthread)
0825 return;
0826 rdp = this_cpu_ptr(&rcu_data);
0827 rcu_report_qs_rdp(rdp);
0828 udelay(rcu_unlock_delay);
0829 }
0830 EXPORT_SYMBOL_GPL(rcu_read_unlock_strict);
0831
0832
0833
0834
0835 static void __init rcu_bootup_announce(void)
0836 {
0837 pr_info("Hierarchical RCU implementation.\n");
0838 rcu_bootup_announce_oddness();
0839 }
0840
0841
0842
0843
0844
0845
0846
0847 static void rcu_qs(void)
0848 {
0849 RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!");
0850 if (!__this_cpu_read(rcu_data.cpu_no_qs.s))
0851 return;
0852 trace_rcu_grace_period(TPS("rcu_sched"),
0853 __this_cpu_read(rcu_data.gp_seq), TPS("cpuqs"));
0854 __this_cpu_write(rcu_data.cpu_no_qs.b.norm, false);
0855 if (__this_cpu_read(rcu_data.cpu_no_qs.b.exp))
0856 rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));
0857 }
0858
0859
0860
0861
0862
0863
0864
0865
0866 void rcu_all_qs(void)
0867 {
0868 unsigned long flags;
0869
0870 if (!raw_cpu_read(rcu_data.rcu_urgent_qs))
0871 return;
0872 preempt_disable();
0873
0874 if (!smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs))) {
0875 preempt_enable();
0876 return;
0877 }
0878 this_cpu_write(rcu_data.rcu_urgent_qs, false);
0879 if (unlikely(raw_cpu_read(rcu_data.rcu_need_heavy_qs))) {
0880 local_irq_save(flags);
0881 rcu_momentary_dyntick_idle();
0882 local_irq_restore(flags);
0883 }
0884 rcu_qs();
0885 preempt_enable();
0886 }
0887 EXPORT_SYMBOL_GPL(rcu_all_qs);
0888
0889
0890
0891
0892 void rcu_note_context_switch(bool preempt)
0893 {
0894 trace_rcu_utilization(TPS("Start context switch"));
0895 rcu_qs();
0896
0897 if (!smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs)))
0898 goto out;
0899 this_cpu_write(rcu_data.rcu_urgent_qs, false);
0900 if (unlikely(raw_cpu_read(rcu_data.rcu_need_heavy_qs)))
0901 rcu_momentary_dyntick_idle();
0902 out:
0903 rcu_tasks_qs(current, preempt);
0904 trace_rcu_utilization(TPS("End context switch"));
0905 }
0906 EXPORT_SYMBOL_GPL(rcu_note_context_switch);
0907
0908
0909
0910
0911
0912 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
0913 {
0914 return 0;
0915 }
0916
0917
0918
0919
0920 static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
0921 {
0922 return false;
0923 }
0924
0925
0926
0927
0928
0929 static notrace bool rcu_preempt_need_deferred_qs(struct task_struct *t)
0930 {
0931 return false;
0932 }
0933
0934
0935
0936
0937
0938 notrace void rcu_preempt_deferred_qs(struct task_struct *t)
0939 {
0940 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
0941
0942 if (rdp->cpu_no_qs.b.exp)
0943 rcu_report_exp_rdp(rdp);
0944 }
0945
0946
0947
0948
0949
0950
0951 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
0952 {
0953 WARN_ON_ONCE(rnp->qsmask);
0954 }
0955
0956
0957
0958
0959
0960 static void rcu_flavor_sched_clock_irq(int user)
0961 {
0962 if (user || rcu_is_cpu_rrupt_from_idle()) {
0963
0964
0965
0966
0967
0968
0969
0970
0971
0972
0973
0974
0975
0976 rcu_qs();
0977 }
0978 }
0979
0980
0981
0982
0983
0984 void exit_rcu(void)
0985 {
0986 }
0987
0988
0989
0990
0991 static void
0992 dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
0993 {
0994 WARN_ON_ONCE(!list_empty(&rnp->blkd_tasks));
0995 }
0996
0997 #endif
0998
0999
1000
1001
1002 static void rcu_cpu_kthread_setup(unsigned int cpu)
1003 {
1004 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
1005 #ifdef CONFIG_RCU_BOOST
1006 struct sched_param sp;
1007
1008 sp.sched_priority = kthread_prio;
1009 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
1010 #endif
1011
1012 WRITE_ONCE(rdp->rcuc_activity, jiffies);
1013 }
1014
1015 static bool rcu_is_callbacks_nocb_kthread(struct rcu_data *rdp)
1016 {
1017 #ifdef CONFIG_RCU_NOCB_CPU
1018 return rdp->nocb_cb_kthread == current;
1019 #else
1020 return false;
1021 #endif
1022 }
1023
1024
1025
1026
1027
1028 static bool rcu_is_callbacks_kthread(struct rcu_data *rdp)
1029 {
1030 return rdp->rcu_cpu_kthread_task == current ||
1031 rcu_is_callbacks_nocb_kthread(rdp);
1032 }
1033
1034 #ifdef CONFIG_RCU_BOOST
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044 static int rcu_boost(struct rcu_node *rnp)
1045 {
1046 unsigned long flags;
1047 struct task_struct *t;
1048 struct list_head *tb;
1049
1050 if (READ_ONCE(rnp->exp_tasks) == NULL &&
1051 READ_ONCE(rnp->boost_tasks) == NULL)
1052 return 0;
1053
1054 raw_spin_lock_irqsave_rcu_node(rnp, flags);
1055
1056
1057
1058
1059
1060 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
1061 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1062 return 0;
1063 }
1064
1065
1066
1067
1068
1069
1070
1071 if (rnp->exp_tasks != NULL)
1072 tb = rnp->exp_tasks;
1073 else
1074 tb = rnp->boost_tasks;
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092 t = container_of(tb, struct task_struct, rcu_node_entry);
1093 rt_mutex_init_proxy_locked(&rnp->boost_mtx.rtmutex, t);
1094 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1095
1096 rt_mutex_lock(&rnp->boost_mtx);
1097 rt_mutex_unlock(&rnp->boost_mtx);
1098 rnp->n_boosts++;
1099
1100 return READ_ONCE(rnp->exp_tasks) != NULL ||
1101 READ_ONCE(rnp->boost_tasks) != NULL;
1102 }
1103
1104
1105
1106
1107 static int rcu_boost_kthread(void *arg)
1108 {
1109 struct rcu_node *rnp = (struct rcu_node *)arg;
1110 int spincnt = 0;
1111 int more2boost;
1112
1113 trace_rcu_utilization(TPS("Start boost kthread@init"));
1114 for (;;) {
1115 WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_WAITING);
1116 trace_rcu_utilization(TPS("End boost kthread@rcu_wait"));
1117 rcu_wait(READ_ONCE(rnp->boost_tasks) ||
1118 READ_ONCE(rnp->exp_tasks));
1119 trace_rcu_utilization(TPS("Start boost kthread@rcu_wait"));
1120 WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_RUNNING);
1121 more2boost = rcu_boost(rnp);
1122 if (more2boost)
1123 spincnt++;
1124 else
1125 spincnt = 0;
1126 if (spincnt > 10) {
1127 WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_YIELDING);
1128 trace_rcu_utilization(TPS("End boost kthread@rcu_yield"));
1129 schedule_timeout_idle(2);
1130 trace_rcu_utilization(TPS("Start boost kthread@rcu_yield"));
1131 spincnt = 0;
1132 }
1133 }
1134
1135 trace_rcu_utilization(TPS("End boost kthread@notreached"));
1136 return 0;
1137 }
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
1150 __releases(rnp->lock)
1151 {
1152 raw_lockdep_assert_held_rcu_node(rnp);
1153 if (!rnp->boost_kthread_task ||
1154 (!rcu_preempt_blocked_readers_cgp(rnp) && !rnp->exp_tasks)) {
1155 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1156 return;
1157 }
1158 if (rnp->exp_tasks != NULL ||
1159 (rnp->gp_tasks != NULL &&
1160 rnp->boost_tasks == NULL &&
1161 rnp->qsmask == 0 &&
1162 (!time_after(rnp->boost_time, jiffies) || rcu_state.cbovld ||
1163 IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)))) {
1164 if (rnp->exp_tasks == NULL)
1165 WRITE_ONCE(rnp->boost_tasks, rnp->gp_tasks);
1166 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1167 rcu_wake_cond(rnp->boost_kthread_task,
1168 READ_ONCE(rnp->boost_kthread_status));
1169 } else {
1170 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1171 }
1172 }
1173
1174 #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
1175
1176
1177
1178
1179 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1180 {
1181 rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
1182 }
1183
1184
1185
1186
1187
1188 static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
1189 {
1190 unsigned long flags;
1191 int rnp_index = rnp - rcu_get_root();
1192 struct sched_param sp;
1193 struct task_struct *t;
1194
1195 mutex_lock(&rnp->boost_kthread_mutex);
1196 if (rnp->boost_kthread_task || !rcu_scheduler_fully_active)
1197 goto out;
1198
1199 t = kthread_create(rcu_boost_kthread, (void *)rnp,
1200 "rcub/%d", rnp_index);
1201 if (WARN_ON_ONCE(IS_ERR(t)))
1202 goto out;
1203
1204 raw_spin_lock_irqsave_rcu_node(rnp, flags);
1205 rnp->boost_kthread_task = t;
1206 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1207 sp.sched_priority = kthread_prio;
1208 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1209 wake_up_process(t);
1210
1211 out:
1212 mutex_unlock(&rnp->boost_kthread_mutex);
1213 }
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
1225 {
1226 struct task_struct *t = rnp->boost_kthread_task;
1227 unsigned long mask = rcu_rnp_online_cpus(rnp);
1228 cpumask_var_t cm;
1229 int cpu;
1230
1231 if (!t)
1232 return;
1233 if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
1234 return;
1235 mutex_lock(&rnp->boost_kthread_mutex);
1236 for_each_leaf_node_possible_cpu(rnp, cpu)
1237 if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
1238 cpu != outgoingcpu)
1239 cpumask_set_cpu(cpu, cm);
1240 cpumask_and(cm, cm, housekeeping_cpumask(HK_TYPE_RCU));
1241 if (cpumask_empty(cm))
1242 cpumask_copy(cm, housekeeping_cpumask(HK_TYPE_RCU));
1243 set_cpus_allowed_ptr(t, cm);
1244 mutex_unlock(&rnp->boost_kthread_mutex);
1245 free_cpumask_var(cm);
1246 }
1247
1248 #else
1249
1250 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
1251 __releases(rnp->lock)
1252 {
1253 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1254 }
1255
1256 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1257 {
1258 }
1259
1260 static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
1261 {
1262 }
1263
1264 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
1265 {
1266 }
1267
1268 #endif
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279 static bool rcu_nohz_full_cpu(void)
1280 {
1281 #ifdef CONFIG_NO_HZ_FULL
1282 if (tick_nohz_full_cpu(smp_processor_id()) &&
1283 (!rcu_gp_in_progress() ||
1284 time_before(jiffies, READ_ONCE(rcu_state.gp_start) + HZ)))
1285 return true;
1286 #endif
1287 return false;
1288 }
1289
1290
1291
1292
1293 static void rcu_bind_gp_kthread(void)
1294 {
1295 if (!tick_nohz_full_enabled())
1296 return;
1297 housekeeping_affine(current, HK_TYPE_RCU);
1298 }