kernel/locking/rtmutex.c

0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * RT-Mutexes: simple blocking mutual exclusion locks with PI support
0004  *
0005  * started by Ingo Molnar and Thomas Gleixner.
0006  *
0007  *  Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
0008  *  Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
0009  *  Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
0010  *  Copyright (C) 2006 Esben Nielsen
0011  * Adaptive Spinlocks:
0012  *  Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
0013  *                   and Peter Morreale,
0014  * Adaptive Spinlocks simplification:
0015  *  Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
0016  *
0017  *  See Documentation/locking/rt-mutex-design.rst for details.
0018  */
0019 #include <linux/sched.h>
0020 #include <linux/sched/debug.h>
0021 #include <linux/sched/deadline.h>
0022 #include <linux/sched/signal.h>
0023 #include <linux/sched/rt.h>
0024 #include <linux/sched/wake_q.h>
0025 #include <linux/ww_mutex.h>
0026
0027 #include <trace/events/lock.h>
0028
0029 #include "rtmutex_common.h"
0030
0031 #ifndef WW_RT
0032 # define build_ww_mutex()   (false)
0033 # define ww_container_of(rtm)   NULL
0034
0035 static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter,
0036                     struct rt_mutex *lock,
0037                     struct ww_acquire_ctx *ww_ctx)
0038 {
0039     return 0;
0040 }
0041
0042 static inline void __ww_mutex_check_waiters(struct rt_mutex *lock,
0043                         struct ww_acquire_ctx *ww_ctx)
0044 {
0045 }
0046
0047 static inline void ww_mutex_lock_acquired(struct ww_mutex *lock,
0048                       struct ww_acquire_ctx *ww_ctx)
0049 {
0050 }
0051
0052 static inline int __ww_mutex_check_kill(struct rt_mutex *lock,
0053                     struct rt_mutex_waiter *waiter,
0054                     struct ww_acquire_ctx *ww_ctx)
0055 {
0056     return 0;
0057 }
0058
0059 #else
0060 # define build_ww_mutex()   (true)
0061 # define ww_container_of(rtm)   container_of(rtm, struct ww_mutex, base)
0062 # include "ww_mutex.h"
0063 #endif
0064
0065 /*
0066  * lock->owner state tracking:
0067  *
0068  * lock->owner holds the task_struct pointer of the owner. Bit 0
0069  * is used to keep track of the "lock has waiters" state.
0070  *
0071  * owner    bit0
0072  * NULL     0   lock is free (fast acquire possible)
0073  * NULL     1   lock is free and has waiters and the top waiter
0074  *              is going to take the lock*
0075  * taskpointer  0   lock is held (fast release possible)
0076  * taskpointer  1   lock is held and has waiters**
0077  *
0078  * The fast atomic compare exchange based acquire and release is only
0079  * possible when bit 0 of lock->owner is 0.
0080  *
0081  * (*) It also can be a transitional state when grabbing the lock
0082  * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
0083  * we need to set the bit0 before looking at the lock, and the owner may be
0084  * NULL in this small time, hence this can be a transitional state.
0085  *
0086  * (**) There is a small time when bit 0 is set but there are no
0087  * waiters. This can happen when grabbing the lock in the slow path.
0088  * To prevent a cmpxchg of the owner releasing the lock, we need to
0089  * set this bit before looking at the lock.
0090  */
0091
0092 static __always_inline void
0093 rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner)
0094 {
0095     unsigned long val = (unsigned long)owner;
0096
0097     if (rt_mutex_has_waiters(lock))
0098         val |= RT_MUTEX_HAS_WAITERS;
0099
0100     WRITE_ONCE(lock->owner, (struct task_struct *)val);
0101 }
0102
0103 static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
0104 {
0105     lock->owner = (struct task_struct *)
0106             ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
0107 }
0108
0109 static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock)
0110 {
0111     unsigned long owner, *p = (unsigned long *) &lock->owner;
0112
0113     if (rt_mutex_has_waiters(lock))
0114         return;
0115
0116     /*
0117      * The rbtree has no waiters enqueued, now make sure that the
0118      * lock->owner still has the waiters bit set, otherwise the
0119      * following can happen:
0120      *
0121      * CPU 0    CPU 1       CPU2
0122      * l->owner=T1
0123      *      rt_mutex_lock(l)
0124      *      lock(l->lock)
0125      *      l->owner = T1 | HAS_WAITERS;
0126      *      enqueue(T2)
0127      *      boost()
0128      *        unlock(l->lock)
0129      *      block()
0130      *
0131      *              rt_mutex_lock(l)
0132      *              lock(l->lock)
0133      *              l->owner = T1 | HAS_WAITERS;
0134      *              enqueue(T3)
0135      *              boost()
0136      *                unlock(l->lock)
0137      *              block()
0138      *      signal(->T2)    signal(->T3)
0139      *      lock(l->lock)
0140      *      dequeue(T2)
0141      *      deboost()
0142      *        unlock(l->lock)
0143      *              lock(l->lock)
0144      *              dequeue(T3)
0145      *               ==> wait list is empty
0146      *              deboost()
0147      *               unlock(l->lock)
0148      *      lock(l->lock)
0149      *      fixup_rt_mutex_waiters()
0150      *        if (wait_list_empty(l) {
0151      *          l->owner = owner
0152      *          owner = l->owner & ~HAS_WAITERS;
0153      *            ==> l->owner = T1
0154      *        }
0155      *              lock(l->lock)
0156      * rt_mutex_unlock(l)       fixup_rt_mutex_waiters()
0157      *                if (wait_list_empty(l) {
0158      *                  owner = l->owner & ~HAS_WAITERS;
0159      * cmpxchg(l->owner, T1, NULL)
0160      *  ===> Success (l->owner = NULL)
0161      *
0162      *                  l->owner = owner
0163      *                    ==> l->owner = T1
0164      *                }
0165      *
0166      * With the check for the waiter bit in place T3 on CPU2 will not
0167      * overwrite. All tasks fiddling with the waiters bit are
0168      * serialized by l->lock, so nothing else can modify the waiters
0169      * bit. If the bit is set then nothing can change l->owner either
0170      * so the simple RMW is safe. The cmpxchg() will simply fail if it
0171      * happens in the middle of the RMW because the waiters bit is
0172      * still set.
0173      */
0174     owner = READ_ONCE(*p);
0175     if (owner & RT_MUTEX_HAS_WAITERS)
0176         WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
0177 }
0178
0179 /*
0180  * We can speed up the acquire/release, if there's no debugging state to be
0181  * set up.
0182  */
0183 #ifndef CONFIG_DEBUG_RT_MUTEXES
0184 static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock,
0185                              struct task_struct *old,
0186                              struct task_struct *new)
0187 {
0188     return try_cmpxchg_acquire(&lock->owner, &old, new);
0189 }
0190
0191 static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
0192                              struct task_struct *old,
0193                              struct task_struct *new)
0194 {
0195     return try_cmpxchg_release(&lock->owner, &old, new);
0196 }
0197
0198 /*
0199  * Callers must hold the ->wait_lock -- which is the whole purpose as we force
0200  * all future threads that attempt to [Rmw] the lock to the slowpath. As such
0201  * relaxed semantics suffice.
0202  */
0203 static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)
0204 {
0205     unsigned long owner, *p = (unsigned long *) &lock->owner;
0206
0207     do {
0208         owner = *p;
0209     } while (cmpxchg_relaxed(p, owner,
0210                  owner | RT_MUTEX_HAS_WAITERS) != owner);
0211 }
0212
0213 /*
0214  * Safe fastpath aware unlock:
0215  * 1) Clear the waiters bit
0216  * 2) Drop lock->wait_lock
0217  * 3) Try to unlock the lock with cmpxchg
0218  */
0219 static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock,
0220                          unsigned long flags)
0221     __releases(lock->wait_lock)
0222 {
0223     struct task_struct *owner = rt_mutex_owner(lock);
0224
0225     clear_rt_mutex_waiters(lock);
0226     raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
0227     /*
0228      * If a new waiter comes in between the unlock and the cmpxchg
0229      * we have two situations:
0230      *
0231      * unlock(wait_lock);
0232      *                  lock(wait_lock);
0233      * cmpxchg(p, owner, 0) == owner
0234      *                  mark_rt_mutex_waiters(lock);
0235      *                  acquire(lock);
0236      * or:
0237      *
0238      * unlock(wait_lock);
0239      *                  lock(wait_lock);
0240      *                  mark_rt_mutex_waiters(lock);
0241      *
0242      * cmpxchg(p, owner, 0) != owner
0243      *                  enqueue_waiter();
0244      *                  unlock(wait_lock);
0245      * lock(wait_lock);
0246      * wake waiter();
0247      * unlock(wait_lock);
0248      *                  lock(wait_lock);
0249      *                  acquire(lock);
0250      */
0251     return rt_mutex_cmpxchg_release(lock, owner, NULL);
0252 }
0253
0254 #else
0255 static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock,
0256                              struct task_struct *old,
0257                              struct task_struct *new)
0258 {
0259     return false;
0260
0261 }
0262
0263 static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
0264                              struct task_struct *old,
0265                              struct task_struct *new)
0266 {
0267     return false;
0268 }
0269
0270 static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)
0271 {
0272     lock->owner = (struct task_struct *)
0273             ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
0274 }
0275
0276 /*
0277  * Simple slow path only version: lock->owner is protected by lock->wait_lock.
0278  */
0279 static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock,
0280                          unsigned long flags)
0281     __releases(lock->wait_lock)
0282 {
0283     lock->owner = NULL;
0284     raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
0285     return true;
0286 }
0287 #endif
0288
0289 static __always_inline int __waiter_prio(struct task_struct *task)
0290 {
0291     int prio = task->prio;
0292
0293     if (!rt_prio(prio))
0294         return DEFAULT_PRIO;
0295
0296     return prio;
0297 }
0298
0299 static __always_inline void
0300 waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task)
0301 {
0302     waiter->prio = __waiter_prio(task);
0303     waiter->deadline = task->dl.deadline;
0304 }
0305
0306 /*
0307  * Only use with rt_mutex_waiter_{less,equal}()
0308  */
0309 #define task_to_waiter(p)   \
0310     &(struct rt_mutex_waiter){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline }
0311
0312 static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left,
0313                         struct rt_mutex_waiter *right)
0314 {
0315     if (left->prio < right->prio)
0316         return 1;
0317
0318     /*
0319      * If both waiters have dl_prio(), we check the deadlines of the
0320      * associated tasks.
0321      * If left waiter has a dl_prio(), and we didn't return 1 above,
0322      * then right waiter has a dl_prio() too.
0323      */
0324     if (dl_prio(left->prio))
0325         return dl_time_before(left->deadline, right->deadline);
0326
0327     return 0;
0328 }
0329
0330 static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
0331                          struct rt_mutex_waiter *right)
0332 {
0333     if (left->prio != right->prio)
0334         return 0;
0335
0336     /*
0337      * If both waiters have dl_prio(), we check the deadlines of the
0338      * associated tasks.
0339      * If left waiter has a dl_prio(), and we didn't return 0 above,
0340      * then right waiter has a dl_prio() too.
0341      */
0342     if (dl_prio(left->prio))
0343         return left->deadline == right->deadline;
0344
0345     return 1;
0346 }
0347
0348 static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter,
0349                   struct rt_mutex_waiter *top_waiter)
0350 {
0351     if (rt_mutex_waiter_less(waiter, top_waiter))
0352         return true;
0353
0354 #ifdef RT_MUTEX_BUILD_SPINLOCKS
0355     /*
0356      * Note that RT tasks are excluded from same priority (lateral)
0357      * steals to prevent the introduction of an unbounded latency.
0358      */
0359     if (rt_prio(waiter->prio) || dl_prio(waiter->prio))
0360         return false;
0361
0362     return rt_mutex_waiter_equal(waiter, top_waiter);
0363 #else
0364     return false;
0365 #endif
0366 }
0367
0368 #define __node_2_waiter(node) \
0369     rb_entry((node), struct rt_mutex_waiter, tree_entry)
0370
0371 static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b)
0372 {
0373     struct rt_mutex_waiter *aw = __node_2_waiter(a);
0374     struct rt_mutex_waiter *bw = __node_2_waiter(b);
0375
0376     if (rt_mutex_waiter_less(aw, bw))
0377         return 1;
0378
0379     if (!build_ww_mutex())
0380         return 0;
0381
0382     if (rt_mutex_waiter_less(bw, aw))
0383         return 0;
0384
0385     /* NOTE: relies on waiter->ww_ctx being set before insertion */
0386     if (aw->ww_ctx) {
0387         if (!bw->ww_ctx)
0388             return 1;
0389
0390         return (signed long)(aw->ww_ctx->stamp -
0391                      bw->ww_ctx->stamp) < 0;
0392     }
0393
0394     return 0;
0395 }
0396
0397 static __always_inline void
0398 rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
0399 {
0400     rb_add_cached(&waiter->tree_entry, &lock->waiters, __waiter_less);
0401 }
0402
0403 static __always_inline void
0404 rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
0405 {
0406     if (RB_EMPTY_NODE(&waiter->tree_entry))
0407         return;
0408
0409     rb_erase_cached(&waiter->tree_entry, &lock->waiters);
0410     RB_CLEAR_NODE(&waiter->tree_entry);
0411 }
0412
0413 #define __node_2_pi_waiter(node) \
0414     rb_entry((node), struct rt_mutex_waiter, pi_tree_entry)
0415
0416 static __always_inline bool
0417 __pi_waiter_less(struct rb_node *a, const struct rb_node *b)
0418 {
0419     return rt_mutex_waiter_less(__node_2_pi_waiter(a), __node_2_pi_waiter(b));
0420 }
0421
0422 static __always_inline void
0423 rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
0424 {
0425     rb_add_cached(&waiter->pi_tree_entry, &task->pi_waiters, __pi_waiter_less);
0426 }
0427
0428 static __always_inline void
0429 rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
0430 {
0431     if (RB_EMPTY_NODE(&waiter->pi_tree_entry))
0432         return;
0433
0434     rb_erase_cached(&waiter->pi_tree_entry, &task->pi_waiters);
0435     RB_CLEAR_NODE(&waiter->pi_tree_entry);
0436 }
0437
0438 static __always_inline void rt_mutex_adjust_prio(struct task_struct *p)
0439 {
0440     struct task_struct *pi_task = NULL;
0441
0442     lockdep_assert_held(&p->pi_lock);
0443
0444     if (task_has_pi_waiters(p))
0445         pi_task = task_top_pi_waiter(p)->task;
0446
0447     rt_mutex_setprio(p, pi_task);
0448 }
0449
0450 /* RT mutex specific wake_q wrappers */
0451 static __always_inline void rt_mutex_wake_q_add_task(struct rt_wake_q_head *wqh,
0452                              struct task_struct *task,
0453                              unsigned int wake_state)
0454 {
0455     if (IS_ENABLED(CONFIG_PREEMPT_RT) && wake_state == TASK_RTLOCK_WAIT) {
0456         if (IS_ENABLED(CONFIG_PROVE_LOCKING))
0457             WARN_ON_ONCE(wqh->rtlock_task);
0458         get_task_struct(task);
0459         wqh->rtlock_task = task;
0460     } else {
0461         wake_q_add(&wqh->head, task);
0462     }
0463 }
0464
0465 static __always_inline void rt_mutex_wake_q_add(struct rt_wake_q_head *wqh,
0466                         struct rt_mutex_waiter *w)
0467 {
0468     rt_mutex_wake_q_add_task(wqh, w->task, w->wake_state);
0469 }
0470
0471 static __always_inline void rt_mutex_wake_up_q(struct rt_wake_q_head *wqh)
0472 {
0473     if (IS_ENABLED(CONFIG_PREEMPT_RT) && wqh->rtlock_task) {
0474         wake_up_state(wqh->rtlock_task, TASK_RTLOCK_WAIT);
0475         put_task_struct(wqh->rtlock_task);
0476         wqh->rtlock_task = NULL;
0477     }
0478
0479     if (!wake_q_empty(&wqh->head))
0480         wake_up_q(&wqh->head);
0481
0482     /* Pairs with preempt_disable() in mark_wakeup_next_waiter() */
0483     preempt_enable();
0484 }
0485
0486 /*
0487  * Deadlock detection is conditional:
0488  *
0489  * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted
0490  * if the detect argument is == RT_MUTEX_FULL_CHAINWALK.
0491  *
0492  * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always
0493  * conducted independent of the detect argument.
0494  *
0495  * If the waiter argument is NULL this indicates the deboost path and
0496  * deadlock detection is disabled independent of the detect argument
0497  * and the config settings.
0498  */
0499 static __always_inline bool
0500 rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter,
0501                   enum rtmutex_chainwalk chwalk)
0502 {
0503     if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES))
0504         return waiter != NULL;
0505     return chwalk == RT_MUTEX_FULL_CHAINWALK;
0506 }
0507
0508 static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_struct *p)
0509 {
0510     return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
0511 }
0512
0513 /*
0514  * Adjust the priority chain. Also used for deadlock detection.
0515  * Decreases task's usage by one - may thus free the task.
0516  *
0517  * @task:   the task owning the mutex (owner) for which a chain walk is
0518  *      probably needed
0519  * @chwalk: do we have to carry out deadlock detection?
0520  * @orig_lock:  the mutex (can be NULL if we are walking the chain to recheck
0521  *      things for a task that has just got its priority adjusted, and
0522  *      is waiting on a mutex)
0523  * @next_lock:  the mutex on which the owner of @orig_lock was blocked before
0524  *      we dropped its pi_lock. Is never dereferenced, only used for
0525  *      comparison to detect lock chain changes.
0526  * @orig_waiter: rt_mutex_waiter struct for the task that has just donated
0527  *      its priority to the mutex owner (can be NULL in the case
0528  *      depicted above or if the top waiter is gone away and we are
0529  *      actually deboosting the owner)
0530  * @top_task:   the current top waiter
0531  *
0532  * Returns 0 or -EDEADLK.
0533  *
0534  * Chain walk basics and protection scope
0535  *
0536  * [R] refcount on task
0537  * [P] task->pi_lock held
0538  * [L] rtmutex->wait_lock held
0539  *
0540  * Step Description             Protected by
0541  *  function arguments:
0542  *  @task                   [R]
0543  *  @orig_lock if != NULL           @top_task is blocked on it
0544  *  @next_lock              Unprotected. Cannot be
0545  *                      dereferenced. Only used for
0546  *                      comparison.
0547  *  @orig_waiter if != NULL         @top_task is blocked on it
0548  *  @top_task               current, or in case of proxy
0549  *                      locking protected by calling
0550  *                      code
0551  *  again:
0552  *    loop_sanity_check();
0553  *  retry:
0554  * [1]    lock(task->pi_lock);          [R] acquire [P]
0555  * [2]    waiter = task->pi_blocked_on;     [P]
0556  * [3]    check_exit_conditions_1();        [P]
0557  * [4]    lock = waiter->lock;          [P]
0558  * [5]    if (!try_lock(lock->wait_lock)) { [P] try to acquire [L]
0559  *      unlock(task->pi_lock);      release [P]
0560  *      goto retry;
0561  *    }
0562  * [6]    check_exit_conditions_2();        [P] + [L]
0563  * [7]    requeue_lock_waiter(lock, waiter);    [P] + [L]
0564  * [8]    unlock(task->pi_lock);        release [P]
0565  *    put_task_struct(task);        release [R]
0566  * [9]    check_exit_conditions_3();        [L]
0567  * [10]   task = owner(lock);           [L]
0568  *    get_task_struct(task);        [L] acquire [R]
0569  *    lock(task->pi_lock);          [L] acquire [P]
0570  * [11]   requeue_pi_waiter(tsk, waiters(lock));[P] + [L]
0571  * [12]   check_exit_conditions_4();        [P] + [L]
0572  * [13]   unlock(task->pi_lock);        release [P]
0573  *    unlock(lock->wait_lock);      release [L]
0574  *    goto again;
0575  */
0576 static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
0577                           enum rtmutex_chainwalk chwalk,
0578                           struct rt_mutex_base *orig_lock,
0579                           struct rt_mutex_base *next_lock,
0580                           struct rt_mutex_waiter *orig_waiter,
0581                           struct task_struct *top_task)
0582 {
0583     struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
0584     struct rt_mutex_waiter *prerequeue_top_waiter;
0585     int ret = 0, depth = 0;
0586     struct rt_mutex_base *lock;
0587     bool detect_deadlock;
0588     bool requeue = true;
0589
0590     detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);
0591
0592     /*
0593      * The (de)boosting is a step by step approach with a lot of
0594      * pitfalls. We want this to be preemptible and we want hold a
0595      * maximum of two locks per step. So we have to check
0596      * carefully whether things change under us.
0597      */
0598  again:
0599     /*
0600      * We limit the lock chain length for each invocation.
0601      */
0602     if (++depth > max_lock_depth) {
0603         static int prev_max;
0604
0605         /*
0606          * Print this only once. If the admin changes the limit,
0607          * print a new message when reaching the limit again.
0608          */
0609         if (prev_max != max_lock_depth) {
0610             prev_max = max_lock_depth;
0611             printk(KERN_WARNING "Maximum lock depth %d reached "
0612                    "task: %s (%d)\n", max_lock_depth,
0613                    top_task->comm, task_pid_nr(top_task));
0614         }
0615         put_task_struct(task);
0616
0617         return -EDEADLK;
0618     }
0619
0620     /*
0621      * We are fully preemptible here and only hold the refcount on
0622      * @task. So everything can have changed under us since the
0623      * caller or our own code below (goto retry/again) dropped all
0624      * locks.
0625      */
0626  retry:
0627     /*
0628      * [1] Task cannot go away as we did a get_task() before !
0629      */
0630     raw_spin_lock_irq(&task->pi_lock);
0631
0632     /*
0633      * [2] Get the waiter on which @task is blocked on.
0634      */
0635     waiter = task->pi_blocked_on;
0636
0637     /*
0638      * [3] check_exit_conditions_1() protected by task->pi_lock.
0639      */
0640
0641     /*
0642      * Check whether the end of the boosting chain has been
0643      * reached or the state of the chain has changed while we
0644      * dropped the locks.
0645      */
0646     if (!waiter)
0647         goto out_unlock_pi;
0648
0649     /*
0650      * Check the orig_waiter state. After we dropped the locks,
0651      * the previous owner of the lock might have released the lock.
0652      */
0653     if (orig_waiter && !rt_mutex_owner(orig_lock))
0654         goto out_unlock_pi;
0655
0656     /*
0657      * We dropped all locks after taking a refcount on @task, so
0658      * the task might have moved on in the lock chain or even left
0659      * the chain completely and blocks now on an unrelated lock or
0660      * on @orig_lock.
0661      *
0662      * We stored the lock on which @task was blocked in @next_lock,
0663      * so we can detect the chain change.
0664      */
0665     if (next_lock != waiter->lock)
0666         goto out_unlock_pi;
0667
0668     /*
0669      * There could be 'spurious' loops in the lock graph due to ww_mutex,
0670      * consider:
0671      *
0672      *   P1: A, ww_A, ww_B
0673      *   P2: ww_B, ww_A
0674      *   P3: A
0675      *
0676      * P3 should not return -EDEADLK because it gets trapped in the cycle
0677      * created by P1 and P2 (which will resolve -- and runs into
0678      * max_lock_depth above). Therefore disable detect_deadlock such that
0679      * the below termination condition can trigger once all relevant tasks
0680      * are boosted.
0681      *
0682      * Even when we start with ww_mutex we can disable deadlock detection,
0683      * since we would supress a ww_mutex induced deadlock at [6] anyway.
0684      * Supressing it here however is not sufficient since we might still
0685      * hit [6] due to adjustment driven iteration.
0686      *
0687      * NOTE: if someone were to create a deadlock between 2 ww_classes we'd
0688      * utterly fail to report it; lockdep should.
0689      */
0690     if (IS_ENABLED(CONFIG_PREEMPT_RT) && waiter->ww_ctx && detect_deadlock)
0691         detect_deadlock = false;
0692
0693     /*
0694      * Drop out, when the task has no waiters. Note,
0695      * top_waiter can be NULL, when we are in the deboosting
0696      * mode!
0697      */
0698     if (top_waiter) {
0699         if (!task_has_pi_waiters(task))
0700             goto out_unlock_pi;
0701         /*
0702          * If deadlock detection is off, we stop here if we
0703          * are not the top pi waiter of the task. If deadlock
0704          * detection is enabled we continue, but stop the
0705          * requeueing in the chain walk.
0706          */
0707         if (top_waiter != task_top_pi_waiter(task)) {
0708             if (!detect_deadlock)
0709                 goto out_unlock_pi;
0710             else
0711                 requeue = false;
0712         }
0713     }
0714
0715     /*
0716      * If the waiter priority is the same as the task priority
0717      * then there is no further priority adjustment necessary.  If
0718      * deadlock detection is off, we stop the chain walk. If its
0719      * enabled we continue, but stop the requeueing in the chain
0720      * walk.
0721      */
0722     if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
0723         if (!detect_deadlock)
0724             goto out_unlock_pi;
0725         else
0726             requeue = false;
0727     }
0728
0729     /*
0730      * [4] Get the next lock
0731      */
0732     lock = waiter->lock;
0733     /*
0734      * [5] We need to trylock here as we are holding task->pi_lock,
0735      * which is the reverse lock order versus the other rtmutex
0736      * operations.
0737      */
0738     if (!raw_spin_trylock(&lock->wait_lock)) {
0739         raw_spin_unlock_irq(&task->pi_lock);
0740         cpu_relax();
0741         goto retry;
0742     }
0743
0744     /*
0745      * [6] check_exit_conditions_2() protected by task->pi_lock and
0746      * lock->wait_lock.
0747      *
0748      * Deadlock detection. If the lock is the same as the original
0749      * lock which caused us to walk the lock chain or if the
0750      * current lock is owned by the task which initiated the chain
0751      * walk, we detected a deadlock.
0752      */
0753     if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
0754         ret = -EDEADLK;
0755
0756         /*
0757          * When the deadlock is due to ww_mutex; also see above. Don't
0758          * report the deadlock and instead let the ww_mutex wound/die
0759          * logic pick which of the contending threads gets -EDEADLK.
0760          *
0761          * NOTE: assumes the cycle only contains a single ww_class; any
0762          * other configuration and we fail to report; also, see
0763          * lockdep.
0764          */
0765         if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter && orig_waiter->ww_ctx)
0766             ret = 0;
0767
0768         raw_spin_unlock(&lock->wait_lock);
0769         goto out_unlock_pi;
0770     }
0771
0772     /*
0773      * If we just follow the lock chain for deadlock detection, no
0774      * need to do all the requeue operations. To avoid a truckload
0775      * of conditionals around the various places below, just do the
0776      * minimum chain walk checks.
0777      */
0778     if (!requeue) {
0779         /*
0780          * No requeue[7] here. Just release @task [8]
0781          */
0782         raw_spin_unlock(&task->pi_lock);
0783         put_task_struct(task);
0784
0785         /*
0786          * [9] check_exit_conditions_3 protected by lock->wait_lock.
0787          * If there is no owner of the lock, end of chain.
0788          */
0789         if (!rt_mutex_owner(lock)) {
0790             raw_spin_unlock_irq(&lock->wait_lock);
0791             return 0;
0792         }
0793
0794         /* [10] Grab the next task, i.e. owner of @lock */
0795         task = get_task_struct(rt_mutex_owner(lock));
0796         raw_spin_lock(&task->pi_lock);
0797
0798         /*
0799          * No requeue [11] here. We just do deadlock detection.
0800          *
0801          * [12] Store whether owner is blocked
0802          * itself. Decision is made after dropping the locks
0803          */
0804         next_lock = task_blocked_on_lock(task);
0805         /*
0806          * Get the top waiter for the next iteration
0807          */
0808         top_waiter = rt_mutex_top_waiter(lock);
0809
0810         /* [13] Drop locks */
0811         raw_spin_unlock(&task->pi_lock);
0812         raw_spin_unlock_irq(&lock->wait_lock);
0813
0814         /* If owner is not blocked, end of chain. */
0815         if (!next_lock)
0816             goto out_put_task;
0817         goto again;
0818     }
0819
0820     /*
0821      * Store the current top waiter before doing the requeue
0822      * operation on @lock. We need it for the boost/deboost
0823      * decision below.
0824      */
0825     prerequeue_top_waiter = rt_mutex_top_waiter(lock);
0826
0827     /* [7] Requeue the waiter in the lock waiter tree. */
0828     rt_mutex_dequeue(lock, waiter);
0829
0830     /*
0831      * Update the waiter prio fields now that we're dequeued.
0832      *
0833      * These values can have changed through either:
0834      *
0835      *   sys_sched_set_scheduler() / sys_sched_setattr()
0836      *
0837      * or
0838      *
0839      *   DL CBS enforcement advancing the effective deadline.
0840      *
0841      * Even though pi_waiters also uses these fields, and that tree is only
0842      * updated in [11], we can do this here, since we hold [L], which
0843      * serializes all pi_waiters access and rb_erase() does not care about
0844      * the values of the node being removed.
0845      */
0846     waiter_update_prio(waiter, task);
0847
0848     rt_mutex_enqueue(lock, waiter);
0849
0850     /* [8] Release the task */
0851     raw_spin_unlock(&task->pi_lock);
0852     put_task_struct(task);
0853
0854     /*
0855      * [9] check_exit_conditions_3 protected by lock->wait_lock.
0856      *
0857      * We must abort the chain walk if there is no lock owner even
0858      * in the dead lock detection case, as we have nothing to
0859      * follow here. This is the end of the chain we are walking.
0860      */
0861     if (!rt_mutex_owner(lock)) {
0862         /*
0863          * If the requeue [7] above changed the top waiter,
0864          * then we need to wake the new top waiter up to try
0865          * to get the lock.
0866          */
0867         if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
0868             wake_up_state(waiter->task, waiter->wake_state);
0869         raw_spin_unlock_irq(&lock->wait_lock);
0870         return 0;
0871     }
0872
0873     /* [10] Grab the next task, i.e. the owner of @lock */
0874     task = get_task_struct(rt_mutex_owner(lock));
0875     raw_spin_lock(&task->pi_lock);
0876
0877     /* [11] requeue the pi waiters if necessary */
0878     if (waiter == rt_mutex_top_waiter(lock)) {
0879         /*
0880          * The waiter became the new top (highest priority)
0881          * waiter on the lock. Replace the previous top waiter
0882          * in the owner tasks pi waiters tree with this waiter
0883          * and adjust the priority of the owner.
0884          */
0885         rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
0886         rt_mutex_enqueue_pi(task, waiter);
0887         rt_mutex_adjust_prio(task);
0888
0889     } else if (prerequeue_top_waiter == waiter) {
0890         /*
0891          * The waiter was the top waiter on the lock, but is
0892          * no longer the top priority waiter. Replace waiter in
0893          * the owner tasks pi waiters tree with the new top
0894          * (highest priority) waiter and adjust the priority
0895          * of the owner.
0896          * The new top waiter is stored in @waiter so that
0897          * @waiter == @top_waiter evaluates to true below and
0898          * we continue to deboost the rest of the chain.
0899          */
0900         rt_mutex_dequeue_pi(task, waiter);
0901         waiter = rt_mutex_top_waiter(lock);
0902         rt_mutex_enqueue_pi(task, waiter);
0903         rt_mutex_adjust_prio(task);
0904     } else {
0905         /*
0906          * Nothing changed. No need to do any priority
0907          * adjustment.
0908          */
0909     }
0910
0911     /*
0912      * [12] check_exit_conditions_4() protected by task->pi_lock
0913      * and lock->wait_lock. The actual decisions are made after we
0914      * dropped the locks.
0915      *
0916      * Check whether the task which owns the current lock is pi
0917      * blocked itself. If yes we store a pointer to the lock for
0918      * the lock chain change detection above. After we dropped
0919      * task->pi_lock next_lock cannot be dereferenced anymore.
0920      */
0921     next_lock = task_blocked_on_lock(task);
0922     /*
0923      * Store the top waiter of @lock for the end of chain walk
0924      * decision below.
0925      */
0926     top_waiter = rt_mutex_top_waiter(lock);
0927
0928     /* [13] Drop the locks */
0929     raw_spin_unlock(&task->pi_lock);
0930     raw_spin_unlock_irq(&lock->wait_lock);
0931
0932     /*
0933      * Make the actual exit decisions [12], based on the stored
0934      * values.
0935      *
0936      * We reached the end of the lock chain. Stop right here. No
0937      * point to go back just to figure that out.
0938      */
0939     if (!next_lock)
0940         goto out_put_task;
0941
0942     /*
0943      * If the current waiter is not the top waiter on the lock,
0944      * then we can stop the chain walk here if we are not in full
0945      * deadlock detection mode.
0946      */
0947     if (!detect_deadlock && waiter != top_waiter)
0948         goto out_put_task;
0949
0950     goto again;
0951
0952  out_unlock_pi:
0953     raw_spin_unlock_irq(&task->pi_lock);
0954  out_put_task:
0955     put_task_struct(task);
0956
0957     return ret;
0958 }
0959
0960 /*
0961  * Try to take an rt-mutex
0962  *
0963  * Must be called with lock->wait_lock held and interrupts disabled
0964  *
0965  * @lock:   The lock to be acquired.
0966  * @task:   The task which wants to acquire the lock
0967  * @waiter: The waiter that is queued to the lock's wait tree if the
0968  *      callsite called task_blocked_on_lock(), otherwise NULL
0969  */
0970 static int __sched
0971 try_to_take_rt_mutex(struct rt_mutex_base *lock, struct task_struct *task,
0972              struct rt_mutex_waiter *waiter)
0973 {
0974     lockdep_assert_held(&lock->wait_lock);
0975
0976     /*
0977      * Before testing whether we can acquire @lock, we set the
0978      * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
0979      * other tasks which try to modify @lock into the slow path
0980      * and they serialize on @lock->wait_lock.
0981      *
0982      * The RT_MUTEX_HAS_WAITERS bit can have a transitional state
0983      * as explained at the top of this file if and only if:
0984      *
0985      * - There is a lock owner. The caller must fixup the
0986      *   transient state if it does a trylock or leaves the lock
0987      *   function due to a signal or timeout.
0988      *
0989      * - @task acquires the lock and there are no other
0990      *   waiters. This is undone in rt_mutex_set_owner(@task) at
0991      *   the end of this function.
0992      */
0993     mark_rt_mutex_waiters(lock);
0994
0995     /*
0996      * If @lock has an owner, give up.
0997      */
0998     if (rt_mutex_owner(lock))
0999         return 0;
1000
1001     /*
1002      * If @waiter != NULL, @task has already enqueued the waiter
1003      * into @lock waiter tree. If @waiter == NULL then this is a
1004      * trylock attempt.
1005      */
1006     if (waiter) {
1007         struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock);
1008
1009         /*
1010          * If waiter is the highest priority waiter of @lock,
1011          * or allowed to steal it, take it over.
1012          */
1013         if (waiter == top_waiter || rt_mutex_steal(waiter, top_waiter)) {
1014             /*
1015              * We can acquire the lock. Remove the waiter from the
1016              * lock waiters tree.
1017              */
1018             rt_mutex_dequeue(lock, waiter);
1019         } else {
1020             return 0;
1021         }
1022     } else {
1023         /*
1024          * If the lock has waiters already we check whether @task is
1025          * eligible to take over the lock.
1026          *
1027          * If there are no other waiters, @task can acquire
1028          * the lock.  @task->pi_blocked_on is NULL, so it does
1029          * not need to be dequeued.
1030          */
1031         if (rt_mutex_has_waiters(lock)) {
1032             /* Check whether the trylock can steal it. */
1033             if (!rt_mutex_steal(task_to_waiter(task),
1034                         rt_mutex_top_waiter(lock)))
1035                 return 0;
1036
1037             /*
1038              * The current top waiter stays enqueued. We
1039              * don't have to change anything in the lock
1040              * waiters order.
1041              */
1042         } else {
1043             /*
1044              * No waiters. Take the lock without the
1045              * pi_lock dance.@task->pi_blocked_on is NULL
1046              * and we have no waiters to enqueue in @task
1047              * pi waiters tree.
1048              */
1049             goto takeit;
1050         }
1051     }
1052
1053     /*
1054      * Clear @task->pi_blocked_on. Requires protection by
1055      * @task->pi_lock. Redundant operation for the @waiter == NULL
1056      * case, but conditionals are more expensive than a redundant
1057      * store.
1058      */
1059     raw_spin_lock(&task->pi_lock);
1060     task->pi_blocked_on = NULL;
1061     /*
1062      * Finish the lock acquisition. @task is the new owner. If
1063      * other waiters exist we have to insert the highest priority
1064      * waiter into @task->pi_waiters tree.
1065      */
1066     if (rt_mutex_has_waiters(lock))
1067         rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
1068     raw_spin_unlock(&task->pi_lock);
1069
1070 takeit:
1071     /*
1072      * This either preserves the RT_MUTEX_HAS_WAITERS bit if there
1073      * are still waiters or clears it.
1074      */
1075     rt_mutex_set_owner(lock, task);
1076
1077     return 1;
1078 }
1079
1080 /*
1081  * Task blocks on lock.
1082  *
1083  * Prepare waiter and propagate pi chain
1084  *
1085  * This must be called with lock->wait_lock held and interrupts disabled
1086  */
1087 static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
1088                        struct rt_mutex_waiter *waiter,
1089                        struct task_struct *task,
1090                        struct ww_acquire_ctx *ww_ctx,
1091                        enum rtmutex_chainwalk chwalk)
1092 {
1093     struct task_struct *owner = rt_mutex_owner(lock);
1094     struct rt_mutex_waiter *top_waiter = waiter;
1095     struct rt_mutex_base *next_lock;
1096     int chain_walk = 0, res;
1097
1098     lockdep_assert_held(&lock->wait_lock);
1099
1100     /*
1101      * Early deadlock detection. We really don't want the task to
1102      * enqueue on itself just to untangle the mess later. It's not
1103      * only an optimization. We drop the locks, so another waiter
1104      * can come in before the chain walk detects the deadlock. So
1105      * the other will detect the deadlock and return -EDEADLOCK,
1106      * which is wrong, as the other waiter is not in a deadlock
1107      * situation.
1108      *
1109      * Except for ww_mutex, in that case the chain walk must already deal
1110      * with spurious cycles, see the comments at [3] and [6].
1111      */
1112     if (owner == task && !(build_ww_mutex() && ww_ctx))
1113         return -EDEADLK;
1114
1115     raw_spin_lock(&task->pi_lock);
1116     waiter->task = task;
1117     waiter->lock = lock;
1118     waiter_update_prio(waiter, task);
1119
1120     /* Get the top priority waiter on the lock */
1121     if (rt_mutex_has_waiters(lock))
1122         top_waiter = rt_mutex_top_waiter(lock);
1123     rt_mutex_enqueue(lock, waiter);
1124
1125     task->pi_blocked_on = waiter;
1126
1127     raw_spin_unlock(&task->pi_lock);
1128
1129     if (build_ww_mutex() && ww_ctx) {
1130         struct rt_mutex *rtm;
1131
1132         /* Check whether the waiter should back out immediately */
1133         rtm = container_of(lock, struct rt_mutex, rtmutex);
1134         res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx);
1135         if (res) {
1136             raw_spin_lock(&task->pi_lock);
1137             rt_mutex_dequeue(lock, waiter);
1138             task->pi_blocked_on = NULL;
1139             raw_spin_unlock(&task->pi_lock);
1140             return res;
1141         }
1142     }
1143
1144     if (!owner)
1145         return 0;
1146
1147     raw_spin_lock(&owner->pi_lock);
1148     if (waiter == rt_mutex_top_waiter(lock)) {
1149         rt_mutex_dequeue_pi(owner, top_waiter);
1150         rt_mutex_enqueue_pi(owner, waiter);
1151
1152         rt_mutex_adjust_prio(owner);
1153         if (owner->pi_blocked_on)
1154             chain_walk = 1;
1155     } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
1156         chain_walk = 1;
1157     }
1158
1159     /* Store the lock on which owner is blocked or NULL */
1160     next_lock = task_blocked_on_lock(owner);
1161
1162     raw_spin_unlock(&owner->pi_lock);
1163     /*
1164      * Even if full deadlock detection is on, if the owner is not
1165      * blocked itself, we can avoid finding this out in the chain
1166      * walk.
1167      */
1168     if (!chain_walk || !next_lock)
1169         return 0;
1170
1171     /*
1172      * The owner can't disappear while holding a lock,
1173      * so the owner struct is protected by wait_lock.
1174      * Gets dropped in rt_mutex_adjust_prio_chain()!
1175      */
1176     get_task_struct(owner);
1177
1178     raw_spin_unlock_irq(&lock->wait_lock);
1179
1180     res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
1181                      next_lock, waiter, task);
1182
1183     raw_spin_lock_irq(&lock->wait_lock);
1184
1185     return res;
1186 }
1187
1188 /*
1189  * Remove the top waiter from the current tasks pi waiter tree and
1190  * queue it up.
1191  *
1192  * Called with lock->wait_lock held and interrupts disabled.
1193  */
1194 static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh,
1195                         struct rt_mutex_base *lock)
1196 {
1197     struct rt_mutex_waiter *waiter;
1198
1199     raw_spin_lock(&current->pi_lock);
1200
1201     waiter = rt_mutex_top_waiter(lock);
1202
1203     /*
1204      * Remove it from current->pi_waiters and deboost.
1205      *
1206      * We must in fact deboost here in order to ensure we call
1207      * rt_mutex_setprio() to update p->pi_top_task before the
1208      * task unblocks.
1209      */
1210     rt_mutex_dequeue_pi(current, waiter);
1211     rt_mutex_adjust_prio(current);
1212
1213     /*
1214      * As we are waking up the top waiter, and the waiter stays
1215      * queued on the lock until it gets the lock, this lock
1216      * obviously has waiters. Just set the bit here and this has
1217      * the added benefit of forcing all new tasks into the
1218      * slow path making sure no task of lower priority than
1219      * the top waiter can steal this lock.
1220      */
1221     lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
1222
1223     /*
1224      * We deboosted before waking the top waiter task such that we don't
1225      * run two tasks with the 'same' priority (and ensure the
1226      * p->pi_top_task pointer points to a blocked task). This however can
1227      * lead to priority inversion if we would get preempted after the
1228      * deboost but before waking our donor task, hence the preempt_disable()
1229      * before unlock.
1230      *
1231      * Pairs with preempt_enable() in rt_mutex_wake_up_q();
1232      */
1233     preempt_disable();
1234     rt_mutex_wake_q_add(wqh, waiter);
1235     raw_spin_unlock(&current->pi_lock);
1236 }
1237
1238 static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock)
1239 {
1240     int ret = try_to_take_rt_mutex(lock, current, NULL);
1241
1242     /*
1243      * try_to_take_rt_mutex() sets the lock waiters bit
1244      * unconditionally. Clean this up.
1245      */
1246     fixup_rt_mutex_waiters(lock);
1247
1248     return ret;
1249 }
1250
1251 /*
1252  * Slow path try-lock function:
1253  */
1254 static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock)
1255 {
1256     unsigned long flags;
1257     int ret;
1258
1259     /*
1260      * If the lock already has an owner we fail to get the lock.
1261      * This can be done without taking the @lock->wait_lock as
1262      * it is only being read, and this is a trylock anyway.
1263      */
1264     if (rt_mutex_owner(lock))
1265         return 0;
1266
1267     /*
1268      * The mutex has currently no owner. Lock the wait lock and try to
1269      * acquire the lock. We use irqsave here to support early boot calls.
1270      */
1271     raw_spin_lock_irqsave(&lock->wait_lock, flags);
1272
1273     ret = __rt_mutex_slowtrylock(lock);
1274
1275     raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1276
1277     return ret;
1278 }
1279
1280 static __always_inline int __rt_mutex_trylock(struct rt_mutex_base *lock)
1281 {
1282     if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
1283         return 1;
1284
1285     return rt_mutex_slowtrylock(lock);
1286 }
1287
1288 /*
1289  * Slow path to release a rt-mutex.
1290  */
1291 static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock)
1292 {
1293     DEFINE_RT_WAKE_Q(wqh);
1294     unsigned long flags;
1295
1296     /* irqsave required to support early boot calls */
1297     raw_spin_lock_irqsave(&lock->wait_lock, flags);
1298
1299     debug_rt_mutex_unlock(lock);
1300
1301     /*
1302      * We must be careful here if the fast path is enabled. If we
1303      * have no waiters queued we cannot set owner to NULL here
1304      * because of:
1305      *
1306      * foo->lock->owner = NULL;
1307      *          rtmutex_lock(foo->lock);   <- fast path
1308      *          free = atomic_dec_and_test(foo->refcnt);
1309      *          rtmutex_unlock(foo->lock); <- fast path
1310      *          if (free)
1311      *              kfree(foo);
1312      * raw_spin_unlock(foo->lock->wait_lock);
1313      *
1314      * So for the fastpath enabled kernel:
1315      *
1316      * Nothing can set the waiters bit as long as we hold
1317      * lock->wait_lock. So we do the following sequence:
1318      *
1319      *  owner = rt_mutex_owner(lock);
1320      *  clear_rt_mutex_waiters(lock);
1321      *  raw_spin_unlock(&lock->wait_lock);
1322      *  if (cmpxchg(&lock->owner, owner, 0) == owner)
1323      *      return;
1324      *  goto retry;
1325      *
1326      * The fastpath disabled variant is simple as all access to
1327      * lock->owner is serialized by lock->wait_lock:
1328      *
1329      *  lock->owner = NULL;
1330      *  raw_spin_unlock(&lock->wait_lock);
1331      */
1332     while (!rt_mutex_has_waiters(lock)) {
1333         /* Drops lock->wait_lock ! */
1334         if (unlock_rt_mutex_safe(lock, flags) == true)
1335             return;
1336         /* Relock the rtmutex and try again */
1337         raw_spin_lock_irqsave(&lock->wait_lock, flags);
1338     }
1339
1340     /*
1341      * The wakeup next waiter path does not suffer from the above
1342      * race. See the comments there.
1343      *
1344      * Queue the next waiter for wakeup once we release the wait_lock.
1345      */
1346     mark_wakeup_next_waiter(&wqh, lock);
1347     raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1348
1349     rt_mutex_wake_up_q(&wqh);
1350 }
1351
1352 static __always_inline void __rt_mutex_unlock(struct rt_mutex_base *lock)
1353 {
1354     if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
1355         return;
1356
1357     rt_mutex_slowunlock(lock);
1358 }
1359
1360 #ifdef CONFIG_SMP
1361 static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,
1362                   struct rt_mutex_waiter *waiter,
1363                   struct task_struct *owner)
1364 {
1365     bool res = true;
1366
1367     rcu_read_lock();
1368     for (;;) {
1369         /* If owner changed, trylock again. */
1370         if (owner != rt_mutex_owner(lock))
1371             break;
1372         /*
1373          * Ensure that @owner is dereferenced after checking that
1374          * the lock owner still matches @owner. If that fails,
1375          * @owner might point to freed memory. If it still matches,
1376          * the rcu_read_lock() ensures the memory stays valid.
1377          */
1378         barrier();
1379         /*
1380          * Stop spinning when:
1381          *  - the lock owner has been scheduled out
1382          *  - current is not longer the top waiter
1383          *  - current is requested to reschedule (redundant
1384          *    for CONFIG_PREEMPT_RCU=y)
1385          *  - the VCPU on which owner runs is preempted
1386          */
1387         if (!owner_on_cpu(owner) || need_resched() ||
1388             !rt_mutex_waiter_is_top_waiter(lock, waiter)) {
1389             res = false;
1390             break;
1391         }
1392         cpu_relax();
1393     }
1394     rcu_read_unlock();
1395     return res;
1396 }
1397 #else
1398 static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,
1399                   struct rt_mutex_waiter *waiter,
1400                   struct task_struct *owner)
1401 {
1402     return false;
1403 }
1404 #endif
1405
1406 #ifdef RT_MUTEX_BUILD_MUTEX
1407 /*
1408  * Functions required for:
1409  *  - rtmutex, futex on all kernels
1410  *  - mutex and rwsem substitutions on RT kernels
1411  */
1412
1413 /*
1414  * Remove a waiter from a lock and give up
1415  *
1416  * Must be called with lock->wait_lock held and interrupts disabled. It must
1417  * have just failed to try_to_take_rt_mutex().
1418  */
1419 static void __sched remove_waiter(struct rt_mutex_base *lock,
1420                   struct rt_mutex_waiter *waiter)
1421 {
1422     bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
1423     struct task_struct *owner = rt_mutex_owner(lock);
1424     struct rt_mutex_base *next_lock;
1425
1426     lockdep_assert_held(&lock->wait_lock);
1427
1428     raw_spin_lock(&current->pi_lock);
1429     rt_mutex_dequeue(lock, waiter);
1430     current->pi_blocked_on = NULL;
1431     raw_spin_unlock(&current->pi_lock);
1432
1433     /*
1434      * Only update priority if the waiter was the highest priority
1435      * waiter of the lock and there is an owner to update.
1436      */
1437     if (!owner || !is_top_waiter)
1438         return;
1439
1440     raw_spin_lock(&owner->pi_lock);
1441
1442     rt_mutex_dequeue_pi(owner, waiter);
1443
1444     if (rt_mutex_has_waiters(lock))
1445         rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
1446
1447     rt_mutex_adjust_prio(owner);
1448
1449     /* Store the lock on which owner is blocked or NULL */
1450     next_lock = task_blocked_on_lock(owner);
1451
1452     raw_spin_unlock(&owner->pi_lock);
1453
1454     /*
1455      * Don't walk the chain, if the owner task is not blocked
1456      * itself.
1457      */
1458     if (!next_lock)
1459         return;
1460
1461     /* gets dropped in rt_mutex_adjust_prio_chain()! */
1462     get_task_struct(owner);
1463
1464     raw_spin_unlock_irq(&lock->wait_lock);
1465
1466     rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
1467                    next_lock, NULL, current);
1468
1469     raw_spin_lock_irq(&lock->wait_lock);
1470 }
1471
1472 /**
1473  * rt_mutex_slowlock_block() - Perform the wait-wake-try-to-take loop
1474  * @lock:        the rt_mutex to take
1475  * @ww_ctx:      WW mutex context pointer
1476  * @state:       the state the task should block in (TASK_INTERRUPTIBLE
1477  *           or TASK_UNINTERRUPTIBLE)
1478  * @timeout:         the pre-initialized and started timer, or NULL for none
1479  * @waiter:      the pre-initialized rt_mutex_waiter
1480  *
1481  * Must be called with lock->wait_lock held and interrupts disabled
1482  */
1483 static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock,
1484                        struct ww_acquire_ctx *ww_ctx,
1485                        unsigned int state,
1486                        struct hrtimer_sleeper *timeout,
1487                        struct rt_mutex_waiter *waiter)
1488 {
1489     struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
1490     struct task_struct *owner;
1491     int ret = 0;
1492
1493     for (;;) {
1494         /* Try to acquire the lock: */
1495         if (try_to_take_rt_mutex(lock, current, waiter))
1496             break;
1497
1498         if (timeout && !timeout->task) {
1499             ret = -ETIMEDOUT;
1500             break;
1501         }
1502         if (signal_pending_state(state, current)) {
1503             ret = -EINTR;
1504             break;
1505         }
1506
1507         if (build_ww_mutex() && ww_ctx) {
1508             ret = __ww_mutex_check_kill(rtm, waiter, ww_ctx);
1509             if (ret)
1510                 break;
1511         }
1512
1513         if (waiter == rt_mutex_top_waiter(lock))
1514             owner = rt_mutex_owner(lock);
1515         else
1516             owner = NULL;
1517         raw_spin_unlock_irq(&lock->wait_lock);
1518
1519         if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner))
1520             schedule();
1521
1522         raw_spin_lock_irq(&lock->wait_lock);
1523         set_current_state(state);
1524     }
1525
1526     __set_current_state(TASK_RUNNING);
1527     return ret;
1528 }
1529
1530 static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock,
1531                          struct rt_mutex_waiter *w)
1532 {
1533     /*
1534      * If the result is not -EDEADLOCK or the caller requested
1535      * deadlock detection, nothing to do here.
1536      */
1537     if (res != -EDEADLOCK || detect_deadlock)
1538         return;
1539
1540     if (build_ww_mutex() && w->ww_ctx)
1541         return;
1542
1543     /*
1544      * Yell loudly and stop the task right here.
1545      */
1546     WARN(1, "rtmutex deadlock detected\n");
1547     while (1) {
1548         set_current_state(TASK_INTERRUPTIBLE);
1549         schedule();
1550     }
1551 }
1552
1553 /**
1554  * __rt_mutex_slowlock - Locking slowpath invoked with lock::wait_lock held
1555  * @lock:   The rtmutex to block lock
1556  * @ww_ctx: WW mutex context pointer
1557  * @state:  The task state for sleeping
1558  * @chwalk: Indicator whether full or partial chainwalk is requested
1559  * @waiter: Initializer waiter for blocking
1560  */
1561 static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
1562                        struct ww_acquire_ctx *ww_ctx,
1563                        unsigned int state,
1564                        enum rtmutex_chainwalk chwalk,
1565                        struct rt_mutex_waiter *waiter)
1566 {
1567     struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
1568     struct ww_mutex *ww = ww_container_of(rtm);
1569     int ret;
1570
1571     lockdep_assert_held(&lock->wait_lock);
1572
1573     /* Try to acquire the lock again: */
1574     if (try_to_take_rt_mutex(lock, current, NULL)) {
1575         if (build_ww_mutex() && ww_ctx) {
1576             __ww_mutex_check_waiters(rtm, ww_ctx);
1577             ww_mutex_lock_acquired(ww, ww_ctx);
1578         }
1579         return 0;
1580     }
1581
1582     set_current_state(state);
1583
1584     trace_contention_begin(lock, LCB_F_RT);
1585
1586     ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk);
1587     if (likely(!ret))
1588         ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter);
1589
1590     if (likely(!ret)) {
1591         /* acquired the lock */
1592         if (build_ww_mutex() && ww_ctx) {
1593             if (!ww_ctx->is_wait_die)
1594                 __ww_mutex_check_waiters(rtm, ww_ctx);
1595             ww_mutex_lock_acquired(ww, ww_ctx);
1596         }
1597     } else {
1598         __set_current_state(TASK_RUNNING);
1599         remove_waiter(lock, waiter);
1600         rt_mutex_handle_deadlock(ret, chwalk, waiter);
1601     }
1602
1603     /*
1604      * try_to_take_rt_mutex() sets the waiter bit
1605      * unconditionally. We might have to fix that up.
1606      */
1607     fixup_rt_mutex_waiters(lock);
1608
1609     trace_contention_end(lock, ret);
1610
1611     return ret;
1612 }
1613
1614 static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock,
1615                          struct ww_acquire_ctx *ww_ctx,
1616                          unsigned int state)
1617 {
1618     struct rt_mutex_waiter waiter;
1619     int ret;
1620
1621     rt_mutex_init_waiter(&waiter);
1622     waiter.ww_ctx = ww_ctx;
1623
1624     ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK,
1625                   &waiter);
1626
1627     debug_rt_mutex_free_waiter(&waiter);
1628     return ret;
1629 }
1630
1631 /*
1632  * rt_mutex_slowlock - Locking slowpath invoked when fast path fails
1633  * @lock:   The rtmutex to block lock
1634  * @ww_ctx: WW mutex context pointer
1635  * @state:  The task state for sleeping
1636  */
1637 static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
1638                      struct ww_acquire_ctx *ww_ctx,
1639                      unsigned int state)
1640 {
1641     unsigned long flags;
1642     int ret;
1643
1644     /*
1645      * Technically we could use raw_spin_[un]lock_irq() here, but this can
1646      * be called in early boot if the cmpxchg() fast path is disabled
1647      * (debug, no architecture support). In this case we will acquire the
1648      * rtmutex with lock->wait_lock held. But we cannot unconditionally
1649      * enable interrupts in that early boot case. So we need to use the
1650      * irqsave/restore variants.
1651      */
1652     raw_spin_lock_irqsave(&lock->wait_lock, flags);
1653     ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state);
1654     raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1655
1656     return ret;
1657 }
1658
1659 static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock,
1660                        unsigned int state)
1661 {
1662     if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
1663         return 0;
1664
1665     return rt_mutex_slowlock(lock, NULL, state);
1666 }
1667 #endif /* RT_MUTEX_BUILD_MUTEX */
1668
1669 #ifdef RT_MUTEX_BUILD_SPINLOCKS
1670 /*
1671  * Functions required for spin/rw_lock substitution on RT kernels
1672  */
1673
1674 /**
1675  * rtlock_slowlock_locked - Slow path lock acquisition for RT locks
1676  * @lock:   The underlying RT mutex
1677  */
1678 static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
1679 {
1680     struct rt_mutex_waiter waiter;
1681     struct task_struct *owner;
1682
1683     lockdep_assert_held(&lock->wait_lock);
1684
1685     if (try_to_take_rt_mutex(lock, current, NULL))
1686         return;
1687
1688     rt_mutex_init_rtlock_waiter(&waiter);
1689
1690     /* Save current state and set state to TASK_RTLOCK_WAIT */
1691     current_save_and_set_rtlock_wait_state();
1692
1693     trace_contention_begin(lock, LCB_F_RT);
1694
1695     task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK);
1696
1697     for (;;) {
1698         /* Try to acquire the lock again */
1699         if (try_to_take_rt_mutex(lock, current, &waiter))
1700             break;
1701
1702         if (&waiter == rt_mutex_top_waiter(lock))
1703             owner = rt_mutex_owner(lock);
1704         else
1705             owner = NULL;
1706         raw_spin_unlock_irq(&lock->wait_lock);
1707
1708         if (!owner || !rtmutex_spin_on_owner(lock, &waiter, owner))
1709             schedule_rtlock();
1710
1711         raw_spin_lock_irq(&lock->wait_lock);
1712         set_current_state(TASK_RTLOCK_WAIT);
1713     }
1714
1715     /* Restore the task state */
1716     current_restore_rtlock_saved_state();
1717
1718     /*
1719      * try_to_take_rt_mutex() sets the waiter bit unconditionally.
1720      * We might have to fix that up:
1721      */
1722     fixup_rt_mutex_waiters(lock);
1723     debug_rt_mutex_free_waiter(&waiter);
1724
1725     trace_contention_end(lock, 0);
1726 }
1727
1728 static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock)
1729 {
1730     unsigned long flags;
1731
1732     raw_spin_lock_irqsave(&lock->wait_lock, flags);
1733     rtlock_slowlock_locked(lock);
1734     raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1735 }
1736
1737 #endif /* RT_MUTEX_BUILD_SPINLOCKS */