Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 #include <linux/atomic.h>
0003 #include <linux/percpu.h>
0004 #include <linux/wait.h>
0005 #include <linux/lockdep.h>
0006 #include <linux/percpu-rwsem.h>
0007 #include <linux/rcupdate.h>
0008 #include <linux/sched.h>
0009 #include <linux/sched/task.h>
0010 #include <linux/sched/debug.h>
0011 #include <linux/errno.h>
0012 #include <trace/events/lock.h>
0013 
0014 int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
0015             const char *name, struct lock_class_key *key)
0016 {
0017     sem->read_count = alloc_percpu(int);
0018     if (unlikely(!sem->read_count))
0019         return -ENOMEM;
0020 
0021     rcu_sync_init(&sem->rss);
0022     rcuwait_init(&sem->writer);
0023     init_waitqueue_head(&sem->waiters);
0024     atomic_set(&sem->block, 0);
0025 #ifdef CONFIG_DEBUG_LOCK_ALLOC
0026     debug_check_no_locks_freed((void *)sem, sizeof(*sem));
0027     lockdep_init_map(&sem->dep_map, name, key, 0);
0028 #endif
0029     return 0;
0030 }
0031 EXPORT_SYMBOL_GPL(__percpu_init_rwsem);
0032 
0033 void percpu_free_rwsem(struct percpu_rw_semaphore *sem)
0034 {
0035     /*
0036      * XXX: temporary kludge. The error path in alloc_super()
0037      * assumes that percpu_free_rwsem() is safe after kzalloc().
0038      */
0039     if (!sem->read_count)
0040         return;
0041 
0042     rcu_sync_dtor(&sem->rss);
0043     free_percpu(sem->read_count);
0044     sem->read_count = NULL; /* catch use after free bugs */
0045 }
0046 EXPORT_SYMBOL_GPL(percpu_free_rwsem);
0047 
0048 static bool __percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
0049 {
0050     this_cpu_inc(*sem->read_count);
0051 
0052     /*
0053      * Due to having preemption disabled the decrement happens on
0054      * the same CPU as the increment, avoiding the
0055      * increment-on-one-CPU-and-decrement-on-another problem.
0056      *
0057      * If the reader misses the writer's assignment of sem->block, then the
0058      * writer is guaranteed to see the reader's increment.
0059      *
0060      * Conversely, any readers that increment their sem->read_count after
0061      * the writer looks are guaranteed to see the sem->block value, which
0062      * in turn means that they are guaranteed to immediately decrement
0063      * their sem->read_count, so that it doesn't matter that the writer
0064      * missed them.
0065      */
0066 
0067     smp_mb(); /* A matches D */
0068 
0069     /*
0070      * If !sem->block the critical section starts here, matched by the
0071      * release in percpu_up_write().
0072      */
0073     if (likely(!atomic_read_acquire(&sem->block)))
0074         return true;
0075 
0076     this_cpu_dec(*sem->read_count);
0077 
0078     /* Prod writer to re-evaluate readers_active_check() */
0079     rcuwait_wake_up(&sem->writer);
0080 
0081     return false;
0082 }
0083 
0084 static inline bool __percpu_down_write_trylock(struct percpu_rw_semaphore *sem)
0085 {
0086     if (atomic_read(&sem->block))
0087         return false;
0088 
0089     return atomic_xchg(&sem->block, 1) == 0;
0090 }
0091 
0092 static bool __percpu_rwsem_trylock(struct percpu_rw_semaphore *sem, bool reader)
0093 {
0094     if (reader) {
0095         bool ret;
0096 
0097         preempt_disable();
0098         ret = __percpu_down_read_trylock(sem);
0099         preempt_enable();
0100 
0101         return ret;
0102     }
0103     return __percpu_down_write_trylock(sem);
0104 }
0105 
0106 /*
0107  * The return value of wait_queue_entry::func means:
0108  *
0109  *  <0 - error, wakeup is terminated and the error is returned
0110  *   0 - no wakeup, a next waiter is tried
0111  *  >0 - woken, if EXCLUSIVE, counted towards @nr_exclusive.
0112  *
0113  * We use EXCLUSIVE for both readers and writers to preserve FIFO order,
0114  * and play games with the return value to allow waking multiple readers.
0115  *
0116  * Specifically, we wake readers until we've woken a single writer, or until a
0117  * trylock fails.
0118  */
0119 static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry,
0120                       unsigned int mode, int wake_flags,
0121                       void *key)
0122 {
0123     bool reader = wq_entry->flags & WQ_FLAG_CUSTOM;
0124     struct percpu_rw_semaphore *sem = key;
0125     struct task_struct *p;
0126 
0127     /* concurrent against percpu_down_write(), can get stolen */
0128     if (!__percpu_rwsem_trylock(sem, reader))
0129         return 1;
0130 
0131     p = get_task_struct(wq_entry->private);
0132     list_del_init(&wq_entry->entry);
0133     smp_store_release(&wq_entry->private, NULL);
0134 
0135     wake_up_process(p);
0136     put_task_struct(p);
0137 
0138     return !reader; /* wake (readers until) 1 writer */
0139 }
0140 
0141 static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader)
0142 {
0143     DEFINE_WAIT_FUNC(wq_entry, percpu_rwsem_wake_function);
0144     bool wait;
0145 
0146     spin_lock_irq(&sem->waiters.lock);
0147     /*
0148      * Serialize against the wakeup in percpu_up_write(), if we fail
0149      * the trylock, the wakeup must see us on the list.
0150      */
0151     wait = !__percpu_rwsem_trylock(sem, reader);
0152     if (wait) {
0153         wq_entry.flags |= WQ_FLAG_EXCLUSIVE | reader * WQ_FLAG_CUSTOM;
0154         __add_wait_queue_entry_tail(&sem->waiters, &wq_entry);
0155     }
0156     spin_unlock_irq(&sem->waiters.lock);
0157 
0158     while (wait) {
0159         set_current_state(TASK_UNINTERRUPTIBLE);
0160         if (!smp_load_acquire(&wq_entry.private))
0161             break;
0162         schedule();
0163     }
0164     __set_current_state(TASK_RUNNING);
0165 }
0166 
0167 bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
0168 {
0169     if (__percpu_down_read_trylock(sem))
0170         return true;
0171 
0172     if (try)
0173         return false;
0174 
0175     trace_contention_begin(sem, LCB_F_PERCPU | LCB_F_READ);
0176     preempt_enable();
0177     percpu_rwsem_wait(sem, /* .reader = */ true);
0178     preempt_disable();
0179     trace_contention_end(sem, 0);
0180 
0181     return true;
0182 }
0183 EXPORT_SYMBOL_GPL(__percpu_down_read);
0184 
0185 #define per_cpu_sum(var)                        \
0186 ({                                  \
0187     typeof(var) __sum = 0;                      \
0188     int cpu;                            \
0189     compiletime_assert_atomic_type(__sum);              \
0190     for_each_possible_cpu(cpu)                  \
0191         __sum += per_cpu(var, cpu);             \
0192     __sum;                              \
0193 })
0194 
0195 /*
0196  * Return true if the modular sum of the sem->read_count per-CPU variable is
0197  * zero.  If this sum is zero, then it is stable due to the fact that if any
0198  * newly arriving readers increment a given counter, they will immediately
0199  * decrement that same counter.
0200  *
0201  * Assumes sem->block is set.
0202  */
0203 static bool readers_active_check(struct percpu_rw_semaphore *sem)
0204 {
0205     if (per_cpu_sum(*sem->read_count) != 0)
0206         return false;
0207 
0208     /*
0209      * If we observed the decrement; ensure we see the entire critical
0210      * section.
0211      */
0212 
0213     smp_mb(); /* C matches B */
0214 
0215     return true;
0216 }
0217 
0218 void __sched percpu_down_write(struct percpu_rw_semaphore *sem)
0219 {
0220     might_sleep();
0221     rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
0222     trace_contention_begin(sem, LCB_F_PERCPU | LCB_F_WRITE);
0223 
0224     /* Notify readers to take the slow path. */
0225     rcu_sync_enter(&sem->rss);
0226 
0227     /*
0228      * Try set sem->block; this provides writer-writer exclusion.
0229      * Having sem->block set makes new readers block.
0230      */
0231     if (!__percpu_down_write_trylock(sem))
0232         percpu_rwsem_wait(sem, /* .reader = */ false);
0233 
0234     /* smp_mb() implied by __percpu_down_write_trylock() on success -- D matches A */
0235 
0236     /*
0237      * If they don't see our store of sem->block, then we are guaranteed to
0238      * see their sem->read_count increment, and therefore will wait for
0239      * them.
0240      */
0241 
0242     /* Wait for all active readers to complete. */
0243     rcuwait_wait_event(&sem->writer, readers_active_check(sem), TASK_UNINTERRUPTIBLE);
0244     trace_contention_end(sem, 0);
0245 }
0246 EXPORT_SYMBOL_GPL(percpu_down_write);
0247 
0248 void percpu_up_write(struct percpu_rw_semaphore *sem)
0249 {
0250     rwsem_release(&sem->dep_map, _RET_IP_);
0251 
0252     /*
0253      * Signal the writer is done, no fast path yet.
0254      *
0255      * One reason that we cannot just immediately flip to readers_fast is
0256      * that new readers might fail to see the results of this writer's
0257      * critical section.
0258      *
0259      * Therefore we force it through the slow path which guarantees an
0260      * acquire and thereby guarantees the critical section's consistency.
0261      */
0262     atomic_set_release(&sem->block, 0);
0263 
0264     /*
0265      * Prod any pending reader/writer to make progress.
0266      */
0267     __wake_up(&sem->waiters, TASK_NORMAL, 1, sem);
0268 
0269     /*
0270      * Once this completes (at least one RCU-sched grace period hence) the
0271      * reader fast path will be available again. Safe to use outside the
0272      * exclusive write lock because its counting.
0273      */
0274     rcu_sync_exit(&sem->rss);
0275 }
0276 EXPORT_SYMBOL_GPL(percpu_up_write);