Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Copyright (C) 2008 Oracle.  All rights reserved.
0004  */
0005 
0006 #include <linux/sched.h>
0007 #include <linux/pagemap.h>
0008 #include <linux/spinlock.h>
0009 #include <linux/page-flags.h>
0010 #include <asm/bug.h>
0011 #include "misc.h"
0012 #include "ctree.h"
0013 #include "extent_io.h"
0014 #include "locking.h"
0015 
0016 /*
0017  * Lockdep class keys for extent_buffer->lock's in this root.  For a given
0018  * eb, the lockdep key is determined by the btrfs_root it belongs to and
0019  * the level the eb occupies in the tree.
0020  *
0021  * Different roots are used for different purposes and may nest inside each
0022  * other and they require separate keysets.  As lockdep keys should be
0023  * static, assign keysets according to the purpose of the root as indicated
0024  * by btrfs_root->root_key.objectid.  This ensures that all special purpose
0025  * roots have separate keysets.
0026  *
0027  * Lock-nesting across peer nodes is always done with the immediate parent
0028  * node locked thus preventing deadlock.  As lockdep doesn't know this, use
0029  * subclass to avoid triggering lockdep warning in such cases.
0030  *
0031  * The key is set by the readpage_end_io_hook after the buffer has passed
0032  * csum validation but before the pages are unlocked.  It is also set by
0033  * btrfs_init_new_buffer on freshly allocated blocks.
0034  *
0035  * We also add a check to make sure the highest level of the tree is the
0036  * same as our lockdep setup here.  If BTRFS_MAX_LEVEL changes, this code
0037  * needs update as well.
0038  */
0039 #ifdef CONFIG_DEBUG_LOCK_ALLOC
0040 #if BTRFS_MAX_LEVEL != 8
0041 #error
0042 #endif
0043 
0044 #define DEFINE_LEVEL(stem, level)                   \
0045     .names[level] = "btrfs-" stem "-0" #level,
0046 
0047 #define DEFINE_NAME(stem)                       \
0048     DEFINE_LEVEL(stem, 0)                       \
0049     DEFINE_LEVEL(stem, 1)                       \
0050     DEFINE_LEVEL(stem, 2)                       \
0051     DEFINE_LEVEL(stem, 3)                       \
0052     DEFINE_LEVEL(stem, 4)                       \
0053     DEFINE_LEVEL(stem, 5)                       \
0054     DEFINE_LEVEL(stem, 6)                       \
0055     DEFINE_LEVEL(stem, 7)
0056 
0057 static struct btrfs_lockdep_keyset {
0058     u64         id;     /* root objectid */
0059     /* Longest entry: btrfs-free-space-00 */
0060     char            names[BTRFS_MAX_LEVEL][20];
0061     struct lock_class_key   keys[BTRFS_MAX_LEVEL];
0062 } btrfs_lockdep_keysets[] = {
0063     { .id = BTRFS_ROOT_TREE_OBJECTID,   DEFINE_NAME("root") },
0064     { .id = BTRFS_EXTENT_TREE_OBJECTID, DEFINE_NAME("extent")   },
0065     { .id = BTRFS_CHUNK_TREE_OBJECTID,  DEFINE_NAME("chunk")    },
0066     { .id = BTRFS_DEV_TREE_OBJECTID,    DEFINE_NAME("dev")  },
0067     { .id = BTRFS_CSUM_TREE_OBJECTID,   DEFINE_NAME("csum") },
0068     { .id = BTRFS_QUOTA_TREE_OBJECTID,  DEFINE_NAME("quota")    },
0069     { .id = BTRFS_TREE_LOG_OBJECTID,    DEFINE_NAME("log")  },
0070     { .id = BTRFS_TREE_RELOC_OBJECTID,  DEFINE_NAME("treloc")   },
0071     { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, DEFINE_NAME("dreloc")   },
0072     { .id = BTRFS_UUID_TREE_OBJECTID,   DEFINE_NAME("uuid") },
0073     { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, DEFINE_NAME("free-space") },
0074     { .id = 0,              DEFINE_NAME("tree") },
0075 };
0076 
0077 #undef DEFINE_LEVEL
0078 #undef DEFINE_NAME
0079 
0080 void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, int level)
0081 {
0082     struct btrfs_lockdep_keyset *ks;
0083 
0084     BUG_ON(level >= ARRAY_SIZE(ks->keys));
0085 
0086     /* Find the matching keyset, id 0 is the default entry */
0087     for (ks = btrfs_lockdep_keysets; ks->id; ks++)
0088         if (ks->id == objectid)
0089             break;
0090 
0091     lockdep_set_class_and_name(&eb->lock, &ks->keys[level], ks->names[level]);
0092 }
0093 
0094 void btrfs_maybe_reset_lockdep_class(struct btrfs_root *root, struct extent_buffer *eb)
0095 {
0096     if (test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state))
0097         btrfs_set_buffer_lockdep_class(root->root_key.objectid,
0098                            eb, btrfs_header_level(eb));
0099 }
0100 
0101 #endif
0102 
0103 /*
0104  * Extent buffer locking
0105  * =====================
0106  *
0107  * We use a rw_semaphore for tree locking, and the semantics are exactly the
0108  * same:
0109  *
0110  * - reader/writer exclusion
0111  * - writer/writer exclusion
0112  * - reader/reader sharing
0113  * - try-lock semantics for readers and writers
0114  *
0115  * The rwsem implementation does opportunistic spinning which reduces number of
0116  * times the locking task needs to sleep.
0117  */
0118 
0119 /*
0120  * __btrfs_tree_read_lock - lock extent buffer for read
0121  * @eb:     the eb to be locked
0122  * @nest:   the nesting level to be used for lockdep
0123  *
0124  * This takes the read lock on the extent buffer, using the specified nesting
0125  * level for lockdep purposes.
0126  */
0127 void __btrfs_tree_read_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest)
0128 {
0129     u64 start_ns = 0;
0130 
0131     if (trace_btrfs_tree_read_lock_enabled())
0132         start_ns = ktime_get_ns();
0133 
0134     down_read_nested(&eb->lock, nest);
0135     trace_btrfs_tree_read_lock(eb, start_ns);
0136 }
0137 
0138 void btrfs_tree_read_lock(struct extent_buffer *eb)
0139 {
0140     __btrfs_tree_read_lock(eb, BTRFS_NESTING_NORMAL);
0141 }
0142 
0143 /*
0144  * Try-lock for read.
0145  *
0146  * Return 1 if the rwlock has been taken, 0 otherwise
0147  */
0148 int btrfs_try_tree_read_lock(struct extent_buffer *eb)
0149 {
0150     if (down_read_trylock(&eb->lock)) {
0151         trace_btrfs_try_tree_read_lock(eb);
0152         return 1;
0153     }
0154     return 0;
0155 }
0156 
0157 /*
0158  * Try-lock for write.
0159  *
0160  * Return 1 if the rwlock has been taken, 0 otherwise
0161  */
0162 int btrfs_try_tree_write_lock(struct extent_buffer *eb)
0163 {
0164     if (down_write_trylock(&eb->lock)) {
0165         eb->lock_owner = current->pid;
0166         trace_btrfs_try_tree_write_lock(eb);
0167         return 1;
0168     }
0169     return 0;
0170 }
0171 
0172 /*
0173  * Release read lock.
0174  */
0175 void btrfs_tree_read_unlock(struct extent_buffer *eb)
0176 {
0177     trace_btrfs_tree_read_unlock(eb);
0178     up_read(&eb->lock);
0179 }
0180 
0181 /*
0182  * __btrfs_tree_lock - lock eb for write
0183  * @eb:     the eb to lock
0184  * @nest:   the nesting to use for the lock
0185  *
0186  * Returns with the eb->lock write locked.
0187  */
0188 void __btrfs_tree_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest)
0189     __acquires(&eb->lock)
0190 {
0191     u64 start_ns = 0;
0192 
0193     if (trace_btrfs_tree_lock_enabled())
0194         start_ns = ktime_get_ns();
0195 
0196     down_write_nested(&eb->lock, nest);
0197     eb->lock_owner = current->pid;
0198     trace_btrfs_tree_lock(eb, start_ns);
0199 }
0200 
0201 void btrfs_tree_lock(struct extent_buffer *eb)
0202 {
0203     __btrfs_tree_lock(eb, BTRFS_NESTING_NORMAL);
0204 }
0205 
0206 /*
0207  * Release the write lock.
0208  */
0209 void btrfs_tree_unlock(struct extent_buffer *eb)
0210 {
0211     trace_btrfs_tree_unlock(eb);
0212     eb->lock_owner = 0;
0213     up_write(&eb->lock);
0214 }
0215 
0216 /*
0217  * This releases any locks held in the path starting at level and going all the
0218  * way up to the root.
0219  *
0220  * btrfs_search_slot will keep the lock held on higher nodes in a few corner
0221  * cases, such as COW of the block at slot zero in the node.  This ignores
0222  * those rules, and it should only be called when there are no more updates to
0223  * be done higher up in the tree.
0224  */
0225 void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
0226 {
0227     int i;
0228 
0229     if (path->keep_locks)
0230         return;
0231 
0232     for (i = level; i < BTRFS_MAX_LEVEL; i++) {
0233         if (!path->nodes[i])
0234             continue;
0235         if (!path->locks[i])
0236             continue;
0237         btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
0238         path->locks[i] = 0;
0239     }
0240 }
0241 
0242 /*
0243  * Loop around taking references on and locking the root node of the tree until
0244  * we end up with a lock on the root node.
0245  *
0246  * Return: root extent buffer with write lock held
0247  */
0248 struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
0249 {
0250     struct extent_buffer *eb;
0251 
0252     while (1) {
0253         eb = btrfs_root_node(root);
0254 
0255         btrfs_maybe_reset_lockdep_class(root, eb);
0256         btrfs_tree_lock(eb);
0257         if (eb == root->node)
0258             break;
0259         btrfs_tree_unlock(eb);
0260         free_extent_buffer(eb);
0261     }
0262     return eb;
0263 }
0264 
0265 /*
0266  * Loop around taking references on and locking the root node of the tree until
0267  * we end up with a lock on the root node.
0268  *
0269  * Return: root extent buffer with read lock held
0270  */
0271 struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
0272 {
0273     struct extent_buffer *eb;
0274 
0275     while (1) {
0276         eb = btrfs_root_node(root);
0277 
0278         btrfs_maybe_reset_lockdep_class(root, eb);
0279         btrfs_tree_read_lock(eb);
0280         if (eb == root->node)
0281             break;
0282         btrfs_tree_read_unlock(eb);
0283         free_extent_buffer(eb);
0284     }
0285     return eb;
0286 }
0287 
0288 /*
0289  * DREW locks
0290  * ==========
0291  *
0292  * DREW stands for double-reader-writer-exclusion lock. It's used in situation
0293  * where you want to provide A-B exclusion but not AA or BB.
0294  *
0295  * Currently implementation gives more priority to reader. If a reader and a
0296  * writer both race to acquire their respective sides of the lock the writer
0297  * would yield its lock as soon as it detects a concurrent reader. Additionally
0298  * if there are pending readers no new writers would be allowed to come in and
0299  * acquire the lock.
0300  */
0301 
0302 int btrfs_drew_lock_init(struct btrfs_drew_lock *lock)
0303 {
0304     int ret;
0305 
0306     ret = percpu_counter_init(&lock->writers, 0, GFP_KERNEL);
0307     if (ret)
0308         return ret;
0309 
0310     atomic_set(&lock->readers, 0);
0311     init_waitqueue_head(&lock->pending_readers);
0312     init_waitqueue_head(&lock->pending_writers);
0313 
0314     return 0;
0315 }
0316 
0317 void btrfs_drew_lock_destroy(struct btrfs_drew_lock *lock)
0318 {
0319     percpu_counter_destroy(&lock->writers);
0320 }
0321 
0322 /* Return true if acquisition is successful, false otherwise */
0323 bool btrfs_drew_try_write_lock(struct btrfs_drew_lock *lock)
0324 {
0325     if (atomic_read(&lock->readers))
0326         return false;
0327 
0328     percpu_counter_inc(&lock->writers);
0329 
0330     /* Ensure writers count is updated before we check for pending readers */
0331     smp_mb();
0332     if (atomic_read(&lock->readers)) {
0333         btrfs_drew_write_unlock(lock);
0334         return false;
0335     }
0336 
0337     return true;
0338 }
0339 
0340 void btrfs_drew_write_lock(struct btrfs_drew_lock *lock)
0341 {
0342     while (true) {
0343         if (btrfs_drew_try_write_lock(lock))
0344             return;
0345         wait_event(lock->pending_writers, !atomic_read(&lock->readers));
0346     }
0347 }
0348 
0349 void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock)
0350 {
0351     percpu_counter_dec(&lock->writers);
0352     cond_wake_up(&lock->pending_readers);
0353 }
0354 
0355 void btrfs_drew_read_lock(struct btrfs_drew_lock *lock)
0356 {
0357     atomic_inc(&lock->readers);
0358 
0359     /*
0360      * Ensure the pending reader count is perceieved BEFORE this reader
0361      * goes to sleep in case of active writers. This guarantees new writers
0362      * won't be allowed and that the current reader will be woken up when
0363      * the last active writer finishes its jobs.
0364      */
0365     smp_mb__after_atomic();
0366 
0367     wait_event(lock->pending_readers,
0368            percpu_counter_sum(&lock->writers) == 0);
0369 }
0370 
0371 void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock)
0372 {
0373     /*
0374      * atomic_dec_and_test implies a full barrier, so woken up writers
0375      * are guaranteed to see the decrement
0376      */
0377     if (atomic_dec_and_test(&lock->readers))
0378         wake_up(&lock->pending_writers);
0379 }