Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Copyright (C) 2009 Oracle.  All rights reserved.
0004  */
0005 
0006 #include <linux/sched.h>
0007 #include <linux/slab.h>
0008 #include <linux/sort.h>
0009 #include "ctree.h"
0010 #include "delayed-ref.h"
0011 #include "transaction.h"
0012 #include "qgroup.h"
0013 #include "space-info.h"
0014 #include "tree-mod-log.h"
0015 
0016 struct kmem_cache *btrfs_delayed_ref_head_cachep;
0017 struct kmem_cache *btrfs_delayed_tree_ref_cachep;
0018 struct kmem_cache *btrfs_delayed_data_ref_cachep;
0019 struct kmem_cache *btrfs_delayed_extent_op_cachep;
0020 /*
0021  * delayed back reference update tracking.  For subvolume trees
0022  * we queue up extent allocations and backref maintenance for
0023  * delayed processing.   This avoids deep call chains where we
0024  * add extents in the middle of btrfs_search_slot, and it allows
0025  * us to buffer up frequently modified backrefs in an rb tree instead
0026  * of hammering updates on the extent allocation tree.
0027  */
0028 
0029 bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info)
0030 {
0031     struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
0032     struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
0033     bool ret = false;
0034     u64 reserved;
0035 
0036     spin_lock(&global_rsv->lock);
0037     reserved = global_rsv->reserved;
0038     spin_unlock(&global_rsv->lock);
0039 
0040     /*
0041      * Since the global reserve is just kind of magic we don't really want
0042      * to rely on it to save our bacon, so if our size is more than the
0043      * delayed_refs_rsv and the global rsv then it's time to think about
0044      * bailing.
0045      */
0046     spin_lock(&delayed_refs_rsv->lock);
0047     reserved += delayed_refs_rsv->reserved;
0048     if (delayed_refs_rsv->size >= reserved)
0049         ret = true;
0050     spin_unlock(&delayed_refs_rsv->lock);
0051     return ret;
0052 }
0053 
0054 int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans)
0055 {
0056     u64 num_entries =
0057         atomic_read(&trans->transaction->delayed_refs.num_entries);
0058     u64 avg_runtime;
0059     u64 val;
0060 
0061     smp_mb();
0062     avg_runtime = trans->fs_info->avg_delayed_ref_runtime;
0063     val = num_entries * avg_runtime;
0064     if (val >= NSEC_PER_SEC)
0065         return 1;
0066     if (val >= NSEC_PER_SEC / 2)
0067         return 2;
0068 
0069     return btrfs_check_space_for_delayed_refs(trans->fs_info);
0070 }
0071 
0072 /**
0073  * Release a ref head's reservation
0074  *
0075  * @fs_info:  the filesystem
0076  * @nr:       number of items to drop
0077  *
0078  * This drops the delayed ref head's count from the delayed refs rsv and frees
0079  * any excess reservation we had.
0080  */
0081 void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr)
0082 {
0083     struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
0084     u64 num_bytes = btrfs_calc_insert_metadata_size(fs_info, nr);
0085     u64 released = 0;
0086 
0087     /*
0088      * We have to check the mount option here because we could be enabling
0089      * the free space tree for the first time and don't have the compat_ro
0090      * option set yet.
0091      *
0092      * We need extra reservations if we have the free space tree because
0093      * we'll have to modify that tree as well.
0094      */
0095     if (btrfs_test_opt(fs_info, FREE_SPACE_TREE))
0096         num_bytes *= 2;
0097 
0098     released = btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL);
0099     if (released)
0100         trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
0101                           0, released, 0);
0102 }
0103 
0104 /*
0105  * btrfs_update_delayed_refs_rsv - adjust the size of the delayed refs rsv
0106  * @trans - the trans that may have generated delayed refs
0107  *
0108  * This is to be called anytime we may have adjusted trans->delayed_ref_updates,
0109  * it'll calculate the additional size and add it to the delayed_refs_rsv.
0110  */
0111 void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans)
0112 {
0113     struct btrfs_fs_info *fs_info = trans->fs_info;
0114     struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
0115     u64 num_bytes;
0116 
0117     if (!trans->delayed_ref_updates)
0118         return;
0119 
0120     num_bytes = btrfs_calc_insert_metadata_size(fs_info,
0121                             trans->delayed_ref_updates);
0122     /*
0123      * We have to check the mount option here because we could be enabling
0124      * the free space tree for the first time and don't have the compat_ro
0125      * option set yet.
0126      *
0127      * We need extra reservations if we have the free space tree because
0128      * we'll have to modify that tree as well.
0129      */
0130     if (btrfs_test_opt(fs_info, FREE_SPACE_TREE))
0131         num_bytes *= 2;
0132 
0133     spin_lock(&delayed_rsv->lock);
0134     delayed_rsv->size += num_bytes;
0135     delayed_rsv->full = false;
0136     spin_unlock(&delayed_rsv->lock);
0137     trans->delayed_ref_updates = 0;
0138 }
0139 
0140 /**
0141  * Transfer bytes to our delayed refs rsv
0142  *
0143  * @fs_info:   the filesystem
0144  * @src:       source block rsv to transfer from
0145  * @num_bytes: number of bytes to transfer
0146  *
0147  * This transfers up to the num_bytes amount from the src rsv to the
0148  * delayed_refs_rsv.  Any extra bytes are returned to the space info.
0149  */
0150 void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
0151                        struct btrfs_block_rsv *src,
0152                        u64 num_bytes)
0153 {
0154     struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
0155     u64 to_free = 0;
0156 
0157     spin_lock(&src->lock);
0158     src->reserved -= num_bytes;
0159     src->size -= num_bytes;
0160     spin_unlock(&src->lock);
0161 
0162     spin_lock(&delayed_refs_rsv->lock);
0163     if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) {
0164         u64 delta = delayed_refs_rsv->size -
0165             delayed_refs_rsv->reserved;
0166         if (num_bytes > delta) {
0167             to_free = num_bytes - delta;
0168             num_bytes = delta;
0169         }
0170     } else {
0171         to_free = num_bytes;
0172         num_bytes = 0;
0173     }
0174 
0175     if (num_bytes)
0176         delayed_refs_rsv->reserved += num_bytes;
0177     if (delayed_refs_rsv->reserved >= delayed_refs_rsv->size)
0178         delayed_refs_rsv->full = true;
0179     spin_unlock(&delayed_refs_rsv->lock);
0180 
0181     if (num_bytes)
0182         trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
0183                           0, num_bytes, 1);
0184     if (to_free)
0185         btrfs_space_info_free_bytes_may_use(fs_info,
0186                 delayed_refs_rsv->space_info, to_free);
0187 }
0188 
0189 /**
0190  * Refill based on our delayed refs usage
0191  *
0192  * @fs_info: the filesystem
0193  * @flush:   control how we can flush for this reservation.
0194  *
0195  * This will refill the delayed block_rsv up to 1 items size worth of space and
0196  * will return -ENOSPC if we can't make the reservation.
0197  */
0198 int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
0199                   enum btrfs_reserve_flush_enum flush)
0200 {
0201     struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
0202     u64 limit = btrfs_calc_insert_metadata_size(fs_info, 1);
0203     u64 num_bytes = 0;
0204     int ret = -ENOSPC;
0205 
0206     spin_lock(&block_rsv->lock);
0207     if (block_rsv->reserved < block_rsv->size) {
0208         num_bytes = block_rsv->size - block_rsv->reserved;
0209         num_bytes = min(num_bytes, limit);
0210     }
0211     spin_unlock(&block_rsv->lock);
0212 
0213     if (!num_bytes)
0214         return 0;
0215 
0216     ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv, num_bytes, flush);
0217     if (ret)
0218         return ret;
0219     btrfs_block_rsv_add_bytes(block_rsv, num_bytes, 0);
0220     trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
0221                       0, num_bytes, 1);
0222     return 0;
0223 }
0224 
0225 /*
0226  * compare two delayed tree backrefs with same bytenr and type
0227  */
0228 static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref1,
0229               struct btrfs_delayed_tree_ref *ref2)
0230 {
0231     if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
0232         if (ref1->root < ref2->root)
0233             return -1;
0234         if (ref1->root > ref2->root)
0235             return 1;
0236     } else {
0237         if (ref1->parent < ref2->parent)
0238             return -1;
0239         if (ref1->parent > ref2->parent)
0240             return 1;
0241     }
0242     return 0;
0243 }
0244 
0245 /*
0246  * compare two delayed data backrefs with same bytenr and type
0247  */
0248 static int comp_data_refs(struct btrfs_delayed_data_ref *ref1,
0249               struct btrfs_delayed_data_ref *ref2)
0250 {
0251     if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
0252         if (ref1->root < ref2->root)
0253             return -1;
0254         if (ref1->root > ref2->root)
0255             return 1;
0256         if (ref1->objectid < ref2->objectid)
0257             return -1;
0258         if (ref1->objectid > ref2->objectid)
0259             return 1;
0260         if (ref1->offset < ref2->offset)
0261             return -1;
0262         if (ref1->offset > ref2->offset)
0263             return 1;
0264     } else {
0265         if (ref1->parent < ref2->parent)
0266             return -1;
0267         if (ref1->parent > ref2->parent)
0268             return 1;
0269     }
0270     return 0;
0271 }
0272 
0273 static int comp_refs(struct btrfs_delayed_ref_node *ref1,
0274              struct btrfs_delayed_ref_node *ref2,
0275              bool check_seq)
0276 {
0277     int ret = 0;
0278 
0279     if (ref1->type < ref2->type)
0280         return -1;
0281     if (ref1->type > ref2->type)
0282         return 1;
0283     if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
0284         ref1->type == BTRFS_SHARED_BLOCK_REF_KEY)
0285         ret = comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref1),
0286                      btrfs_delayed_node_to_tree_ref(ref2));
0287     else
0288         ret = comp_data_refs(btrfs_delayed_node_to_data_ref(ref1),
0289                      btrfs_delayed_node_to_data_ref(ref2));
0290     if (ret)
0291         return ret;
0292     if (check_seq) {
0293         if (ref1->seq < ref2->seq)
0294             return -1;
0295         if (ref1->seq > ref2->seq)
0296             return 1;
0297     }
0298     return 0;
0299 }
0300 
0301 /* insert a new ref to head ref rbtree */
0302 static struct btrfs_delayed_ref_head *htree_insert(struct rb_root_cached *root,
0303                            struct rb_node *node)
0304 {
0305     struct rb_node **p = &root->rb_root.rb_node;
0306     struct rb_node *parent_node = NULL;
0307     struct btrfs_delayed_ref_head *entry;
0308     struct btrfs_delayed_ref_head *ins;
0309     u64 bytenr;
0310     bool leftmost = true;
0311 
0312     ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
0313     bytenr = ins->bytenr;
0314     while (*p) {
0315         parent_node = *p;
0316         entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
0317                  href_node);
0318 
0319         if (bytenr < entry->bytenr) {
0320             p = &(*p)->rb_left;
0321         } else if (bytenr > entry->bytenr) {
0322             p = &(*p)->rb_right;
0323             leftmost = false;
0324         } else {
0325             return entry;
0326         }
0327     }
0328 
0329     rb_link_node(node, parent_node, p);
0330     rb_insert_color_cached(node, root, leftmost);
0331     return NULL;
0332 }
0333 
0334 static struct btrfs_delayed_ref_node* tree_insert(struct rb_root_cached *root,
0335         struct btrfs_delayed_ref_node *ins)
0336 {
0337     struct rb_node **p = &root->rb_root.rb_node;
0338     struct rb_node *node = &ins->ref_node;
0339     struct rb_node *parent_node = NULL;
0340     struct btrfs_delayed_ref_node *entry;
0341     bool leftmost = true;
0342 
0343     while (*p) {
0344         int comp;
0345 
0346         parent_node = *p;
0347         entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
0348                  ref_node);
0349         comp = comp_refs(ins, entry, true);
0350         if (comp < 0) {
0351             p = &(*p)->rb_left;
0352         } else if (comp > 0) {
0353             p = &(*p)->rb_right;
0354             leftmost = false;
0355         } else {
0356             return entry;
0357         }
0358     }
0359 
0360     rb_link_node(node, parent_node, p);
0361     rb_insert_color_cached(node, root, leftmost);
0362     return NULL;
0363 }
0364 
0365 static struct btrfs_delayed_ref_head *find_first_ref_head(
0366         struct btrfs_delayed_ref_root *dr)
0367 {
0368     struct rb_node *n;
0369     struct btrfs_delayed_ref_head *entry;
0370 
0371     n = rb_first_cached(&dr->href_root);
0372     if (!n)
0373         return NULL;
0374 
0375     entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
0376 
0377     return entry;
0378 }
0379 
0380 /*
0381  * Find a head entry based on bytenr. This returns the delayed ref head if it
0382  * was able to find one, or NULL if nothing was in that spot.  If return_bigger
0383  * is given, the next bigger entry is returned if no exact match is found.
0384  */
0385 static struct btrfs_delayed_ref_head *find_ref_head(
0386         struct btrfs_delayed_ref_root *dr, u64 bytenr,
0387         bool return_bigger)
0388 {
0389     struct rb_root *root = &dr->href_root.rb_root;
0390     struct rb_node *n;
0391     struct btrfs_delayed_ref_head *entry;
0392 
0393     n = root->rb_node;
0394     entry = NULL;
0395     while (n) {
0396         entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
0397 
0398         if (bytenr < entry->bytenr)
0399             n = n->rb_left;
0400         else if (bytenr > entry->bytenr)
0401             n = n->rb_right;
0402         else
0403             return entry;
0404     }
0405     if (entry && return_bigger) {
0406         if (bytenr > entry->bytenr) {
0407             n = rb_next(&entry->href_node);
0408             if (!n)
0409                 return NULL;
0410             entry = rb_entry(n, struct btrfs_delayed_ref_head,
0411                      href_node);
0412         }
0413         return entry;
0414     }
0415     return NULL;
0416 }
0417 
0418 int btrfs_delayed_ref_lock(struct btrfs_delayed_ref_root *delayed_refs,
0419                struct btrfs_delayed_ref_head *head)
0420 {
0421     lockdep_assert_held(&delayed_refs->lock);
0422     if (mutex_trylock(&head->mutex))
0423         return 0;
0424 
0425     refcount_inc(&head->refs);
0426     spin_unlock(&delayed_refs->lock);
0427 
0428     mutex_lock(&head->mutex);
0429     spin_lock(&delayed_refs->lock);
0430     if (RB_EMPTY_NODE(&head->href_node)) {
0431         mutex_unlock(&head->mutex);
0432         btrfs_put_delayed_ref_head(head);
0433         return -EAGAIN;
0434     }
0435     btrfs_put_delayed_ref_head(head);
0436     return 0;
0437 }
0438 
0439 static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
0440                     struct btrfs_delayed_ref_root *delayed_refs,
0441                     struct btrfs_delayed_ref_head *head,
0442                     struct btrfs_delayed_ref_node *ref)
0443 {
0444     lockdep_assert_held(&head->lock);
0445     rb_erase_cached(&ref->ref_node, &head->ref_tree);
0446     RB_CLEAR_NODE(&ref->ref_node);
0447     if (!list_empty(&ref->add_list))
0448         list_del(&ref->add_list);
0449     ref->in_tree = 0;
0450     btrfs_put_delayed_ref(ref);
0451     atomic_dec(&delayed_refs->num_entries);
0452 }
0453 
0454 static bool merge_ref(struct btrfs_trans_handle *trans,
0455               struct btrfs_delayed_ref_root *delayed_refs,
0456               struct btrfs_delayed_ref_head *head,
0457               struct btrfs_delayed_ref_node *ref,
0458               u64 seq)
0459 {
0460     struct btrfs_delayed_ref_node *next;
0461     struct rb_node *node = rb_next(&ref->ref_node);
0462     bool done = false;
0463 
0464     while (!done && node) {
0465         int mod;
0466 
0467         next = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
0468         node = rb_next(node);
0469         if (seq && next->seq >= seq)
0470             break;
0471         if (comp_refs(ref, next, false))
0472             break;
0473 
0474         if (ref->action == next->action) {
0475             mod = next->ref_mod;
0476         } else {
0477             if (ref->ref_mod < next->ref_mod) {
0478                 swap(ref, next);
0479                 done = true;
0480             }
0481             mod = -next->ref_mod;
0482         }
0483 
0484         drop_delayed_ref(trans, delayed_refs, head, next);
0485         ref->ref_mod += mod;
0486         if (ref->ref_mod == 0) {
0487             drop_delayed_ref(trans, delayed_refs, head, ref);
0488             done = true;
0489         } else {
0490             /*
0491              * Can't have multiples of the same ref on a tree block.
0492              */
0493             WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
0494                 ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
0495         }
0496     }
0497 
0498     return done;
0499 }
0500 
0501 void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
0502                   struct btrfs_delayed_ref_root *delayed_refs,
0503                   struct btrfs_delayed_ref_head *head)
0504 {
0505     struct btrfs_fs_info *fs_info = trans->fs_info;
0506     struct btrfs_delayed_ref_node *ref;
0507     struct rb_node *node;
0508     u64 seq = 0;
0509 
0510     lockdep_assert_held(&head->lock);
0511 
0512     if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
0513         return;
0514 
0515     /* We don't have too many refs to merge for data. */
0516     if (head->is_data)
0517         return;
0518 
0519     seq = btrfs_tree_mod_log_lowest_seq(fs_info);
0520 again:
0521     for (node = rb_first_cached(&head->ref_tree); node;
0522          node = rb_next(node)) {
0523         ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
0524         if (seq && ref->seq >= seq)
0525             continue;
0526         if (merge_ref(trans, delayed_refs, head, ref, seq))
0527             goto again;
0528     }
0529 }
0530 
0531 int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq)
0532 {
0533     int ret = 0;
0534     u64 min_seq = btrfs_tree_mod_log_lowest_seq(fs_info);
0535 
0536     if (min_seq != 0 && seq >= min_seq) {
0537         btrfs_debug(fs_info,
0538                 "holding back delayed_ref %llu, lowest is %llu",
0539                 seq, min_seq);
0540         ret = 1;
0541     }
0542 
0543     return ret;
0544 }
0545 
0546 struct btrfs_delayed_ref_head *btrfs_select_ref_head(
0547         struct btrfs_delayed_ref_root *delayed_refs)
0548 {
0549     struct btrfs_delayed_ref_head *head;
0550 
0551 again:
0552     head = find_ref_head(delayed_refs, delayed_refs->run_delayed_start,
0553                  true);
0554     if (!head && delayed_refs->run_delayed_start != 0) {
0555         delayed_refs->run_delayed_start = 0;
0556         head = find_first_ref_head(delayed_refs);
0557     }
0558     if (!head)
0559         return NULL;
0560 
0561     while (head->processing) {
0562         struct rb_node *node;
0563 
0564         node = rb_next(&head->href_node);
0565         if (!node) {
0566             if (delayed_refs->run_delayed_start == 0)
0567                 return NULL;
0568             delayed_refs->run_delayed_start = 0;
0569             goto again;
0570         }
0571         head = rb_entry(node, struct btrfs_delayed_ref_head,
0572                 href_node);
0573     }
0574 
0575     head->processing = 1;
0576     WARN_ON(delayed_refs->num_heads_ready == 0);
0577     delayed_refs->num_heads_ready--;
0578     delayed_refs->run_delayed_start = head->bytenr +
0579         head->num_bytes;
0580     return head;
0581 }
0582 
0583 void btrfs_delete_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
0584                struct btrfs_delayed_ref_head *head)
0585 {
0586     lockdep_assert_held(&delayed_refs->lock);
0587     lockdep_assert_held(&head->lock);
0588 
0589     rb_erase_cached(&head->href_node, &delayed_refs->href_root);
0590     RB_CLEAR_NODE(&head->href_node);
0591     atomic_dec(&delayed_refs->num_entries);
0592     delayed_refs->num_heads--;
0593     if (head->processing == 0)
0594         delayed_refs->num_heads_ready--;
0595 }
0596 
0597 /*
0598  * Helper to insert the ref_node to the tail or merge with tail.
0599  *
0600  * Return 0 for insert.
0601  * Return >0 for merge.
0602  */
0603 static int insert_delayed_ref(struct btrfs_trans_handle *trans,
0604                   struct btrfs_delayed_ref_root *root,
0605                   struct btrfs_delayed_ref_head *href,
0606                   struct btrfs_delayed_ref_node *ref)
0607 {
0608     struct btrfs_delayed_ref_node *exist;
0609     int mod;
0610     int ret = 0;
0611 
0612     spin_lock(&href->lock);
0613     exist = tree_insert(&href->ref_tree, ref);
0614     if (!exist)
0615         goto inserted;
0616 
0617     /* Now we are sure we can merge */
0618     ret = 1;
0619     if (exist->action == ref->action) {
0620         mod = ref->ref_mod;
0621     } else {
0622         /* Need to change action */
0623         if (exist->ref_mod < ref->ref_mod) {
0624             exist->action = ref->action;
0625             mod = -exist->ref_mod;
0626             exist->ref_mod = ref->ref_mod;
0627             if (ref->action == BTRFS_ADD_DELAYED_REF)
0628                 list_add_tail(&exist->add_list,
0629                           &href->ref_add_list);
0630             else if (ref->action == BTRFS_DROP_DELAYED_REF) {
0631                 ASSERT(!list_empty(&exist->add_list));
0632                 list_del(&exist->add_list);
0633             } else {
0634                 ASSERT(0);
0635             }
0636         } else
0637             mod = -ref->ref_mod;
0638     }
0639     exist->ref_mod += mod;
0640 
0641     /* remove existing tail if its ref_mod is zero */
0642     if (exist->ref_mod == 0)
0643         drop_delayed_ref(trans, root, href, exist);
0644     spin_unlock(&href->lock);
0645     return ret;
0646 inserted:
0647     if (ref->action == BTRFS_ADD_DELAYED_REF)
0648         list_add_tail(&ref->add_list, &href->ref_add_list);
0649     atomic_inc(&root->num_entries);
0650     spin_unlock(&href->lock);
0651     return ret;
0652 }
0653 
0654 /*
0655  * helper function to update the accounting in the head ref
0656  * existing and update must have the same bytenr
0657  */
0658 static noinline void update_existing_head_ref(struct btrfs_trans_handle *trans,
0659              struct btrfs_delayed_ref_head *existing,
0660              struct btrfs_delayed_ref_head *update)
0661 {
0662     struct btrfs_delayed_ref_root *delayed_refs =
0663         &trans->transaction->delayed_refs;
0664     struct btrfs_fs_info *fs_info = trans->fs_info;
0665     int old_ref_mod;
0666 
0667     BUG_ON(existing->is_data != update->is_data);
0668 
0669     spin_lock(&existing->lock);
0670     if (update->must_insert_reserved) {
0671         /* if the extent was freed and then
0672          * reallocated before the delayed ref
0673          * entries were processed, we can end up
0674          * with an existing head ref without
0675          * the must_insert_reserved flag set.
0676          * Set it again here
0677          */
0678         existing->must_insert_reserved = update->must_insert_reserved;
0679 
0680         /*
0681          * update the num_bytes so we make sure the accounting
0682          * is done correctly
0683          */
0684         existing->num_bytes = update->num_bytes;
0685 
0686     }
0687 
0688     if (update->extent_op) {
0689         if (!existing->extent_op) {
0690             existing->extent_op = update->extent_op;
0691         } else {
0692             if (update->extent_op->update_key) {
0693                 memcpy(&existing->extent_op->key,
0694                        &update->extent_op->key,
0695                        sizeof(update->extent_op->key));
0696                 existing->extent_op->update_key = true;
0697             }
0698             if (update->extent_op->update_flags) {
0699                 existing->extent_op->flags_to_set |=
0700                     update->extent_op->flags_to_set;
0701                 existing->extent_op->update_flags = true;
0702             }
0703             btrfs_free_delayed_extent_op(update->extent_op);
0704         }
0705     }
0706     /*
0707      * update the reference mod on the head to reflect this new operation,
0708      * only need the lock for this case cause we could be processing it
0709      * currently, for refs we just added we know we're a-ok.
0710      */
0711     old_ref_mod = existing->total_ref_mod;
0712     existing->ref_mod += update->ref_mod;
0713     existing->total_ref_mod += update->ref_mod;
0714 
0715     /*
0716      * If we are going to from a positive ref mod to a negative or vice
0717      * versa we need to make sure to adjust pending_csums accordingly.
0718      */
0719     if (existing->is_data) {
0720         u64 csum_leaves =
0721             btrfs_csum_bytes_to_leaves(fs_info,
0722                            existing->num_bytes);
0723 
0724         if (existing->total_ref_mod >= 0 && old_ref_mod < 0) {
0725             delayed_refs->pending_csums -= existing->num_bytes;
0726             btrfs_delayed_refs_rsv_release(fs_info, csum_leaves);
0727         }
0728         if (existing->total_ref_mod < 0 && old_ref_mod >= 0) {
0729             delayed_refs->pending_csums += existing->num_bytes;
0730             trans->delayed_ref_updates += csum_leaves;
0731         }
0732     }
0733 
0734     spin_unlock(&existing->lock);
0735 }
0736 
0737 static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
0738                   struct btrfs_qgroup_extent_record *qrecord,
0739                   u64 bytenr, u64 num_bytes, u64 ref_root,
0740                   u64 reserved, int action, bool is_data,
0741                   bool is_system)
0742 {
0743     int count_mod = 1;
0744     int must_insert_reserved = 0;
0745 
0746     /* If reserved is provided, it must be a data extent. */
0747     BUG_ON(!is_data && reserved);
0748 
0749     /*
0750      * The head node stores the sum of all the mods, so dropping a ref
0751      * should drop the sum in the head node by one.
0752      */
0753     if (action == BTRFS_UPDATE_DELAYED_HEAD)
0754         count_mod = 0;
0755     else if (action == BTRFS_DROP_DELAYED_REF)
0756         count_mod = -1;
0757 
0758     /*
0759      * BTRFS_ADD_DELAYED_EXTENT means that we need to update the reserved
0760      * accounting when the extent is finally added, or if a later
0761      * modification deletes the delayed ref without ever inserting the
0762      * extent into the extent allocation tree.  ref->must_insert_reserved
0763      * is the flag used to record that accounting mods are required.
0764      *
0765      * Once we record must_insert_reserved, switch the action to
0766      * BTRFS_ADD_DELAYED_REF because other special casing is not required.
0767      */
0768     if (action == BTRFS_ADD_DELAYED_EXTENT)
0769         must_insert_reserved = 1;
0770     else
0771         must_insert_reserved = 0;
0772 
0773     refcount_set(&head_ref->refs, 1);
0774     head_ref->bytenr = bytenr;
0775     head_ref->num_bytes = num_bytes;
0776     head_ref->ref_mod = count_mod;
0777     head_ref->must_insert_reserved = must_insert_reserved;
0778     head_ref->is_data = is_data;
0779     head_ref->is_system = is_system;
0780     head_ref->ref_tree = RB_ROOT_CACHED;
0781     INIT_LIST_HEAD(&head_ref->ref_add_list);
0782     RB_CLEAR_NODE(&head_ref->href_node);
0783     head_ref->processing = 0;
0784     head_ref->total_ref_mod = count_mod;
0785     spin_lock_init(&head_ref->lock);
0786     mutex_init(&head_ref->mutex);
0787 
0788     if (qrecord) {
0789         if (ref_root && reserved) {
0790             qrecord->data_rsv = reserved;
0791             qrecord->data_rsv_refroot = ref_root;
0792         }
0793         qrecord->bytenr = bytenr;
0794         qrecord->num_bytes = num_bytes;
0795         qrecord->old_roots = NULL;
0796     }
0797 }
0798 
0799 /*
0800  * helper function to actually insert a head node into the rbtree.
0801  * this does all the dirty work in terms of maintaining the correct
0802  * overall modification count.
0803  */
0804 static noinline struct btrfs_delayed_ref_head *
0805 add_delayed_ref_head(struct btrfs_trans_handle *trans,
0806              struct btrfs_delayed_ref_head *head_ref,
0807              struct btrfs_qgroup_extent_record *qrecord,
0808              int action, int *qrecord_inserted_ret)
0809 {
0810     struct btrfs_delayed_ref_head *existing;
0811     struct btrfs_delayed_ref_root *delayed_refs;
0812     int qrecord_inserted = 0;
0813 
0814     delayed_refs = &trans->transaction->delayed_refs;
0815 
0816     /* Record qgroup extent info if provided */
0817     if (qrecord) {
0818         if (btrfs_qgroup_trace_extent_nolock(trans->fs_info,
0819                     delayed_refs, qrecord))
0820             kfree(qrecord);
0821         else
0822             qrecord_inserted = 1;
0823     }
0824 
0825     trace_add_delayed_ref_head(trans->fs_info, head_ref, action);
0826 
0827     existing = htree_insert(&delayed_refs->href_root,
0828                 &head_ref->href_node);
0829     if (existing) {
0830         update_existing_head_ref(trans, existing, head_ref);
0831         /*
0832          * we've updated the existing ref, free the newly
0833          * allocated ref
0834          */
0835         kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
0836         head_ref = existing;
0837     } else {
0838         if (head_ref->is_data && head_ref->ref_mod < 0) {
0839             delayed_refs->pending_csums += head_ref->num_bytes;
0840             trans->delayed_ref_updates +=
0841                 btrfs_csum_bytes_to_leaves(trans->fs_info,
0842                                head_ref->num_bytes);
0843         }
0844         delayed_refs->num_heads++;
0845         delayed_refs->num_heads_ready++;
0846         atomic_inc(&delayed_refs->num_entries);
0847         trans->delayed_ref_updates++;
0848     }
0849     if (qrecord_inserted_ret)
0850         *qrecord_inserted_ret = qrecord_inserted;
0851 
0852     return head_ref;
0853 }
0854 
0855 /*
0856  * init_delayed_ref_common - Initialize the structure which represents a
0857  *               modification to a an extent.
0858  *
0859  * @fs_info:    Internal to the mounted filesystem mount structure.
0860  *
0861  * @ref:    The structure which is going to be initialized.
0862  *
0863  * @bytenr: The logical address of the extent for which a modification is
0864  *      going to be recorded.
0865  *
0866  * @num_bytes:  Size of the extent whose modification is being recorded.
0867  *
0868  * @ref_root:   The id of the root where this modification has originated, this
0869  *      can be either one of the well-known metadata trees or the
0870  *      subvolume id which references this extent.
0871  *
0872  * @action: Can be one of BTRFS_ADD_DELAYED_REF/BTRFS_DROP_DELAYED_REF or
0873  *      BTRFS_ADD_DELAYED_EXTENT
0874  *
0875  * @ref_type:   Holds the type of the extent which is being recorded, can be
0876  *      one of BTRFS_SHARED_BLOCK_REF_KEY/BTRFS_TREE_BLOCK_REF_KEY
0877  *      when recording a metadata extent or BTRFS_SHARED_DATA_REF_KEY/
0878  *      BTRFS_EXTENT_DATA_REF_KEY when recording data extent
0879  */
0880 static void init_delayed_ref_common(struct btrfs_fs_info *fs_info,
0881                     struct btrfs_delayed_ref_node *ref,
0882                     u64 bytenr, u64 num_bytes, u64 ref_root,
0883                     int action, u8 ref_type)
0884 {
0885     u64 seq = 0;
0886 
0887     if (action == BTRFS_ADD_DELAYED_EXTENT)
0888         action = BTRFS_ADD_DELAYED_REF;
0889 
0890     if (is_fstree(ref_root))
0891         seq = atomic64_read(&fs_info->tree_mod_seq);
0892 
0893     refcount_set(&ref->refs, 1);
0894     ref->bytenr = bytenr;
0895     ref->num_bytes = num_bytes;
0896     ref->ref_mod = 1;
0897     ref->action = action;
0898     ref->is_head = 0;
0899     ref->in_tree = 1;
0900     ref->seq = seq;
0901     ref->type = ref_type;
0902     RB_CLEAR_NODE(&ref->ref_node);
0903     INIT_LIST_HEAD(&ref->add_list);
0904 }
0905 
0906 /*
0907  * add a delayed tree ref.  This does all of the accounting required
0908  * to make sure the delayed ref is eventually processed before this
0909  * transaction commits.
0910  */
0911 int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
0912                    struct btrfs_ref *generic_ref,
0913                    struct btrfs_delayed_extent_op *extent_op)
0914 {
0915     struct btrfs_fs_info *fs_info = trans->fs_info;
0916     struct btrfs_delayed_tree_ref *ref;
0917     struct btrfs_delayed_ref_head *head_ref;
0918     struct btrfs_delayed_ref_root *delayed_refs;
0919     struct btrfs_qgroup_extent_record *record = NULL;
0920     int qrecord_inserted;
0921     bool is_system;
0922     int action = generic_ref->action;
0923     int level = generic_ref->tree_ref.level;
0924     int ret;
0925     u64 bytenr = generic_ref->bytenr;
0926     u64 num_bytes = generic_ref->len;
0927     u64 parent = generic_ref->parent;
0928     u8 ref_type;
0929 
0930     is_system = (generic_ref->tree_ref.owning_root == BTRFS_CHUNK_TREE_OBJECTID);
0931 
0932     ASSERT(generic_ref->type == BTRFS_REF_METADATA && generic_ref->action);
0933     ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
0934     if (!ref)
0935         return -ENOMEM;
0936 
0937     head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
0938     if (!head_ref) {
0939         kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
0940         return -ENOMEM;
0941     }
0942 
0943     if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
0944         !generic_ref->skip_qgroup) {
0945         record = kzalloc(sizeof(*record), GFP_NOFS);
0946         if (!record) {
0947             kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
0948             kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
0949             return -ENOMEM;
0950         }
0951     }
0952 
0953     if (parent)
0954         ref_type = BTRFS_SHARED_BLOCK_REF_KEY;
0955     else
0956         ref_type = BTRFS_TREE_BLOCK_REF_KEY;
0957 
0958     init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
0959                 generic_ref->tree_ref.owning_root, action,
0960                 ref_type);
0961     ref->root = generic_ref->tree_ref.owning_root;
0962     ref->parent = parent;
0963     ref->level = level;
0964 
0965     init_delayed_ref_head(head_ref, record, bytenr, num_bytes,
0966                   generic_ref->tree_ref.owning_root, 0, action,
0967                   false, is_system);
0968     head_ref->extent_op = extent_op;
0969 
0970     delayed_refs = &trans->transaction->delayed_refs;
0971     spin_lock(&delayed_refs->lock);
0972 
0973     /*
0974      * insert both the head node and the new ref without dropping
0975      * the spin lock
0976      */
0977     head_ref = add_delayed_ref_head(trans, head_ref, record,
0978                     action, &qrecord_inserted);
0979 
0980     ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node);
0981     spin_unlock(&delayed_refs->lock);
0982 
0983     /*
0984      * Need to update the delayed_refs_rsv with any changes we may have
0985      * made.
0986      */
0987     btrfs_update_delayed_refs_rsv(trans);
0988 
0989     trace_add_delayed_tree_ref(fs_info, &ref->node, ref,
0990                    action == BTRFS_ADD_DELAYED_EXTENT ?
0991                    BTRFS_ADD_DELAYED_REF : action);
0992     if (ret > 0)
0993         kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
0994 
0995     if (qrecord_inserted)
0996         btrfs_qgroup_trace_extent_post(trans, record);
0997 
0998     return 0;
0999 }
1000 
1001 /*
1002  * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
1003  */
1004 int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
1005                    struct btrfs_ref *generic_ref,
1006                    u64 reserved)
1007 {
1008     struct btrfs_fs_info *fs_info = trans->fs_info;
1009     struct btrfs_delayed_data_ref *ref;
1010     struct btrfs_delayed_ref_head *head_ref;
1011     struct btrfs_delayed_ref_root *delayed_refs;
1012     struct btrfs_qgroup_extent_record *record = NULL;
1013     int qrecord_inserted;
1014     int action = generic_ref->action;
1015     int ret;
1016     u64 bytenr = generic_ref->bytenr;
1017     u64 num_bytes = generic_ref->len;
1018     u64 parent = generic_ref->parent;
1019     u64 ref_root = generic_ref->data_ref.owning_root;
1020     u64 owner = generic_ref->data_ref.ino;
1021     u64 offset = generic_ref->data_ref.offset;
1022     u8 ref_type;
1023 
1024     ASSERT(generic_ref->type == BTRFS_REF_DATA && action);
1025     ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
1026     if (!ref)
1027         return -ENOMEM;
1028 
1029     if (parent)
1030             ref_type = BTRFS_SHARED_DATA_REF_KEY;
1031     else
1032             ref_type = BTRFS_EXTENT_DATA_REF_KEY;
1033     init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
1034                 ref_root, action, ref_type);
1035     ref->root = ref_root;
1036     ref->parent = parent;
1037     ref->objectid = owner;
1038     ref->offset = offset;
1039 
1040 
1041     head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
1042     if (!head_ref) {
1043         kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
1044         return -ENOMEM;
1045     }
1046 
1047     if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
1048         !generic_ref->skip_qgroup) {
1049         record = kzalloc(sizeof(*record), GFP_NOFS);
1050         if (!record) {
1051             kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
1052             kmem_cache_free(btrfs_delayed_ref_head_cachep,
1053                     head_ref);
1054             return -ENOMEM;
1055         }
1056     }
1057 
1058     init_delayed_ref_head(head_ref, record, bytenr, num_bytes, ref_root,
1059                   reserved, action, true, false);
1060     head_ref->extent_op = NULL;
1061 
1062     delayed_refs = &trans->transaction->delayed_refs;
1063     spin_lock(&delayed_refs->lock);
1064 
1065     /*
1066      * insert both the head node and the new ref without dropping
1067      * the spin lock
1068      */
1069     head_ref = add_delayed_ref_head(trans, head_ref, record,
1070                     action, &qrecord_inserted);
1071 
1072     ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node);
1073     spin_unlock(&delayed_refs->lock);
1074 
1075     /*
1076      * Need to update the delayed_refs_rsv with any changes we may have
1077      * made.
1078      */
1079     btrfs_update_delayed_refs_rsv(trans);
1080 
1081     trace_add_delayed_data_ref(trans->fs_info, &ref->node, ref,
1082                    action == BTRFS_ADD_DELAYED_EXTENT ?
1083                    BTRFS_ADD_DELAYED_REF : action);
1084     if (ret > 0)
1085         kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
1086 
1087 
1088     if (qrecord_inserted)
1089         return btrfs_qgroup_trace_extent_post(trans, record);
1090     return 0;
1091 }
1092 
1093 int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
1094                 u64 bytenr, u64 num_bytes,
1095                 struct btrfs_delayed_extent_op *extent_op)
1096 {
1097     struct btrfs_delayed_ref_head *head_ref;
1098     struct btrfs_delayed_ref_root *delayed_refs;
1099 
1100     head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
1101     if (!head_ref)
1102         return -ENOMEM;
1103 
1104     init_delayed_ref_head(head_ref, NULL, bytenr, num_bytes, 0, 0,
1105                   BTRFS_UPDATE_DELAYED_HEAD, false, false);
1106     head_ref->extent_op = extent_op;
1107 
1108     delayed_refs = &trans->transaction->delayed_refs;
1109     spin_lock(&delayed_refs->lock);
1110 
1111     add_delayed_ref_head(trans, head_ref, NULL, BTRFS_UPDATE_DELAYED_HEAD,
1112                  NULL);
1113 
1114     spin_unlock(&delayed_refs->lock);
1115 
1116     /*
1117      * Need to update the delayed_refs_rsv with any changes we may have
1118      * made.
1119      */
1120     btrfs_update_delayed_refs_rsv(trans);
1121     return 0;
1122 }
1123 
1124 /*
1125  * This does a simple search for the head node for a given extent.  Returns the
1126  * head node if found, or NULL if not.
1127  */
1128 struct btrfs_delayed_ref_head *
1129 btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr)
1130 {
1131     lockdep_assert_held(&delayed_refs->lock);
1132 
1133     return find_ref_head(delayed_refs, bytenr, false);
1134 }
1135 
1136 void __cold btrfs_delayed_ref_exit(void)
1137 {
1138     kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
1139     kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
1140     kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
1141     kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
1142 }
1143 
1144 int __init btrfs_delayed_ref_init(void)
1145 {
1146     btrfs_delayed_ref_head_cachep = kmem_cache_create(
1147                 "btrfs_delayed_ref_head",
1148                 sizeof(struct btrfs_delayed_ref_head), 0,
1149                 SLAB_MEM_SPREAD, NULL);
1150     if (!btrfs_delayed_ref_head_cachep)
1151         goto fail;
1152 
1153     btrfs_delayed_tree_ref_cachep = kmem_cache_create(
1154                 "btrfs_delayed_tree_ref",
1155                 sizeof(struct btrfs_delayed_tree_ref), 0,
1156                 SLAB_MEM_SPREAD, NULL);
1157     if (!btrfs_delayed_tree_ref_cachep)
1158         goto fail;
1159 
1160     btrfs_delayed_data_ref_cachep = kmem_cache_create(
1161                 "btrfs_delayed_data_ref",
1162                 sizeof(struct btrfs_delayed_data_ref), 0,
1163                 SLAB_MEM_SPREAD, NULL);
1164     if (!btrfs_delayed_data_ref_cachep)
1165         goto fail;
1166 
1167     btrfs_delayed_extent_op_cachep = kmem_cache_create(
1168                 "btrfs_delayed_extent_op",
1169                 sizeof(struct btrfs_delayed_extent_op), 0,
1170                 SLAB_MEM_SPREAD, NULL);
1171     if (!btrfs_delayed_extent_op_cachep)
1172         goto fail;
1173 
1174     return 0;
1175 fail:
1176     btrfs_delayed_ref_exit();
1177     return -ENOMEM;
1178 }