Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Copyright (C) 2011 Fujitsu.  All rights reserved.
0004  * Written by Miao Xie <miaox@cn.fujitsu.com>
0005  */
0006 
0007 #include <linux/slab.h>
0008 #include <linux/iversion.h>
0009 #include "misc.h"
0010 #include "delayed-inode.h"
0011 #include "disk-io.h"
0012 #include "transaction.h"
0013 #include "ctree.h"
0014 #include "qgroup.h"
0015 #include "locking.h"
0016 #include "inode-item.h"
0017 
0018 #define BTRFS_DELAYED_WRITEBACK     512
0019 #define BTRFS_DELAYED_BACKGROUND    128
0020 #define BTRFS_DELAYED_BATCH     16
0021 
0022 static struct kmem_cache *delayed_node_cache;
0023 
0024 int __init btrfs_delayed_inode_init(void)
0025 {
0026     delayed_node_cache = kmem_cache_create("btrfs_delayed_node",
0027                     sizeof(struct btrfs_delayed_node),
0028                     0,
0029                     SLAB_MEM_SPREAD,
0030                     NULL);
0031     if (!delayed_node_cache)
0032         return -ENOMEM;
0033     return 0;
0034 }
0035 
0036 void __cold btrfs_delayed_inode_exit(void)
0037 {
0038     kmem_cache_destroy(delayed_node_cache);
0039 }
0040 
0041 static inline void btrfs_init_delayed_node(
0042                 struct btrfs_delayed_node *delayed_node,
0043                 struct btrfs_root *root, u64 inode_id)
0044 {
0045     delayed_node->root = root;
0046     delayed_node->inode_id = inode_id;
0047     refcount_set(&delayed_node->refs, 0);
0048     delayed_node->ins_root = RB_ROOT_CACHED;
0049     delayed_node->del_root = RB_ROOT_CACHED;
0050     mutex_init(&delayed_node->mutex);
0051     INIT_LIST_HEAD(&delayed_node->n_list);
0052     INIT_LIST_HEAD(&delayed_node->p_list);
0053 }
0054 
0055 static struct btrfs_delayed_node *btrfs_get_delayed_node(
0056         struct btrfs_inode *btrfs_inode)
0057 {
0058     struct btrfs_root *root = btrfs_inode->root;
0059     u64 ino = btrfs_ino(btrfs_inode);
0060     struct btrfs_delayed_node *node;
0061 
0062     node = READ_ONCE(btrfs_inode->delayed_node);
0063     if (node) {
0064         refcount_inc(&node->refs);
0065         return node;
0066     }
0067 
0068     spin_lock(&root->inode_lock);
0069     node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
0070 
0071     if (node) {
0072         if (btrfs_inode->delayed_node) {
0073             refcount_inc(&node->refs);  /* can be accessed */
0074             BUG_ON(btrfs_inode->delayed_node != node);
0075             spin_unlock(&root->inode_lock);
0076             return node;
0077         }
0078 
0079         /*
0080          * It's possible that we're racing into the middle of removing
0081          * this node from the radix tree.  In this case, the refcount
0082          * was zero and it should never go back to one.  Just return
0083          * NULL like it was never in the radix at all; our release
0084          * function is in the process of removing it.
0085          *
0086          * Some implementations of refcount_inc refuse to bump the
0087          * refcount once it has hit zero.  If we don't do this dance
0088          * here, refcount_inc() may decide to just WARN_ONCE() instead
0089          * of actually bumping the refcount.
0090          *
0091          * If this node is properly in the radix, we want to bump the
0092          * refcount twice, once for the inode and once for this get
0093          * operation.
0094          */
0095         if (refcount_inc_not_zero(&node->refs)) {
0096             refcount_inc(&node->refs);
0097             btrfs_inode->delayed_node = node;
0098         } else {
0099             node = NULL;
0100         }
0101 
0102         spin_unlock(&root->inode_lock);
0103         return node;
0104     }
0105     spin_unlock(&root->inode_lock);
0106 
0107     return NULL;
0108 }
0109 
0110 /* Will return either the node or PTR_ERR(-ENOMEM) */
0111 static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
0112         struct btrfs_inode *btrfs_inode)
0113 {
0114     struct btrfs_delayed_node *node;
0115     struct btrfs_root *root = btrfs_inode->root;
0116     u64 ino = btrfs_ino(btrfs_inode);
0117     int ret;
0118 
0119 again:
0120     node = btrfs_get_delayed_node(btrfs_inode);
0121     if (node)
0122         return node;
0123 
0124     node = kmem_cache_zalloc(delayed_node_cache, GFP_NOFS);
0125     if (!node)
0126         return ERR_PTR(-ENOMEM);
0127     btrfs_init_delayed_node(node, root, ino);
0128 
0129     /* cached in the btrfs inode and can be accessed */
0130     refcount_set(&node->refs, 2);
0131 
0132     ret = radix_tree_preload(GFP_NOFS);
0133     if (ret) {
0134         kmem_cache_free(delayed_node_cache, node);
0135         return ERR_PTR(ret);
0136     }
0137 
0138     spin_lock(&root->inode_lock);
0139     ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
0140     if (ret == -EEXIST) {
0141         spin_unlock(&root->inode_lock);
0142         kmem_cache_free(delayed_node_cache, node);
0143         radix_tree_preload_end();
0144         goto again;
0145     }
0146     btrfs_inode->delayed_node = node;
0147     spin_unlock(&root->inode_lock);
0148     radix_tree_preload_end();
0149 
0150     return node;
0151 }
0152 
0153 /*
0154  * Call it when holding delayed_node->mutex
0155  *
0156  * If mod = 1, add this node into the prepared list.
0157  */
0158 static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
0159                      struct btrfs_delayed_node *node,
0160                      int mod)
0161 {
0162     spin_lock(&root->lock);
0163     if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
0164         if (!list_empty(&node->p_list))
0165             list_move_tail(&node->p_list, &root->prepare_list);
0166         else if (mod)
0167             list_add_tail(&node->p_list, &root->prepare_list);
0168     } else {
0169         list_add_tail(&node->n_list, &root->node_list);
0170         list_add_tail(&node->p_list, &root->prepare_list);
0171         refcount_inc(&node->refs);  /* inserted into list */
0172         root->nodes++;
0173         set_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
0174     }
0175     spin_unlock(&root->lock);
0176 }
0177 
0178 /* Call it when holding delayed_node->mutex */
0179 static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
0180                        struct btrfs_delayed_node *node)
0181 {
0182     spin_lock(&root->lock);
0183     if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
0184         root->nodes--;
0185         refcount_dec(&node->refs);  /* not in the list */
0186         list_del_init(&node->n_list);
0187         if (!list_empty(&node->p_list))
0188             list_del_init(&node->p_list);
0189         clear_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
0190     }
0191     spin_unlock(&root->lock);
0192 }
0193 
0194 static struct btrfs_delayed_node *btrfs_first_delayed_node(
0195             struct btrfs_delayed_root *delayed_root)
0196 {
0197     struct list_head *p;
0198     struct btrfs_delayed_node *node = NULL;
0199 
0200     spin_lock(&delayed_root->lock);
0201     if (list_empty(&delayed_root->node_list))
0202         goto out;
0203 
0204     p = delayed_root->node_list.next;
0205     node = list_entry(p, struct btrfs_delayed_node, n_list);
0206     refcount_inc(&node->refs);
0207 out:
0208     spin_unlock(&delayed_root->lock);
0209 
0210     return node;
0211 }
0212 
0213 static struct btrfs_delayed_node *btrfs_next_delayed_node(
0214                         struct btrfs_delayed_node *node)
0215 {
0216     struct btrfs_delayed_root *delayed_root;
0217     struct list_head *p;
0218     struct btrfs_delayed_node *next = NULL;
0219 
0220     delayed_root = node->root->fs_info->delayed_root;
0221     spin_lock(&delayed_root->lock);
0222     if (!test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
0223         /* not in the list */
0224         if (list_empty(&delayed_root->node_list))
0225             goto out;
0226         p = delayed_root->node_list.next;
0227     } else if (list_is_last(&node->n_list, &delayed_root->node_list))
0228         goto out;
0229     else
0230         p = node->n_list.next;
0231 
0232     next = list_entry(p, struct btrfs_delayed_node, n_list);
0233     refcount_inc(&next->refs);
0234 out:
0235     spin_unlock(&delayed_root->lock);
0236 
0237     return next;
0238 }
0239 
0240 static void __btrfs_release_delayed_node(
0241                 struct btrfs_delayed_node *delayed_node,
0242                 int mod)
0243 {
0244     struct btrfs_delayed_root *delayed_root;
0245 
0246     if (!delayed_node)
0247         return;
0248 
0249     delayed_root = delayed_node->root->fs_info->delayed_root;
0250 
0251     mutex_lock(&delayed_node->mutex);
0252     if (delayed_node->count)
0253         btrfs_queue_delayed_node(delayed_root, delayed_node, mod);
0254     else
0255         btrfs_dequeue_delayed_node(delayed_root, delayed_node);
0256     mutex_unlock(&delayed_node->mutex);
0257 
0258     if (refcount_dec_and_test(&delayed_node->refs)) {
0259         struct btrfs_root *root = delayed_node->root;
0260 
0261         spin_lock(&root->inode_lock);
0262         /*
0263          * Once our refcount goes to zero, nobody is allowed to bump it
0264          * back up.  We can delete it now.
0265          */
0266         ASSERT(refcount_read(&delayed_node->refs) == 0);
0267         radix_tree_delete(&root->delayed_nodes_tree,
0268                   delayed_node->inode_id);
0269         spin_unlock(&root->inode_lock);
0270         kmem_cache_free(delayed_node_cache, delayed_node);
0271     }
0272 }
0273 
0274 static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
0275 {
0276     __btrfs_release_delayed_node(node, 0);
0277 }
0278 
0279 static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
0280                     struct btrfs_delayed_root *delayed_root)
0281 {
0282     struct list_head *p;
0283     struct btrfs_delayed_node *node = NULL;
0284 
0285     spin_lock(&delayed_root->lock);
0286     if (list_empty(&delayed_root->prepare_list))
0287         goto out;
0288 
0289     p = delayed_root->prepare_list.next;
0290     list_del_init(p);
0291     node = list_entry(p, struct btrfs_delayed_node, p_list);
0292     refcount_inc(&node->refs);
0293 out:
0294     spin_unlock(&delayed_root->lock);
0295 
0296     return node;
0297 }
0298 
0299 static inline void btrfs_release_prepared_delayed_node(
0300                     struct btrfs_delayed_node *node)
0301 {
0302     __btrfs_release_delayed_node(node, 1);
0303 }
0304 
0305 static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
0306 {
0307     struct btrfs_delayed_item *item;
0308     item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
0309     if (item) {
0310         item->data_len = data_len;
0311         item->ins_or_del = 0;
0312         item->bytes_reserved = 0;
0313         item->delayed_node = NULL;
0314         refcount_set(&item->refs, 1);
0315     }
0316     return item;
0317 }
0318 
0319 /*
0320  * __btrfs_lookup_delayed_item - look up the delayed item by key
0321  * @delayed_node: pointer to the delayed node
0322  * @key:      the key to look up
0323  * @prev:     used to store the prev item if the right item isn't found
0324  * @next:     used to store the next item if the right item isn't found
0325  *
0326  * Note: if we don't find the right item, we will return the prev item and
0327  * the next item.
0328  */
0329 static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
0330                 struct rb_root *root,
0331                 struct btrfs_key *key,
0332                 struct btrfs_delayed_item **prev,
0333                 struct btrfs_delayed_item **next)
0334 {
0335     struct rb_node *node, *prev_node = NULL;
0336     struct btrfs_delayed_item *delayed_item = NULL;
0337     int ret = 0;
0338 
0339     node = root->rb_node;
0340 
0341     while (node) {
0342         delayed_item = rb_entry(node, struct btrfs_delayed_item,
0343                     rb_node);
0344         prev_node = node;
0345         ret = btrfs_comp_cpu_keys(&delayed_item->key, key);
0346         if (ret < 0)
0347             node = node->rb_right;
0348         else if (ret > 0)
0349             node = node->rb_left;
0350         else
0351             return delayed_item;
0352     }
0353 
0354     if (prev) {
0355         if (!prev_node)
0356             *prev = NULL;
0357         else if (ret < 0)
0358             *prev = delayed_item;
0359         else if ((node = rb_prev(prev_node)) != NULL) {
0360             *prev = rb_entry(node, struct btrfs_delayed_item,
0361                      rb_node);
0362         } else
0363             *prev = NULL;
0364     }
0365 
0366     if (next) {
0367         if (!prev_node)
0368             *next = NULL;
0369         else if (ret > 0)
0370             *next = delayed_item;
0371         else if ((node = rb_next(prev_node)) != NULL) {
0372             *next = rb_entry(node, struct btrfs_delayed_item,
0373                      rb_node);
0374         } else
0375             *next = NULL;
0376     }
0377     return NULL;
0378 }
0379 
0380 static struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
0381                     struct btrfs_delayed_node *delayed_node,
0382                     struct btrfs_key *key)
0383 {
0384     return __btrfs_lookup_delayed_item(&delayed_node->ins_root.rb_root, key,
0385                        NULL, NULL);
0386 }
0387 
0388 static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
0389                     struct btrfs_delayed_item *ins)
0390 {
0391     struct rb_node **p, *node;
0392     struct rb_node *parent_node = NULL;
0393     struct rb_root_cached *root;
0394     struct btrfs_delayed_item *item;
0395     int cmp;
0396     bool leftmost = true;
0397 
0398     if (ins->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
0399         root = &delayed_node->ins_root;
0400     else if (ins->ins_or_del == BTRFS_DELAYED_DELETION_ITEM)
0401         root = &delayed_node->del_root;
0402     else
0403         BUG();
0404     p = &root->rb_root.rb_node;
0405     node = &ins->rb_node;
0406 
0407     while (*p) {
0408         parent_node = *p;
0409         item = rb_entry(parent_node, struct btrfs_delayed_item,
0410                  rb_node);
0411 
0412         cmp = btrfs_comp_cpu_keys(&item->key, &ins->key);
0413         if (cmp < 0) {
0414             p = &(*p)->rb_right;
0415             leftmost = false;
0416         } else if (cmp > 0) {
0417             p = &(*p)->rb_left;
0418         } else {
0419             return -EEXIST;
0420         }
0421     }
0422 
0423     rb_link_node(node, parent_node, p);
0424     rb_insert_color_cached(node, root, leftmost);
0425     ins->delayed_node = delayed_node;
0426 
0427     /* Delayed items are always for dir index items. */
0428     ASSERT(ins->key.type == BTRFS_DIR_INDEX_KEY);
0429 
0430     if (ins->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM &&
0431         ins->key.offset >= delayed_node->index_cnt)
0432         delayed_node->index_cnt = ins->key.offset + 1;
0433 
0434     delayed_node->count++;
0435     atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
0436     return 0;
0437 }
0438 
0439 static void finish_one_item(struct btrfs_delayed_root *delayed_root)
0440 {
0441     int seq = atomic_inc_return(&delayed_root->items_seq);
0442 
0443     /* atomic_dec_return implies a barrier */
0444     if ((atomic_dec_return(&delayed_root->items) <
0445         BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0))
0446         cond_wake_up_nomb(&delayed_root->wait);
0447 }
0448 
0449 static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
0450 {
0451     struct rb_root_cached *root;
0452     struct btrfs_delayed_root *delayed_root;
0453 
0454     /* Not associated with any delayed_node */
0455     if (!delayed_item->delayed_node)
0456         return;
0457     delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
0458 
0459     BUG_ON(!delayed_root);
0460     BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM &&
0461            delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM);
0462 
0463     if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
0464         root = &delayed_item->delayed_node->ins_root;
0465     else
0466         root = &delayed_item->delayed_node->del_root;
0467 
0468     rb_erase_cached(&delayed_item->rb_node, root);
0469     delayed_item->delayed_node->count--;
0470 
0471     finish_one_item(delayed_root);
0472 }
0473 
0474 static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
0475 {
0476     if (item) {
0477         __btrfs_remove_delayed_item(item);
0478         if (refcount_dec_and_test(&item->refs))
0479             kfree(item);
0480     }
0481 }
0482 
0483 static struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
0484                     struct btrfs_delayed_node *delayed_node)
0485 {
0486     struct rb_node *p;
0487     struct btrfs_delayed_item *item = NULL;
0488 
0489     p = rb_first_cached(&delayed_node->ins_root);
0490     if (p)
0491         item = rb_entry(p, struct btrfs_delayed_item, rb_node);
0492 
0493     return item;
0494 }
0495 
0496 static struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
0497                     struct btrfs_delayed_node *delayed_node)
0498 {
0499     struct rb_node *p;
0500     struct btrfs_delayed_item *item = NULL;
0501 
0502     p = rb_first_cached(&delayed_node->del_root);
0503     if (p)
0504         item = rb_entry(p, struct btrfs_delayed_item, rb_node);
0505 
0506     return item;
0507 }
0508 
0509 static struct btrfs_delayed_item *__btrfs_next_delayed_item(
0510                         struct btrfs_delayed_item *item)
0511 {
0512     struct rb_node *p;
0513     struct btrfs_delayed_item *next = NULL;
0514 
0515     p = rb_next(&item->rb_node);
0516     if (p)
0517         next = rb_entry(p, struct btrfs_delayed_item, rb_node);
0518 
0519     return next;
0520 }
0521 
0522 static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
0523                            struct btrfs_root *root,
0524                            struct btrfs_delayed_item *item)
0525 {
0526     struct btrfs_block_rsv *src_rsv;
0527     struct btrfs_block_rsv *dst_rsv;
0528     struct btrfs_fs_info *fs_info = root->fs_info;
0529     u64 num_bytes;
0530     int ret;
0531 
0532     if (!trans->bytes_reserved)
0533         return 0;
0534 
0535     src_rsv = trans->block_rsv;
0536     dst_rsv = &fs_info->delayed_block_rsv;
0537 
0538     num_bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
0539 
0540     /*
0541      * Here we migrate space rsv from transaction rsv, since have already
0542      * reserved space when starting a transaction.  So no need to reserve
0543      * qgroup space here.
0544      */
0545     ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, true);
0546     if (!ret) {
0547         trace_btrfs_space_reservation(fs_info, "delayed_item",
0548                           item->key.objectid,
0549                           num_bytes, 1);
0550         /*
0551          * For insertions we track reserved metadata space by accounting
0552          * for the number of leaves that will be used, based on the delayed
0553          * node's index_items_size field.
0554          */
0555         if (item->ins_or_del == BTRFS_DELAYED_DELETION_ITEM)
0556             item->bytes_reserved = num_bytes;
0557     }
0558 
0559     return ret;
0560 }
0561 
0562 static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
0563                         struct btrfs_delayed_item *item)
0564 {
0565     struct btrfs_block_rsv *rsv;
0566     struct btrfs_fs_info *fs_info = root->fs_info;
0567 
0568     if (!item->bytes_reserved)
0569         return;
0570 
0571     rsv = &fs_info->delayed_block_rsv;
0572     /*
0573      * Check btrfs_delayed_item_reserve_metadata() to see why we don't need
0574      * to release/reserve qgroup space.
0575      */
0576     trace_btrfs_space_reservation(fs_info, "delayed_item",
0577                       item->key.objectid, item->bytes_reserved,
0578                       0);
0579     btrfs_block_rsv_release(fs_info, rsv, item->bytes_reserved, NULL);
0580 }
0581 
0582 static void btrfs_delayed_item_release_leaves(struct btrfs_delayed_node *node,
0583                           unsigned int num_leaves)
0584 {
0585     struct btrfs_fs_info *fs_info = node->root->fs_info;
0586     const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, num_leaves);
0587 
0588     /* There are no space reservations during log replay, bail out. */
0589     if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
0590         return;
0591 
0592     trace_btrfs_space_reservation(fs_info, "delayed_item", node->inode_id,
0593                       bytes, 0);
0594     btrfs_block_rsv_release(fs_info, &fs_info->delayed_block_rsv, bytes, NULL);
0595 }
0596 
0597 static int btrfs_delayed_inode_reserve_metadata(
0598                     struct btrfs_trans_handle *trans,
0599                     struct btrfs_root *root,
0600                     struct btrfs_delayed_node *node)
0601 {
0602     struct btrfs_fs_info *fs_info = root->fs_info;
0603     struct btrfs_block_rsv *src_rsv;
0604     struct btrfs_block_rsv *dst_rsv;
0605     u64 num_bytes;
0606     int ret;
0607 
0608     src_rsv = trans->block_rsv;
0609     dst_rsv = &fs_info->delayed_block_rsv;
0610 
0611     num_bytes = btrfs_calc_metadata_size(fs_info, 1);
0612 
0613     /*
0614      * btrfs_dirty_inode will update the inode under btrfs_join_transaction
0615      * which doesn't reserve space for speed.  This is a problem since we
0616      * still need to reserve space for this update, so try to reserve the
0617      * space.
0618      *
0619      * Now if src_rsv == delalloc_block_rsv we'll let it just steal since
0620      * we always reserve enough to update the inode item.
0621      */
0622     if (!src_rsv || (!trans->bytes_reserved &&
0623              src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
0624         ret = btrfs_qgroup_reserve_meta(root, num_bytes,
0625                       BTRFS_QGROUP_RSV_META_PREALLOC, true);
0626         if (ret < 0)
0627             return ret;
0628         ret = btrfs_block_rsv_add(fs_info, dst_rsv, num_bytes,
0629                       BTRFS_RESERVE_NO_FLUSH);
0630         /* NO_FLUSH could only fail with -ENOSPC */
0631         ASSERT(ret == 0 || ret == -ENOSPC);
0632         if (ret)
0633             btrfs_qgroup_free_meta_prealloc(root, num_bytes);
0634     } else {
0635         ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, true);
0636     }
0637 
0638     if (!ret) {
0639         trace_btrfs_space_reservation(fs_info, "delayed_inode",
0640                           node->inode_id, num_bytes, 1);
0641         node->bytes_reserved = num_bytes;
0642     }
0643 
0644     return ret;
0645 }
0646 
0647 static void btrfs_delayed_inode_release_metadata(struct btrfs_fs_info *fs_info,
0648                         struct btrfs_delayed_node *node,
0649                         bool qgroup_free)
0650 {
0651     struct btrfs_block_rsv *rsv;
0652 
0653     if (!node->bytes_reserved)
0654         return;
0655 
0656     rsv = &fs_info->delayed_block_rsv;
0657     trace_btrfs_space_reservation(fs_info, "delayed_inode",
0658                       node->inode_id, node->bytes_reserved, 0);
0659     btrfs_block_rsv_release(fs_info, rsv, node->bytes_reserved, NULL);
0660     if (qgroup_free)
0661         btrfs_qgroup_free_meta_prealloc(node->root,
0662                 node->bytes_reserved);
0663     else
0664         btrfs_qgroup_convert_reserved_meta(node->root,
0665                 node->bytes_reserved);
0666     node->bytes_reserved = 0;
0667 }
0668 
0669 /*
0670  * Insert a single delayed item or a batch of delayed items, as many as possible
0671  * that fit in a leaf. The delayed items (dir index keys) are sorted by their key
0672  * in the rbtree, and if there's a gap between two consecutive dir index items,
0673  * then it means at some point we had delayed dir indexes to add but they got
0674  * removed (by btrfs_delete_delayed_dir_index()) before we attempted to flush them
0675  * into the subvolume tree. Dir index keys also have their offsets coming from a
0676  * monotonically increasing counter, so we can't get new keys with an offset that
0677  * fits within a gap between delayed dir index items.
0678  */
0679 static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
0680                      struct btrfs_root *root,
0681                      struct btrfs_path *path,
0682                      struct btrfs_delayed_item *first_item)
0683 {
0684     struct btrfs_fs_info *fs_info = root->fs_info;
0685     struct btrfs_delayed_node *node = first_item->delayed_node;
0686     LIST_HEAD(item_list);
0687     struct btrfs_delayed_item *curr;
0688     struct btrfs_delayed_item *next;
0689     const int max_size = BTRFS_LEAF_DATA_SIZE(fs_info);
0690     struct btrfs_item_batch batch;
0691     int total_size;
0692     char *ins_data = NULL;
0693     int ret;
0694     bool continuous_keys_only = false;
0695 
0696     lockdep_assert_held(&node->mutex);
0697 
0698     /*
0699      * During normal operation the delayed index offset is continuously
0700      * increasing, so we can batch insert all items as there will not be any
0701      * overlapping keys in the tree.
0702      *
0703      * The exception to this is log replay, where we may have interleaved
0704      * offsets in the tree, so our batch needs to be continuous keys only in
0705      * order to ensure we do not end up with out of order items in our leaf.
0706      */
0707     if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
0708         continuous_keys_only = true;
0709 
0710     /*
0711      * For delayed items to insert, we track reserved metadata bytes based
0712      * on the number of leaves that we will use.
0713      * See btrfs_insert_delayed_dir_index() and
0714      * btrfs_delayed_item_reserve_metadata()).
0715      */
0716     ASSERT(first_item->bytes_reserved == 0);
0717 
0718     list_add_tail(&first_item->tree_list, &item_list);
0719     batch.total_data_size = first_item->data_len;
0720     batch.nr = 1;
0721     total_size = first_item->data_len + sizeof(struct btrfs_item);
0722     curr = first_item;
0723 
0724     while (true) {
0725         int next_size;
0726 
0727         next = __btrfs_next_delayed_item(curr);
0728         if (!next)
0729             break;
0730 
0731         /*
0732          * We cannot allow gaps in the key space if we're doing log
0733          * replay.
0734          */
0735         if (continuous_keys_only &&
0736             (next->key.offset != curr->key.offset + 1))
0737             break;
0738 
0739         ASSERT(next->bytes_reserved == 0);
0740 
0741         next_size = next->data_len + sizeof(struct btrfs_item);
0742         if (total_size + next_size > max_size)
0743             break;
0744 
0745         list_add_tail(&next->tree_list, &item_list);
0746         batch.nr++;
0747         total_size += next_size;
0748         batch.total_data_size += next->data_len;
0749         curr = next;
0750     }
0751 
0752     if (batch.nr == 1) {
0753         batch.keys = &first_item->key;
0754         batch.data_sizes = &first_item->data_len;
0755     } else {
0756         struct btrfs_key *ins_keys;
0757         u32 *ins_sizes;
0758         int i = 0;
0759 
0760         ins_data = kmalloc(batch.nr * sizeof(u32) +
0761                    batch.nr * sizeof(struct btrfs_key), GFP_NOFS);
0762         if (!ins_data) {
0763             ret = -ENOMEM;
0764             goto out;
0765         }
0766         ins_sizes = (u32 *)ins_data;
0767         ins_keys = (struct btrfs_key *)(ins_data + batch.nr * sizeof(u32));
0768         batch.keys = ins_keys;
0769         batch.data_sizes = ins_sizes;
0770         list_for_each_entry(curr, &item_list, tree_list) {
0771             ins_keys[i] = curr->key;
0772             ins_sizes[i] = curr->data_len;
0773             i++;
0774         }
0775     }
0776 
0777     ret = btrfs_insert_empty_items(trans, root, path, &batch);
0778     if (ret)
0779         goto out;
0780 
0781     list_for_each_entry(curr, &item_list, tree_list) {
0782         char *data_ptr;
0783 
0784         data_ptr = btrfs_item_ptr(path->nodes[0], path->slots[0], char);
0785         write_extent_buffer(path->nodes[0], &curr->data,
0786                     (unsigned long)data_ptr, curr->data_len);
0787         path->slots[0]++;
0788     }
0789 
0790     /*
0791      * Now release our path before releasing the delayed items and their
0792      * metadata reservations, so that we don't block other tasks for more
0793      * time than needed.
0794      */
0795     btrfs_release_path(path);
0796 
0797     ASSERT(node->index_item_leaves > 0);
0798 
0799     /*
0800      * For normal operations we will batch an entire leaf's worth of delayed
0801      * items, so if there are more items to process we can decrement
0802      * index_item_leaves by 1 as we inserted 1 leaf's worth of items.
0803      *
0804      * However for log replay we may not have inserted an entire leaf's
0805      * worth of items, we may have not had continuous items, so decrementing
0806      * here would mess up the index_item_leaves accounting.  For this case
0807      * only clean up the accounting when there are no items left.
0808      */
0809     if (next && !continuous_keys_only) {
0810         /*
0811          * We inserted one batch of items into a leaf a there are more
0812          * items to flush in a future batch, now release one unit of
0813          * metadata space from the delayed block reserve, corresponding
0814          * the leaf we just flushed to.
0815          */
0816         btrfs_delayed_item_release_leaves(node, 1);
0817         node->index_item_leaves--;
0818     } else if (!next) {
0819         /*
0820          * There are no more items to insert. We can have a number of
0821          * reserved leaves > 1 here - this happens when many dir index
0822          * items are added and then removed before they are flushed (file
0823          * names with a very short life, never span a transaction). So
0824          * release all remaining leaves.
0825          */
0826         btrfs_delayed_item_release_leaves(node, node->index_item_leaves);
0827         node->index_item_leaves = 0;
0828     }
0829 
0830     list_for_each_entry_safe(curr, next, &item_list, tree_list) {
0831         list_del(&curr->tree_list);
0832         btrfs_release_delayed_item(curr);
0833     }
0834 out:
0835     kfree(ins_data);
0836     return ret;
0837 }
0838 
0839 static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
0840                       struct btrfs_path *path,
0841                       struct btrfs_root *root,
0842                       struct btrfs_delayed_node *node)
0843 {
0844     int ret = 0;
0845 
0846     while (ret == 0) {
0847         struct btrfs_delayed_item *curr;
0848 
0849         mutex_lock(&node->mutex);
0850         curr = __btrfs_first_delayed_insertion_item(node);
0851         if (!curr) {
0852             mutex_unlock(&node->mutex);
0853             break;
0854         }
0855         ret = btrfs_insert_delayed_item(trans, root, path, curr);
0856         mutex_unlock(&node->mutex);
0857     }
0858 
0859     return ret;
0860 }
0861 
0862 static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
0863                     struct btrfs_root *root,
0864                     struct btrfs_path *path,
0865                     struct btrfs_delayed_item *item)
0866 {
0867     struct btrfs_fs_info *fs_info = root->fs_info;
0868     struct btrfs_delayed_item *curr, *next;
0869     struct extent_buffer *leaf = path->nodes[0];
0870     LIST_HEAD(batch_list);
0871     int nitems, slot, last_slot;
0872     int ret;
0873     u64 total_reserved_size = item->bytes_reserved;
0874 
0875     ASSERT(leaf != NULL);
0876 
0877     slot = path->slots[0];
0878     last_slot = btrfs_header_nritems(leaf) - 1;
0879     /*
0880      * Our caller always gives us a path pointing to an existing item, so
0881      * this can not happen.
0882      */
0883     ASSERT(slot <= last_slot);
0884     if (WARN_ON(slot > last_slot))
0885         return -ENOENT;
0886 
0887     nitems = 1;
0888     curr = item;
0889     list_add_tail(&curr->tree_list, &batch_list);
0890 
0891     /*
0892      * Keep checking if the next delayed item matches the next item in the
0893      * leaf - if so, we can add it to the batch of items to delete from the
0894      * leaf.
0895      */
0896     while (slot < last_slot) {
0897         struct btrfs_key key;
0898 
0899         next = __btrfs_next_delayed_item(curr);
0900         if (!next)
0901             break;
0902 
0903         slot++;
0904         btrfs_item_key_to_cpu(leaf, &key, slot);
0905         if (btrfs_comp_cpu_keys(&next->key, &key) != 0)
0906             break;
0907         nitems++;
0908         curr = next;
0909         list_add_tail(&curr->tree_list, &batch_list);
0910         total_reserved_size += curr->bytes_reserved;
0911     }
0912 
0913     ret = btrfs_del_items(trans, root, path, path->slots[0], nitems);
0914     if (ret)
0915         return ret;
0916 
0917     /* In case of BTRFS_FS_LOG_RECOVERING items won't have reserved space */
0918     if (total_reserved_size > 0) {
0919         /*
0920          * Check btrfs_delayed_item_reserve_metadata() to see why we
0921          * don't need to release/reserve qgroup space.
0922          */
0923         trace_btrfs_space_reservation(fs_info, "delayed_item",
0924                           item->key.objectid, total_reserved_size,
0925                           0);
0926         btrfs_block_rsv_release(fs_info, &fs_info->delayed_block_rsv,
0927                     total_reserved_size, NULL);
0928     }
0929 
0930     list_for_each_entry_safe(curr, next, &batch_list, tree_list) {
0931         list_del(&curr->tree_list);
0932         btrfs_release_delayed_item(curr);
0933     }
0934 
0935     return 0;
0936 }
0937 
0938 static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
0939                       struct btrfs_path *path,
0940                       struct btrfs_root *root,
0941                       struct btrfs_delayed_node *node)
0942 {
0943     int ret = 0;
0944 
0945     while (ret == 0) {
0946         struct btrfs_delayed_item *item;
0947 
0948         mutex_lock(&node->mutex);
0949         item = __btrfs_first_delayed_deletion_item(node);
0950         if (!item) {
0951             mutex_unlock(&node->mutex);
0952             break;
0953         }
0954 
0955         ret = btrfs_search_slot(trans, root, &item->key, path, -1, 1);
0956         if (ret > 0) {
0957             /*
0958              * There's no matching item in the leaf. This means we
0959              * have already deleted this item in a past run of the
0960              * delayed items. We ignore errors when running delayed
0961              * items from an async context, through a work queue job
0962              * running btrfs_async_run_delayed_root(), and don't
0963              * release delayed items that failed to complete. This
0964              * is because we will retry later, and at transaction
0965              * commit time we always run delayed items and will
0966              * then deal with errors if they fail to run again.
0967              *
0968              * So just release delayed items for which we can't find
0969              * an item in the tree, and move to the next item.
0970              */
0971             btrfs_release_path(path);
0972             btrfs_release_delayed_item(item);
0973             ret = 0;
0974         } else if (ret == 0) {
0975             ret = btrfs_batch_delete_items(trans, root, path, item);
0976             btrfs_release_path(path);
0977         }
0978 
0979         /*
0980          * We unlock and relock on each iteration, this is to prevent
0981          * blocking other tasks for too long while we are being run from
0982          * the async context (work queue job). Those tasks are typically
0983          * running system calls like creat/mkdir/rename/unlink/etc which
0984          * need to add delayed items to this delayed node.
0985          */
0986         mutex_unlock(&node->mutex);
0987     }
0988 
0989     return ret;
0990 }
0991 
0992 static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
0993 {
0994     struct btrfs_delayed_root *delayed_root;
0995 
0996     if (delayed_node &&
0997         test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
0998         BUG_ON(!delayed_node->root);
0999         clear_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
1000         delayed_node->count--;
1001 
1002         delayed_root = delayed_node->root->fs_info->delayed_root;
1003         finish_one_item(delayed_root);
1004     }
1005 }
1006 
1007 static void btrfs_release_delayed_iref(struct btrfs_delayed_node *delayed_node)
1008 {
1009 
1010     if (test_and_clear_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags)) {
1011         struct btrfs_delayed_root *delayed_root;
1012 
1013         ASSERT(delayed_node->root);
1014         delayed_node->count--;
1015 
1016         delayed_root = delayed_node->root->fs_info->delayed_root;
1017         finish_one_item(delayed_root);
1018     }
1019 }
1020 
1021 static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1022                     struct btrfs_root *root,
1023                     struct btrfs_path *path,
1024                     struct btrfs_delayed_node *node)
1025 {
1026     struct btrfs_fs_info *fs_info = root->fs_info;
1027     struct btrfs_key key;
1028     struct btrfs_inode_item *inode_item;
1029     struct extent_buffer *leaf;
1030     int mod;
1031     int ret;
1032 
1033     key.objectid = node->inode_id;
1034     key.type = BTRFS_INODE_ITEM_KEY;
1035     key.offset = 0;
1036 
1037     if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
1038         mod = -1;
1039     else
1040         mod = 1;
1041 
1042     ret = btrfs_lookup_inode(trans, root, path, &key, mod);
1043     if (ret > 0)
1044         ret = -ENOENT;
1045     if (ret < 0)
1046         goto out;
1047 
1048     leaf = path->nodes[0];
1049     inode_item = btrfs_item_ptr(leaf, path->slots[0],
1050                     struct btrfs_inode_item);
1051     write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
1052                 sizeof(struct btrfs_inode_item));
1053     btrfs_mark_buffer_dirty(leaf);
1054 
1055     if (!test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
1056         goto out;
1057 
1058     path->slots[0]++;
1059     if (path->slots[0] >= btrfs_header_nritems(leaf))
1060         goto search;
1061 again:
1062     btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1063     if (key.objectid != node->inode_id)
1064         goto out;
1065 
1066     if (key.type != BTRFS_INODE_REF_KEY &&
1067         key.type != BTRFS_INODE_EXTREF_KEY)
1068         goto out;
1069 
1070     /*
1071      * Delayed iref deletion is for the inode who has only one link,
1072      * so there is only one iref. The case that several irefs are
1073      * in the same item doesn't exist.
1074      */
1075     btrfs_del_item(trans, root, path);
1076 out:
1077     btrfs_release_delayed_iref(node);
1078     btrfs_release_path(path);
1079 err_out:
1080     btrfs_delayed_inode_release_metadata(fs_info, node, (ret < 0));
1081     btrfs_release_delayed_inode(node);
1082 
1083     /*
1084      * If we fail to update the delayed inode we need to abort the
1085      * transaction, because we could leave the inode with the improper
1086      * counts behind.
1087      */
1088     if (ret && ret != -ENOENT)
1089         btrfs_abort_transaction(trans, ret);
1090 
1091     return ret;
1092 
1093 search:
1094     btrfs_release_path(path);
1095 
1096     key.type = BTRFS_INODE_EXTREF_KEY;
1097     key.offset = -1;
1098 
1099     ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1100     if (ret < 0)
1101         goto err_out;
1102     ASSERT(ret);
1103 
1104     ret = 0;
1105     leaf = path->nodes[0];
1106     path->slots[0]--;
1107     goto again;
1108 }
1109 
1110 static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1111                          struct btrfs_root *root,
1112                          struct btrfs_path *path,
1113                          struct btrfs_delayed_node *node)
1114 {
1115     int ret;
1116 
1117     mutex_lock(&node->mutex);
1118     if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &node->flags)) {
1119         mutex_unlock(&node->mutex);
1120         return 0;
1121     }
1122 
1123     ret = __btrfs_update_delayed_inode(trans, root, path, node);
1124     mutex_unlock(&node->mutex);
1125     return ret;
1126 }
1127 
1128 static inline int
1129 __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1130                    struct btrfs_path *path,
1131                    struct btrfs_delayed_node *node)
1132 {
1133     int ret;
1134 
1135     ret = btrfs_insert_delayed_items(trans, path, node->root, node);
1136     if (ret)
1137         return ret;
1138 
1139     ret = btrfs_delete_delayed_items(trans, path, node->root, node);
1140     if (ret)
1141         return ret;
1142 
1143     ret = btrfs_update_delayed_inode(trans, node->root, path, node);
1144     return ret;
1145 }
1146 
1147 /*
1148  * Called when committing the transaction.
1149  * Returns 0 on success.
1150  * Returns < 0 on error and returns with an aborted transaction with any
1151  * outstanding delayed items cleaned up.
1152  */
1153 static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
1154 {
1155     struct btrfs_fs_info *fs_info = trans->fs_info;
1156     struct btrfs_delayed_root *delayed_root;
1157     struct btrfs_delayed_node *curr_node, *prev_node;
1158     struct btrfs_path *path;
1159     struct btrfs_block_rsv *block_rsv;
1160     int ret = 0;
1161     bool count = (nr > 0);
1162 
1163     if (TRANS_ABORTED(trans))
1164         return -EIO;
1165 
1166     path = btrfs_alloc_path();
1167     if (!path)
1168         return -ENOMEM;
1169 
1170     block_rsv = trans->block_rsv;
1171     trans->block_rsv = &fs_info->delayed_block_rsv;
1172 
1173     delayed_root = fs_info->delayed_root;
1174 
1175     curr_node = btrfs_first_delayed_node(delayed_root);
1176     while (curr_node && (!count || nr--)) {
1177         ret = __btrfs_commit_inode_delayed_items(trans, path,
1178                              curr_node);
1179         if (ret) {
1180             btrfs_release_delayed_node(curr_node);
1181             curr_node = NULL;
1182             btrfs_abort_transaction(trans, ret);
1183             break;
1184         }
1185 
1186         prev_node = curr_node;
1187         curr_node = btrfs_next_delayed_node(curr_node);
1188         btrfs_release_delayed_node(prev_node);
1189     }
1190 
1191     if (curr_node)
1192         btrfs_release_delayed_node(curr_node);
1193     btrfs_free_path(path);
1194     trans->block_rsv = block_rsv;
1195 
1196     return ret;
1197 }
1198 
1199 int btrfs_run_delayed_items(struct btrfs_trans_handle *trans)
1200 {
1201     return __btrfs_run_delayed_items(trans, -1);
1202 }
1203 
1204 int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, int nr)
1205 {
1206     return __btrfs_run_delayed_items(trans, nr);
1207 }
1208 
1209 int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1210                      struct btrfs_inode *inode)
1211 {
1212     struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1213     struct btrfs_path *path;
1214     struct btrfs_block_rsv *block_rsv;
1215     int ret;
1216 
1217     if (!delayed_node)
1218         return 0;
1219 
1220     mutex_lock(&delayed_node->mutex);
1221     if (!delayed_node->count) {
1222         mutex_unlock(&delayed_node->mutex);
1223         btrfs_release_delayed_node(delayed_node);
1224         return 0;
1225     }
1226     mutex_unlock(&delayed_node->mutex);
1227 
1228     path = btrfs_alloc_path();
1229     if (!path) {
1230         btrfs_release_delayed_node(delayed_node);
1231         return -ENOMEM;
1232     }
1233 
1234     block_rsv = trans->block_rsv;
1235     trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
1236 
1237     ret = __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1238 
1239     btrfs_release_delayed_node(delayed_node);
1240     btrfs_free_path(path);
1241     trans->block_rsv = block_rsv;
1242 
1243     return ret;
1244 }
1245 
1246 int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode)
1247 {
1248     struct btrfs_fs_info *fs_info = inode->root->fs_info;
1249     struct btrfs_trans_handle *trans;
1250     struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1251     struct btrfs_path *path;
1252     struct btrfs_block_rsv *block_rsv;
1253     int ret;
1254 
1255     if (!delayed_node)
1256         return 0;
1257 
1258     mutex_lock(&delayed_node->mutex);
1259     if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1260         mutex_unlock(&delayed_node->mutex);
1261         btrfs_release_delayed_node(delayed_node);
1262         return 0;
1263     }
1264     mutex_unlock(&delayed_node->mutex);
1265 
1266     trans = btrfs_join_transaction(delayed_node->root);
1267     if (IS_ERR(trans)) {
1268         ret = PTR_ERR(trans);
1269         goto out;
1270     }
1271 
1272     path = btrfs_alloc_path();
1273     if (!path) {
1274         ret = -ENOMEM;
1275         goto trans_out;
1276     }
1277 
1278     block_rsv = trans->block_rsv;
1279     trans->block_rsv = &fs_info->delayed_block_rsv;
1280 
1281     mutex_lock(&delayed_node->mutex);
1282     if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags))
1283         ret = __btrfs_update_delayed_inode(trans, delayed_node->root,
1284                            path, delayed_node);
1285     else
1286         ret = 0;
1287     mutex_unlock(&delayed_node->mutex);
1288 
1289     btrfs_free_path(path);
1290     trans->block_rsv = block_rsv;
1291 trans_out:
1292     btrfs_end_transaction(trans);
1293     btrfs_btree_balance_dirty(fs_info);
1294 out:
1295     btrfs_release_delayed_node(delayed_node);
1296 
1297     return ret;
1298 }
1299 
1300 void btrfs_remove_delayed_node(struct btrfs_inode *inode)
1301 {
1302     struct btrfs_delayed_node *delayed_node;
1303 
1304     delayed_node = READ_ONCE(inode->delayed_node);
1305     if (!delayed_node)
1306         return;
1307 
1308     inode->delayed_node = NULL;
1309     btrfs_release_delayed_node(delayed_node);
1310 }
1311 
1312 struct btrfs_async_delayed_work {
1313     struct btrfs_delayed_root *delayed_root;
1314     int nr;
1315     struct btrfs_work work;
1316 };
1317 
1318 static void btrfs_async_run_delayed_root(struct btrfs_work *work)
1319 {
1320     struct btrfs_async_delayed_work *async_work;
1321     struct btrfs_delayed_root *delayed_root;
1322     struct btrfs_trans_handle *trans;
1323     struct btrfs_path *path;
1324     struct btrfs_delayed_node *delayed_node = NULL;
1325     struct btrfs_root *root;
1326     struct btrfs_block_rsv *block_rsv;
1327     int total_done = 0;
1328 
1329     async_work = container_of(work, struct btrfs_async_delayed_work, work);
1330     delayed_root = async_work->delayed_root;
1331 
1332     path = btrfs_alloc_path();
1333     if (!path)
1334         goto out;
1335 
1336     do {
1337         if (atomic_read(&delayed_root->items) <
1338             BTRFS_DELAYED_BACKGROUND / 2)
1339             break;
1340 
1341         delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
1342         if (!delayed_node)
1343             break;
1344 
1345         root = delayed_node->root;
1346 
1347         trans = btrfs_join_transaction(root);
1348         if (IS_ERR(trans)) {
1349             btrfs_release_path(path);
1350             btrfs_release_prepared_delayed_node(delayed_node);
1351             total_done++;
1352             continue;
1353         }
1354 
1355         block_rsv = trans->block_rsv;
1356         trans->block_rsv = &root->fs_info->delayed_block_rsv;
1357 
1358         __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1359 
1360         trans->block_rsv = block_rsv;
1361         btrfs_end_transaction(trans);
1362         btrfs_btree_balance_dirty_nodelay(root->fs_info);
1363 
1364         btrfs_release_path(path);
1365         btrfs_release_prepared_delayed_node(delayed_node);
1366         total_done++;
1367 
1368     } while ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK)
1369          || total_done < async_work->nr);
1370 
1371     btrfs_free_path(path);
1372 out:
1373     wake_up(&delayed_root->wait);
1374     kfree(async_work);
1375 }
1376 
1377 
1378 static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1379                      struct btrfs_fs_info *fs_info, int nr)
1380 {
1381     struct btrfs_async_delayed_work *async_work;
1382 
1383     async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
1384     if (!async_work)
1385         return -ENOMEM;
1386 
1387     async_work->delayed_root = delayed_root;
1388     btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root, NULL,
1389             NULL);
1390     async_work->nr = nr;
1391 
1392     btrfs_queue_work(fs_info->delayed_workers, &async_work->work);
1393     return 0;
1394 }
1395 
1396 void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info)
1397 {
1398     WARN_ON(btrfs_first_delayed_node(fs_info->delayed_root));
1399 }
1400 
1401 static int could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)
1402 {
1403     int val = atomic_read(&delayed_root->items_seq);
1404 
1405     if (val < seq || val >= seq + BTRFS_DELAYED_BATCH)
1406         return 1;
1407 
1408     if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1409         return 1;
1410 
1411     return 0;
1412 }
1413 
1414 void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
1415 {
1416     struct btrfs_delayed_root *delayed_root = fs_info->delayed_root;
1417 
1418     if ((atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) ||
1419         btrfs_workqueue_normal_congested(fs_info->delayed_workers))
1420         return;
1421 
1422     if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
1423         int seq;
1424         int ret;
1425 
1426         seq = atomic_read(&delayed_root->items_seq);
1427 
1428         ret = btrfs_wq_run_delayed_node(delayed_root, fs_info, 0);
1429         if (ret)
1430             return;
1431 
1432         wait_event_interruptible(delayed_root->wait,
1433                      could_end_wait(delayed_root, seq));
1434         return;
1435     }
1436 
1437     btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
1438 }
1439 
1440 /* Will return 0 or -ENOMEM */
1441 int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1442                    const char *name, int name_len,
1443                    struct btrfs_inode *dir,
1444                    struct btrfs_disk_key *disk_key, u8 type,
1445                    u64 index)
1446 {
1447     struct btrfs_fs_info *fs_info = trans->fs_info;
1448     const unsigned int leaf_data_size = BTRFS_LEAF_DATA_SIZE(fs_info);
1449     struct btrfs_delayed_node *delayed_node;
1450     struct btrfs_delayed_item *delayed_item;
1451     struct btrfs_dir_item *dir_item;
1452     bool reserve_leaf_space;
1453     u32 data_len;
1454     int ret;
1455 
1456     delayed_node = btrfs_get_or_create_delayed_node(dir);
1457     if (IS_ERR(delayed_node))
1458         return PTR_ERR(delayed_node);
1459 
1460     delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len);
1461     if (!delayed_item) {
1462         ret = -ENOMEM;
1463         goto release_node;
1464     }
1465 
1466     delayed_item->key.objectid = btrfs_ino(dir);
1467     delayed_item->key.type = BTRFS_DIR_INDEX_KEY;
1468     delayed_item->key.offset = index;
1469     delayed_item->ins_or_del = BTRFS_DELAYED_INSERTION_ITEM;
1470 
1471     dir_item = (struct btrfs_dir_item *)delayed_item->data;
1472     dir_item->location = *disk_key;
1473     btrfs_set_stack_dir_transid(dir_item, trans->transid);
1474     btrfs_set_stack_dir_data_len(dir_item, 0);
1475     btrfs_set_stack_dir_name_len(dir_item, name_len);
1476     btrfs_set_stack_dir_type(dir_item, type);
1477     memcpy((char *)(dir_item + 1), name, name_len);
1478 
1479     data_len = delayed_item->data_len + sizeof(struct btrfs_item);
1480 
1481     mutex_lock(&delayed_node->mutex);
1482 
1483     if (delayed_node->index_item_leaves == 0 ||
1484         delayed_node->curr_index_batch_size + data_len > leaf_data_size) {
1485         delayed_node->curr_index_batch_size = data_len;
1486         reserve_leaf_space = true;
1487     } else {
1488         delayed_node->curr_index_batch_size += data_len;
1489         reserve_leaf_space = false;
1490     }
1491 
1492     if (reserve_leaf_space) {
1493         ret = btrfs_delayed_item_reserve_metadata(trans, dir->root,
1494                               delayed_item);
1495         /*
1496          * Space was reserved for a dir index item insertion when we
1497          * started the transaction, so getting a failure here should be
1498          * impossible.
1499          */
1500         if (WARN_ON(ret)) {
1501             mutex_unlock(&delayed_node->mutex);
1502             btrfs_release_delayed_item(delayed_item);
1503             goto release_node;
1504         }
1505 
1506         delayed_node->index_item_leaves++;
1507     } else if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
1508         const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
1509 
1510         /*
1511          * Adding the new dir index item does not require touching another
1512          * leaf, so we can release 1 unit of metadata that was previously
1513          * reserved when starting the transaction. This applies only to
1514          * the case where we had a transaction start and excludes the
1515          * transaction join case (when replaying log trees).
1516          */
1517         trace_btrfs_space_reservation(fs_info, "transaction",
1518                           trans->transid, bytes, 0);
1519         btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
1520         ASSERT(trans->bytes_reserved >= bytes);
1521         trans->bytes_reserved -= bytes;
1522     }
1523 
1524     ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
1525     if (unlikely(ret)) {
1526         btrfs_err(trans->fs_info,
1527               "err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
1528               name_len, name, delayed_node->root->root_key.objectid,
1529               delayed_node->inode_id, ret);
1530         BUG();
1531     }
1532     mutex_unlock(&delayed_node->mutex);
1533 
1534 release_node:
1535     btrfs_release_delayed_node(delayed_node);
1536     return ret;
1537 }
1538 
1539 static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info,
1540                            struct btrfs_delayed_node *node,
1541                            struct btrfs_key *key)
1542 {
1543     struct btrfs_delayed_item *item;
1544 
1545     mutex_lock(&node->mutex);
1546     item = __btrfs_lookup_delayed_insertion_item(node, key);
1547     if (!item) {
1548         mutex_unlock(&node->mutex);
1549         return 1;
1550     }
1551 
1552     /*
1553      * For delayed items to insert, we track reserved metadata bytes based
1554      * on the number of leaves that we will use.
1555      * See btrfs_insert_delayed_dir_index() and
1556      * btrfs_delayed_item_reserve_metadata()).
1557      */
1558     ASSERT(item->bytes_reserved == 0);
1559     ASSERT(node->index_item_leaves > 0);
1560 
1561     /*
1562      * If there's only one leaf reserved, we can decrement this item from the
1563      * current batch, otherwise we can not because we don't know which leaf
1564      * it belongs to. With the current limit on delayed items, we rarely
1565      * accumulate enough dir index items to fill more than one leaf (even
1566      * when using a leaf size of 4K).
1567      */
1568     if (node->index_item_leaves == 1) {
1569         const u32 data_len = item->data_len + sizeof(struct btrfs_item);
1570 
1571         ASSERT(node->curr_index_batch_size >= data_len);
1572         node->curr_index_batch_size -= data_len;
1573     }
1574 
1575     btrfs_release_delayed_item(item);
1576 
1577     /* If we now have no more dir index items, we can release all leaves. */
1578     if (RB_EMPTY_ROOT(&node->ins_root.rb_root)) {
1579         btrfs_delayed_item_release_leaves(node, node->index_item_leaves);
1580         node->index_item_leaves = 0;
1581     }
1582 
1583     mutex_unlock(&node->mutex);
1584     return 0;
1585 }
1586 
1587 int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
1588                    struct btrfs_inode *dir, u64 index)
1589 {
1590     struct btrfs_delayed_node *node;
1591     struct btrfs_delayed_item *item;
1592     struct btrfs_key item_key;
1593     int ret;
1594 
1595     node = btrfs_get_or_create_delayed_node(dir);
1596     if (IS_ERR(node))
1597         return PTR_ERR(node);
1598 
1599     item_key.objectid = btrfs_ino(dir);
1600     item_key.type = BTRFS_DIR_INDEX_KEY;
1601     item_key.offset = index;
1602 
1603     ret = btrfs_delete_delayed_insertion_item(trans->fs_info, node,
1604                           &item_key);
1605     if (!ret)
1606         goto end;
1607 
1608     item = btrfs_alloc_delayed_item(0);
1609     if (!item) {
1610         ret = -ENOMEM;
1611         goto end;
1612     }
1613 
1614     item->key = item_key;
1615     item->ins_or_del = BTRFS_DELAYED_DELETION_ITEM;
1616 
1617     ret = btrfs_delayed_item_reserve_metadata(trans, dir->root, item);
1618     /*
1619      * we have reserved enough space when we start a new transaction,
1620      * so reserving metadata failure is impossible.
1621      */
1622     if (ret < 0) {
1623         btrfs_err(trans->fs_info,
1624 "metadata reservation failed for delayed dir item deltiona, should have been reserved");
1625         btrfs_release_delayed_item(item);
1626         goto end;
1627     }
1628 
1629     mutex_lock(&node->mutex);
1630     ret = __btrfs_add_delayed_item(node, item);
1631     if (unlikely(ret)) {
1632         btrfs_err(trans->fs_info,
1633               "err add delayed dir index item(index: %llu) into the deletion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
1634               index, node->root->root_key.objectid,
1635               node->inode_id, ret);
1636         btrfs_delayed_item_release_metadata(dir->root, item);
1637         btrfs_release_delayed_item(item);
1638     }
1639     mutex_unlock(&node->mutex);
1640 end:
1641     btrfs_release_delayed_node(node);
1642     return ret;
1643 }
1644 
1645 int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode)
1646 {
1647     struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1648 
1649     if (!delayed_node)
1650         return -ENOENT;
1651 
1652     /*
1653      * Since we have held i_mutex of this directory, it is impossible that
1654      * a new directory index is added into the delayed node and index_cnt
1655      * is updated now. So we needn't lock the delayed node.
1656      */
1657     if (!delayed_node->index_cnt) {
1658         btrfs_release_delayed_node(delayed_node);
1659         return -EINVAL;
1660     }
1661 
1662     inode->index_cnt = delayed_node->index_cnt;
1663     btrfs_release_delayed_node(delayed_node);
1664     return 0;
1665 }
1666 
1667 bool btrfs_readdir_get_delayed_items(struct inode *inode,
1668                      struct list_head *ins_list,
1669                      struct list_head *del_list)
1670 {
1671     struct btrfs_delayed_node *delayed_node;
1672     struct btrfs_delayed_item *item;
1673 
1674     delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
1675     if (!delayed_node)
1676         return false;
1677 
1678     /*
1679      * We can only do one readdir with delayed items at a time because of
1680      * item->readdir_list.
1681      */
1682     btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
1683     btrfs_inode_lock(inode, 0);
1684 
1685     mutex_lock(&delayed_node->mutex);
1686     item = __btrfs_first_delayed_insertion_item(delayed_node);
1687     while (item) {
1688         refcount_inc(&item->refs);
1689         list_add_tail(&item->readdir_list, ins_list);
1690         item = __btrfs_next_delayed_item(item);
1691     }
1692 
1693     item = __btrfs_first_delayed_deletion_item(delayed_node);
1694     while (item) {
1695         refcount_inc(&item->refs);
1696         list_add_tail(&item->readdir_list, del_list);
1697         item = __btrfs_next_delayed_item(item);
1698     }
1699     mutex_unlock(&delayed_node->mutex);
1700     /*
1701      * This delayed node is still cached in the btrfs inode, so refs
1702      * must be > 1 now, and we needn't check it is going to be freed
1703      * or not.
1704      *
1705      * Besides that, this function is used to read dir, we do not
1706      * insert/delete delayed items in this period. So we also needn't
1707      * requeue or dequeue this delayed node.
1708      */
1709     refcount_dec(&delayed_node->refs);
1710 
1711     return true;
1712 }
1713 
1714 void btrfs_readdir_put_delayed_items(struct inode *inode,
1715                      struct list_head *ins_list,
1716                      struct list_head *del_list)
1717 {
1718     struct btrfs_delayed_item *curr, *next;
1719 
1720     list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1721         list_del(&curr->readdir_list);
1722         if (refcount_dec_and_test(&curr->refs))
1723             kfree(curr);
1724     }
1725 
1726     list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1727         list_del(&curr->readdir_list);
1728         if (refcount_dec_and_test(&curr->refs))
1729             kfree(curr);
1730     }
1731 
1732     /*
1733      * The VFS is going to do up_read(), so we need to downgrade back to a
1734      * read lock.
1735      */
1736     downgrade_write(&inode->i_rwsem);
1737 }
1738 
1739 int btrfs_should_delete_dir_index(struct list_head *del_list,
1740                   u64 index)
1741 {
1742     struct btrfs_delayed_item *curr;
1743     int ret = 0;
1744 
1745     list_for_each_entry(curr, del_list, readdir_list) {
1746         if (curr->key.offset > index)
1747             break;
1748         if (curr->key.offset == index) {
1749             ret = 1;
1750             break;
1751         }
1752     }
1753     return ret;
1754 }
1755 
1756 /*
1757  * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree
1758  *
1759  */
1760 int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
1761                     struct list_head *ins_list)
1762 {
1763     struct btrfs_dir_item *di;
1764     struct btrfs_delayed_item *curr, *next;
1765     struct btrfs_key location;
1766     char *name;
1767     int name_len;
1768     int over = 0;
1769     unsigned char d_type;
1770 
1771     if (list_empty(ins_list))
1772         return 0;
1773 
1774     /*
1775      * Changing the data of the delayed item is impossible. So
1776      * we needn't lock them. And we have held i_mutex of the
1777      * directory, nobody can delete any directory indexes now.
1778      */
1779     list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1780         list_del(&curr->readdir_list);
1781 
1782         if (curr->key.offset < ctx->pos) {
1783             if (refcount_dec_and_test(&curr->refs))
1784                 kfree(curr);
1785             continue;
1786         }
1787 
1788         ctx->pos = curr->key.offset;
1789 
1790         di = (struct btrfs_dir_item *)curr->data;
1791         name = (char *)(di + 1);
1792         name_len = btrfs_stack_dir_name_len(di);
1793 
1794         d_type = fs_ftype_to_dtype(di->type);
1795         btrfs_disk_key_to_cpu(&location, &di->location);
1796 
1797         over = !dir_emit(ctx, name, name_len,
1798                    location.objectid, d_type);
1799 
1800         if (refcount_dec_and_test(&curr->refs))
1801             kfree(curr);
1802 
1803         if (over)
1804             return 1;
1805         ctx->pos++;
1806     }
1807     return 0;
1808 }
1809 
1810 static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1811                   struct btrfs_inode_item *inode_item,
1812                   struct inode *inode)
1813 {
1814     u64 flags;
1815 
1816     btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode));
1817     btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode));
1818     btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
1819     btrfs_set_stack_inode_mode(inode_item, inode->i_mode);
1820     btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink);
1821     btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
1822     btrfs_set_stack_inode_generation(inode_item,
1823                      BTRFS_I(inode)->generation);
1824     btrfs_set_stack_inode_sequence(inode_item,
1825                        inode_peek_iversion(inode));
1826     btrfs_set_stack_inode_transid(inode_item, trans->transid);
1827     btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
1828     flags = btrfs_inode_combine_flags(BTRFS_I(inode)->flags,
1829                       BTRFS_I(inode)->ro_flags);
1830     btrfs_set_stack_inode_flags(inode_item, flags);
1831     btrfs_set_stack_inode_block_group(inode_item, 0);
1832 
1833     btrfs_set_stack_timespec_sec(&inode_item->atime,
1834                      inode->i_atime.tv_sec);
1835     btrfs_set_stack_timespec_nsec(&inode_item->atime,
1836                       inode->i_atime.tv_nsec);
1837 
1838     btrfs_set_stack_timespec_sec(&inode_item->mtime,
1839                      inode->i_mtime.tv_sec);
1840     btrfs_set_stack_timespec_nsec(&inode_item->mtime,
1841                       inode->i_mtime.tv_nsec);
1842 
1843     btrfs_set_stack_timespec_sec(&inode_item->ctime,
1844                      inode->i_ctime.tv_sec);
1845     btrfs_set_stack_timespec_nsec(&inode_item->ctime,
1846                       inode->i_ctime.tv_nsec);
1847 
1848     btrfs_set_stack_timespec_sec(&inode_item->otime,
1849                      BTRFS_I(inode)->i_otime.tv_sec);
1850     btrfs_set_stack_timespec_nsec(&inode_item->otime,
1851                      BTRFS_I(inode)->i_otime.tv_nsec);
1852 }
1853 
1854 int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1855 {
1856     struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
1857     struct btrfs_delayed_node *delayed_node;
1858     struct btrfs_inode_item *inode_item;
1859 
1860     delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
1861     if (!delayed_node)
1862         return -ENOENT;
1863 
1864     mutex_lock(&delayed_node->mutex);
1865     if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1866         mutex_unlock(&delayed_node->mutex);
1867         btrfs_release_delayed_node(delayed_node);
1868         return -ENOENT;
1869     }
1870 
1871     inode_item = &delayed_node->inode_item;
1872 
1873     i_uid_write(inode, btrfs_stack_inode_uid(inode_item));
1874     i_gid_write(inode, btrfs_stack_inode_gid(inode_item));
1875     btrfs_i_size_write(BTRFS_I(inode), btrfs_stack_inode_size(inode_item));
1876     btrfs_inode_set_file_extent_range(BTRFS_I(inode), 0,
1877             round_up(i_size_read(inode), fs_info->sectorsize));
1878     inode->i_mode = btrfs_stack_inode_mode(inode_item);
1879     set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
1880     inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
1881     BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
1882         BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item);
1883 
1884     inode_set_iversion_queried(inode,
1885                    btrfs_stack_inode_sequence(inode_item));
1886     inode->i_rdev = 0;
1887     *rdev = btrfs_stack_inode_rdev(inode_item);
1888     btrfs_inode_split_flags(btrfs_stack_inode_flags(inode_item),
1889                 &BTRFS_I(inode)->flags, &BTRFS_I(inode)->ro_flags);
1890 
1891     inode->i_atime.tv_sec = btrfs_stack_timespec_sec(&inode_item->atime);
1892     inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->atime);
1893 
1894     inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(&inode_item->mtime);
1895     inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->mtime);
1896 
1897     inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(&inode_item->ctime);
1898     inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->ctime);
1899 
1900     BTRFS_I(inode)->i_otime.tv_sec =
1901         btrfs_stack_timespec_sec(&inode_item->otime);
1902     BTRFS_I(inode)->i_otime.tv_nsec =
1903         btrfs_stack_timespec_nsec(&inode_item->otime);
1904 
1905     inode->i_generation = BTRFS_I(inode)->generation;
1906     BTRFS_I(inode)->index_cnt = (u64)-1;
1907 
1908     mutex_unlock(&delayed_node->mutex);
1909     btrfs_release_delayed_node(delayed_node);
1910     return 0;
1911 }
1912 
1913 int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1914                    struct btrfs_root *root,
1915                    struct btrfs_inode *inode)
1916 {
1917     struct btrfs_delayed_node *delayed_node;
1918     int ret = 0;
1919 
1920     delayed_node = btrfs_get_or_create_delayed_node(inode);
1921     if (IS_ERR(delayed_node))
1922         return PTR_ERR(delayed_node);
1923 
1924     mutex_lock(&delayed_node->mutex);
1925     if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1926         fill_stack_inode_item(trans, &delayed_node->inode_item,
1927                       &inode->vfs_inode);
1928         goto release_node;
1929     }
1930 
1931     ret = btrfs_delayed_inode_reserve_metadata(trans, root, delayed_node);
1932     if (ret)
1933         goto release_node;
1934 
1935     fill_stack_inode_item(trans, &delayed_node->inode_item, &inode->vfs_inode);
1936     set_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
1937     delayed_node->count++;
1938     atomic_inc(&root->fs_info->delayed_root->items);
1939 release_node:
1940     mutex_unlock(&delayed_node->mutex);
1941     btrfs_release_delayed_node(delayed_node);
1942     return ret;
1943 }
1944 
1945 int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode)
1946 {
1947     struct btrfs_fs_info *fs_info = inode->root->fs_info;
1948     struct btrfs_delayed_node *delayed_node;
1949 
1950     /*
1951      * we don't do delayed inode updates during log recovery because it
1952      * leads to enospc problems.  This means we also can't do
1953      * delayed inode refs
1954      */
1955     if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
1956         return -EAGAIN;
1957 
1958     delayed_node = btrfs_get_or_create_delayed_node(inode);
1959     if (IS_ERR(delayed_node))
1960         return PTR_ERR(delayed_node);
1961 
1962     /*
1963      * We don't reserve space for inode ref deletion is because:
1964      * - We ONLY do async inode ref deletion for the inode who has only
1965      *   one link(i_nlink == 1), it means there is only one inode ref.
1966      *   And in most case, the inode ref and the inode item are in the
1967      *   same leaf, and we will deal with them at the same time.
1968      *   Since we are sure we will reserve the space for the inode item,
1969      *   it is unnecessary to reserve space for inode ref deletion.
1970      * - If the inode ref and the inode item are not in the same leaf,
1971      *   We also needn't worry about enospc problem, because we reserve
1972      *   much more space for the inode update than it needs.
1973      * - At the worst, we can steal some space from the global reservation.
1974      *   It is very rare.
1975      */
1976     mutex_lock(&delayed_node->mutex);
1977     if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
1978         goto release_node;
1979 
1980     set_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
1981     delayed_node->count++;
1982     atomic_inc(&fs_info->delayed_root->items);
1983 release_node:
1984     mutex_unlock(&delayed_node->mutex);
1985     btrfs_release_delayed_node(delayed_node);
1986     return 0;
1987 }
1988 
1989 static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
1990 {
1991     struct btrfs_root *root = delayed_node->root;
1992     struct btrfs_fs_info *fs_info = root->fs_info;
1993     struct btrfs_delayed_item *curr_item, *prev_item;
1994 
1995     mutex_lock(&delayed_node->mutex);
1996     curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
1997     while (curr_item) {
1998         prev_item = curr_item;
1999         curr_item = __btrfs_next_delayed_item(prev_item);
2000         btrfs_release_delayed_item(prev_item);
2001     }
2002 
2003     if (delayed_node->index_item_leaves > 0) {
2004         btrfs_delayed_item_release_leaves(delayed_node,
2005                       delayed_node->index_item_leaves);
2006         delayed_node->index_item_leaves = 0;
2007     }
2008 
2009     curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
2010     while (curr_item) {
2011         btrfs_delayed_item_release_metadata(root, curr_item);
2012         prev_item = curr_item;
2013         curr_item = __btrfs_next_delayed_item(prev_item);
2014         btrfs_release_delayed_item(prev_item);
2015     }
2016 
2017     btrfs_release_delayed_iref(delayed_node);
2018 
2019     if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
2020         btrfs_delayed_inode_release_metadata(fs_info, delayed_node, false);
2021         btrfs_release_delayed_inode(delayed_node);
2022     }
2023     mutex_unlock(&delayed_node->mutex);
2024 }
2025 
2026 void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode)
2027 {
2028     struct btrfs_delayed_node *delayed_node;
2029 
2030     delayed_node = btrfs_get_delayed_node(inode);
2031     if (!delayed_node)
2032         return;
2033 
2034     __btrfs_kill_delayed_node(delayed_node);
2035     btrfs_release_delayed_node(delayed_node);
2036 }
2037 
2038 void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
2039 {
2040     u64 inode_id = 0;
2041     struct btrfs_delayed_node *delayed_nodes[8];
2042     int i, n;
2043 
2044     while (1) {
2045         spin_lock(&root->inode_lock);
2046         n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
2047                        (void **)delayed_nodes, inode_id,
2048                        ARRAY_SIZE(delayed_nodes));
2049         if (!n) {
2050             spin_unlock(&root->inode_lock);
2051             break;
2052         }
2053 
2054         inode_id = delayed_nodes[n - 1]->inode_id + 1;
2055         for (i = 0; i < n; i++) {
2056             /*
2057              * Don't increase refs in case the node is dead and
2058              * about to be removed from the tree in the loop below
2059              */
2060             if (!refcount_inc_not_zero(&delayed_nodes[i]->refs))
2061                 delayed_nodes[i] = NULL;
2062         }
2063         spin_unlock(&root->inode_lock);
2064 
2065         for (i = 0; i < n; i++) {
2066             if (!delayed_nodes[i])
2067                 continue;
2068             __btrfs_kill_delayed_node(delayed_nodes[i]);
2069             btrfs_release_delayed_node(delayed_nodes[i]);
2070         }
2071     }
2072 }
2073 
2074 void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info)
2075 {
2076     struct btrfs_delayed_node *curr_node, *prev_node;
2077 
2078     curr_node = btrfs_first_delayed_node(fs_info->delayed_root);
2079     while (curr_node) {
2080         __btrfs_kill_delayed_node(curr_node);
2081 
2082         prev_node = curr_node;
2083         curr_node = btrfs_next_delayed_node(curr_node);
2084         btrfs_release_delayed_node(prev_node);
2085     }
2086 }
2087