Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 
0003 #include "misc.h"
0004 #include "ctree.h"
0005 #include "block-rsv.h"
0006 #include "space-info.h"
0007 #include "transaction.h"
0008 #include "block-group.h"
0009 #include "disk-io.h"
0010 
0011 /*
0012  * HOW DO BLOCK RESERVES WORK
0013  *
0014  *   Think of block_rsv's as buckets for logically grouped metadata
0015  *   reservations.  Each block_rsv has a ->size and a ->reserved.  ->size is
0016  *   how large we want our block rsv to be, ->reserved is how much space is
0017  *   currently reserved for this block reserve.
0018  *
0019  *   ->failfast exists for the truncate case, and is described below.
0020  *
0021  * NORMAL OPERATION
0022  *
0023  *   -> Reserve
0024  *     Entrance: btrfs_block_rsv_add, btrfs_block_rsv_refill
0025  *
0026  *     We call into btrfs_reserve_metadata_bytes() with our bytes, which is
0027  *     accounted for in space_info->bytes_may_use, and then add the bytes to
0028  *     ->reserved, and ->size in the case of btrfs_block_rsv_add.
0029  *
0030  *     ->size is an over-estimation of how much we may use for a particular
0031  *     operation.
0032  *
0033  *   -> Use
0034  *     Entrance: btrfs_use_block_rsv
0035  *
0036  *     When we do a btrfs_alloc_tree_block() we call into btrfs_use_block_rsv()
0037  *     to determine the appropriate block_rsv to use, and then verify that
0038  *     ->reserved has enough space for our tree block allocation.  Once
0039  *     successful we subtract fs_info->nodesize from ->reserved.
0040  *
0041  *   -> Finish
0042  *     Entrance: btrfs_block_rsv_release
0043  *
0044  *     We are finished with our operation, subtract our individual reservation
0045  *     from ->size, and then subtract ->size from ->reserved and free up the
0046  *     excess if there is any.
0047  *
0048  *     There is some logic here to refill the delayed refs rsv or the global rsv
0049  *     as needed, otherwise the excess is subtracted from
0050  *     space_info->bytes_may_use.
0051  *
0052  * TYPES OF BLOCK RESERVES
0053  *
0054  * BLOCK_RSV_TRANS, BLOCK_RSV_DELOPS, BLOCK_RSV_CHUNK
0055  *   These behave normally, as described above, just within the confines of the
0056  *   lifetime of their particular operation (transaction for the whole trans
0057  *   handle lifetime, for example).
0058  *
0059  * BLOCK_RSV_GLOBAL
0060  *   It is impossible to properly account for all the space that may be required
0061  *   to make our extent tree updates.  This block reserve acts as an overflow
0062  *   buffer in case our delayed refs reserve does not reserve enough space to
0063  *   update the extent tree.
0064  *
0065  *   We can steal from this in some cases as well, notably on evict() or
0066  *   truncate() in order to help users recover from ENOSPC conditions.
0067  *
0068  * BLOCK_RSV_DELALLOC
0069  *   The individual item sizes are determined by the per-inode size
0070  *   calculations, which are described with the delalloc code.  This is pretty
0071  *   straightforward, it's just the calculation of ->size encodes a lot of
0072  *   different items, and thus it gets used when updating inodes, inserting file
0073  *   extents, and inserting checksums.
0074  *
0075  * BLOCK_RSV_DELREFS
0076  *   We keep a running tally of how many delayed refs we have on the system.
0077  *   We assume each one of these delayed refs are going to use a full
0078  *   reservation.  We use the transaction items and pre-reserve space for every
0079  *   operation, and use this reservation to refill any gap between ->size and
0080  *   ->reserved that may exist.
0081  *
0082  *   From there it's straightforward, removing a delayed ref means we remove its
0083  *   count from ->size and free up reservations as necessary.  Since this is
0084  *   the most dynamic block reserve in the system, we will try to refill this
0085  *   block reserve first with any excess returned by any other block reserve.
0086  *
0087  * BLOCK_RSV_EMPTY
0088  *   This is the fallback block reserve to make us try to reserve space if we
0089  *   don't have a specific bucket for this allocation.  It is mostly used for
0090  *   updating the device tree and such, since that is a separate pool we're
0091  *   content to just reserve space from the space_info on demand.
0092  *
0093  * BLOCK_RSV_TEMP
0094  *   This is used by things like truncate and iput.  We will temporarily
0095  *   allocate a block reserve, set it to some size, and then truncate bytes
0096  *   until we have no space left.  With ->failfast set we'll simply return
0097  *   ENOSPC from btrfs_use_block_rsv() to signal that we need to unwind and try
0098  *   to make a new reservation.  This is because these operations are
0099  *   unbounded, so we want to do as much work as we can, and then back off and
0100  *   re-reserve.
0101  */
0102 
0103 static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
0104                     struct btrfs_block_rsv *block_rsv,
0105                     struct btrfs_block_rsv *dest, u64 num_bytes,
0106                     u64 *qgroup_to_release_ret)
0107 {
0108     struct btrfs_space_info *space_info = block_rsv->space_info;
0109     u64 qgroup_to_release = 0;
0110     u64 ret;
0111 
0112     spin_lock(&block_rsv->lock);
0113     if (num_bytes == (u64)-1) {
0114         num_bytes = block_rsv->size;
0115         qgroup_to_release = block_rsv->qgroup_rsv_size;
0116     }
0117     block_rsv->size -= num_bytes;
0118     if (block_rsv->reserved >= block_rsv->size) {
0119         num_bytes = block_rsv->reserved - block_rsv->size;
0120         block_rsv->reserved = block_rsv->size;
0121         block_rsv->full = true;
0122     } else {
0123         num_bytes = 0;
0124     }
0125     if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
0126         qgroup_to_release = block_rsv->qgroup_rsv_reserved -
0127                     block_rsv->qgroup_rsv_size;
0128         block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size;
0129     } else {
0130         qgroup_to_release = 0;
0131     }
0132     spin_unlock(&block_rsv->lock);
0133 
0134     ret = num_bytes;
0135     if (num_bytes > 0) {
0136         if (dest) {
0137             spin_lock(&dest->lock);
0138             if (!dest->full) {
0139                 u64 bytes_to_add;
0140 
0141                 bytes_to_add = dest->size - dest->reserved;
0142                 bytes_to_add = min(num_bytes, bytes_to_add);
0143                 dest->reserved += bytes_to_add;
0144                 if (dest->reserved >= dest->size)
0145                     dest->full = true;
0146                 num_bytes -= bytes_to_add;
0147             }
0148             spin_unlock(&dest->lock);
0149         }
0150         if (num_bytes)
0151             btrfs_space_info_free_bytes_may_use(fs_info,
0152                                 space_info,
0153                                 num_bytes);
0154     }
0155     if (qgroup_to_release_ret)
0156         *qgroup_to_release_ret = qgroup_to_release;
0157     return ret;
0158 }
0159 
0160 int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src,
0161                 struct btrfs_block_rsv *dst, u64 num_bytes,
0162                 bool update_size)
0163 {
0164     int ret;
0165 
0166     ret = btrfs_block_rsv_use_bytes(src, num_bytes);
0167     if (ret)
0168         return ret;
0169 
0170     btrfs_block_rsv_add_bytes(dst, num_bytes, update_size);
0171     return 0;
0172 }
0173 
0174 void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, enum btrfs_rsv_type type)
0175 {
0176     memset(rsv, 0, sizeof(*rsv));
0177     spin_lock_init(&rsv->lock);
0178     rsv->type = type;
0179 }
0180 
0181 void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
0182                    struct btrfs_block_rsv *rsv,
0183                    enum btrfs_rsv_type type)
0184 {
0185     btrfs_init_block_rsv(rsv, type);
0186     rsv->space_info = btrfs_find_space_info(fs_info,
0187                         BTRFS_BLOCK_GROUP_METADATA);
0188 }
0189 
0190 struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
0191                           enum btrfs_rsv_type type)
0192 {
0193     struct btrfs_block_rsv *block_rsv;
0194 
0195     block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
0196     if (!block_rsv)
0197         return NULL;
0198 
0199     btrfs_init_metadata_block_rsv(fs_info, block_rsv, type);
0200     return block_rsv;
0201 }
0202 
0203 void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
0204               struct btrfs_block_rsv *rsv)
0205 {
0206     if (!rsv)
0207         return;
0208     btrfs_block_rsv_release(fs_info, rsv, (u64)-1, NULL);
0209     kfree(rsv);
0210 }
0211 
0212 int btrfs_block_rsv_add(struct btrfs_fs_info *fs_info,
0213             struct btrfs_block_rsv *block_rsv, u64 num_bytes,
0214             enum btrfs_reserve_flush_enum flush)
0215 {
0216     int ret;
0217 
0218     if (num_bytes == 0)
0219         return 0;
0220 
0221     ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv, num_bytes, flush);
0222     if (!ret)
0223         btrfs_block_rsv_add_bytes(block_rsv, num_bytes, true);
0224 
0225     return ret;
0226 }
0227 
0228 int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor)
0229 {
0230     u64 num_bytes = 0;
0231     int ret = -ENOSPC;
0232 
0233     if (!block_rsv)
0234         return 0;
0235 
0236     spin_lock(&block_rsv->lock);
0237     num_bytes = div_factor(block_rsv->size, min_factor);
0238     if (block_rsv->reserved >= num_bytes)
0239         ret = 0;
0240     spin_unlock(&block_rsv->lock);
0241 
0242     return ret;
0243 }
0244 
0245 int btrfs_block_rsv_refill(struct btrfs_fs_info *fs_info,
0246                struct btrfs_block_rsv *block_rsv, u64 min_reserved,
0247                enum btrfs_reserve_flush_enum flush)
0248 {
0249     u64 num_bytes = 0;
0250     int ret = -ENOSPC;
0251 
0252     if (!block_rsv)
0253         return 0;
0254 
0255     spin_lock(&block_rsv->lock);
0256     num_bytes = min_reserved;
0257     if (block_rsv->reserved >= num_bytes)
0258         ret = 0;
0259     else
0260         num_bytes -= block_rsv->reserved;
0261     spin_unlock(&block_rsv->lock);
0262 
0263     if (!ret)
0264         return 0;
0265 
0266     ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv, num_bytes, flush);
0267     if (!ret) {
0268         btrfs_block_rsv_add_bytes(block_rsv, num_bytes, false);
0269         return 0;
0270     }
0271 
0272     return ret;
0273 }
0274 
0275 u64 btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
0276                 struct btrfs_block_rsv *block_rsv, u64 num_bytes,
0277                 u64 *qgroup_to_release)
0278 {
0279     struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
0280     struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
0281     struct btrfs_block_rsv *target = NULL;
0282 
0283     /*
0284      * If we are the delayed_rsv then push to the global rsv, otherwise dump
0285      * into the delayed rsv if it is not full.
0286      */
0287     if (block_rsv == delayed_rsv)
0288         target = global_rsv;
0289     else if (block_rsv != global_rsv && !delayed_rsv->full)
0290         target = delayed_rsv;
0291 
0292     if (target && block_rsv->space_info != target->space_info)
0293         target = NULL;
0294 
0295     return block_rsv_release_bytes(fs_info, block_rsv, target, num_bytes,
0296                        qgroup_to_release);
0297 }
0298 
0299 int btrfs_block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, u64 num_bytes)
0300 {
0301     int ret = -ENOSPC;
0302 
0303     spin_lock(&block_rsv->lock);
0304     if (block_rsv->reserved >= num_bytes) {
0305         block_rsv->reserved -= num_bytes;
0306         if (block_rsv->reserved < block_rsv->size)
0307             block_rsv->full = false;
0308         ret = 0;
0309     }
0310     spin_unlock(&block_rsv->lock);
0311     return ret;
0312 }
0313 
0314 void btrfs_block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
0315                    u64 num_bytes, bool update_size)
0316 {
0317     spin_lock(&block_rsv->lock);
0318     block_rsv->reserved += num_bytes;
0319     if (update_size)
0320         block_rsv->size += num_bytes;
0321     else if (block_rsv->reserved >= block_rsv->size)
0322         block_rsv->full = true;
0323     spin_unlock(&block_rsv->lock);
0324 }
0325 
0326 int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
0327                  struct btrfs_block_rsv *dest, u64 num_bytes,
0328                  int min_factor)
0329 {
0330     struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
0331     u64 min_bytes;
0332 
0333     if (global_rsv->space_info != dest->space_info)
0334         return -ENOSPC;
0335 
0336     spin_lock(&global_rsv->lock);
0337     min_bytes = div_factor(global_rsv->size, min_factor);
0338     if (global_rsv->reserved < min_bytes + num_bytes) {
0339         spin_unlock(&global_rsv->lock);
0340         return -ENOSPC;
0341     }
0342     global_rsv->reserved -= num_bytes;
0343     if (global_rsv->reserved < global_rsv->size)
0344         global_rsv->full = false;
0345     spin_unlock(&global_rsv->lock);
0346 
0347     btrfs_block_rsv_add_bytes(dest, num_bytes, true);
0348     return 0;
0349 }
0350 
0351 void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info)
0352 {
0353     struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
0354     struct btrfs_space_info *sinfo = block_rsv->space_info;
0355     struct btrfs_root *root, *tmp;
0356     u64 num_bytes = btrfs_root_used(&fs_info->tree_root->root_item);
0357     unsigned int min_items = 1;
0358 
0359     /*
0360      * The global block rsv is based on the size of the extent tree, the
0361      * checksum tree and the root tree.  If the fs is empty we want to set
0362      * it to a minimal amount for safety.
0363      *
0364      * We also are going to need to modify the minimum of the tree root and
0365      * any global roots we could touch.
0366      */
0367     read_lock(&fs_info->global_root_lock);
0368     rbtree_postorder_for_each_entry_safe(root, tmp, &fs_info->global_root_tree,
0369                          rb_node) {
0370         if (root->root_key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
0371             root->root_key.objectid == BTRFS_CSUM_TREE_OBJECTID ||
0372             root->root_key.objectid == BTRFS_FREE_SPACE_TREE_OBJECTID) {
0373             num_bytes += btrfs_root_used(&root->root_item);
0374             min_items++;
0375         }
0376     }
0377     read_unlock(&fs_info->global_root_lock);
0378 
0379     /*
0380      * But we also want to reserve enough space so we can do the fallback
0381      * global reserve for an unlink, which is an additional 5 items (see the
0382      * comment in __unlink_start_trans for what we're modifying.)
0383      *
0384      * But we also need space for the delayed ref updates from the unlink,
0385      * so its 10, 5 for the actual operation, and 5 for the delayed ref
0386      * updates.
0387      */
0388     min_items += 10;
0389 
0390     num_bytes = max_t(u64, num_bytes,
0391               btrfs_calc_insert_metadata_size(fs_info, min_items));
0392 
0393     spin_lock(&sinfo->lock);
0394     spin_lock(&block_rsv->lock);
0395 
0396     block_rsv->size = min_t(u64, num_bytes, SZ_512M);
0397 
0398     if (block_rsv->reserved < block_rsv->size) {
0399         num_bytes = block_rsv->size - block_rsv->reserved;
0400         btrfs_space_info_update_bytes_may_use(fs_info, sinfo,
0401                               num_bytes);
0402         block_rsv->reserved = block_rsv->size;
0403     } else if (block_rsv->reserved > block_rsv->size) {
0404         num_bytes = block_rsv->reserved - block_rsv->size;
0405         btrfs_space_info_update_bytes_may_use(fs_info, sinfo,
0406                               -num_bytes);
0407         block_rsv->reserved = block_rsv->size;
0408         btrfs_try_granting_tickets(fs_info, sinfo);
0409     }
0410 
0411     block_rsv->full = (block_rsv->reserved == block_rsv->size);
0412 
0413     if (block_rsv->size >= sinfo->total_bytes)
0414         sinfo->force_alloc = CHUNK_ALLOC_FORCE;
0415     spin_unlock(&block_rsv->lock);
0416     spin_unlock(&sinfo->lock);
0417 }
0418 
0419 void btrfs_init_root_block_rsv(struct btrfs_root *root)
0420 {
0421     struct btrfs_fs_info *fs_info = root->fs_info;
0422 
0423     switch (root->root_key.objectid) {
0424     case BTRFS_CSUM_TREE_OBJECTID:
0425     case BTRFS_EXTENT_TREE_OBJECTID:
0426     case BTRFS_FREE_SPACE_TREE_OBJECTID:
0427         root->block_rsv = &fs_info->delayed_refs_rsv;
0428         break;
0429     case BTRFS_ROOT_TREE_OBJECTID:
0430     case BTRFS_DEV_TREE_OBJECTID:
0431     case BTRFS_QUOTA_TREE_OBJECTID:
0432         root->block_rsv = &fs_info->global_block_rsv;
0433         break;
0434     case BTRFS_CHUNK_TREE_OBJECTID:
0435         root->block_rsv = &fs_info->chunk_block_rsv;
0436         break;
0437     default:
0438         root->block_rsv = NULL;
0439         break;
0440     }
0441 }
0442 
0443 void btrfs_init_global_block_rsv(struct btrfs_fs_info *fs_info)
0444 {
0445     struct btrfs_space_info *space_info;
0446 
0447     space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
0448     fs_info->chunk_block_rsv.space_info = space_info;
0449 
0450     space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
0451     fs_info->global_block_rsv.space_info = space_info;
0452     fs_info->trans_block_rsv.space_info = space_info;
0453     fs_info->empty_block_rsv.space_info = space_info;
0454     fs_info->delayed_block_rsv.space_info = space_info;
0455     fs_info->delayed_refs_rsv.space_info = space_info;
0456 
0457     btrfs_update_global_block_rsv(fs_info);
0458 }
0459 
0460 void btrfs_release_global_block_rsv(struct btrfs_fs_info *fs_info)
0461 {
0462     btrfs_block_rsv_release(fs_info, &fs_info->global_block_rsv, (u64)-1,
0463                 NULL);
0464     WARN_ON(fs_info->trans_block_rsv.size > 0);
0465     WARN_ON(fs_info->trans_block_rsv.reserved > 0);
0466     WARN_ON(fs_info->chunk_block_rsv.size > 0);
0467     WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
0468     WARN_ON(fs_info->delayed_block_rsv.size > 0);
0469     WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
0470     WARN_ON(fs_info->delayed_refs_rsv.reserved > 0);
0471     WARN_ON(fs_info->delayed_refs_rsv.size > 0);
0472 }
0473 
0474 static struct btrfs_block_rsv *get_block_rsv(
0475                     const struct btrfs_trans_handle *trans,
0476                     const struct btrfs_root *root)
0477 {
0478     struct btrfs_fs_info *fs_info = root->fs_info;
0479     struct btrfs_block_rsv *block_rsv = NULL;
0480 
0481     if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) ||
0482         (root == fs_info->uuid_root) ||
0483         (trans->adding_csums &&
0484          root->root_key.objectid == BTRFS_CSUM_TREE_OBJECTID))
0485         block_rsv = trans->block_rsv;
0486 
0487     if (!block_rsv)
0488         block_rsv = root->block_rsv;
0489 
0490     if (!block_rsv)
0491         block_rsv = &fs_info->empty_block_rsv;
0492 
0493     return block_rsv;
0494 }
0495 
0496 struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans,
0497                         struct btrfs_root *root,
0498                         u32 blocksize)
0499 {
0500     struct btrfs_fs_info *fs_info = root->fs_info;
0501     struct btrfs_block_rsv *block_rsv;
0502     struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
0503     int ret;
0504     bool global_updated = false;
0505 
0506     block_rsv = get_block_rsv(trans, root);
0507 
0508     if (unlikely(block_rsv->size == 0))
0509         goto try_reserve;
0510 again:
0511     ret = btrfs_block_rsv_use_bytes(block_rsv, blocksize);
0512     if (!ret)
0513         return block_rsv;
0514 
0515     if (block_rsv->failfast)
0516         return ERR_PTR(ret);
0517 
0518     if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) {
0519         global_updated = true;
0520         btrfs_update_global_block_rsv(fs_info);
0521         goto again;
0522     }
0523 
0524     /*
0525      * The global reserve still exists to save us from ourselves, so don't
0526      * warn_on if we are short on our delayed refs reserve.
0527      */
0528     if (block_rsv->type != BTRFS_BLOCK_RSV_DELREFS &&
0529         btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
0530         static DEFINE_RATELIMIT_STATE(_rs,
0531                 DEFAULT_RATELIMIT_INTERVAL * 10,
0532                 /*DEFAULT_RATELIMIT_BURST*/ 1);
0533         if (__ratelimit(&_rs))
0534             WARN(1, KERN_DEBUG
0535                 "BTRFS: block rsv %d returned %d\n",
0536                 block_rsv->type, ret);
0537     }
0538 try_reserve:
0539     ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv, blocksize,
0540                        BTRFS_RESERVE_NO_FLUSH);
0541     if (!ret)
0542         return block_rsv;
0543     /*
0544      * If we couldn't reserve metadata bytes try and use some from
0545      * the global reserve if its space type is the same as the global
0546      * reservation.
0547      */
0548     if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL &&
0549         block_rsv->space_info == global_rsv->space_info) {
0550         ret = btrfs_block_rsv_use_bytes(global_rsv, blocksize);
0551         if (!ret)
0552             return global_rsv;
0553     }
0554     return ERR_PTR(ret);
0555 }