xfs/libxfs/xfs_rmap_btree.c

0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Copyright (c) 2014 Red Hat, Inc.
0004  * All Rights Reserved.
0005  */
0006 #include "xfs.h"
0007 #include "xfs_fs.h"
0008 #include "xfs_shared.h"
0009 #include "xfs_format.h"
0010 #include "xfs_log_format.h"
0011 #include "xfs_trans_resv.h"
0012 #include "xfs_mount.h"
0013 #include "xfs_trans.h"
0014 #include "xfs_alloc.h"
0015 #include "xfs_btree.h"
0016 #include "xfs_btree_staging.h"
0017 #include "xfs_rmap.h"
0018 #include "xfs_rmap_btree.h"
0019 #include "xfs_trace.h"
0020 #include "xfs_error.h"
0021 #include "xfs_extent_busy.h"
0022 #include "xfs_ag.h"
0023 #include "xfs_ag_resv.h"
0024
0025 static struct kmem_cache    *xfs_rmapbt_cur_cache;
0026
0027 /*
0028  * Reverse map btree.
0029  *
0030  * This is a per-ag tree used to track the owner(s) of a given extent. With
0031  * reflink it is possible for there to be multiple owners, which is a departure
0032  * from classic XFS. Owner records for data extents are inserted when the
0033  * extent is mapped and removed when an extent is unmapped.  Owner records for
0034  * all other block types (i.e. metadata) are inserted when an extent is
0035  * allocated and removed when an extent is freed. There can only be one owner
0036  * of a metadata extent, usually an inode or some other metadata structure like
0037  * an AG btree.
0038  *
0039  * The rmap btree is part of the free space management, so blocks for the tree
0040  * are sourced from the agfl. Hence we need transaction reservation support for
0041  * this tree so that the freelist is always large enough. This also impacts on
0042  * the minimum space we need to leave free in the AG.
0043  *
0044  * The tree is ordered by [ag block, owner, offset]. This is a large key size,
0045  * but it is the only way to enforce unique keys when a block can be owned by
0046  * multiple files at any offset. There's no need to order/search by extent
0047  * size for online updating/management of the tree. It is intended that most
0048  * reverse lookups will be to find the owner(s) of a particular block, or to
0049  * try to recover tree and file data from corrupt primary metadata.
0050  */
0051
0052 static struct xfs_btree_cur *
0053 xfs_rmapbt_dup_cursor(
0054     struct xfs_btree_cur    *cur)
0055 {
0056     return xfs_rmapbt_init_cursor(cur->bc_mp, cur->bc_tp,
0057                 cur->bc_ag.agbp, cur->bc_ag.pag);
0058 }
0059
0060 STATIC void
0061 xfs_rmapbt_set_root(
0062     struct xfs_btree_cur        *cur,
0063     const union xfs_btree_ptr   *ptr,
0064     int             inc)
0065 {
0066     struct xfs_buf      *agbp = cur->bc_ag.agbp;
0067     struct xfs_agf      *agf = agbp->b_addr;
0068     int         btnum = cur->bc_btnum;
0069
0070     ASSERT(ptr->s != 0);
0071
0072     agf->agf_roots[btnum] = ptr->s;
0073     be32_add_cpu(&agf->agf_levels[btnum], inc);
0074     cur->bc_ag.pag->pagf_levels[btnum] += inc;
0075
0076     xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
0077 }
0078
0079 STATIC int
0080 xfs_rmapbt_alloc_block(
0081     struct xfs_btree_cur        *cur,
0082     const union xfs_btree_ptr   *start,
0083     union xfs_btree_ptr     *new,
0084     int             *stat)
0085 {
0086     struct xfs_buf      *agbp = cur->bc_ag.agbp;
0087     struct xfs_agf      *agf = agbp->b_addr;
0088     struct xfs_perag    *pag = cur->bc_ag.pag;
0089     int         error;
0090     xfs_agblock_t       bno;
0091
0092     /* Allocate the new block from the freelist. If we can't, give up.  */
0093     error = xfs_alloc_get_freelist(pag, cur->bc_tp, cur->bc_ag.agbp,
0094                        &bno, 1);
0095     if (error)
0096         return error;
0097
0098     trace_xfs_rmapbt_alloc_block(cur->bc_mp, pag->pag_agno, bno, 1);
0099     if (bno == NULLAGBLOCK) {
0100         *stat = 0;
0101         return 0;
0102     }
0103
0104     xfs_extent_busy_reuse(cur->bc_mp, pag, bno, 1, false);
0105
0106     new->s = cpu_to_be32(bno);
0107     be32_add_cpu(&agf->agf_rmap_blocks, 1);
0108     xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
0109
0110     xfs_ag_resv_rmapbt_alloc(cur->bc_mp, pag->pag_agno);
0111
0112     *stat = 1;
0113     return 0;
0114 }
0115
0116 STATIC int
0117 xfs_rmapbt_free_block(
0118     struct xfs_btree_cur    *cur,
0119     struct xfs_buf      *bp)
0120 {
0121     struct xfs_buf      *agbp = cur->bc_ag.agbp;
0122     struct xfs_agf      *agf = agbp->b_addr;
0123     struct xfs_perag    *pag = cur->bc_ag.pag;
0124     xfs_agblock_t       bno;
0125     int         error;
0126
0127     bno = xfs_daddr_to_agbno(cur->bc_mp, xfs_buf_daddr(bp));
0128     trace_xfs_rmapbt_free_block(cur->bc_mp, pag->pag_agno,
0129             bno, 1);
0130     be32_add_cpu(&agf->agf_rmap_blocks, -1);
0131     xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
0132     error = xfs_alloc_put_freelist(pag, cur->bc_tp, agbp, NULL, bno, 1);
0133     if (error)
0134         return error;
0135
0136     xfs_extent_busy_insert(cur->bc_tp, pag, bno, 1,
0137                   XFS_EXTENT_BUSY_SKIP_DISCARD);
0138
0139     xfs_ag_resv_free_extent(pag, XFS_AG_RESV_RMAPBT, NULL, 1);
0140     return 0;
0141 }
0142
0143 STATIC int
0144 xfs_rmapbt_get_minrecs(
0145     struct xfs_btree_cur    *cur,
0146     int         level)
0147 {
0148     return cur->bc_mp->m_rmap_mnr[level != 0];
0149 }
0150
0151 STATIC int
0152 xfs_rmapbt_get_maxrecs(
0153     struct xfs_btree_cur    *cur,
0154     int         level)
0155 {
0156     return cur->bc_mp->m_rmap_mxr[level != 0];
0157 }
0158
0159 STATIC void
0160 xfs_rmapbt_init_key_from_rec(
0161     union xfs_btree_key     *key,
0162     const union xfs_btree_rec   *rec)
0163 {
0164     key->rmap.rm_startblock = rec->rmap.rm_startblock;
0165     key->rmap.rm_owner = rec->rmap.rm_owner;
0166     key->rmap.rm_offset = rec->rmap.rm_offset;
0167 }
0168
0169 /*
0170  * The high key for a reverse mapping record can be computed by shifting
0171  * the startblock and offset to the highest value that would still map
0172  * to that record.  In practice this means that we add blockcount-1 to
0173  * the startblock for all records, and if the record is for a data/attr
0174  * fork mapping, we add blockcount-1 to the offset too.
0175  */
0176 STATIC void
0177 xfs_rmapbt_init_high_key_from_rec(
0178     union xfs_btree_key     *key,
0179     const union xfs_btree_rec   *rec)
0180 {
0181     uint64_t            off;
0182     int             adj;
0183
0184     adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1;
0185
0186     key->rmap.rm_startblock = rec->rmap.rm_startblock;
0187     be32_add_cpu(&key->rmap.rm_startblock, adj);
0188     key->rmap.rm_owner = rec->rmap.rm_owner;
0189     key->rmap.rm_offset = rec->rmap.rm_offset;
0190     if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) ||
0191         XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset)))
0192         return;
0193     off = be64_to_cpu(key->rmap.rm_offset);
0194     off = (XFS_RMAP_OFF(off) + adj) | (off & ~XFS_RMAP_OFF_MASK);
0195     key->rmap.rm_offset = cpu_to_be64(off);
0196 }
0197
0198 STATIC void
0199 xfs_rmapbt_init_rec_from_cur(
0200     struct xfs_btree_cur    *cur,
0201     union xfs_btree_rec *rec)
0202 {
0203     rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock);
0204     rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount);
0205     rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner);
0206     rec->rmap.rm_offset = cpu_to_be64(
0207             xfs_rmap_irec_offset_pack(&cur->bc_rec.r));
0208 }
0209
0210 STATIC void
0211 xfs_rmapbt_init_ptr_from_cur(
0212     struct xfs_btree_cur    *cur,
0213     union xfs_btree_ptr *ptr)
0214 {
0215     struct xfs_agf      *agf = cur->bc_ag.agbp->b_addr;
0216
0217     ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno));
0218
0219     ptr->s = agf->agf_roots[cur->bc_btnum];
0220 }
0221
0222 STATIC int64_t
0223 xfs_rmapbt_key_diff(
0224     struct xfs_btree_cur        *cur,
0225     const union xfs_btree_key   *key)
0226 {
0227     struct xfs_rmap_irec        *rec = &cur->bc_rec.r;
0228     const struct xfs_rmap_key   *kp = &key->rmap;
0229     __u64               x, y;
0230     int64_t             d;
0231
0232     d = (int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
0233     if (d)
0234         return d;
0235
0236     x = be64_to_cpu(kp->rm_owner);
0237     y = rec->rm_owner;
0238     if (x > y)
0239         return 1;
0240     else if (y > x)
0241         return -1;
0242
0243     x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset));
0244     y = rec->rm_offset;
0245     if (x > y)
0246         return 1;
0247     else if (y > x)
0248         return -1;
0249     return 0;
0250 }
0251
0252 STATIC int64_t
0253 xfs_rmapbt_diff_two_keys(
0254     struct xfs_btree_cur        *cur,
0255     const union xfs_btree_key   *k1,
0256     const union xfs_btree_key   *k2)
0257 {
0258     const struct xfs_rmap_key   *kp1 = &k1->rmap;
0259     const struct xfs_rmap_key   *kp2 = &k2->rmap;
0260     int64_t             d;
0261     __u64               x, y;
0262
0263     d = (int64_t)be32_to_cpu(kp1->rm_startblock) -
0264                be32_to_cpu(kp2->rm_startblock);
0265     if (d)
0266         return d;
0267
0268     x = be64_to_cpu(kp1->rm_owner);
0269     y = be64_to_cpu(kp2->rm_owner);
0270     if (x > y)
0271         return 1;
0272     else if (y > x)
0273         return -1;
0274
0275     x = XFS_RMAP_OFF(be64_to_cpu(kp1->rm_offset));
0276     y = XFS_RMAP_OFF(be64_to_cpu(kp2->rm_offset));
0277     if (x > y)
0278         return 1;
0279     else if (y > x)
0280         return -1;
0281     return 0;
0282 }
0283
0284 static xfs_failaddr_t
0285 xfs_rmapbt_verify(
0286     struct xfs_buf      *bp)
0287 {
0288     struct xfs_mount    *mp = bp->b_mount;
0289     struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
0290     struct xfs_perag    *pag = bp->b_pag;
0291     xfs_failaddr_t      fa;
0292     unsigned int        level;
0293
0294     /*
0295      * magic number and level verification
0296      *
0297      * During growfs operations, we can't verify the exact level or owner as
0298      * the perag is not fully initialised and hence not attached to the
0299      * buffer.  In this case, check against the maximum tree depth.
0300      *
0301      * Similarly, during log recovery we will have a perag structure
0302      * attached, but the agf information will not yet have been initialised
0303      * from the on disk AGF. Again, we can only check against maximum limits
0304      * in this case.
0305      */
0306     if (!xfs_verify_magic(bp, block->bb_magic))
0307         return __this_address;
0308
0309     if (!xfs_has_rmapbt(mp))
0310         return __this_address;
0311     fa = xfs_btree_sblock_v5hdr_verify(bp);
0312     if (fa)
0313         return fa;
0314
0315     level = be16_to_cpu(block->bb_level);
0316     if (pag && pag->pagf_init) {
0317         if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi])
0318             return __this_address;
0319     } else if (level >= mp->m_rmap_maxlevels)
0320         return __this_address;
0321
0322     return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]);
0323 }
0324
0325 static void
0326 xfs_rmapbt_read_verify(
0327     struct xfs_buf  *bp)
0328 {
0329     xfs_failaddr_t  fa;
0330
0331     if (!xfs_btree_sblock_verify_crc(bp))
0332         xfs_verifier_error(bp, -EFSBADCRC, __this_address);
0333     else {
0334         fa = xfs_rmapbt_verify(bp);
0335         if (fa)
0336             xfs_verifier_error(bp, -EFSCORRUPTED, fa);
0337     }
0338
0339     if (bp->b_error)
0340         trace_xfs_btree_corrupt(bp, _RET_IP_);
0341 }
0342
0343 static void
0344 xfs_rmapbt_write_verify(
0345     struct xfs_buf  *bp)
0346 {
0347     xfs_failaddr_t  fa;
0348
0349     fa = xfs_rmapbt_verify(bp);
0350     if (fa) {
0351         trace_xfs_btree_corrupt(bp, _RET_IP_);
0352         xfs_verifier_error(bp, -EFSCORRUPTED, fa);
0353         return;
0354     }
0355     xfs_btree_sblock_calc_crc(bp);
0356
0357 }
0358
0359 const struct xfs_buf_ops xfs_rmapbt_buf_ops = {
0360     .name           = "xfs_rmapbt",
0361     .magic          = { 0, cpu_to_be32(XFS_RMAP_CRC_MAGIC) },
0362     .verify_read        = xfs_rmapbt_read_verify,
0363     .verify_write       = xfs_rmapbt_write_verify,
0364     .verify_struct      = xfs_rmapbt_verify,
0365 };
0366
0367 STATIC int
0368 xfs_rmapbt_keys_inorder(
0369     struct xfs_btree_cur        *cur,
0370     const union xfs_btree_key   *k1,
0371     const union xfs_btree_key   *k2)
0372 {
0373     uint32_t        x;
0374     uint32_t        y;
0375     uint64_t        a;
0376     uint64_t        b;
0377
0378     x = be32_to_cpu(k1->rmap.rm_startblock);
0379     y = be32_to_cpu(k2->rmap.rm_startblock);
0380     if (x < y)
0381         return 1;
0382     else if (x > y)
0383         return 0;
0384     a = be64_to_cpu(k1->rmap.rm_owner);
0385     b = be64_to_cpu(k2->rmap.rm_owner);
0386     if (a < b)
0387         return 1;
0388     else if (a > b)
0389         return 0;
0390     a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset));
0391     b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset));
0392     if (a <= b)
0393         return 1;
0394     return 0;
0395 }
0396
0397 STATIC int
0398 xfs_rmapbt_recs_inorder(
0399     struct xfs_btree_cur        *cur,
0400     const union xfs_btree_rec   *r1,
0401     const union xfs_btree_rec   *r2)
0402 {
0403     uint32_t        x;
0404     uint32_t        y;
0405     uint64_t        a;
0406     uint64_t        b;
0407
0408     x = be32_to_cpu(r1->rmap.rm_startblock);
0409     y = be32_to_cpu(r2->rmap.rm_startblock);
0410     if (x < y)
0411         return 1;
0412     else if (x > y)
0413         return 0;
0414     a = be64_to_cpu(r1->rmap.rm_owner);
0415     b = be64_to_cpu(r2->rmap.rm_owner);
0416     if (a < b)
0417         return 1;
0418     else if (a > b)
0419         return 0;
0420     a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset));
0421     b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset));
0422     if (a <= b)
0423         return 1;
0424     return 0;
0425 }
0426
0427 static const struct xfs_btree_ops xfs_rmapbt_ops = {
0428     .rec_len        = sizeof(struct xfs_rmap_rec),
0429     .key_len        = 2 * sizeof(struct xfs_rmap_key),
0430
0431     .dup_cursor     = xfs_rmapbt_dup_cursor,
0432     .set_root       = xfs_rmapbt_set_root,
0433     .alloc_block        = xfs_rmapbt_alloc_block,
0434     .free_block     = xfs_rmapbt_free_block,
0435     .get_minrecs        = xfs_rmapbt_get_minrecs,
0436     .get_maxrecs        = xfs_rmapbt_get_maxrecs,
0437     .init_key_from_rec  = xfs_rmapbt_init_key_from_rec,
0438     .init_high_key_from_rec = xfs_rmapbt_init_high_key_from_rec,
0439     .init_rec_from_cur  = xfs_rmapbt_init_rec_from_cur,
0440     .init_ptr_from_cur  = xfs_rmapbt_init_ptr_from_cur,
0441     .key_diff       = xfs_rmapbt_key_diff,
0442     .buf_ops        = &xfs_rmapbt_buf_ops,
0443     .diff_two_keys      = xfs_rmapbt_diff_two_keys,
0444     .keys_inorder       = xfs_rmapbt_keys_inorder,
0445     .recs_inorder       = xfs_rmapbt_recs_inorder,
0446 };
0447
0448 static struct xfs_btree_cur *
0449 xfs_rmapbt_init_common(
0450     struct xfs_mount    *mp,
0451     struct xfs_trans    *tp,
0452     struct xfs_perag    *pag)
0453 {
0454     struct xfs_btree_cur    *cur;
0455
0456     /* Overlapping btree; 2 keys per pointer. */
0457     cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_RMAP,
0458             mp->m_rmap_maxlevels, xfs_rmapbt_cur_cache);
0459     cur->bc_flags = XFS_BTREE_CRC_BLOCKS | XFS_BTREE_OVERLAPPING;
0460     cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2);
0461     cur->bc_ops = &xfs_rmapbt_ops;
0462
0463     /* take a reference for the cursor */
0464     atomic_inc(&pag->pag_ref);
0465     cur->bc_ag.pag = pag;
0466
0467     return cur;
0468 }
0469
0470 /* Create a new reverse mapping btree cursor. */
0471 struct xfs_btree_cur *
0472 xfs_rmapbt_init_cursor(
0473     struct xfs_mount    *mp,
0474     struct xfs_trans    *tp,
0475     struct xfs_buf      *agbp,
0476     struct xfs_perag    *pag)
0477 {
0478     struct xfs_agf      *agf = agbp->b_addr;
0479     struct xfs_btree_cur    *cur;
0480
0481     cur = xfs_rmapbt_init_common(mp, tp, pag);
0482     cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]);
0483     cur->bc_ag.agbp = agbp;
0484     return cur;
0485 }
0486
0487 /* Create a new reverse mapping btree cursor with a fake root for staging. */
0488 struct xfs_btree_cur *
0489 xfs_rmapbt_stage_cursor(
0490     struct xfs_mount    *mp,
0491     struct xbtree_afakeroot *afake,
0492     struct xfs_perag    *pag)
0493 {
0494     struct xfs_btree_cur    *cur;
0495
0496     cur = xfs_rmapbt_init_common(mp, NULL, pag);
0497     xfs_btree_stage_afakeroot(cur, afake);
0498     return cur;
0499 }
0500
0501 /*
0502  * Install a new reverse mapping btree root.  Caller is responsible for
0503  * invalidating and freeing the old btree blocks.
0504  */
0505 void
0506 xfs_rmapbt_commit_staged_btree(
0507     struct xfs_btree_cur    *cur,
0508     struct xfs_trans    *tp,
0509     struct xfs_buf      *agbp)
0510 {
0511     struct xfs_agf      *agf = agbp->b_addr;
0512     struct xbtree_afakeroot *afake = cur->bc_ag.afake;
0513
0514     ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
0515
0516     agf->agf_roots[cur->bc_btnum] = cpu_to_be32(afake->af_root);
0517     agf->agf_levels[cur->bc_btnum] = cpu_to_be32(afake->af_levels);
0518     agf->agf_rmap_blocks = cpu_to_be32(afake->af_blocks);
0519     xfs_alloc_log_agf(tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS |
0520                     XFS_AGF_RMAP_BLOCKS);
0521     xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_rmapbt_ops);
0522 }
0523
0524 /* Calculate number of records in a reverse mapping btree block. */
0525 static inline unsigned int
0526 xfs_rmapbt_block_maxrecs(
0527     unsigned int        blocklen,
0528     bool            leaf)
0529 {
0530     if (leaf)
0531         return blocklen / sizeof(struct xfs_rmap_rec);
0532     return blocklen /
0533         (2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t));
0534 }
0535
0536 /*
0537  * Calculate number of records in an rmap btree block.
0538  */
0539 int
0540 xfs_rmapbt_maxrecs(
0541     int         blocklen,
0542     int         leaf)
0543 {
0544     blocklen -= XFS_RMAP_BLOCK_LEN;
0545     return xfs_rmapbt_block_maxrecs(blocklen, leaf);
0546 }
0547
0548 /* Compute the max possible height for reverse mapping btrees. */
0549 unsigned int
0550 xfs_rmapbt_maxlevels_ondisk(void)
0551 {
0552     unsigned int        minrecs[2];
0553     unsigned int        blocklen;
0554
0555     blocklen = XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN;
0556
0557     minrecs[0] = xfs_rmapbt_block_maxrecs(blocklen, true) / 2;
0558     minrecs[1] = xfs_rmapbt_block_maxrecs(blocklen, false) / 2;
0559
0560     /*
0561      * Compute the asymptotic maxlevels for an rmapbt on any reflink fs.
0562      *
0563      * On a reflink filesystem, each AG block can have up to 2^32 (per the
0564      * refcount record format) owners, which means that theoretically we
0565      * could face up to 2^64 rmap records.  However, we're likely to run
0566      * out of blocks in the AG long before that happens, which means that
0567      * we must compute the max height based on what the btree will look
0568      * like if it consumes almost all the blocks in the AG due to maximal
0569      * sharing factor.
0570      */
0571     return xfs_btree_space_to_height(minrecs, XFS_MAX_CRC_AG_BLOCKS);
0572 }
0573
0574 /* Compute the maximum height of an rmap btree. */
0575 void
0576 xfs_rmapbt_compute_maxlevels(
0577     struct xfs_mount        *mp)
0578 {
0579     if (!xfs_has_rmapbt(mp)) {
0580         mp->m_rmap_maxlevels = 0;
0581         return;
0582     }
0583
0584     if (xfs_has_reflink(mp)) {
0585         /*
0586          * Compute the asymptotic maxlevels for an rmap btree on a
0587          * filesystem that supports reflink.
0588          *
0589          * On a reflink filesystem, each AG block can have up to 2^32
0590          * (per the refcount record format) owners, which means that
0591          * theoretically we could face up to 2^64 rmap records.
0592          * However, we're likely to run out of blocks in the AG long
0593          * before that happens, which means that we must compute the
0594          * max height based on what the btree will look like if it
0595          * consumes almost all the blocks in the AG due to maximal
0596          * sharing factor.
0597          */
0598         mp->m_rmap_maxlevels = xfs_btree_space_to_height(mp->m_rmap_mnr,
0599                 mp->m_sb.sb_agblocks);
0600     } else {
0601         /*
0602          * If there's no block sharing, compute the maximum rmapbt
0603          * height assuming one rmap record per AG block.
0604          */
0605         mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels(
0606                 mp->m_rmap_mnr, mp->m_sb.sb_agblocks);
0607     }
0608     ASSERT(mp->m_rmap_maxlevels <= xfs_rmapbt_maxlevels_ondisk());
0609 }
0610
0611 /* Calculate the refcount btree size for some records. */
0612 xfs_extlen_t
0613 xfs_rmapbt_calc_size(
0614     struct xfs_mount    *mp,
0615     unsigned long long  len)
0616 {
0617     return xfs_btree_calc_size(mp->m_rmap_mnr, len);
0618 }
0619
0620 /*
0621  * Calculate the maximum refcount btree size.
0622  */
0623 xfs_extlen_t
0624 xfs_rmapbt_max_size(
0625     struct xfs_mount    *mp,
0626     xfs_agblock_t       agblocks)
0627 {
0628     /* Bail out if we're uninitialized, which can happen in mkfs. */
0629     if (mp->m_rmap_mxr[0] == 0)
0630         return 0;
0631
0632     return xfs_rmapbt_calc_size(mp, agblocks);
0633 }
0634
0635 /*
0636  * Figure out how many blocks to reserve and how many are used by this btree.
0637  */
0638 int
0639 xfs_rmapbt_calc_reserves(
0640     struct xfs_mount    *mp,
0641     struct xfs_trans    *tp,
0642     struct xfs_perag    *pag,
0643     xfs_extlen_t        *ask,
0644     xfs_extlen_t        *used)
0645 {
0646     struct xfs_buf      *agbp;
0647     struct xfs_agf      *agf;
0648     xfs_agblock_t       agblocks;
0649     xfs_extlen_t        tree_len;
0650     int         error;
0651
0652     if (!xfs_has_rmapbt(mp))
0653         return 0;
0654
0655     error = xfs_alloc_read_agf(pag, tp, 0, &agbp);
0656     if (error)
0657         return error;
0658
0659     agf = agbp->b_addr;
0660     agblocks = be32_to_cpu(agf->agf_length);
0661     tree_len = be32_to_cpu(agf->agf_rmap_blocks);
0662     xfs_trans_brelse(tp, agbp);
0663
0664     /*
0665      * The log is permanently allocated, so the space it occupies will
0666      * never be available for the kinds of things that would require btree
0667      * expansion.  We therefore can pretend the space isn't there.
0668      */
0669     if (xfs_ag_contains_log(mp, pag->pag_agno))
0670         agblocks -= mp->m_sb.sb_logblocks;
0671
0672     /* Reserve 1% of the AG or enough for 1 block per record. */
0673     *ask += max(agblocks / 100, xfs_rmapbt_max_size(mp, agblocks));
0674     *used += tree_len;
0675
0676     return error;
0677 }
0678
0679 int __init
0680 xfs_rmapbt_init_cur_cache(void)
0681 {
0682     xfs_rmapbt_cur_cache = kmem_cache_create("xfs_rmapbt_cur",
0683             xfs_btree_cur_sizeof(xfs_rmapbt_maxlevels_ondisk()),
0684             0, 0, NULL);
0685
0686     if (!xfs_rmapbt_cur_cache)
0687         return -ENOMEM;
0688     return 0;
0689 }
0690
0691 void
0692 xfs_rmapbt_destroy_cur_cache(void)
0693 {
0694     kmem_cache_destroy(xfs_rmapbt_cur_cache);
0695     xfs_rmapbt_cur_cache = NULL;
0696 }