Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0+
0002 /*
0003  * Copyright (C) 2017 Oracle.  All Rights Reserved.
0004  * Author: Darrick J. Wong <darrick.wong@oracle.com>
0005  */
0006 #include "xfs.h"
0007 #include "xfs_fs.h"
0008 #include "xfs_shared.h"
0009 #include "xfs_format.h"
0010 #include "xfs_trans_resv.h"
0011 #include "xfs_mount.h"
0012 #include "xfs_inode.h"
0013 #include "xfs_btree.h"
0014 #include "scrub/scrub.h"
0015 #include "scrub/common.h"
0016 #include "scrub/btree.h"
0017 #include "scrub/trace.h"
0018 
0019 /* btree scrubbing */
0020 
0021 /*
0022  * Check for btree operation errors.  See the section about handling
0023  * operational errors in common.c.
0024  */
0025 static bool
0026 __xchk_btree_process_error(
0027     struct xfs_scrub    *sc,
0028     struct xfs_btree_cur    *cur,
0029     int         level,
0030     int         *error,
0031     __u32           errflag,
0032     void            *ret_ip)
0033 {
0034     if (*error == 0)
0035         return true;
0036 
0037     switch (*error) {
0038     case -EDEADLOCK:
0039         /* Used to restart an op with deadlock avoidance. */
0040         trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
0041         break;
0042     case -EFSBADCRC:
0043     case -EFSCORRUPTED:
0044         /* Note the badness but don't abort. */
0045         sc->sm->sm_flags |= errflag;
0046         *error = 0;
0047         fallthrough;
0048     default:
0049         if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
0050             trace_xchk_ifork_btree_op_error(sc, cur, level,
0051                     *error, ret_ip);
0052         else
0053             trace_xchk_btree_op_error(sc, cur, level,
0054                     *error, ret_ip);
0055         break;
0056     }
0057     return false;
0058 }
0059 
0060 bool
0061 xchk_btree_process_error(
0062     struct xfs_scrub    *sc,
0063     struct xfs_btree_cur    *cur,
0064     int         level,
0065     int         *error)
0066 {
0067     return __xchk_btree_process_error(sc, cur, level, error,
0068             XFS_SCRUB_OFLAG_CORRUPT, __return_address);
0069 }
0070 
0071 bool
0072 xchk_btree_xref_process_error(
0073     struct xfs_scrub    *sc,
0074     struct xfs_btree_cur    *cur,
0075     int         level,
0076     int         *error)
0077 {
0078     return __xchk_btree_process_error(sc, cur, level, error,
0079             XFS_SCRUB_OFLAG_XFAIL, __return_address);
0080 }
0081 
0082 /* Record btree block corruption. */
0083 static void
0084 __xchk_btree_set_corrupt(
0085     struct xfs_scrub    *sc,
0086     struct xfs_btree_cur    *cur,
0087     int         level,
0088     __u32           errflag,
0089     void            *ret_ip)
0090 {
0091     sc->sm->sm_flags |= errflag;
0092 
0093     if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
0094         trace_xchk_ifork_btree_error(sc, cur, level,
0095                 ret_ip);
0096     else
0097         trace_xchk_btree_error(sc, cur, level,
0098                 ret_ip);
0099 }
0100 
0101 void
0102 xchk_btree_set_corrupt(
0103     struct xfs_scrub    *sc,
0104     struct xfs_btree_cur    *cur,
0105     int         level)
0106 {
0107     __xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT,
0108             __return_address);
0109 }
0110 
0111 void
0112 xchk_btree_xref_set_corrupt(
0113     struct xfs_scrub    *sc,
0114     struct xfs_btree_cur    *cur,
0115     int         level)
0116 {
0117     __xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT,
0118             __return_address);
0119 }
0120 
0121 /*
0122  * Make sure this record is in order and doesn't stray outside of the parent
0123  * keys.
0124  */
0125 STATIC void
0126 xchk_btree_rec(
0127     struct xchk_btree   *bs)
0128 {
0129     struct xfs_btree_cur    *cur = bs->cur;
0130     union xfs_btree_rec *rec;
0131     union xfs_btree_key key;
0132     union xfs_btree_key hkey;
0133     union xfs_btree_key *keyp;
0134     struct xfs_btree_block  *block;
0135     struct xfs_btree_block  *keyblock;
0136     struct xfs_buf      *bp;
0137 
0138     block = xfs_btree_get_block(cur, 0, &bp);
0139     rec = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr, block);
0140 
0141     trace_xchk_btree_rec(bs->sc, cur, 0);
0142 
0143     /* If this isn't the first record, are they in order? */
0144     if (cur->bc_levels[0].ptr > 1 &&
0145         !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec))
0146         xchk_btree_set_corrupt(bs->sc, cur, 0);
0147     memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len);
0148 
0149     if (cur->bc_nlevels == 1)
0150         return;
0151 
0152     /* Is this at least as large as the parent low key? */
0153     cur->bc_ops->init_key_from_rec(&key, rec);
0154     keyblock = xfs_btree_get_block(cur, 1, &bp);
0155     keyp = xfs_btree_key_addr(cur, cur->bc_levels[1].ptr, keyblock);
0156     if (cur->bc_ops->diff_two_keys(cur, &key, keyp) < 0)
0157         xchk_btree_set_corrupt(bs->sc, cur, 1);
0158 
0159     if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
0160         return;
0161 
0162     /* Is this no larger than the parent high key? */
0163     cur->bc_ops->init_high_key_from_rec(&hkey, rec);
0164     keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[1].ptr, keyblock);
0165     if (cur->bc_ops->diff_two_keys(cur, keyp, &hkey) < 0)
0166         xchk_btree_set_corrupt(bs->sc, cur, 1);
0167 }
0168 
0169 /*
0170  * Make sure this key is in order and doesn't stray outside of the parent
0171  * keys.
0172  */
0173 STATIC void
0174 xchk_btree_key(
0175     struct xchk_btree   *bs,
0176     int         level)
0177 {
0178     struct xfs_btree_cur    *cur = bs->cur;
0179     union xfs_btree_key *key;
0180     union xfs_btree_key *keyp;
0181     struct xfs_btree_block  *block;
0182     struct xfs_btree_block  *keyblock;
0183     struct xfs_buf      *bp;
0184 
0185     block = xfs_btree_get_block(cur, level, &bp);
0186     key = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block);
0187 
0188     trace_xchk_btree_key(bs->sc, cur, level);
0189 
0190     /* If this isn't the first key, are they in order? */
0191     if (cur->bc_levels[level].ptr > 1 &&
0192         !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level - 1], key))
0193         xchk_btree_set_corrupt(bs->sc, cur, level);
0194     memcpy(&bs->lastkey[level - 1], key, cur->bc_ops->key_len);
0195 
0196     if (level + 1 >= cur->bc_nlevels)
0197         return;
0198 
0199     /* Is this at least as large as the parent low key? */
0200     keyblock = xfs_btree_get_block(cur, level + 1, &bp);
0201     keyp = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr, keyblock);
0202     if (cur->bc_ops->diff_two_keys(cur, key, keyp) < 0)
0203         xchk_btree_set_corrupt(bs->sc, cur, level);
0204 
0205     if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
0206         return;
0207 
0208     /* Is this no larger than the parent high key? */
0209     key = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr, block);
0210     keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr,
0211             keyblock);
0212     if (cur->bc_ops->diff_two_keys(cur, keyp, key) < 0)
0213         xchk_btree_set_corrupt(bs->sc, cur, level);
0214 }
0215 
0216 /*
0217  * Check a btree pointer.  Returns true if it's ok to use this pointer.
0218  * Callers do not need to set the corrupt flag.
0219  */
0220 static bool
0221 xchk_btree_ptr_ok(
0222     struct xchk_btree   *bs,
0223     int         level,
0224     union xfs_btree_ptr *ptr)
0225 {
0226     bool            res;
0227 
0228     /* A btree rooted in an inode has no block pointer to the root. */
0229     if ((bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
0230         level == bs->cur->bc_nlevels)
0231         return true;
0232 
0233     /* Otherwise, check the pointers. */
0234     if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
0235         res = xfs_btree_check_lptr(bs->cur, be64_to_cpu(ptr->l), level);
0236     else
0237         res = xfs_btree_check_sptr(bs->cur, be32_to_cpu(ptr->s), level);
0238     if (!res)
0239         xchk_btree_set_corrupt(bs->sc, bs->cur, level);
0240 
0241     return res;
0242 }
0243 
0244 /* Check that a btree block's sibling matches what we expect it. */
0245 STATIC int
0246 xchk_btree_block_check_sibling(
0247     struct xchk_btree   *bs,
0248     int         level,
0249     int         direction,
0250     union xfs_btree_ptr *sibling)
0251 {
0252     struct xfs_btree_cur    *cur = bs->cur;
0253     struct xfs_btree_block  *pblock;
0254     struct xfs_buf      *pbp;
0255     struct xfs_btree_cur    *ncur = NULL;
0256     union xfs_btree_ptr *pp;
0257     int         success;
0258     int         error;
0259 
0260     error = xfs_btree_dup_cursor(cur, &ncur);
0261     if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error) ||
0262         !ncur)
0263         return error;
0264 
0265     /*
0266      * If the pointer is null, we shouldn't be able to move the upper
0267      * level pointer anywhere.
0268      */
0269     if (xfs_btree_ptr_is_null(cur, sibling)) {
0270         if (direction > 0)
0271             error = xfs_btree_increment(ncur, level + 1, &success);
0272         else
0273             error = xfs_btree_decrement(ncur, level + 1, &success);
0274         if (error == 0 && success)
0275             xchk_btree_set_corrupt(bs->sc, cur, level);
0276         error = 0;
0277         goto out;
0278     }
0279 
0280     /* Increment upper level pointer. */
0281     if (direction > 0)
0282         error = xfs_btree_increment(ncur, level + 1, &success);
0283     else
0284         error = xfs_btree_decrement(ncur, level + 1, &success);
0285     if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error))
0286         goto out;
0287     if (!success) {
0288         xchk_btree_set_corrupt(bs->sc, cur, level + 1);
0289         goto out;
0290     }
0291 
0292     /* Compare upper level pointer to sibling pointer. */
0293     pblock = xfs_btree_get_block(ncur, level + 1, &pbp);
0294     pp = xfs_btree_ptr_addr(ncur, ncur->bc_levels[level + 1].ptr, pblock);
0295     if (!xchk_btree_ptr_ok(bs, level + 1, pp))
0296         goto out;
0297     if (pbp)
0298         xchk_buffer_recheck(bs->sc, pbp);
0299 
0300     if (xfs_btree_diff_two_ptrs(cur, pp, sibling))
0301         xchk_btree_set_corrupt(bs->sc, cur, level);
0302 out:
0303     xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
0304     return error;
0305 }
0306 
0307 /* Check the siblings of a btree block. */
0308 STATIC int
0309 xchk_btree_block_check_siblings(
0310     struct xchk_btree   *bs,
0311     struct xfs_btree_block  *block)
0312 {
0313     struct xfs_btree_cur    *cur = bs->cur;
0314     union xfs_btree_ptr leftsib;
0315     union xfs_btree_ptr rightsib;
0316     int         level;
0317     int         error = 0;
0318 
0319     xfs_btree_get_sibling(cur, block, &leftsib, XFS_BB_LEFTSIB);
0320     xfs_btree_get_sibling(cur, block, &rightsib, XFS_BB_RIGHTSIB);
0321     level = xfs_btree_get_level(block);
0322 
0323     /* Root block should never have siblings. */
0324     if (level == cur->bc_nlevels - 1) {
0325         if (!xfs_btree_ptr_is_null(cur, &leftsib) ||
0326             !xfs_btree_ptr_is_null(cur, &rightsib))
0327             xchk_btree_set_corrupt(bs->sc, cur, level);
0328         goto out;
0329     }
0330 
0331     /*
0332      * Does the left & right sibling pointers match the adjacent
0333      * parent level pointers?
0334      * (These function absorbs error codes for us.)
0335      */
0336     error = xchk_btree_block_check_sibling(bs, level, -1, &leftsib);
0337     if (error)
0338         return error;
0339     error = xchk_btree_block_check_sibling(bs, level, 1, &rightsib);
0340     if (error)
0341         return error;
0342 out:
0343     return error;
0344 }
0345 
0346 struct check_owner {
0347     struct list_head    list;
0348     xfs_daddr_t     daddr;
0349     int         level;
0350 };
0351 
0352 /*
0353  * Make sure this btree block isn't in the free list and that there's
0354  * an rmap record for it.
0355  */
0356 STATIC int
0357 xchk_btree_check_block_owner(
0358     struct xchk_btree   *bs,
0359     int         level,
0360     xfs_daddr_t     daddr)
0361 {
0362     xfs_agnumber_t      agno;
0363     xfs_agblock_t       agbno;
0364     xfs_btnum_t     btnum;
0365     bool            init_sa;
0366     int         error = 0;
0367 
0368     if (!bs->cur)
0369         return 0;
0370 
0371     btnum = bs->cur->bc_btnum;
0372     agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr);
0373     agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr);
0374 
0375     init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS;
0376     if (init_sa) {
0377         error = xchk_ag_init_existing(bs->sc, agno, &bs->sc->sa);
0378         if (!xchk_btree_xref_process_error(bs->sc, bs->cur,
0379                 level, &error))
0380             goto out_free;
0381     }
0382 
0383     xchk_xref_is_used_space(bs->sc, agbno, 1);
0384     /*
0385      * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we
0386      * have to nullify it (to shut down further block owner checks) if
0387      * self-xref encounters problems.
0388      */
0389     if (!bs->sc->sa.bno_cur && btnum == XFS_BTNUM_BNO)
0390         bs->cur = NULL;
0391 
0392     xchk_xref_is_owned_by(bs->sc, agbno, 1, bs->oinfo);
0393     if (!bs->sc->sa.rmap_cur && btnum == XFS_BTNUM_RMAP)
0394         bs->cur = NULL;
0395 
0396 out_free:
0397     if (init_sa)
0398         xchk_ag_free(bs->sc, &bs->sc->sa);
0399 
0400     return error;
0401 }
0402 
0403 /* Check the owner of a btree block. */
0404 STATIC int
0405 xchk_btree_check_owner(
0406     struct xchk_btree   *bs,
0407     int         level,
0408     struct xfs_buf      *bp)
0409 {
0410     struct xfs_btree_cur    *cur = bs->cur;
0411     struct check_owner  *co;
0412 
0413     /*
0414      * In theory, xfs_btree_get_block should only give us a null buffer
0415      * pointer for the root of a root-in-inode btree type, but we need
0416      * to check defensively here in case the cursor state is also screwed
0417      * up.
0418      */
0419     if (bp == NULL) {
0420         if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE))
0421             xchk_btree_set_corrupt(bs->sc, bs->cur, level);
0422         return 0;
0423     }
0424 
0425     /*
0426      * We want to cross-reference each btree block with the bnobt
0427      * and the rmapbt.  We cannot cross-reference the bnobt or
0428      * rmapbt while scanning the bnobt or rmapbt, respectively,
0429      * because we cannot alter the cursor and we'd prefer not to
0430      * duplicate cursors.  Therefore, save the buffer daddr for
0431      * later scanning.
0432      */
0433     if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) {
0434         co = kmem_alloc(sizeof(struct check_owner),
0435                 KM_MAYFAIL);
0436         if (!co)
0437             return -ENOMEM;
0438         co->level = level;
0439         co->daddr = xfs_buf_daddr(bp);
0440         list_add_tail(&co->list, &bs->to_check);
0441         return 0;
0442     }
0443 
0444     return xchk_btree_check_block_owner(bs, level, xfs_buf_daddr(bp));
0445 }
0446 
0447 /* Decide if we want to check minrecs of a btree block in the inode root. */
0448 static inline bool
0449 xchk_btree_check_iroot_minrecs(
0450     struct xchk_btree   *bs)
0451 {
0452     /*
0453      * xfs_bmap_add_attrfork_btree had an implementation bug wherein it
0454      * would miscalculate the space required for the data fork bmbt root
0455      * when adding an attr fork, and promote the iroot contents to an
0456      * external block unnecessarily.  This went unnoticed for many years
0457      * until scrub found filesystems in this state.  Inode rooted btrees are
0458      * not supposed to have immediate child blocks that are small enough
0459      * that the contents could fit in the inode root, but we can't fail
0460      * existing filesystems, so instead we disable the check for data fork
0461      * bmap btrees when there's an attr fork.
0462      */
0463     if (bs->cur->bc_btnum == XFS_BTNUM_BMAP &&
0464         bs->cur->bc_ino.whichfork == XFS_DATA_FORK &&
0465         xfs_inode_has_attr_fork(bs->sc->ip))
0466         return false;
0467 
0468     return true;
0469 }
0470 
0471 /*
0472  * Check that this btree block has at least minrecs records or is one of the
0473  * special blocks that don't require that.
0474  */
0475 STATIC void
0476 xchk_btree_check_minrecs(
0477     struct xchk_btree   *bs,
0478     int         level,
0479     struct xfs_btree_block  *block)
0480 {
0481     struct xfs_btree_cur    *cur = bs->cur;
0482     unsigned int        root_level = cur->bc_nlevels - 1;
0483     unsigned int        numrecs = be16_to_cpu(block->bb_numrecs);
0484 
0485     /* More records than minrecs means the block is ok. */
0486     if (numrecs >= cur->bc_ops->get_minrecs(cur, level))
0487         return;
0488 
0489     /*
0490      * For btrees rooted in the inode, it's possible that the root block
0491      * contents spilled into a regular ondisk block because there wasn't
0492      * enough space in the inode root.  The number of records in that
0493      * child block might be less than the standard minrecs, but that's ok
0494      * provided that there's only one direct child of the root.
0495      */
0496     if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
0497         level == cur->bc_nlevels - 2) {
0498         struct xfs_btree_block  *root_block;
0499         struct xfs_buf      *root_bp;
0500         int         root_maxrecs;
0501 
0502         root_block = xfs_btree_get_block(cur, root_level, &root_bp);
0503         root_maxrecs = cur->bc_ops->get_dmaxrecs(cur, root_level);
0504         if (xchk_btree_check_iroot_minrecs(bs) &&
0505             (be16_to_cpu(root_block->bb_numrecs) != 1 ||
0506              numrecs <= root_maxrecs))
0507             xchk_btree_set_corrupt(bs->sc, cur, level);
0508         return;
0509     }
0510 
0511     /*
0512      * Otherwise, only the root level is allowed to have fewer than minrecs
0513      * records or keyptrs.
0514      */
0515     if (level < root_level)
0516         xchk_btree_set_corrupt(bs->sc, cur, level);
0517 }
0518 
0519 /*
0520  * Grab and scrub a btree block given a btree pointer.  Returns block
0521  * and buffer pointers (if applicable) if they're ok to use.
0522  */
0523 STATIC int
0524 xchk_btree_get_block(
0525     struct xchk_btree   *bs,
0526     int         level,
0527     union xfs_btree_ptr *pp,
0528     struct xfs_btree_block  **pblock,
0529     struct xfs_buf      **pbp)
0530 {
0531     xfs_failaddr_t      failed_at;
0532     int         error;
0533 
0534     *pblock = NULL;
0535     *pbp = NULL;
0536 
0537     error = xfs_btree_lookup_get_block(bs->cur, level, pp, pblock);
0538     if (!xchk_btree_process_error(bs->sc, bs->cur, level, &error) ||
0539         !*pblock)
0540         return error;
0541 
0542     xfs_btree_get_block(bs->cur, level, pbp);
0543     if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
0544         failed_at = __xfs_btree_check_lblock(bs->cur, *pblock,
0545                 level, *pbp);
0546     else
0547         failed_at = __xfs_btree_check_sblock(bs->cur, *pblock,
0548                  level, *pbp);
0549     if (failed_at) {
0550         xchk_btree_set_corrupt(bs->sc, bs->cur, level);
0551         return 0;
0552     }
0553     if (*pbp)
0554         xchk_buffer_recheck(bs->sc, *pbp);
0555 
0556     xchk_btree_check_minrecs(bs, level, *pblock);
0557 
0558     /*
0559      * Check the block's owner; this function absorbs error codes
0560      * for us.
0561      */
0562     error = xchk_btree_check_owner(bs, level, *pbp);
0563     if (error)
0564         return error;
0565 
0566     /*
0567      * Check the block's siblings; this function absorbs error codes
0568      * for us.
0569      */
0570     return xchk_btree_block_check_siblings(bs, *pblock);
0571 }
0572 
0573 /*
0574  * Check that the low and high keys of this block match the keys stored
0575  * in the parent block.
0576  */
0577 STATIC void
0578 xchk_btree_block_keys(
0579     struct xchk_btree   *bs,
0580     int         level,
0581     struct xfs_btree_block  *block)
0582 {
0583     union xfs_btree_key block_keys;
0584     struct xfs_btree_cur    *cur = bs->cur;
0585     union xfs_btree_key *high_bk;
0586     union xfs_btree_key *parent_keys;
0587     union xfs_btree_key *high_pk;
0588     struct xfs_btree_block  *parent_block;
0589     struct xfs_buf      *bp;
0590 
0591     if (level >= cur->bc_nlevels - 1)
0592         return;
0593 
0594     /* Calculate the keys for this block. */
0595     xfs_btree_get_keys(cur, block, &block_keys);
0596 
0597     /* Obtain the parent's copy of the keys for this block. */
0598     parent_block = xfs_btree_get_block(cur, level + 1, &bp);
0599     parent_keys = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr,
0600             parent_block);
0601 
0602     if (cur->bc_ops->diff_two_keys(cur, &block_keys, parent_keys) != 0)
0603         xchk_btree_set_corrupt(bs->sc, cur, 1);
0604 
0605     if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
0606         return;
0607 
0608     /* Get high keys */
0609     high_bk = xfs_btree_high_key_from_key(cur, &block_keys);
0610     high_pk = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr,
0611             parent_block);
0612 
0613     if (cur->bc_ops->diff_two_keys(cur, high_bk, high_pk) != 0)
0614         xchk_btree_set_corrupt(bs->sc, cur, 1);
0615 }
0616 
0617 /*
0618  * Visit all nodes and leaves of a btree.  Check that all pointers and
0619  * records are in order, that the keys reflect the records, and use a callback
0620  * so that the caller can verify individual records.
0621  */
0622 int
0623 xchk_btree(
0624     struct xfs_scrub        *sc,
0625     struct xfs_btree_cur        *cur,
0626     xchk_btree_rec_fn       scrub_fn,
0627     const struct xfs_owner_info *oinfo,
0628     void                *private)
0629 {
0630     union xfs_btree_ptr     ptr;
0631     struct xchk_btree       *bs;
0632     union xfs_btree_ptr     *pp;
0633     union xfs_btree_rec     *recp;
0634     struct xfs_btree_block      *block;
0635     struct xfs_buf          *bp;
0636     struct check_owner      *co;
0637     struct check_owner      *n;
0638     size_t              cur_sz;
0639     int             level;
0640     int             error = 0;
0641 
0642     /*
0643      * Allocate the btree scrub context from the heap, because this
0644      * structure can get rather large.  Don't let a caller feed us a
0645      * totally absurd size.
0646      */
0647     cur_sz = xchk_btree_sizeof(cur->bc_nlevels);
0648     if (cur_sz > PAGE_SIZE) {
0649         xchk_btree_set_corrupt(sc, cur, 0);
0650         return 0;
0651     }
0652     bs = kmem_zalloc(cur_sz, KM_NOFS | KM_MAYFAIL);
0653     if (!bs)
0654         return -ENOMEM;
0655     bs->cur = cur;
0656     bs->scrub_rec = scrub_fn;
0657     bs->oinfo = oinfo;
0658     bs->private = private;
0659     bs->sc = sc;
0660 
0661     /* Initialize scrub state */
0662     INIT_LIST_HEAD(&bs->to_check);
0663 
0664     /*
0665      * Load the root of the btree.  The helper function absorbs
0666      * error codes for us.
0667      */
0668     level = cur->bc_nlevels - 1;
0669     cur->bc_ops->init_ptr_from_cur(cur, &ptr);
0670     if (!xchk_btree_ptr_ok(bs, cur->bc_nlevels, &ptr))
0671         goto out;
0672     error = xchk_btree_get_block(bs, level, &ptr, &block, &bp);
0673     if (error || !block)
0674         goto out;
0675 
0676     cur->bc_levels[level].ptr = 1;
0677 
0678     while (level < cur->bc_nlevels) {
0679         block = xfs_btree_get_block(cur, level, &bp);
0680 
0681         if (level == 0) {
0682             /* End of leaf, pop back towards the root. */
0683             if (cur->bc_levels[level].ptr >
0684                 be16_to_cpu(block->bb_numrecs)) {
0685                 xchk_btree_block_keys(bs, level, block);
0686                 if (level < cur->bc_nlevels - 1)
0687                     cur->bc_levels[level + 1].ptr++;
0688                 level++;
0689                 continue;
0690             }
0691 
0692             /* Records in order for scrub? */
0693             xchk_btree_rec(bs);
0694 
0695             /* Call out to the record checker. */
0696             recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr,
0697                     block);
0698             error = bs->scrub_rec(bs, recp);
0699             if (error)
0700                 break;
0701             if (xchk_should_terminate(sc, &error) ||
0702                 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
0703                 break;
0704 
0705             cur->bc_levels[level].ptr++;
0706             continue;
0707         }
0708 
0709         /* End of node, pop back towards the root. */
0710         if (cur->bc_levels[level].ptr >
0711                     be16_to_cpu(block->bb_numrecs)) {
0712             xchk_btree_block_keys(bs, level, block);
0713             if (level < cur->bc_nlevels - 1)
0714                 cur->bc_levels[level + 1].ptr++;
0715             level++;
0716             continue;
0717         }
0718 
0719         /* Keys in order for scrub? */
0720         xchk_btree_key(bs, level);
0721 
0722         /* Drill another level deeper. */
0723         pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block);
0724         if (!xchk_btree_ptr_ok(bs, level, pp)) {
0725             cur->bc_levels[level].ptr++;
0726             continue;
0727         }
0728         level--;
0729         error = xchk_btree_get_block(bs, level, pp, &block, &bp);
0730         if (error || !block)
0731             goto out;
0732 
0733         cur->bc_levels[level].ptr = 1;
0734     }
0735 
0736 out:
0737     /* Process deferred owner checks on btree blocks. */
0738     list_for_each_entry_safe(co, n, &bs->to_check, list) {
0739         if (!error && bs->cur)
0740             error = xchk_btree_check_block_owner(bs, co->level,
0741                     co->daddr);
0742         list_del(&co->list);
0743         kmem_free(co);
0744     }
0745     kmem_free(bs);
0746 
0747     return error;
0748 }