Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0+
0002 /*
0003  * Copyright (C) 2019 Oracle.  All Rights Reserved.
0004  * Author: Darrick J. Wong <darrick.wong@oracle.com>
0005  */
0006 #include "xfs.h"
0007 #include "xfs_fs.h"
0008 #include "xfs_shared.h"
0009 #include "xfs_format.h"
0010 #include "xfs_btree.h"
0011 #include "xfs_trans_resv.h"
0012 #include "xfs_mount.h"
0013 #include "xfs_ag.h"
0014 #include "xfs_health.h"
0015 #include "scrub/scrub.h"
0016 #include "scrub/health.h"
0017 
0018 /*
0019  * Scrub and In-Core Filesystem Health Assessments
0020  * ===============================================
0021  *
0022  * Online scrub and repair have the time and the ability to perform stronger
0023  * checks than we can do from the metadata verifiers, because they can
0024  * cross-reference records between data structures.  Therefore, scrub is in a
0025  * good position to update the online filesystem health assessments to reflect
0026  * the good/bad state of the data structure.
0027  *
0028  * We therefore extend scrub in the following ways to achieve this:
0029  *
0030  * 1. Create a "sick_mask" field in the scrub context.  When we're setting up a
0031  * scrub call, set this to the default XFS_SICK_* flag(s) for the selected
0032  * scrub type (call it A).  Scrub and repair functions can override the default
0033  * sick_mask value if they choose.
0034  *
0035  * 2. If the scrubber returns a runtime error code, we exit making no changes
0036  * to the incore sick state.
0037  *
0038  * 3. If the scrubber finds that A is clean, use sick_mask to clear the incore
0039  * sick flags before exiting.
0040  *
0041  * 4. If the scrubber finds that A is corrupt, use sick_mask to set the incore
0042  * sick flags.  If the user didn't want to repair then we exit, leaving the
0043  * metadata structure unfixed and the sick flag set.
0044  *
0045  * 5. Now we know that A is corrupt and the user wants to repair, so run the
0046  * repairer.  If the repairer returns an error code, we exit with that error
0047  * code, having made no further changes to the incore sick state.
0048  *
0049  * 6. If repair rebuilds A correctly and the subsequent re-scrub of A is clean,
0050  * use sick_mask to clear the incore sick flags.  This should have the effect
0051  * that A is no longer marked sick.
0052  *
0053  * 7. If repair rebuilds A incorrectly, the re-scrub will find it corrupt and
0054  * use sick_mask to set the incore sick flags.  This should have no externally
0055  * visible effect since we already set them in step (4).
0056  *
0057  * There are some complications to this story, however.  For certain types of
0058  * complementary metadata indices (e.g. inobt/finobt), it is easier to rebuild
0059  * both structures at the same time.  The following principles apply to this
0060  * type of repair strategy:
0061  *
0062  * 8. Any repair function that rebuilds multiple structures should update
0063  * sick_mask_visible to reflect whatever other structures are rebuilt, and
0064  * verify that all the rebuilt structures can pass a scrub check.  The outcomes
0065  * of 5-7 still apply, but with a sick_mask that covers everything being
0066  * rebuilt.
0067  */
0068 
0069 /* Map our scrub type to a sick mask and a set of health update functions. */
0070 
0071 enum xchk_health_group {
0072     XHG_FS = 1,
0073     XHG_RT,
0074     XHG_AG,
0075     XHG_INO,
0076 };
0077 
0078 struct xchk_health_map {
0079     enum xchk_health_group  group;
0080     unsigned int        sick_mask;
0081 };
0082 
0083 static const struct xchk_health_map type_to_health_flag[XFS_SCRUB_TYPE_NR] = {
0084     [XFS_SCRUB_TYPE_SB]     = { XHG_AG,  XFS_SICK_AG_SB },
0085     [XFS_SCRUB_TYPE_AGF]        = { XHG_AG,  XFS_SICK_AG_AGF },
0086     [XFS_SCRUB_TYPE_AGFL]       = { XHG_AG,  XFS_SICK_AG_AGFL },
0087     [XFS_SCRUB_TYPE_AGI]        = { XHG_AG,  XFS_SICK_AG_AGI },
0088     [XFS_SCRUB_TYPE_BNOBT]      = { XHG_AG,  XFS_SICK_AG_BNOBT },
0089     [XFS_SCRUB_TYPE_CNTBT]      = { XHG_AG,  XFS_SICK_AG_CNTBT },
0090     [XFS_SCRUB_TYPE_INOBT]      = { XHG_AG,  XFS_SICK_AG_INOBT },
0091     [XFS_SCRUB_TYPE_FINOBT]     = { XHG_AG,  XFS_SICK_AG_FINOBT },
0092     [XFS_SCRUB_TYPE_RMAPBT]     = { XHG_AG,  XFS_SICK_AG_RMAPBT },
0093     [XFS_SCRUB_TYPE_REFCNTBT]   = { XHG_AG,  XFS_SICK_AG_REFCNTBT },
0094     [XFS_SCRUB_TYPE_INODE]      = { XHG_INO, XFS_SICK_INO_CORE },
0095     [XFS_SCRUB_TYPE_BMBTD]      = { XHG_INO, XFS_SICK_INO_BMBTD },
0096     [XFS_SCRUB_TYPE_BMBTA]      = { XHG_INO, XFS_SICK_INO_BMBTA },
0097     [XFS_SCRUB_TYPE_BMBTC]      = { XHG_INO, XFS_SICK_INO_BMBTC },
0098     [XFS_SCRUB_TYPE_DIR]        = { XHG_INO, XFS_SICK_INO_DIR },
0099     [XFS_SCRUB_TYPE_XATTR]      = { XHG_INO, XFS_SICK_INO_XATTR },
0100     [XFS_SCRUB_TYPE_SYMLINK]    = { XHG_INO, XFS_SICK_INO_SYMLINK },
0101     [XFS_SCRUB_TYPE_PARENT]     = { XHG_INO, XFS_SICK_INO_PARENT },
0102     [XFS_SCRUB_TYPE_RTBITMAP]   = { XHG_RT,  XFS_SICK_RT_BITMAP },
0103     [XFS_SCRUB_TYPE_RTSUM]      = { XHG_RT,  XFS_SICK_RT_SUMMARY },
0104     [XFS_SCRUB_TYPE_UQUOTA]     = { XHG_FS,  XFS_SICK_FS_UQUOTA },
0105     [XFS_SCRUB_TYPE_GQUOTA]     = { XHG_FS,  XFS_SICK_FS_GQUOTA },
0106     [XFS_SCRUB_TYPE_PQUOTA]     = { XHG_FS,  XFS_SICK_FS_PQUOTA },
0107     [XFS_SCRUB_TYPE_FSCOUNTERS] = { XHG_FS,  XFS_SICK_FS_COUNTERS },
0108 };
0109 
0110 /* Return the health status mask for this scrub type. */
0111 unsigned int
0112 xchk_health_mask_for_scrub_type(
0113     __u32           scrub_type)
0114 {
0115     return type_to_health_flag[scrub_type].sick_mask;
0116 }
0117 
0118 /*
0119  * Update filesystem health assessments based on what we found and did.
0120  *
0121  * If the scrubber finds errors, we mark sick whatever's mentioned in
0122  * sick_mask, no matter whether this is a first scan or an
0123  * evaluation of repair effectiveness.
0124  *
0125  * Otherwise, no direct corruption was found, so mark whatever's in
0126  * sick_mask as healthy.
0127  */
0128 void
0129 xchk_update_health(
0130     struct xfs_scrub    *sc)
0131 {
0132     struct xfs_perag    *pag;
0133     bool            bad;
0134 
0135     if (!sc->sick_mask)
0136         return;
0137 
0138     bad = (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
0139                    XFS_SCRUB_OFLAG_XCORRUPT));
0140     switch (type_to_health_flag[sc->sm->sm_type].group) {
0141     case XHG_AG:
0142         pag = xfs_perag_get(sc->mp, sc->sm->sm_agno);
0143         if (bad)
0144             xfs_ag_mark_sick(pag, sc->sick_mask);
0145         else
0146             xfs_ag_mark_healthy(pag, sc->sick_mask);
0147         xfs_perag_put(pag);
0148         break;
0149     case XHG_INO:
0150         if (!sc->ip)
0151             return;
0152         if (bad)
0153             xfs_inode_mark_sick(sc->ip, sc->sick_mask);
0154         else
0155             xfs_inode_mark_healthy(sc->ip, sc->sick_mask);
0156         break;
0157     case XHG_FS:
0158         if (bad)
0159             xfs_fs_mark_sick(sc->mp, sc->sick_mask);
0160         else
0161             xfs_fs_mark_healthy(sc->mp, sc->sick_mask);
0162         break;
0163     case XHG_RT:
0164         if (bad)
0165             xfs_rt_mark_sick(sc->mp, sc->sick_mask);
0166         else
0167             xfs_rt_mark_healthy(sc->mp, sc->sick_mask);
0168         break;
0169     default:
0170         ASSERT(0);
0171         break;
0172     }
0173 }
0174 
0175 /* Is the given per-AG btree healthy enough for scanning? */
0176 bool
0177 xchk_ag_btree_healthy_enough(
0178     struct xfs_scrub    *sc,
0179     struct xfs_perag    *pag,
0180     xfs_btnum_t     btnum)
0181 {
0182     unsigned int        mask = 0;
0183 
0184     /*
0185      * We always want the cursor if it's the same type as whatever we're
0186      * scrubbing, even if we already know the structure is corrupt.
0187      *
0188      * Otherwise, we're only interested in the btree for cross-referencing.
0189      * If we know the btree is bad then don't bother, just set XFAIL.
0190      */
0191     switch (btnum) {
0192     case XFS_BTNUM_BNO:
0193         if (sc->sm->sm_type == XFS_SCRUB_TYPE_BNOBT)
0194             return true;
0195         mask = XFS_SICK_AG_BNOBT;
0196         break;
0197     case XFS_BTNUM_CNT:
0198         if (sc->sm->sm_type == XFS_SCRUB_TYPE_CNTBT)
0199             return true;
0200         mask = XFS_SICK_AG_CNTBT;
0201         break;
0202     case XFS_BTNUM_INO:
0203         if (sc->sm->sm_type == XFS_SCRUB_TYPE_INOBT)
0204             return true;
0205         mask = XFS_SICK_AG_INOBT;
0206         break;
0207     case XFS_BTNUM_FINO:
0208         if (sc->sm->sm_type == XFS_SCRUB_TYPE_FINOBT)
0209             return true;
0210         mask = XFS_SICK_AG_FINOBT;
0211         break;
0212     case XFS_BTNUM_RMAP:
0213         if (sc->sm->sm_type == XFS_SCRUB_TYPE_RMAPBT)
0214             return true;
0215         mask = XFS_SICK_AG_RMAPBT;
0216         break;
0217     case XFS_BTNUM_REFC:
0218         if (sc->sm->sm_type == XFS_SCRUB_TYPE_REFCNTBT)
0219             return true;
0220         mask = XFS_SICK_AG_REFCNTBT;
0221         break;
0222     default:
0223         ASSERT(0);
0224         return true;
0225     }
0226 
0227     if (xfs_ag_has_sickness(pag, mask)) {
0228         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL;
0229         return false;
0230     }
0231 
0232     return true;
0233 }