Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Copyright (C) Qu Wenruo 2017.  All rights reserved.
0004  */
0005 
0006 /*
0007  * The module is used to catch unexpected/corrupted tree block data.
0008  * Such behavior can be caused either by a fuzzed image or bugs.
0009  *
0010  * The objective is to do leaf/node validation checks when tree block is read
0011  * from disk, and check *every* possible member, so other code won't
0012  * need to checking them again.
0013  *
0014  * Due to the potential and unwanted damage, every checker needs to be
0015  * carefully reviewed otherwise so it does not prevent mount of valid images.
0016  */
0017 
0018 #include <linux/types.h>
0019 #include <linux/stddef.h>
0020 #include <linux/error-injection.h>
0021 #include "ctree.h"
0022 #include "tree-checker.h"
0023 #include "disk-io.h"
0024 #include "compression.h"
0025 #include "volumes.h"
0026 #include "misc.h"
0027 #include "btrfs_inode.h"
0028 
0029 /*
0030  * Error message should follow the following format:
0031  * corrupt <type>: <identifier>, <reason>[, <bad_value>]
0032  *
0033  * @type:   leaf or node
0034  * @identifier: the necessary info to locate the leaf/node.
0035  *      It's recommended to decode key.objecitd/offset if it's
0036  *      meaningful.
0037  * @reason: describe the error
0038  * @bad_value:  optional, it's recommended to output bad value and its
0039  *      expected value (range).
0040  *
0041  * Since comma is used to separate the components, only space is allowed
0042  * inside each component.
0043  */
0044 
0045 /*
0046  * Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt.
0047  * Allows callers to customize the output.
0048  */
0049 __printf(3, 4)
0050 __cold
0051 static void generic_err(const struct extent_buffer *eb, int slot,
0052             const char *fmt, ...)
0053 {
0054     const struct btrfs_fs_info *fs_info = eb->fs_info;
0055     struct va_format vaf;
0056     va_list args;
0057 
0058     va_start(args, fmt);
0059 
0060     vaf.fmt = fmt;
0061     vaf.va = &args;
0062 
0063     btrfs_crit(fs_info,
0064         "corrupt %s: root=%llu block=%llu slot=%d, %pV",
0065         btrfs_header_level(eb) == 0 ? "leaf" : "node",
0066         btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, &vaf);
0067     va_end(args);
0068 }
0069 
0070 /*
0071  * Customized reporter for extent data item, since its key objectid and
0072  * offset has its own meaning.
0073  */
0074 __printf(3, 4)
0075 __cold
0076 static void file_extent_err(const struct extent_buffer *eb, int slot,
0077                 const char *fmt, ...)
0078 {
0079     const struct btrfs_fs_info *fs_info = eb->fs_info;
0080     struct btrfs_key key;
0081     struct va_format vaf;
0082     va_list args;
0083 
0084     btrfs_item_key_to_cpu(eb, &key, slot);
0085     va_start(args, fmt);
0086 
0087     vaf.fmt = fmt;
0088     vaf.va = &args;
0089 
0090     btrfs_crit(fs_info,
0091     "corrupt %s: root=%llu block=%llu slot=%d ino=%llu file_offset=%llu, %pV",
0092         btrfs_header_level(eb) == 0 ? "leaf" : "node",
0093         btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
0094         key.objectid, key.offset, &vaf);
0095     va_end(args);
0096 }
0097 
0098 /*
0099  * Return 0 if the btrfs_file_extent_##name is aligned to @alignment
0100  * Else return 1
0101  */
0102 #define CHECK_FE_ALIGNED(leaf, slot, fi, name, alignment)             \
0103 ({                                        \
0104     if (unlikely(!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)),      \
0105                  (alignment))))                   \
0106         file_extent_err((leaf), (slot),                   \
0107     "invalid %s for file extent, have %llu, should be aligned to %u",     \
0108             (#name), btrfs_file_extent_##name((leaf), (fi)),      \
0109             (alignment));                         \
0110     (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment)));   \
0111 })
0112 
0113 static u64 file_extent_end(struct extent_buffer *leaf,
0114                struct btrfs_key *key,
0115                struct btrfs_file_extent_item *extent)
0116 {
0117     u64 end;
0118     u64 len;
0119 
0120     if (btrfs_file_extent_type(leaf, extent) == BTRFS_FILE_EXTENT_INLINE) {
0121         len = btrfs_file_extent_ram_bytes(leaf, extent);
0122         end = ALIGN(key->offset + len, leaf->fs_info->sectorsize);
0123     } else {
0124         len = btrfs_file_extent_num_bytes(leaf, extent);
0125         end = key->offset + len;
0126     }
0127     return end;
0128 }
0129 
0130 /*
0131  * Customized report for dir_item, the only new important information is
0132  * key->objectid, which represents inode number
0133  */
0134 __printf(3, 4)
0135 __cold
0136 static void dir_item_err(const struct extent_buffer *eb, int slot,
0137              const char *fmt, ...)
0138 {
0139     const struct btrfs_fs_info *fs_info = eb->fs_info;
0140     struct btrfs_key key;
0141     struct va_format vaf;
0142     va_list args;
0143 
0144     btrfs_item_key_to_cpu(eb, &key, slot);
0145     va_start(args, fmt);
0146 
0147     vaf.fmt = fmt;
0148     vaf.va = &args;
0149 
0150     btrfs_crit(fs_info,
0151         "corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV",
0152         btrfs_header_level(eb) == 0 ? "leaf" : "node",
0153         btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
0154         key.objectid, &vaf);
0155     va_end(args);
0156 }
0157 
0158 /*
0159  * This functions checks prev_key->objectid, to ensure current key and prev_key
0160  * share the same objectid as inode number.
0161  *
0162  * This is to detect missing INODE_ITEM in subvolume trees.
0163  *
0164  * Return true if everything is OK or we don't need to check.
0165  * Return false if anything is wrong.
0166  */
0167 static bool check_prev_ino(struct extent_buffer *leaf,
0168                struct btrfs_key *key, int slot,
0169                struct btrfs_key *prev_key)
0170 {
0171     /* No prev key, skip check */
0172     if (slot == 0)
0173         return true;
0174 
0175     /* Only these key->types needs to be checked */
0176     ASSERT(key->type == BTRFS_XATTR_ITEM_KEY ||
0177            key->type == BTRFS_INODE_REF_KEY ||
0178            key->type == BTRFS_DIR_INDEX_KEY ||
0179            key->type == BTRFS_DIR_ITEM_KEY ||
0180            key->type == BTRFS_EXTENT_DATA_KEY);
0181 
0182     /*
0183      * Only subvolume trees along with their reloc trees need this check.
0184      * Things like log tree doesn't follow this ino requirement.
0185      */
0186     if (!is_fstree(btrfs_header_owner(leaf)))
0187         return true;
0188 
0189     if (key->objectid == prev_key->objectid)
0190         return true;
0191 
0192     /* Error found */
0193     dir_item_err(leaf, slot,
0194         "invalid previous key objectid, have %llu expect %llu",
0195         prev_key->objectid, key->objectid);
0196     return false;
0197 }
0198 static int check_extent_data_item(struct extent_buffer *leaf,
0199                   struct btrfs_key *key, int slot,
0200                   struct btrfs_key *prev_key)
0201 {
0202     struct btrfs_fs_info *fs_info = leaf->fs_info;
0203     struct btrfs_file_extent_item *fi;
0204     u32 sectorsize = fs_info->sectorsize;
0205     u32 item_size = btrfs_item_size(leaf, slot);
0206     u64 extent_end;
0207 
0208     if (unlikely(!IS_ALIGNED(key->offset, sectorsize))) {
0209         file_extent_err(leaf, slot,
0210 "unaligned file_offset for file extent, have %llu should be aligned to %u",
0211             key->offset, sectorsize);
0212         return -EUCLEAN;
0213     }
0214 
0215     /*
0216      * Previous key must have the same key->objectid (ino).
0217      * It can be XATTR_ITEM, INODE_ITEM or just another EXTENT_DATA.
0218      * But if objectids mismatch, it means we have a missing
0219      * INODE_ITEM.
0220      */
0221     if (unlikely(!check_prev_ino(leaf, key, slot, prev_key)))
0222         return -EUCLEAN;
0223 
0224     fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
0225 
0226     /*
0227      * Make sure the item contains at least inline header, so the file
0228      * extent type is not some garbage.
0229      */
0230     if (unlikely(item_size < BTRFS_FILE_EXTENT_INLINE_DATA_START)) {
0231         file_extent_err(leaf, slot,
0232                 "invalid item size, have %u expect [%zu, %u)",
0233                 item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START,
0234                 SZ_4K);
0235         return -EUCLEAN;
0236     }
0237     if (unlikely(btrfs_file_extent_type(leaf, fi) >=
0238              BTRFS_NR_FILE_EXTENT_TYPES)) {
0239         file_extent_err(leaf, slot,
0240         "invalid type for file extent, have %u expect range [0, %u]",
0241             btrfs_file_extent_type(leaf, fi),
0242             BTRFS_NR_FILE_EXTENT_TYPES - 1);
0243         return -EUCLEAN;
0244     }
0245 
0246     /*
0247      * Support for new compression/encryption must introduce incompat flag,
0248      * and must be caught in open_ctree().
0249      */
0250     if (unlikely(btrfs_file_extent_compression(leaf, fi) >=
0251              BTRFS_NR_COMPRESS_TYPES)) {
0252         file_extent_err(leaf, slot,
0253     "invalid compression for file extent, have %u expect range [0, %u]",
0254             btrfs_file_extent_compression(leaf, fi),
0255             BTRFS_NR_COMPRESS_TYPES - 1);
0256         return -EUCLEAN;
0257     }
0258     if (unlikely(btrfs_file_extent_encryption(leaf, fi))) {
0259         file_extent_err(leaf, slot,
0260             "invalid encryption for file extent, have %u expect 0",
0261             btrfs_file_extent_encryption(leaf, fi));
0262         return -EUCLEAN;
0263     }
0264     if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
0265         /* Inline extent must have 0 as key offset */
0266         if (unlikely(key->offset)) {
0267             file_extent_err(leaf, slot,
0268         "invalid file_offset for inline file extent, have %llu expect 0",
0269                 key->offset);
0270             return -EUCLEAN;
0271         }
0272 
0273         /* Compressed inline extent has no on-disk size, skip it */
0274         if (btrfs_file_extent_compression(leaf, fi) !=
0275             BTRFS_COMPRESS_NONE)
0276             return 0;
0277 
0278         /* Uncompressed inline extent size must match item size */
0279         if (unlikely(item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
0280                       btrfs_file_extent_ram_bytes(leaf, fi))) {
0281             file_extent_err(leaf, slot,
0282     "invalid ram_bytes for uncompressed inline extent, have %u expect %llu",
0283                 item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START +
0284                 btrfs_file_extent_ram_bytes(leaf, fi));
0285             return -EUCLEAN;
0286         }
0287         return 0;
0288     }
0289 
0290     /* Regular or preallocated extent has fixed item size */
0291     if (unlikely(item_size != sizeof(*fi))) {
0292         file_extent_err(leaf, slot,
0293     "invalid item size for reg/prealloc file extent, have %u expect %zu",
0294             item_size, sizeof(*fi));
0295         return -EUCLEAN;
0296     }
0297     if (unlikely(CHECK_FE_ALIGNED(leaf, slot, fi, ram_bytes, sectorsize) ||
0298              CHECK_FE_ALIGNED(leaf, slot, fi, disk_bytenr, sectorsize) ||
0299              CHECK_FE_ALIGNED(leaf, slot, fi, disk_num_bytes, sectorsize) ||
0300              CHECK_FE_ALIGNED(leaf, slot, fi, offset, sectorsize) ||
0301              CHECK_FE_ALIGNED(leaf, slot, fi, num_bytes, sectorsize)))
0302         return -EUCLEAN;
0303 
0304     /* Catch extent end overflow */
0305     if (unlikely(check_add_overflow(btrfs_file_extent_num_bytes(leaf, fi),
0306                     key->offset, &extent_end))) {
0307         file_extent_err(leaf, slot,
0308     "extent end overflow, have file offset %llu extent num bytes %llu",
0309                 key->offset,
0310                 btrfs_file_extent_num_bytes(leaf, fi));
0311         return -EUCLEAN;
0312     }
0313 
0314     /*
0315      * Check that no two consecutive file extent items, in the same leaf,
0316      * present ranges that overlap each other.
0317      */
0318     if (slot > 0 &&
0319         prev_key->objectid == key->objectid &&
0320         prev_key->type == BTRFS_EXTENT_DATA_KEY) {
0321         struct btrfs_file_extent_item *prev_fi;
0322         u64 prev_end;
0323 
0324         prev_fi = btrfs_item_ptr(leaf, slot - 1,
0325                      struct btrfs_file_extent_item);
0326         prev_end = file_extent_end(leaf, prev_key, prev_fi);
0327         if (unlikely(prev_end > key->offset)) {
0328             file_extent_err(leaf, slot - 1,
0329 "file extent end range (%llu) goes beyond start offset (%llu) of the next file extent",
0330                     prev_end, key->offset);
0331             return -EUCLEAN;
0332         }
0333     }
0334 
0335     return 0;
0336 }
0337 
0338 static int check_csum_item(struct extent_buffer *leaf, struct btrfs_key *key,
0339                int slot, struct btrfs_key *prev_key)
0340 {
0341     struct btrfs_fs_info *fs_info = leaf->fs_info;
0342     u32 sectorsize = fs_info->sectorsize;
0343     const u32 csumsize = fs_info->csum_size;
0344 
0345     if (unlikely(key->objectid != BTRFS_EXTENT_CSUM_OBJECTID)) {
0346         generic_err(leaf, slot,
0347         "invalid key objectid for csum item, have %llu expect %llu",
0348             key->objectid, BTRFS_EXTENT_CSUM_OBJECTID);
0349         return -EUCLEAN;
0350     }
0351     if (unlikely(!IS_ALIGNED(key->offset, sectorsize))) {
0352         generic_err(leaf, slot,
0353     "unaligned key offset for csum item, have %llu should be aligned to %u",
0354             key->offset, sectorsize);
0355         return -EUCLEAN;
0356     }
0357     if (unlikely(!IS_ALIGNED(btrfs_item_size(leaf, slot), csumsize))) {
0358         generic_err(leaf, slot,
0359     "unaligned item size for csum item, have %u should be aligned to %u",
0360             btrfs_item_size(leaf, slot), csumsize);
0361         return -EUCLEAN;
0362     }
0363     if (slot > 0 && prev_key->type == BTRFS_EXTENT_CSUM_KEY) {
0364         u64 prev_csum_end;
0365         u32 prev_item_size;
0366 
0367         prev_item_size = btrfs_item_size(leaf, slot - 1);
0368         prev_csum_end = (prev_item_size / csumsize) * sectorsize;
0369         prev_csum_end += prev_key->offset;
0370         if (unlikely(prev_csum_end > key->offset)) {
0371             generic_err(leaf, slot - 1,
0372 "csum end range (%llu) goes beyond the start range (%llu) of the next csum item",
0373                     prev_csum_end, key->offset);
0374             return -EUCLEAN;
0375         }
0376     }
0377     return 0;
0378 }
0379 
0380 /* Inode item error output has the same format as dir_item_err() */
0381 #define inode_item_err(eb, slot, fmt, ...)          \
0382     dir_item_err(eb, slot, fmt, __VA_ARGS__)
0383 
0384 static int check_inode_key(struct extent_buffer *leaf, struct btrfs_key *key,
0385                int slot)
0386 {
0387     struct btrfs_key item_key;
0388     bool is_inode_item;
0389 
0390     btrfs_item_key_to_cpu(leaf, &item_key, slot);
0391     is_inode_item = (item_key.type == BTRFS_INODE_ITEM_KEY);
0392 
0393     /* For XATTR_ITEM, location key should be all 0 */
0394     if (item_key.type == BTRFS_XATTR_ITEM_KEY) {
0395         if (unlikely(key->objectid != 0 || key->type != 0 ||
0396                  key->offset != 0))
0397             return -EUCLEAN;
0398         return 0;
0399     }
0400 
0401     if (unlikely((key->objectid < BTRFS_FIRST_FREE_OBJECTID ||
0402               key->objectid > BTRFS_LAST_FREE_OBJECTID) &&
0403              key->objectid != BTRFS_ROOT_TREE_DIR_OBJECTID &&
0404              key->objectid != BTRFS_FREE_INO_OBJECTID)) {
0405         if (is_inode_item) {
0406             generic_err(leaf, slot,
0407     "invalid key objectid: has %llu expect %llu or [%llu, %llu] or %llu",
0408                 key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID,
0409                 BTRFS_FIRST_FREE_OBJECTID,
0410                 BTRFS_LAST_FREE_OBJECTID,
0411                 BTRFS_FREE_INO_OBJECTID);
0412         } else {
0413             dir_item_err(leaf, slot,
0414 "invalid location key objectid: has %llu expect %llu or [%llu, %llu] or %llu",
0415                 key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID,
0416                 BTRFS_FIRST_FREE_OBJECTID,
0417                 BTRFS_LAST_FREE_OBJECTID,
0418                 BTRFS_FREE_INO_OBJECTID);
0419         }
0420         return -EUCLEAN;
0421     }
0422     if (unlikely(key->offset != 0)) {
0423         if (is_inode_item)
0424             inode_item_err(leaf, slot,
0425                        "invalid key offset: has %llu expect 0",
0426                        key->offset);
0427         else
0428             dir_item_err(leaf, slot,
0429                 "invalid location key offset:has %llu expect 0",
0430                 key->offset);
0431         return -EUCLEAN;
0432     }
0433     return 0;
0434 }
0435 
0436 static int check_root_key(struct extent_buffer *leaf, struct btrfs_key *key,
0437               int slot)
0438 {
0439     struct btrfs_key item_key;
0440     bool is_root_item;
0441 
0442     btrfs_item_key_to_cpu(leaf, &item_key, slot);
0443     is_root_item = (item_key.type == BTRFS_ROOT_ITEM_KEY);
0444 
0445     /* No such tree id */
0446     if (unlikely(key->objectid == 0)) {
0447         if (is_root_item)
0448             generic_err(leaf, slot, "invalid root id 0");
0449         else
0450             dir_item_err(leaf, slot,
0451                      "invalid location key root id 0");
0452         return -EUCLEAN;
0453     }
0454 
0455     /* DIR_ITEM/INDEX/INODE_REF is not allowed to point to non-fs trees */
0456     if (unlikely(!is_fstree(key->objectid) && !is_root_item)) {
0457         dir_item_err(leaf, slot,
0458         "invalid location key objectid, have %llu expect [%llu, %llu]",
0459                 key->objectid, BTRFS_FIRST_FREE_OBJECTID,
0460                 BTRFS_LAST_FREE_OBJECTID);
0461         return -EUCLEAN;
0462     }
0463 
0464     /*
0465      * ROOT_ITEM with non-zero offset means this is a snapshot, created at
0466      * @offset transid.
0467      * Furthermore, for location key in DIR_ITEM, its offset is always -1.
0468      *
0469      * So here we only check offset for reloc tree whose key->offset must
0470      * be a valid tree.
0471      */
0472     if (unlikely(key->objectid == BTRFS_TREE_RELOC_OBJECTID &&
0473              key->offset == 0)) {
0474         generic_err(leaf, slot, "invalid root id 0 for reloc tree");
0475         return -EUCLEAN;
0476     }
0477     return 0;
0478 }
0479 
0480 static int check_dir_item(struct extent_buffer *leaf,
0481               struct btrfs_key *key, struct btrfs_key *prev_key,
0482               int slot)
0483 {
0484     struct btrfs_fs_info *fs_info = leaf->fs_info;
0485     struct btrfs_dir_item *di;
0486     u32 item_size = btrfs_item_size(leaf, slot);
0487     u32 cur = 0;
0488 
0489     if (unlikely(!check_prev_ino(leaf, key, slot, prev_key)))
0490         return -EUCLEAN;
0491 
0492     di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
0493     while (cur < item_size) {
0494         struct btrfs_key location_key;
0495         u32 name_len;
0496         u32 data_len;
0497         u32 max_name_len;
0498         u32 total_size;
0499         u32 name_hash;
0500         u8 dir_type;
0501         int ret;
0502 
0503         /* header itself should not cross item boundary */
0504         if (unlikely(cur + sizeof(*di) > item_size)) {
0505             dir_item_err(leaf, slot,
0506         "dir item header crosses item boundary, have %zu boundary %u",
0507                 cur + sizeof(*di), item_size);
0508             return -EUCLEAN;
0509         }
0510 
0511         /* Location key check */
0512         btrfs_dir_item_key_to_cpu(leaf, di, &location_key);
0513         if (location_key.type == BTRFS_ROOT_ITEM_KEY) {
0514             ret = check_root_key(leaf, &location_key, slot);
0515             if (unlikely(ret < 0))
0516                 return ret;
0517         } else if (location_key.type == BTRFS_INODE_ITEM_KEY ||
0518                location_key.type == 0) {
0519             ret = check_inode_key(leaf, &location_key, slot);
0520             if (unlikely(ret < 0))
0521                 return ret;
0522         } else {
0523             dir_item_err(leaf, slot,
0524             "invalid location key type, have %u, expect %u or %u",
0525                      location_key.type, BTRFS_ROOT_ITEM_KEY,
0526                      BTRFS_INODE_ITEM_KEY);
0527             return -EUCLEAN;
0528         }
0529 
0530         /* dir type check */
0531         dir_type = btrfs_dir_type(leaf, di);
0532         if (unlikely(dir_type >= BTRFS_FT_MAX)) {
0533             dir_item_err(leaf, slot,
0534             "invalid dir item type, have %u expect [0, %u)",
0535                 dir_type, BTRFS_FT_MAX);
0536             return -EUCLEAN;
0537         }
0538 
0539         if (unlikely(key->type == BTRFS_XATTR_ITEM_KEY &&
0540                  dir_type != BTRFS_FT_XATTR)) {
0541             dir_item_err(leaf, slot,
0542         "invalid dir item type for XATTR key, have %u expect %u",
0543                 dir_type, BTRFS_FT_XATTR);
0544             return -EUCLEAN;
0545         }
0546         if (unlikely(dir_type == BTRFS_FT_XATTR &&
0547                  key->type != BTRFS_XATTR_ITEM_KEY)) {
0548             dir_item_err(leaf, slot,
0549             "xattr dir type found for non-XATTR key");
0550             return -EUCLEAN;
0551         }
0552         if (dir_type == BTRFS_FT_XATTR)
0553             max_name_len = XATTR_NAME_MAX;
0554         else
0555             max_name_len = BTRFS_NAME_LEN;
0556 
0557         /* Name/data length check */
0558         name_len = btrfs_dir_name_len(leaf, di);
0559         data_len = btrfs_dir_data_len(leaf, di);
0560         if (unlikely(name_len > max_name_len)) {
0561             dir_item_err(leaf, slot,
0562             "dir item name len too long, have %u max %u",
0563                 name_len, max_name_len);
0564             return -EUCLEAN;
0565         }
0566         if (unlikely(name_len + data_len > BTRFS_MAX_XATTR_SIZE(fs_info))) {
0567             dir_item_err(leaf, slot,
0568             "dir item name and data len too long, have %u max %u",
0569                 name_len + data_len,
0570                 BTRFS_MAX_XATTR_SIZE(fs_info));
0571             return -EUCLEAN;
0572         }
0573 
0574         if (unlikely(data_len && dir_type != BTRFS_FT_XATTR)) {
0575             dir_item_err(leaf, slot,
0576             "dir item with invalid data len, have %u expect 0",
0577                 data_len);
0578             return -EUCLEAN;
0579         }
0580 
0581         total_size = sizeof(*di) + name_len + data_len;
0582 
0583         /* header and name/data should not cross item boundary */
0584         if (unlikely(cur + total_size > item_size)) {
0585             dir_item_err(leaf, slot,
0586         "dir item data crosses item boundary, have %u boundary %u",
0587                 cur + total_size, item_size);
0588             return -EUCLEAN;
0589         }
0590 
0591         /*
0592          * Special check for XATTR/DIR_ITEM, as key->offset is name
0593          * hash, should match its name
0594          */
0595         if (key->type == BTRFS_DIR_ITEM_KEY ||
0596             key->type == BTRFS_XATTR_ITEM_KEY) {
0597             char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)];
0598 
0599             read_extent_buffer(leaf, namebuf,
0600                     (unsigned long)(di + 1), name_len);
0601             name_hash = btrfs_name_hash(namebuf, name_len);
0602             if (unlikely(key->offset != name_hash)) {
0603                 dir_item_err(leaf, slot,
0604         "name hash mismatch with key, have 0x%016x expect 0x%016llx",
0605                     name_hash, key->offset);
0606                 return -EUCLEAN;
0607             }
0608         }
0609         cur += total_size;
0610         di = (struct btrfs_dir_item *)((void *)di + total_size);
0611     }
0612     return 0;
0613 }
0614 
0615 __printf(3, 4)
0616 __cold
0617 static void block_group_err(const struct extent_buffer *eb, int slot,
0618                 const char *fmt, ...)
0619 {
0620     const struct btrfs_fs_info *fs_info = eb->fs_info;
0621     struct btrfs_key key;
0622     struct va_format vaf;
0623     va_list args;
0624 
0625     btrfs_item_key_to_cpu(eb, &key, slot);
0626     va_start(args, fmt);
0627 
0628     vaf.fmt = fmt;
0629     vaf.va = &args;
0630 
0631     btrfs_crit(fs_info,
0632     "corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV",
0633         btrfs_header_level(eb) == 0 ? "leaf" : "node",
0634         btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
0635         key.objectid, key.offset, &vaf);
0636     va_end(args);
0637 }
0638 
0639 static int check_block_group_item(struct extent_buffer *leaf,
0640                   struct btrfs_key *key, int slot)
0641 {
0642     struct btrfs_fs_info *fs_info = leaf->fs_info;
0643     struct btrfs_block_group_item bgi;
0644     u32 item_size = btrfs_item_size(leaf, slot);
0645     u64 chunk_objectid;
0646     u64 flags;
0647     u64 type;
0648 
0649     /*
0650      * Here we don't really care about alignment since extent allocator can
0651      * handle it.  We care more about the size.
0652      */
0653     if (unlikely(key->offset == 0)) {
0654         block_group_err(leaf, slot,
0655                 "invalid block group size 0");
0656         return -EUCLEAN;
0657     }
0658 
0659     if (unlikely(item_size != sizeof(bgi))) {
0660         block_group_err(leaf, slot,
0661             "invalid item size, have %u expect %zu",
0662                 item_size, sizeof(bgi));
0663         return -EUCLEAN;
0664     }
0665 
0666     read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
0667                sizeof(bgi));
0668     chunk_objectid = btrfs_stack_block_group_chunk_objectid(&bgi);
0669     if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
0670         /*
0671          * We don't init the nr_global_roots until we load the global
0672          * roots, so this could be 0 at mount time.  If it's 0 we'll
0673          * just assume we're fine, and later we'll check against our
0674          * actual value.
0675          */
0676         if (unlikely(fs_info->nr_global_roots &&
0677                  chunk_objectid >= fs_info->nr_global_roots)) {
0678             block_group_err(leaf, slot,
0679     "invalid block group global root id, have %llu, needs to be <= %llu",
0680                     chunk_objectid,
0681                     fs_info->nr_global_roots);
0682             return -EUCLEAN;
0683         }
0684     } else if (unlikely(chunk_objectid != BTRFS_FIRST_CHUNK_TREE_OBJECTID)) {
0685         block_group_err(leaf, slot,
0686         "invalid block group chunk objectid, have %llu expect %llu",
0687                 btrfs_stack_block_group_chunk_objectid(&bgi),
0688                 BTRFS_FIRST_CHUNK_TREE_OBJECTID);
0689         return -EUCLEAN;
0690     }
0691 
0692     if (unlikely(btrfs_stack_block_group_used(&bgi) > key->offset)) {
0693         block_group_err(leaf, slot,
0694             "invalid block group used, have %llu expect [0, %llu)",
0695                 btrfs_stack_block_group_used(&bgi), key->offset);
0696         return -EUCLEAN;
0697     }
0698 
0699     flags = btrfs_stack_block_group_flags(&bgi);
0700     if (unlikely(hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1)) {
0701         block_group_err(leaf, slot,
0702 "invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set",
0703             flags & BTRFS_BLOCK_GROUP_PROFILE_MASK,
0704             hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK));
0705         return -EUCLEAN;
0706     }
0707 
0708     type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
0709     if (unlikely(type != BTRFS_BLOCK_GROUP_DATA &&
0710              type != BTRFS_BLOCK_GROUP_METADATA &&
0711              type != BTRFS_BLOCK_GROUP_SYSTEM &&
0712              type != (BTRFS_BLOCK_GROUP_METADATA |
0713                   BTRFS_BLOCK_GROUP_DATA))) {
0714         block_group_err(leaf, slot,
0715 "invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
0716             type, hweight64(type),
0717             BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
0718             BTRFS_BLOCK_GROUP_SYSTEM,
0719             BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA);
0720         return -EUCLEAN;
0721     }
0722     return 0;
0723 }
0724 
0725 __printf(4, 5)
0726 __cold
0727 static void chunk_err(const struct extent_buffer *leaf,
0728               const struct btrfs_chunk *chunk, u64 logical,
0729               const char *fmt, ...)
0730 {
0731     const struct btrfs_fs_info *fs_info = leaf->fs_info;
0732     bool is_sb;
0733     struct va_format vaf;
0734     va_list args;
0735     int i;
0736     int slot = -1;
0737 
0738     /* Only superblock eb is able to have such small offset */
0739     is_sb = (leaf->start == BTRFS_SUPER_INFO_OFFSET);
0740 
0741     if (!is_sb) {
0742         /*
0743          * Get the slot number by iterating through all slots, this
0744          * would provide better readability.
0745          */
0746         for (i = 0; i < btrfs_header_nritems(leaf); i++) {
0747             if (btrfs_item_ptr_offset(leaf, i) ==
0748                     (unsigned long)chunk) {
0749                 slot = i;
0750                 break;
0751             }
0752         }
0753     }
0754     va_start(args, fmt);
0755     vaf.fmt = fmt;
0756     vaf.va = &args;
0757 
0758     if (is_sb)
0759         btrfs_crit(fs_info,
0760         "corrupt superblock syschunk array: chunk_start=%llu, %pV",
0761                logical, &vaf);
0762     else
0763         btrfs_crit(fs_info,
0764     "corrupt leaf: root=%llu block=%llu slot=%d chunk_start=%llu, %pV",
0765                BTRFS_CHUNK_TREE_OBJECTID, leaf->start, slot,
0766                logical, &vaf);
0767     va_end(args);
0768 }
0769 
0770 /*
0771  * The common chunk check which could also work on super block sys chunk array.
0772  *
0773  * Return -EUCLEAN if anything is corrupted.
0774  * Return 0 if everything is OK.
0775  */
0776 int btrfs_check_chunk_valid(struct extent_buffer *leaf,
0777                 struct btrfs_chunk *chunk, u64 logical)
0778 {
0779     struct btrfs_fs_info *fs_info = leaf->fs_info;
0780     u64 length;
0781     u64 chunk_end;
0782     u64 stripe_len;
0783     u16 num_stripes;
0784     u16 sub_stripes;
0785     u64 type;
0786     u64 features;
0787     bool mixed = false;
0788     int raid_index;
0789     int nparity;
0790     int ncopies;
0791 
0792     length = btrfs_chunk_length(leaf, chunk);
0793     stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
0794     num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
0795     sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
0796     type = btrfs_chunk_type(leaf, chunk);
0797     raid_index = btrfs_bg_flags_to_raid_index(type);
0798     ncopies = btrfs_raid_array[raid_index].ncopies;
0799     nparity = btrfs_raid_array[raid_index].nparity;
0800 
0801     if (unlikely(!num_stripes)) {
0802         chunk_err(leaf, chunk, logical,
0803               "invalid chunk num_stripes, have %u", num_stripes);
0804         return -EUCLEAN;
0805     }
0806     if (unlikely(num_stripes < ncopies)) {
0807         chunk_err(leaf, chunk, logical,
0808               "invalid chunk num_stripes < ncopies, have %u < %d",
0809               num_stripes, ncopies);
0810         return -EUCLEAN;
0811     }
0812     if (unlikely(nparity && num_stripes == nparity)) {
0813         chunk_err(leaf, chunk, logical,
0814               "invalid chunk num_stripes == nparity, have %u == %d",
0815               num_stripes, nparity);
0816         return -EUCLEAN;
0817     }
0818     if (unlikely(!IS_ALIGNED(logical, fs_info->sectorsize))) {
0819         chunk_err(leaf, chunk, logical,
0820         "invalid chunk logical, have %llu should aligned to %u",
0821               logical, fs_info->sectorsize);
0822         return -EUCLEAN;
0823     }
0824     if (unlikely(btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize)) {
0825         chunk_err(leaf, chunk, logical,
0826               "invalid chunk sectorsize, have %u expect %u",
0827               btrfs_chunk_sector_size(leaf, chunk),
0828               fs_info->sectorsize);
0829         return -EUCLEAN;
0830     }
0831     if (unlikely(!length || !IS_ALIGNED(length, fs_info->sectorsize))) {
0832         chunk_err(leaf, chunk, logical,
0833               "invalid chunk length, have %llu", length);
0834         return -EUCLEAN;
0835     }
0836     if (unlikely(check_add_overflow(logical, length, &chunk_end))) {
0837         chunk_err(leaf, chunk, logical,
0838 "invalid chunk logical start and length, have logical start %llu length %llu",
0839               logical, length);
0840         return -EUCLEAN;
0841     }
0842     if (unlikely(!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN)) {
0843         chunk_err(leaf, chunk, logical,
0844               "invalid chunk stripe length: %llu",
0845               stripe_len);
0846         return -EUCLEAN;
0847     }
0848     if (unlikely(type & ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
0849                   BTRFS_BLOCK_GROUP_PROFILE_MASK))) {
0850         chunk_err(leaf, chunk, logical,
0851               "unrecognized chunk type: 0x%llx",
0852               ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
0853                 BTRFS_BLOCK_GROUP_PROFILE_MASK) &
0854               btrfs_chunk_type(leaf, chunk));
0855         return -EUCLEAN;
0856     }
0857 
0858     if (unlikely(!has_single_bit_set(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) &&
0859              (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0)) {
0860         chunk_err(leaf, chunk, logical,
0861         "invalid chunk profile flag: 0x%llx, expect 0 or 1 bit set",
0862               type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
0863         return -EUCLEAN;
0864     }
0865     if (unlikely((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0)) {
0866         chunk_err(leaf, chunk, logical,
0867     "missing chunk type flag, have 0x%llx one bit must be set in 0x%llx",
0868               type, BTRFS_BLOCK_GROUP_TYPE_MASK);
0869         return -EUCLEAN;
0870     }
0871 
0872     if (unlikely((type & BTRFS_BLOCK_GROUP_SYSTEM) &&
0873              (type & (BTRFS_BLOCK_GROUP_METADATA |
0874                   BTRFS_BLOCK_GROUP_DATA)))) {
0875         chunk_err(leaf, chunk, logical,
0876               "system chunk with data or metadata type: 0x%llx",
0877               type);
0878         return -EUCLEAN;
0879     }
0880 
0881     features = btrfs_super_incompat_flags(fs_info->super_copy);
0882     if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
0883         mixed = true;
0884 
0885     if (!mixed) {
0886         if (unlikely((type & BTRFS_BLOCK_GROUP_METADATA) &&
0887                  (type & BTRFS_BLOCK_GROUP_DATA))) {
0888             chunk_err(leaf, chunk, logical,
0889             "mixed chunk type in non-mixed mode: 0x%llx", type);
0890             return -EUCLEAN;
0891         }
0892     }
0893 
0894     if (unlikely((type & BTRFS_BLOCK_GROUP_RAID10 &&
0895               sub_stripes != btrfs_raid_array[BTRFS_RAID_RAID10].sub_stripes) ||
0896              (type & BTRFS_BLOCK_GROUP_RAID1 &&
0897               num_stripes != btrfs_raid_array[BTRFS_RAID_RAID1].devs_min) ||
0898              (type & BTRFS_BLOCK_GROUP_RAID1C3 &&
0899               num_stripes != btrfs_raid_array[BTRFS_RAID_RAID1C3].devs_min) ||
0900              (type & BTRFS_BLOCK_GROUP_RAID1C4 &&
0901               num_stripes != btrfs_raid_array[BTRFS_RAID_RAID1C4].devs_min) ||
0902              (type & BTRFS_BLOCK_GROUP_RAID5 &&
0903               num_stripes < btrfs_raid_array[BTRFS_RAID_RAID5].devs_min) ||
0904              (type & BTRFS_BLOCK_GROUP_RAID6 &&
0905               num_stripes < btrfs_raid_array[BTRFS_RAID_RAID6].devs_min) ||
0906              (type & BTRFS_BLOCK_GROUP_DUP &&
0907               num_stripes != btrfs_raid_array[BTRFS_RAID_DUP].dev_stripes) ||
0908              ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 &&
0909               num_stripes != btrfs_raid_array[BTRFS_RAID_SINGLE].dev_stripes))) {
0910         chunk_err(leaf, chunk, logical,
0911             "invalid num_stripes:sub_stripes %u:%u for profile %llu",
0912             num_stripes, sub_stripes,
0913             type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
0914         return -EUCLEAN;
0915     }
0916 
0917     return 0;
0918 }
0919 
0920 /*
0921  * Enhanced version of chunk item checker.
0922  *
0923  * The common btrfs_check_chunk_valid() doesn't check item size since it needs
0924  * to work on super block sys_chunk_array which doesn't have full item ptr.
0925  */
0926 static int check_leaf_chunk_item(struct extent_buffer *leaf,
0927                  struct btrfs_chunk *chunk,
0928                  struct btrfs_key *key, int slot)
0929 {
0930     int num_stripes;
0931 
0932     if (unlikely(btrfs_item_size(leaf, slot) < sizeof(struct btrfs_chunk))) {
0933         chunk_err(leaf, chunk, key->offset,
0934             "invalid chunk item size: have %u expect [%zu, %u)",
0935             btrfs_item_size(leaf, slot),
0936             sizeof(struct btrfs_chunk),
0937             BTRFS_LEAF_DATA_SIZE(leaf->fs_info));
0938         return -EUCLEAN;
0939     }
0940 
0941     num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
0942     /* Let btrfs_check_chunk_valid() handle this error type */
0943     if (num_stripes == 0)
0944         goto out;
0945 
0946     if (unlikely(btrfs_chunk_item_size(num_stripes) !=
0947              btrfs_item_size(leaf, slot))) {
0948         chunk_err(leaf, chunk, key->offset,
0949             "invalid chunk item size: have %u expect %lu",
0950             btrfs_item_size(leaf, slot),
0951             btrfs_chunk_item_size(num_stripes));
0952         return -EUCLEAN;
0953     }
0954 out:
0955     return btrfs_check_chunk_valid(leaf, chunk, key->offset);
0956 }
0957 
0958 __printf(3, 4)
0959 __cold
0960 static void dev_item_err(const struct extent_buffer *eb, int slot,
0961              const char *fmt, ...)
0962 {
0963     struct btrfs_key key;
0964     struct va_format vaf;
0965     va_list args;
0966 
0967     btrfs_item_key_to_cpu(eb, &key, slot);
0968     va_start(args, fmt);
0969 
0970     vaf.fmt = fmt;
0971     vaf.va = &args;
0972 
0973     btrfs_crit(eb->fs_info,
0974     "corrupt %s: root=%llu block=%llu slot=%d devid=%llu %pV",
0975         btrfs_header_level(eb) == 0 ? "leaf" : "node",
0976         btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
0977         key.objectid, &vaf);
0978     va_end(args);
0979 }
0980 
0981 static int check_dev_item(struct extent_buffer *leaf,
0982               struct btrfs_key *key, int slot)
0983 {
0984     struct btrfs_dev_item *ditem;
0985     const u32 item_size = btrfs_item_size(leaf, slot);
0986 
0987     if (unlikely(key->objectid != BTRFS_DEV_ITEMS_OBJECTID)) {
0988         dev_item_err(leaf, slot,
0989                  "invalid objectid: has=%llu expect=%llu",
0990                  key->objectid, BTRFS_DEV_ITEMS_OBJECTID);
0991         return -EUCLEAN;
0992     }
0993 
0994     if (unlikely(item_size != sizeof(*ditem))) {
0995         dev_item_err(leaf, slot, "invalid item size: has %u expect %zu",
0996                  item_size, sizeof(*ditem));
0997         return -EUCLEAN;
0998     }
0999 
1000     ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item);
1001     if (unlikely(btrfs_device_id(leaf, ditem) != key->offset)) {
1002         dev_item_err(leaf, slot,
1003                  "devid mismatch: key has=%llu item has=%llu",
1004                  key->offset, btrfs_device_id(leaf, ditem));
1005         return -EUCLEAN;
1006     }
1007 
1008     /*
1009      * For device total_bytes, we don't have reliable way to check it, as
1010      * it can be 0 for device removal. Device size check can only be done
1011      * by dev extents check.
1012      */
1013     if (unlikely(btrfs_device_bytes_used(leaf, ditem) >
1014              btrfs_device_total_bytes(leaf, ditem))) {
1015         dev_item_err(leaf, slot,
1016                  "invalid bytes used: have %llu expect [0, %llu]",
1017                  btrfs_device_bytes_used(leaf, ditem),
1018                  btrfs_device_total_bytes(leaf, ditem));
1019         return -EUCLEAN;
1020     }
1021     /*
1022      * Remaining members like io_align/type/gen/dev_group aren't really
1023      * utilized.  Skip them to make later usage of them easier.
1024      */
1025     return 0;
1026 }
1027 
1028 static int check_inode_item(struct extent_buffer *leaf,
1029                 struct btrfs_key *key, int slot)
1030 {
1031     struct btrfs_fs_info *fs_info = leaf->fs_info;
1032     struct btrfs_inode_item *iitem;
1033     u64 super_gen = btrfs_super_generation(fs_info->super_copy);
1034     u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777);
1035     const u32 item_size = btrfs_item_size(leaf, slot);
1036     u32 mode;
1037     int ret;
1038     u32 flags;
1039     u32 ro_flags;
1040 
1041     ret = check_inode_key(leaf, key, slot);
1042     if (unlikely(ret < 0))
1043         return ret;
1044 
1045     if (unlikely(item_size != sizeof(*iitem))) {
1046         generic_err(leaf, slot, "invalid item size: has %u expect %zu",
1047                 item_size, sizeof(*iitem));
1048         return -EUCLEAN;
1049     }
1050 
1051     iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item);
1052 
1053     /* Here we use super block generation + 1 to handle log tree */
1054     if (unlikely(btrfs_inode_generation(leaf, iitem) > super_gen + 1)) {
1055         inode_item_err(leaf, slot,
1056             "invalid inode generation: has %llu expect (0, %llu]",
1057                    btrfs_inode_generation(leaf, iitem),
1058                    super_gen + 1);
1059         return -EUCLEAN;
1060     }
1061     /* Note for ROOT_TREE_DIR_ITEM, mkfs could set its transid 0 */
1062     if (unlikely(btrfs_inode_transid(leaf, iitem) > super_gen + 1)) {
1063         inode_item_err(leaf, slot,
1064             "invalid inode transid: has %llu expect [0, %llu]",
1065                    btrfs_inode_transid(leaf, iitem), super_gen + 1);
1066         return -EUCLEAN;
1067     }
1068 
1069     /*
1070      * For size and nbytes it's better not to be too strict, as for dir
1071      * item its size/nbytes can easily get wrong, but doesn't affect
1072      * anything in the fs. So here we skip the check.
1073      */
1074     mode = btrfs_inode_mode(leaf, iitem);
1075     if (unlikely(mode & ~valid_mask)) {
1076         inode_item_err(leaf, slot,
1077                    "unknown mode bit detected: 0x%x",
1078                    mode & ~valid_mask);
1079         return -EUCLEAN;
1080     }
1081 
1082     /*
1083      * S_IFMT is not bit mapped so we can't completely rely on
1084      * is_power_of_2/has_single_bit_set, but it can save us from checking
1085      * FIFO/CHR/DIR/REG.  Only needs to check BLK, LNK and SOCKS
1086      */
1087     if (!has_single_bit_set(mode & S_IFMT)) {
1088         if (unlikely(!S_ISLNK(mode) && !S_ISBLK(mode) && !S_ISSOCK(mode))) {
1089             inode_item_err(leaf, slot,
1090             "invalid mode: has 0%o expect valid S_IF* bit(s)",
1091                        mode & S_IFMT);
1092             return -EUCLEAN;
1093         }
1094     }
1095     if (unlikely(S_ISDIR(mode) && btrfs_inode_nlink(leaf, iitem) > 1)) {
1096         inode_item_err(leaf, slot,
1097                "invalid nlink: has %u expect no more than 1 for dir",
1098             btrfs_inode_nlink(leaf, iitem));
1099         return -EUCLEAN;
1100     }
1101     btrfs_inode_split_flags(btrfs_inode_flags(leaf, iitem), &flags, &ro_flags);
1102     if (unlikely(flags & ~BTRFS_INODE_FLAG_MASK)) {
1103         inode_item_err(leaf, slot,
1104                    "unknown incompat flags detected: 0x%x", flags);
1105         return -EUCLEAN;
1106     }
1107     if (unlikely(!sb_rdonly(fs_info->sb) &&
1108              (ro_flags & ~BTRFS_INODE_RO_FLAG_MASK))) {
1109         inode_item_err(leaf, slot,
1110             "unknown ro-compat flags detected on writeable mount: 0x%x",
1111             ro_flags);
1112         return -EUCLEAN;
1113     }
1114     return 0;
1115 }
1116 
1117 static int check_root_item(struct extent_buffer *leaf, struct btrfs_key *key,
1118                int slot)
1119 {
1120     struct btrfs_fs_info *fs_info = leaf->fs_info;
1121     struct btrfs_root_item ri = { 0 };
1122     const u64 valid_root_flags = BTRFS_ROOT_SUBVOL_RDONLY |
1123                      BTRFS_ROOT_SUBVOL_DEAD;
1124     int ret;
1125 
1126     ret = check_root_key(leaf, key, slot);
1127     if (unlikely(ret < 0))
1128         return ret;
1129 
1130     if (unlikely(btrfs_item_size(leaf, slot) != sizeof(ri) &&
1131              btrfs_item_size(leaf, slot) !=
1132              btrfs_legacy_root_item_size())) {
1133         generic_err(leaf, slot,
1134                 "invalid root item size, have %u expect %zu or %u",
1135                 btrfs_item_size(leaf, slot), sizeof(ri),
1136                 btrfs_legacy_root_item_size());
1137         return -EUCLEAN;
1138     }
1139 
1140     /*
1141      * For legacy root item, the members starting at generation_v2 will be
1142      * all filled with 0.
1143      * And since we allow geneartion_v2 as 0, it will still pass the check.
1144      */
1145     read_extent_buffer(leaf, &ri, btrfs_item_ptr_offset(leaf, slot),
1146                btrfs_item_size(leaf, slot));
1147 
1148     /* Generation related */
1149     if (unlikely(btrfs_root_generation(&ri) >
1150              btrfs_super_generation(fs_info->super_copy) + 1)) {
1151         generic_err(leaf, slot,
1152             "invalid root generation, have %llu expect (0, %llu]",
1153                 btrfs_root_generation(&ri),
1154                 btrfs_super_generation(fs_info->super_copy) + 1);
1155         return -EUCLEAN;
1156     }
1157     if (unlikely(btrfs_root_generation_v2(&ri) >
1158              btrfs_super_generation(fs_info->super_copy) + 1)) {
1159         generic_err(leaf, slot,
1160         "invalid root v2 generation, have %llu expect (0, %llu]",
1161                 btrfs_root_generation_v2(&ri),
1162                 btrfs_super_generation(fs_info->super_copy) + 1);
1163         return -EUCLEAN;
1164     }
1165     if (unlikely(btrfs_root_last_snapshot(&ri) >
1166              btrfs_super_generation(fs_info->super_copy) + 1)) {
1167         generic_err(leaf, slot,
1168         "invalid root last_snapshot, have %llu expect (0, %llu]",
1169                 btrfs_root_last_snapshot(&ri),
1170                 btrfs_super_generation(fs_info->super_copy) + 1);
1171         return -EUCLEAN;
1172     }
1173 
1174     /* Alignment and level check */
1175     if (unlikely(!IS_ALIGNED(btrfs_root_bytenr(&ri), fs_info->sectorsize))) {
1176         generic_err(leaf, slot,
1177         "invalid root bytenr, have %llu expect to be aligned to %u",
1178                 btrfs_root_bytenr(&ri), fs_info->sectorsize);
1179         return -EUCLEAN;
1180     }
1181     if (unlikely(btrfs_root_level(&ri) >= BTRFS_MAX_LEVEL)) {
1182         generic_err(leaf, slot,
1183                 "invalid root level, have %u expect [0, %u]",
1184                 btrfs_root_level(&ri), BTRFS_MAX_LEVEL - 1);
1185         return -EUCLEAN;
1186     }
1187     if (unlikely(btrfs_root_drop_level(&ri) >= BTRFS_MAX_LEVEL)) {
1188         generic_err(leaf, slot,
1189                 "invalid root level, have %u expect [0, %u]",
1190                 btrfs_root_drop_level(&ri), BTRFS_MAX_LEVEL - 1);
1191         return -EUCLEAN;
1192     }
1193 
1194     /* Flags check */
1195     if (unlikely(btrfs_root_flags(&ri) & ~valid_root_flags)) {
1196         generic_err(leaf, slot,
1197                 "invalid root flags, have 0x%llx expect mask 0x%llx",
1198                 btrfs_root_flags(&ri), valid_root_flags);
1199         return -EUCLEAN;
1200     }
1201     return 0;
1202 }
1203 
1204 __printf(3,4)
1205 __cold
1206 static void extent_err(const struct extent_buffer *eb, int slot,
1207                const char *fmt, ...)
1208 {
1209     struct btrfs_key key;
1210     struct va_format vaf;
1211     va_list args;
1212     u64 bytenr;
1213     u64 len;
1214 
1215     btrfs_item_key_to_cpu(eb, &key, slot);
1216     bytenr = key.objectid;
1217     if (key.type == BTRFS_METADATA_ITEM_KEY ||
1218         key.type == BTRFS_TREE_BLOCK_REF_KEY ||
1219         key.type == BTRFS_SHARED_BLOCK_REF_KEY)
1220         len = eb->fs_info->nodesize;
1221     else
1222         len = key.offset;
1223     va_start(args, fmt);
1224 
1225     vaf.fmt = fmt;
1226     vaf.va = &args;
1227 
1228     btrfs_crit(eb->fs_info,
1229     "corrupt %s: block=%llu slot=%d extent bytenr=%llu len=%llu %pV",
1230         btrfs_header_level(eb) == 0 ? "leaf" : "node",
1231         eb->start, slot, bytenr, len, &vaf);
1232     va_end(args);
1233 }
1234 
1235 static int check_extent_item(struct extent_buffer *leaf,
1236                  struct btrfs_key *key, int slot,
1237                  struct btrfs_key *prev_key)
1238 {
1239     struct btrfs_fs_info *fs_info = leaf->fs_info;
1240     struct btrfs_extent_item *ei;
1241     bool is_tree_block = false;
1242     unsigned long ptr;  /* Current pointer inside inline refs */
1243     unsigned long end;  /* Extent item end */
1244     const u32 item_size = btrfs_item_size(leaf, slot);
1245     u64 flags;
1246     u64 generation;
1247     u64 total_refs;     /* Total refs in btrfs_extent_item */
1248     u64 inline_refs = 0;    /* found total inline refs */
1249 
1250     if (unlikely(key->type == BTRFS_METADATA_ITEM_KEY &&
1251              !btrfs_fs_incompat(fs_info, SKINNY_METADATA))) {
1252         generic_err(leaf, slot,
1253 "invalid key type, METADATA_ITEM type invalid when SKINNY_METADATA feature disabled");
1254         return -EUCLEAN;
1255     }
1256     /* key->objectid is the bytenr for both key types */
1257     if (unlikely(!IS_ALIGNED(key->objectid, fs_info->sectorsize))) {
1258         generic_err(leaf, slot,
1259         "invalid key objectid, have %llu expect to be aligned to %u",
1260                key->objectid, fs_info->sectorsize);
1261         return -EUCLEAN;
1262     }
1263 
1264     /* key->offset is tree level for METADATA_ITEM_KEY */
1265     if (unlikely(key->type == BTRFS_METADATA_ITEM_KEY &&
1266              key->offset >= BTRFS_MAX_LEVEL)) {
1267         extent_err(leaf, slot,
1268                "invalid tree level, have %llu expect [0, %u]",
1269                key->offset, BTRFS_MAX_LEVEL - 1);
1270         return -EUCLEAN;
1271     }
1272 
1273     /*
1274      * EXTENT/METADATA_ITEM consists of:
1275      * 1) One btrfs_extent_item
1276      *    Records the total refs, type and generation of the extent.
1277      *
1278      * 2) One btrfs_tree_block_info (for EXTENT_ITEM and tree backref only)
1279      *    Records the first key and level of the tree block.
1280      *
1281      * 2) Zero or more btrfs_extent_inline_ref(s)
1282      *    Each inline ref has one btrfs_extent_inline_ref shows:
1283      *    2.1) The ref type, one of the 4
1284      *         TREE_BLOCK_REF   Tree block only
1285      *         SHARED_BLOCK_REF Tree block only
1286      *         EXTENT_DATA_REF  Data only
1287      *         SHARED_DATA_REF  Data only
1288      *    2.2) Ref type specific data
1289      *         Either using btrfs_extent_inline_ref::offset, or specific
1290      *         data structure.
1291      */
1292     if (unlikely(item_size < sizeof(*ei))) {
1293         extent_err(leaf, slot,
1294                "invalid item size, have %u expect [%zu, %u)",
1295                item_size, sizeof(*ei),
1296                BTRFS_LEAF_DATA_SIZE(fs_info));
1297         return -EUCLEAN;
1298     }
1299     end = item_size + btrfs_item_ptr_offset(leaf, slot);
1300 
1301     /* Checks against extent_item */
1302     ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
1303     flags = btrfs_extent_flags(leaf, ei);
1304     total_refs = btrfs_extent_refs(leaf, ei);
1305     generation = btrfs_extent_generation(leaf, ei);
1306     if (unlikely(generation >
1307              btrfs_super_generation(fs_info->super_copy) + 1)) {
1308         extent_err(leaf, slot,
1309                "invalid generation, have %llu expect (0, %llu]",
1310                generation,
1311                btrfs_super_generation(fs_info->super_copy) + 1);
1312         return -EUCLEAN;
1313     }
1314     if (unlikely(!has_single_bit_set(flags & (BTRFS_EXTENT_FLAG_DATA |
1315                           BTRFS_EXTENT_FLAG_TREE_BLOCK)))) {
1316         extent_err(leaf, slot,
1317         "invalid extent flag, have 0x%llx expect 1 bit set in 0x%llx",
1318             flags, BTRFS_EXTENT_FLAG_DATA |
1319             BTRFS_EXTENT_FLAG_TREE_BLOCK);
1320         return -EUCLEAN;
1321     }
1322     is_tree_block = !!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK);
1323     if (is_tree_block) {
1324         if (unlikely(key->type == BTRFS_EXTENT_ITEM_KEY &&
1325                  key->offset != fs_info->nodesize)) {
1326             extent_err(leaf, slot,
1327                    "invalid extent length, have %llu expect %u",
1328                    key->offset, fs_info->nodesize);
1329             return -EUCLEAN;
1330         }
1331     } else {
1332         if (unlikely(key->type != BTRFS_EXTENT_ITEM_KEY)) {
1333             extent_err(leaf, slot,
1334             "invalid key type, have %u expect %u for data backref",
1335                    key->type, BTRFS_EXTENT_ITEM_KEY);
1336             return -EUCLEAN;
1337         }
1338         if (unlikely(!IS_ALIGNED(key->offset, fs_info->sectorsize))) {
1339             extent_err(leaf, slot,
1340             "invalid extent length, have %llu expect aligned to %u",
1341                    key->offset, fs_info->sectorsize);
1342             return -EUCLEAN;
1343         }
1344         if (unlikely(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
1345             extent_err(leaf, slot,
1346             "invalid extent flag, data has full backref set");
1347             return -EUCLEAN;
1348         }
1349     }
1350     ptr = (unsigned long)(struct btrfs_extent_item *)(ei + 1);
1351 
1352     /* Check the special case of btrfs_tree_block_info */
1353     if (is_tree_block && key->type != BTRFS_METADATA_ITEM_KEY) {
1354         struct btrfs_tree_block_info *info;
1355 
1356         info = (struct btrfs_tree_block_info *)ptr;
1357         if (unlikely(btrfs_tree_block_level(leaf, info) >= BTRFS_MAX_LEVEL)) {
1358             extent_err(leaf, slot,
1359             "invalid tree block info level, have %u expect [0, %u]",
1360                    btrfs_tree_block_level(leaf, info),
1361                    BTRFS_MAX_LEVEL - 1);
1362             return -EUCLEAN;
1363         }
1364         ptr = (unsigned long)(struct btrfs_tree_block_info *)(info + 1);
1365     }
1366 
1367     /* Check inline refs */
1368     while (ptr < end) {
1369         struct btrfs_extent_inline_ref *iref;
1370         struct btrfs_extent_data_ref *dref;
1371         struct btrfs_shared_data_ref *sref;
1372         u64 dref_offset;
1373         u64 inline_offset;
1374         u8 inline_type;
1375 
1376         if (unlikely(ptr + sizeof(*iref) > end)) {
1377             extent_err(leaf, slot,
1378 "inline ref item overflows extent item, ptr %lu iref size %zu end %lu",
1379                    ptr, sizeof(*iref), end);
1380             return -EUCLEAN;
1381         }
1382         iref = (struct btrfs_extent_inline_ref *)ptr;
1383         inline_type = btrfs_extent_inline_ref_type(leaf, iref);
1384         inline_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1385         if (unlikely(ptr + btrfs_extent_inline_ref_size(inline_type) > end)) {
1386             extent_err(leaf, slot,
1387 "inline ref item overflows extent item, ptr %lu iref size %u end %lu",
1388                    ptr, inline_type, end);
1389             return -EUCLEAN;
1390         }
1391 
1392         switch (inline_type) {
1393         /* inline_offset is subvolid of the owner, no need to check */
1394         case BTRFS_TREE_BLOCK_REF_KEY:
1395             inline_refs++;
1396             break;
1397         /* Contains parent bytenr */
1398         case BTRFS_SHARED_BLOCK_REF_KEY:
1399             if (unlikely(!IS_ALIGNED(inline_offset,
1400                          fs_info->sectorsize))) {
1401                 extent_err(leaf, slot,
1402         "invalid tree parent bytenr, have %llu expect aligned to %u",
1403                        inline_offset, fs_info->sectorsize);
1404                 return -EUCLEAN;
1405             }
1406             inline_refs++;
1407             break;
1408         /*
1409          * Contains owner subvolid, owner key objectid, adjusted offset.
1410          * The only obvious corruption can happen in that offset.
1411          */
1412         case BTRFS_EXTENT_DATA_REF_KEY:
1413             dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1414             dref_offset = btrfs_extent_data_ref_offset(leaf, dref);
1415             if (unlikely(!IS_ALIGNED(dref_offset,
1416                          fs_info->sectorsize))) {
1417                 extent_err(leaf, slot,
1418         "invalid data ref offset, have %llu expect aligned to %u",
1419                        dref_offset, fs_info->sectorsize);
1420                 return -EUCLEAN;
1421             }
1422             inline_refs += btrfs_extent_data_ref_count(leaf, dref);
1423             break;
1424         /* Contains parent bytenr and ref count */
1425         case BTRFS_SHARED_DATA_REF_KEY:
1426             sref = (struct btrfs_shared_data_ref *)(iref + 1);
1427             if (unlikely(!IS_ALIGNED(inline_offset,
1428                          fs_info->sectorsize))) {
1429                 extent_err(leaf, slot,
1430         "invalid data parent bytenr, have %llu expect aligned to %u",
1431                        inline_offset, fs_info->sectorsize);
1432                 return -EUCLEAN;
1433             }
1434             inline_refs += btrfs_shared_data_ref_count(leaf, sref);
1435             break;
1436         default:
1437             extent_err(leaf, slot, "unknown inline ref type: %u",
1438                    inline_type);
1439             return -EUCLEAN;
1440         }
1441         ptr += btrfs_extent_inline_ref_size(inline_type);
1442     }
1443     /* No padding is allowed */
1444     if (unlikely(ptr != end)) {
1445         extent_err(leaf, slot,
1446                "invalid extent item size, padding bytes found");
1447         return -EUCLEAN;
1448     }
1449 
1450     /* Finally, check the inline refs against total refs */
1451     if (unlikely(inline_refs > total_refs)) {
1452         extent_err(leaf, slot,
1453             "invalid extent refs, have %llu expect >= inline %llu",
1454                total_refs, inline_refs);
1455         return -EUCLEAN;
1456     }
1457 
1458     if ((prev_key->type == BTRFS_EXTENT_ITEM_KEY) ||
1459         (prev_key->type == BTRFS_METADATA_ITEM_KEY)) {
1460         u64 prev_end = prev_key->objectid;
1461 
1462         if (prev_key->type == BTRFS_METADATA_ITEM_KEY)
1463             prev_end += fs_info->nodesize;
1464         else
1465             prev_end += prev_key->offset;
1466 
1467         if (unlikely(prev_end > key->objectid)) {
1468             extent_err(leaf, slot,
1469     "previous extent [%llu %u %llu] overlaps current extent [%llu %u %llu]",
1470                    prev_key->objectid, prev_key->type,
1471                    prev_key->offset, key->objectid, key->type,
1472                    key->offset);
1473             return -EUCLEAN;
1474         }
1475     }
1476 
1477     return 0;
1478 }
1479 
1480 static int check_simple_keyed_refs(struct extent_buffer *leaf,
1481                    struct btrfs_key *key, int slot)
1482 {
1483     u32 expect_item_size = 0;
1484 
1485     if (key->type == BTRFS_SHARED_DATA_REF_KEY)
1486         expect_item_size = sizeof(struct btrfs_shared_data_ref);
1487 
1488     if (unlikely(btrfs_item_size(leaf, slot) != expect_item_size)) {
1489         generic_err(leaf, slot,
1490         "invalid item size, have %u expect %u for key type %u",
1491                 btrfs_item_size(leaf, slot),
1492                 expect_item_size, key->type);
1493         return -EUCLEAN;
1494     }
1495     if (unlikely(!IS_ALIGNED(key->objectid, leaf->fs_info->sectorsize))) {
1496         generic_err(leaf, slot,
1497 "invalid key objectid for shared block ref, have %llu expect aligned to %u",
1498                 key->objectid, leaf->fs_info->sectorsize);
1499         return -EUCLEAN;
1500     }
1501     if (unlikely(key->type != BTRFS_TREE_BLOCK_REF_KEY &&
1502              !IS_ALIGNED(key->offset, leaf->fs_info->sectorsize))) {
1503         extent_err(leaf, slot,
1504         "invalid tree parent bytenr, have %llu expect aligned to %u",
1505                key->offset, leaf->fs_info->sectorsize);
1506         return -EUCLEAN;
1507     }
1508     return 0;
1509 }
1510 
1511 static int check_extent_data_ref(struct extent_buffer *leaf,
1512                  struct btrfs_key *key, int slot)
1513 {
1514     struct btrfs_extent_data_ref *dref;
1515     unsigned long ptr = btrfs_item_ptr_offset(leaf, slot);
1516     const unsigned long end = ptr + btrfs_item_size(leaf, slot);
1517 
1518     if (unlikely(btrfs_item_size(leaf, slot) % sizeof(*dref) != 0)) {
1519         generic_err(leaf, slot,
1520     "invalid item size, have %u expect aligned to %zu for key type %u",
1521                 btrfs_item_size(leaf, slot),
1522                 sizeof(*dref), key->type);
1523         return -EUCLEAN;
1524     }
1525     if (unlikely(!IS_ALIGNED(key->objectid, leaf->fs_info->sectorsize))) {
1526         generic_err(leaf, slot,
1527 "invalid key objectid for shared block ref, have %llu expect aligned to %u",
1528                 key->objectid, leaf->fs_info->sectorsize);
1529         return -EUCLEAN;
1530     }
1531     for (; ptr < end; ptr += sizeof(*dref)) {
1532         u64 offset;
1533 
1534         /*
1535          * We cannot check the extent_data_ref hash due to possible
1536          * overflow from the leaf due to hash collisions.
1537          */
1538         dref = (struct btrfs_extent_data_ref *)ptr;
1539         offset = btrfs_extent_data_ref_offset(leaf, dref);
1540         if (unlikely(!IS_ALIGNED(offset, leaf->fs_info->sectorsize))) {
1541             extent_err(leaf, slot,
1542     "invalid extent data backref offset, have %llu expect aligned to %u",
1543                    offset, leaf->fs_info->sectorsize);
1544             return -EUCLEAN;
1545         }
1546     }
1547     return 0;
1548 }
1549 
1550 #define inode_ref_err(eb, slot, fmt, args...)           \
1551     inode_item_err(eb, slot, fmt, ##args)
1552 static int check_inode_ref(struct extent_buffer *leaf,
1553                struct btrfs_key *key, struct btrfs_key *prev_key,
1554                int slot)
1555 {
1556     struct btrfs_inode_ref *iref;
1557     unsigned long ptr;
1558     unsigned long end;
1559 
1560     if (unlikely(!check_prev_ino(leaf, key, slot, prev_key)))
1561         return -EUCLEAN;
1562     /* namelen can't be 0, so item_size == sizeof() is also invalid */
1563     if (unlikely(btrfs_item_size(leaf, slot) <= sizeof(*iref))) {
1564         inode_ref_err(leaf, slot,
1565             "invalid item size, have %u expect (%zu, %u)",
1566             btrfs_item_size(leaf, slot),
1567             sizeof(*iref), BTRFS_LEAF_DATA_SIZE(leaf->fs_info));
1568         return -EUCLEAN;
1569     }
1570 
1571     ptr = btrfs_item_ptr_offset(leaf, slot);
1572     end = ptr + btrfs_item_size(leaf, slot);
1573     while (ptr < end) {
1574         u16 namelen;
1575 
1576         if (unlikely(ptr + sizeof(iref) > end)) {
1577             inode_ref_err(leaf, slot,
1578             "inode ref overflow, ptr %lu end %lu inode_ref_size %zu",
1579                 ptr, end, sizeof(iref));
1580             return -EUCLEAN;
1581         }
1582 
1583         iref = (struct btrfs_inode_ref *)ptr;
1584         namelen = btrfs_inode_ref_name_len(leaf, iref);
1585         if (unlikely(ptr + sizeof(*iref) + namelen > end)) {
1586             inode_ref_err(leaf, slot,
1587                 "inode ref overflow, ptr %lu end %lu namelen %u",
1588                 ptr, end, namelen);
1589             return -EUCLEAN;
1590         }
1591 
1592         /*
1593          * NOTE: In theory we should record all found index numbers
1594          * to find any duplicated indexes, but that will be too time
1595          * consuming for inodes with too many hard links.
1596          */
1597         ptr += sizeof(*iref) + namelen;
1598     }
1599     return 0;
1600 }
1601 
1602 /*
1603  * Common point to switch the item-specific validation.
1604  */
1605 static int check_leaf_item(struct extent_buffer *leaf,
1606                struct btrfs_key *key, int slot,
1607                struct btrfs_key *prev_key)
1608 {
1609     int ret = 0;
1610     struct btrfs_chunk *chunk;
1611 
1612     switch (key->type) {
1613     case BTRFS_EXTENT_DATA_KEY:
1614         ret = check_extent_data_item(leaf, key, slot, prev_key);
1615         break;
1616     case BTRFS_EXTENT_CSUM_KEY:
1617         ret = check_csum_item(leaf, key, slot, prev_key);
1618         break;
1619     case BTRFS_DIR_ITEM_KEY:
1620     case BTRFS_DIR_INDEX_KEY:
1621     case BTRFS_XATTR_ITEM_KEY:
1622         ret = check_dir_item(leaf, key, prev_key, slot);
1623         break;
1624     case BTRFS_INODE_REF_KEY:
1625         ret = check_inode_ref(leaf, key, prev_key, slot);
1626         break;
1627     case BTRFS_BLOCK_GROUP_ITEM_KEY:
1628         ret = check_block_group_item(leaf, key, slot);
1629         break;
1630     case BTRFS_CHUNK_ITEM_KEY:
1631         chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
1632         ret = check_leaf_chunk_item(leaf, chunk, key, slot);
1633         break;
1634     case BTRFS_DEV_ITEM_KEY:
1635         ret = check_dev_item(leaf, key, slot);
1636         break;
1637     case BTRFS_INODE_ITEM_KEY:
1638         ret = check_inode_item(leaf, key, slot);
1639         break;
1640     case BTRFS_ROOT_ITEM_KEY:
1641         ret = check_root_item(leaf, key, slot);
1642         break;
1643     case BTRFS_EXTENT_ITEM_KEY:
1644     case BTRFS_METADATA_ITEM_KEY:
1645         ret = check_extent_item(leaf, key, slot, prev_key);
1646         break;
1647     case BTRFS_TREE_BLOCK_REF_KEY:
1648     case BTRFS_SHARED_DATA_REF_KEY:
1649     case BTRFS_SHARED_BLOCK_REF_KEY:
1650         ret = check_simple_keyed_refs(leaf, key, slot);
1651         break;
1652     case BTRFS_EXTENT_DATA_REF_KEY:
1653         ret = check_extent_data_ref(leaf, key, slot);
1654         break;
1655     }
1656     return ret;
1657 }
1658 
1659 static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
1660 {
1661     struct btrfs_fs_info *fs_info = leaf->fs_info;
1662     /* No valid key type is 0, so all key should be larger than this key */
1663     struct btrfs_key prev_key = {0, 0, 0};
1664     struct btrfs_key key;
1665     u32 nritems = btrfs_header_nritems(leaf);
1666     int slot;
1667 
1668     if (unlikely(btrfs_header_level(leaf) != 0)) {
1669         generic_err(leaf, 0,
1670             "invalid level for leaf, have %d expect 0",
1671             btrfs_header_level(leaf));
1672         return -EUCLEAN;
1673     }
1674 
1675     /*
1676      * Extent buffers from a relocation tree have a owner field that
1677      * corresponds to the subvolume tree they are based on. So just from an
1678      * extent buffer alone we can not find out what is the id of the
1679      * corresponding subvolume tree, so we can not figure out if the extent
1680      * buffer corresponds to the root of the relocation tree or not. So
1681      * skip this check for relocation trees.
1682      */
1683     if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
1684         u64 owner = btrfs_header_owner(leaf);
1685 
1686         /* These trees must never be empty */
1687         if (unlikely(owner == BTRFS_ROOT_TREE_OBJECTID ||
1688                  owner == BTRFS_CHUNK_TREE_OBJECTID ||
1689                  owner == BTRFS_DEV_TREE_OBJECTID ||
1690                  owner == BTRFS_FS_TREE_OBJECTID ||
1691                  owner == BTRFS_DATA_RELOC_TREE_OBJECTID)) {
1692             generic_err(leaf, 0,
1693             "invalid root, root %llu must never be empty",
1694                     owner);
1695             return -EUCLEAN;
1696         }
1697 
1698         /* Unknown tree */
1699         if (unlikely(owner == 0)) {
1700             generic_err(leaf, 0,
1701                 "invalid owner, root 0 is not defined");
1702             return -EUCLEAN;
1703         }
1704 
1705         /* EXTENT_TREE_V2 can have empty extent trees. */
1706         if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
1707             return 0;
1708 
1709         if (unlikely(owner == BTRFS_EXTENT_TREE_OBJECTID)) {
1710             generic_err(leaf, 0,
1711             "invalid root, root %llu must never be empty",
1712                     owner);
1713             return -EUCLEAN;
1714         }
1715 
1716         return 0;
1717     }
1718 
1719     if (unlikely(nritems == 0))
1720         return 0;
1721 
1722     /*
1723      * Check the following things to make sure this is a good leaf, and
1724      * leaf users won't need to bother with similar sanity checks:
1725      *
1726      * 1) key ordering
1727      * 2) item offset and size
1728      *    No overlap, no hole, all inside the leaf.
1729      * 3) item content
1730      *    If possible, do comprehensive sanity check.
1731      *    NOTE: All checks must only rely on the item data itself.
1732      */
1733     for (slot = 0; slot < nritems; slot++) {
1734         u32 item_end_expected;
1735         u64 item_data_end;
1736         int ret;
1737 
1738         btrfs_item_key_to_cpu(leaf, &key, slot);
1739 
1740         /* Make sure the keys are in the right order */
1741         if (unlikely(btrfs_comp_cpu_keys(&prev_key, &key) >= 0)) {
1742             generic_err(leaf, slot,
1743     "bad key order, prev (%llu %u %llu) current (%llu %u %llu)",
1744                 prev_key.objectid, prev_key.type,
1745                 prev_key.offset, key.objectid, key.type,
1746                 key.offset);
1747             return -EUCLEAN;
1748         }
1749 
1750         item_data_end = (u64)btrfs_item_offset(leaf, slot) +
1751                 btrfs_item_size(leaf, slot);
1752         /*
1753          * Make sure the offset and ends are right, remember that the
1754          * item data starts at the end of the leaf and grows towards the
1755          * front.
1756          */
1757         if (slot == 0)
1758             item_end_expected = BTRFS_LEAF_DATA_SIZE(fs_info);
1759         else
1760             item_end_expected = btrfs_item_offset(leaf,
1761                                  slot - 1);
1762         if (unlikely(item_data_end != item_end_expected)) {
1763             generic_err(leaf, slot,
1764                 "unexpected item end, have %llu expect %u",
1765                 item_data_end, item_end_expected);
1766             return -EUCLEAN;
1767         }
1768 
1769         /*
1770          * Check to make sure that we don't point outside of the leaf,
1771          * just in case all the items are consistent to each other, but
1772          * all point outside of the leaf.
1773          */
1774         if (unlikely(item_data_end > BTRFS_LEAF_DATA_SIZE(fs_info))) {
1775             generic_err(leaf, slot,
1776             "slot end outside of leaf, have %llu expect range [0, %u]",
1777                 item_data_end, BTRFS_LEAF_DATA_SIZE(fs_info));
1778             return -EUCLEAN;
1779         }
1780 
1781         /* Also check if the item pointer overlaps with btrfs item. */
1782         if (unlikely(btrfs_item_ptr_offset(leaf, slot) <
1783                  btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item))) {
1784             generic_err(leaf, slot,
1785         "slot overlaps with its data, item end %lu data start %lu",
1786                 btrfs_item_nr_offset(slot) +
1787                 sizeof(struct btrfs_item),
1788                 btrfs_item_ptr_offset(leaf, slot));
1789             return -EUCLEAN;
1790         }
1791 
1792         if (check_item_data) {
1793             /*
1794              * Check if the item size and content meet other
1795              * criteria
1796              */
1797             ret = check_leaf_item(leaf, &key, slot, &prev_key);
1798             if (unlikely(ret < 0))
1799                 return ret;
1800         }
1801 
1802         prev_key.objectid = key.objectid;
1803         prev_key.type = key.type;
1804         prev_key.offset = key.offset;
1805     }
1806 
1807     return 0;
1808 }
1809 
1810 int btrfs_check_leaf_full(struct extent_buffer *leaf)
1811 {
1812     return check_leaf(leaf, true);
1813 }
1814 ALLOW_ERROR_INJECTION(btrfs_check_leaf_full, ERRNO);
1815 
1816 int btrfs_check_leaf_relaxed(struct extent_buffer *leaf)
1817 {
1818     return check_leaf(leaf, false);
1819 }
1820 
1821 int btrfs_check_node(struct extent_buffer *node)
1822 {
1823     struct btrfs_fs_info *fs_info = node->fs_info;
1824     unsigned long nr = btrfs_header_nritems(node);
1825     struct btrfs_key key, next_key;
1826     int slot;
1827     int level = btrfs_header_level(node);
1828     u64 bytenr;
1829     int ret = 0;
1830 
1831     if (unlikely(level <= 0 || level >= BTRFS_MAX_LEVEL)) {
1832         generic_err(node, 0,
1833             "invalid level for node, have %d expect [1, %d]",
1834             level, BTRFS_MAX_LEVEL - 1);
1835         return -EUCLEAN;
1836     }
1837     if (unlikely(nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(fs_info))) {
1838         btrfs_crit(fs_info,
1839 "corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]",
1840                btrfs_header_owner(node), node->start,
1841                nr == 0 ? "small" : "large", nr,
1842                BTRFS_NODEPTRS_PER_BLOCK(fs_info));
1843         return -EUCLEAN;
1844     }
1845 
1846     for (slot = 0; slot < nr - 1; slot++) {
1847         bytenr = btrfs_node_blockptr(node, slot);
1848         btrfs_node_key_to_cpu(node, &key, slot);
1849         btrfs_node_key_to_cpu(node, &next_key, slot + 1);
1850 
1851         if (unlikely(!bytenr)) {
1852             generic_err(node, slot,
1853                 "invalid NULL node pointer");
1854             ret = -EUCLEAN;
1855             goto out;
1856         }
1857         if (unlikely(!IS_ALIGNED(bytenr, fs_info->sectorsize))) {
1858             generic_err(node, slot,
1859             "unaligned pointer, have %llu should be aligned to %u",
1860                 bytenr, fs_info->sectorsize);
1861             ret = -EUCLEAN;
1862             goto out;
1863         }
1864 
1865         if (unlikely(btrfs_comp_cpu_keys(&key, &next_key) >= 0)) {
1866             generic_err(node, slot,
1867     "bad key order, current (%llu %u %llu) next (%llu %u %llu)",
1868                 key.objectid, key.type, key.offset,
1869                 next_key.objectid, next_key.type,
1870                 next_key.offset);
1871             ret = -EUCLEAN;
1872             goto out;
1873         }
1874     }
1875 out:
1876     return ret;
1877 }
1878 ALLOW_ERROR_INJECTION(btrfs_check_node, ERRNO);
1879 
1880 int btrfs_check_eb_owner(const struct extent_buffer *eb, u64 root_owner)
1881 {
1882     const bool is_subvol = is_fstree(root_owner);
1883     const u64 eb_owner = btrfs_header_owner(eb);
1884 
1885     /*
1886      * Skip dummy fs, as selftests don't create unique ebs for each dummy
1887      * root.
1888      */
1889     if (test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &eb->fs_info->fs_state))
1890         return 0;
1891     /*
1892      * There are several call sites (backref walking, qgroup, and data
1893      * reloc) passing 0 as @root_owner, as they are not holding the
1894      * tree root.  In that case, we can not do a reliable ownership check,
1895      * so just exit.
1896      */
1897     if (root_owner == 0)
1898         return 0;
1899     /*
1900      * These trees use key.offset as their owner, our callers don't have
1901      * the extra capacity to pass key.offset here.  So we just skip them.
1902      */
1903     if (root_owner == BTRFS_TREE_LOG_OBJECTID ||
1904         root_owner == BTRFS_TREE_RELOC_OBJECTID)
1905         return 0;
1906 
1907     if (!is_subvol) {
1908         /* For non-subvolume trees, the eb owner should match root owner */
1909         if (unlikely(root_owner != eb_owner)) {
1910             btrfs_crit(eb->fs_info,
1911 "corrupted %s, root=%llu block=%llu owner mismatch, have %llu expect %llu",
1912                 btrfs_header_level(eb) == 0 ? "leaf" : "node",
1913                 root_owner, btrfs_header_bytenr(eb), eb_owner,
1914                 root_owner);
1915             return -EUCLEAN;
1916         }
1917         return 0;
1918     }
1919 
1920     /*
1921      * For subvolume trees, owners can mismatch, but they should all belong
1922      * to subvolume trees.
1923      */
1924     if (unlikely(is_subvol != is_fstree(eb_owner))) {
1925         btrfs_crit(eb->fs_info,
1926 "corrupted %s, root=%llu block=%llu owner mismatch, have %llu expect [%llu, %llu]",
1927             btrfs_header_level(eb) == 0 ? "leaf" : "node",
1928             root_owner, btrfs_header_bytenr(eb), eb_owner,
1929             BTRFS_FIRST_FREE_OBJECTID, BTRFS_LAST_FREE_OBJECTID);
1930         return -EUCLEAN;
1931     }
1932     return 0;
1933 }