Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Copyright (C) STRATO AG 2011.  All rights reserved.
0004  */
0005 
0006 /*
0007  * This module can be used to catch cases when the btrfs kernel
0008  * code executes write requests to the disk that bring the file
0009  * system in an inconsistent state. In such a state, a power-loss
0010  * or kernel panic event would cause that the data on disk is
0011  * lost or at least damaged.
0012  *
0013  * Code is added that examines all block write requests during
0014  * runtime (including writes of the super block). Three rules
0015  * are verified and an error is printed on violation of the
0016  * rules:
0017  * 1. It is not allowed to write a disk block which is
0018  *    currently referenced by the super block (either directly
0019  *    or indirectly).
0020  * 2. When a super block is written, it is verified that all
0021  *    referenced (directly or indirectly) blocks fulfill the
0022  *    following requirements:
0023  *    2a. All referenced blocks have either been present when
0024  *        the file system was mounted, (i.e., they have been
0025  *        referenced by the super block) or they have been
0026  *        written since then and the write completion callback
0027  *        was called and no write error was indicated and a
0028  *        FLUSH request to the device where these blocks are
0029  *        located was received and completed.
0030  *    2b. All referenced blocks need to have a generation
0031  *        number which is equal to the parent's number.
0032  *
0033  * One issue that was found using this module was that the log
0034  * tree on disk became temporarily corrupted because disk blocks
0035  * that had been in use for the log tree had been freed and
0036  * reused too early, while being referenced by the written super
0037  * block.
0038  *
0039  * The search term in the kernel log that can be used to filter
0040  * on the existence of detected integrity issues is
0041  * "btrfs: attempt".
0042  *
0043  * The integrity check is enabled via mount options. These
0044  * mount options are only supported if the integrity check
0045  * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY.
0046  *
0047  * Example #1, apply integrity checks to all metadata:
0048  * mount /dev/sdb1 /mnt -o check_int
0049  *
0050  * Example #2, apply integrity checks to all metadata and
0051  * to data extents:
0052  * mount /dev/sdb1 /mnt -o check_int_data
0053  *
0054  * Example #3, apply integrity checks to all metadata and dump
0055  * the tree that the super block references to kernel messages
0056  * each time after a super block was written:
0057  * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263
0058  *
0059  * If the integrity check tool is included and activated in
0060  * the mount options, plenty of kernel memory is used, and
0061  * plenty of additional CPU cycles are spent. Enabling this
0062  * functionality is not intended for normal use. In most
0063  * cases, unless you are a btrfs developer who needs to verify
0064  * the integrity of (super)-block write requests, do not
0065  * enable the config option BTRFS_FS_CHECK_INTEGRITY to
0066  * include and compile the integrity check tool.
0067  *
0068  * Expect millions of lines of information in the kernel log with an
0069  * enabled check_int_print_mask. Therefore set LOG_BUF_SHIFT in the
0070  * kernel config to at least 26 (which is 64MB). Usually the value is
0071  * limited to 21 (which is 2MB) in init/Kconfig. The file needs to be
0072  * changed like this before LOG_BUF_SHIFT can be set to a high value:
0073  * config LOG_BUF_SHIFT
0074  *       int "Kernel log buffer size (16 => 64KB, 17 => 128KB)"
0075  *       range 12 30
0076  */
0077 
0078 #include <linux/sched.h>
0079 #include <linux/slab.h>
0080 #include <linux/mutex.h>
0081 #include <linux/blkdev.h>
0082 #include <linux/mm.h>
0083 #include <linux/string.h>
0084 #include <crypto/hash.h>
0085 #include "ctree.h"
0086 #include "disk-io.h"
0087 #include "transaction.h"
0088 #include "extent_io.h"
0089 #include "volumes.h"
0090 #include "print-tree.h"
0091 #include "locking.h"
0092 #include "check-integrity.h"
0093 #include "rcu-string.h"
0094 #include "compression.h"
0095 
0096 #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
0097 #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
0098 #define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100
0099 #define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051
0100 #define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807
0101 #define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530
0102 #define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300
0103 #define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6)    /* in characters,
0104                              * excluding " [...]" */
0105 #define BTRFSIC_GENERATION_UNKNOWN ((u64)-1)
0106 
0107 /*
0108  * The definition of the bitmask fields for the print_mask.
0109  * They are specified with the mount option check_integrity_print_mask.
0110  */
0111 #define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE         0x00000001
0112 #define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION     0x00000002
0113 #define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE          0x00000004
0114 #define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE         0x00000008
0115 #define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH            0x00000010
0116 #define BTRFSIC_PRINT_MASK_END_IO_BIO_BH            0x00000020
0117 #define BTRFSIC_PRINT_MASK_VERBOSE              0x00000040
0118 #define BTRFSIC_PRINT_MASK_VERY_VERBOSE             0x00000080
0119 #define BTRFSIC_PRINT_MASK_INITIAL_TREE             0x00000100
0120 #define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES            0x00000200
0121 #define BTRFSIC_PRINT_MASK_INITIAL_DATABASE         0x00000400
0122 #define BTRFSIC_PRINT_MASK_NUM_COPIES               0x00000800
0123 #define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS        0x00001000
0124 #define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE        0x00002000
0125 
0126 struct btrfsic_dev_state;
0127 struct btrfsic_state;
0128 
0129 struct btrfsic_block {
0130     u32 magic_num;      /* only used for debug purposes */
0131     unsigned int is_metadata:1; /* if it is meta-data, not data-data */
0132     unsigned int is_superblock:1;   /* if it is one of the superblocks */
0133     unsigned int is_iodone:1;   /* if is done by lower subsystem */
0134     unsigned int iodone_w_error:1;  /* error was indicated to endio */
0135     unsigned int never_written:1;   /* block was added because it was
0136                      * referenced, not because it was
0137                      * written */
0138     unsigned int mirror_num;    /* large enough to hold
0139                      * BTRFS_SUPER_MIRROR_MAX */
0140     struct btrfsic_dev_state *dev_state;
0141     u64 dev_bytenr;     /* key, physical byte num on disk */
0142     u64 logical_bytenr; /* logical byte num on disk */
0143     u64 generation;
0144     struct btrfs_disk_key disk_key; /* extra info to print in case of
0145                      * issues, will not always be correct */
0146     struct list_head collision_resolving_node;  /* list node */
0147     struct list_head all_blocks_node;   /* list node */
0148 
0149     /* the following two lists contain block_link items */
0150     struct list_head ref_to_list;   /* list */
0151     struct list_head ref_from_list; /* list */
0152     struct btrfsic_block *next_in_same_bio;
0153     void *orig_bio_private;
0154     bio_end_io_t *orig_bio_end_io;
0155     blk_opf_t submit_bio_bh_rw;
0156     u64 flush_gen; /* only valid if !never_written */
0157 };
0158 
0159 /*
0160  * Elements of this type are allocated dynamically and required because
0161  * each block object can refer to and can be ref from multiple blocks.
0162  * The key to lookup them in the hashtable is the dev_bytenr of
0163  * the block ref to plus the one from the block referred from.
0164  * The fact that they are searchable via a hashtable and that a
0165  * ref_cnt is maintained is not required for the btrfs integrity
0166  * check algorithm itself, it is only used to make the output more
0167  * beautiful in case that an error is detected (an error is defined
0168  * as a write operation to a block while that block is still referenced).
0169  */
0170 struct btrfsic_block_link {
0171     u32 magic_num;      /* only used for debug purposes */
0172     u32 ref_cnt;
0173     struct list_head node_ref_to;   /* list node */
0174     struct list_head node_ref_from; /* list node */
0175     struct list_head collision_resolving_node;  /* list node */
0176     struct btrfsic_block *block_ref_to;
0177     struct btrfsic_block *block_ref_from;
0178     u64 parent_generation;
0179 };
0180 
0181 struct btrfsic_dev_state {
0182     u32 magic_num;      /* only used for debug purposes */
0183     struct block_device *bdev;
0184     struct btrfsic_state *state;
0185     struct list_head collision_resolving_node;  /* list node */
0186     struct btrfsic_block dummy_block_for_bio_bh_flush;
0187     u64 last_flush_gen;
0188 };
0189 
0190 struct btrfsic_block_hashtable {
0191     struct list_head table[BTRFSIC_BLOCK_HASHTABLE_SIZE];
0192 };
0193 
0194 struct btrfsic_block_link_hashtable {
0195     struct list_head table[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE];
0196 };
0197 
0198 struct btrfsic_dev_state_hashtable {
0199     struct list_head table[BTRFSIC_DEV2STATE_HASHTABLE_SIZE];
0200 };
0201 
0202 struct btrfsic_block_data_ctx {
0203     u64 start;      /* virtual bytenr */
0204     u64 dev_bytenr;     /* physical bytenr on device */
0205     u32 len;
0206     struct btrfsic_dev_state *dev;
0207     char **datav;
0208     struct page **pagev;
0209     void *mem_to_free;
0210 };
0211 
0212 /* This structure is used to implement recursion without occupying
0213  * any stack space, refer to btrfsic_process_metablock() */
0214 struct btrfsic_stack_frame {
0215     u32 magic;
0216     u32 nr;
0217     int error;
0218     int i;
0219     int limit_nesting;
0220     int num_copies;
0221     int mirror_num;
0222     struct btrfsic_block *block;
0223     struct btrfsic_block_data_ctx *block_ctx;
0224     struct btrfsic_block *next_block;
0225     struct btrfsic_block_data_ctx next_block_ctx;
0226     struct btrfs_header *hdr;
0227     struct btrfsic_stack_frame *prev;
0228 };
0229 
0230 /* Some state per mounted filesystem */
0231 struct btrfsic_state {
0232     u32 print_mask;
0233     int include_extent_data;
0234     struct list_head all_blocks_list;
0235     struct btrfsic_block_hashtable block_hashtable;
0236     struct btrfsic_block_link_hashtable block_link_hashtable;
0237     struct btrfs_fs_info *fs_info;
0238     u64 max_superblock_generation;
0239     struct btrfsic_block *latest_superblock;
0240     u32 metablock_size;
0241     u32 datablock_size;
0242 };
0243 
0244 static int btrfsic_process_metablock(struct btrfsic_state *state,
0245                      struct btrfsic_block *block,
0246                      struct btrfsic_block_data_ctx *block_ctx,
0247                      int limit_nesting, int force_iodone_flag);
0248 static void btrfsic_read_from_block_data(
0249     struct btrfsic_block_data_ctx *block_ctx,
0250     void *dst, u32 offset, size_t len);
0251 static int btrfsic_create_link_to_next_block(
0252         struct btrfsic_state *state,
0253         struct btrfsic_block *block,
0254         struct btrfsic_block_data_ctx
0255         *block_ctx, u64 next_bytenr,
0256         int limit_nesting,
0257         struct btrfsic_block_data_ctx *next_block_ctx,
0258         struct btrfsic_block **next_blockp,
0259         int force_iodone_flag,
0260         int *num_copiesp, int *mirror_nump,
0261         struct btrfs_disk_key *disk_key,
0262         u64 parent_generation);
0263 static int btrfsic_handle_extent_data(struct btrfsic_state *state,
0264                       struct btrfsic_block *block,
0265                       struct btrfsic_block_data_ctx *block_ctx,
0266                       u32 item_offset, int force_iodone_flag);
0267 static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
0268                  struct btrfsic_block_data_ctx *block_ctx_out,
0269                  int mirror_num);
0270 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx);
0271 static int btrfsic_read_block(struct btrfsic_state *state,
0272                   struct btrfsic_block_data_ctx *block_ctx);
0273 static int btrfsic_process_written_superblock(
0274         struct btrfsic_state *state,
0275         struct btrfsic_block *const block,
0276         struct btrfs_super_block *const super_hdr);
0277 static void btrfsic_bio_end_io(struct bio *bp);
0278 static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state,
0279                           const struct btrfsic_block *block,
0280                           int recursion_level);
0281 static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
0282                     struct btrfsic_block *const block,
0283                     int recursion_level);
0284 static void btrfsic_print_add_link(const struct btrfsic_state *state,
0285                    const struct btrfsic_block_link *l);
0286 static void btrfsic_print_rem_link(const struct btrfsic_state *state,
0287                    const struct btrfsic_block_link *l);
0288 static char btrfsic_get_block_type(const struct btrfsic_state *state,
0289                    const struct btrfsic_block *block);
0290 static void btrfsic_dump_tree(const struct btrfsic_state *state);
0291 static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
0292                   const struct btrfsic_block *block,
0293                   int indent_level);
0294 static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
0295         struct btrfsic_state *state,
0296         struct btrfsic_block_data_ctx *next_block_ctx,
0297         struct btrfsic_block *next_block,
0298         struct btrfsic_block *from_block,
0299         u64 parent_generation);
0300 static struct btrfsic_block *btrfsic_block_lookup_or_add(
0301         struct btrfsic_state *state,
0302         struct btrfsic_block_data_ctx *block_ctx,
0303         const char *additional_string,
0304         int is_metadata,
0305         int is_iodone,
0306         int never_written,
0307         int mirror_num,
0308         int *was_created);
0309 static int btrfsic_process_superblock_dev_mirror(
0310         struct btrfsic_state *state,
0311         struct btrfsic_dev_state *dev_state,
0312         struct btrfs_device *device,
0313         int superblock_mirror_num,
0314         struct btrfsic_dev_state **selected_dev_state,
0315         struct btrfs_super_block *selected_super);
0316 static struct btrfsic_dev_state *btrfsic_dev_state_lookup(dev_t dev);
0317 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
0318                        u64 bytenr,
0319                        struct btrfsic_dev_state *dev_state,
0320                        u64 dev_bytenr);
0321 
0322 static struct mutex btrfsic_mutex;
0323 static int btrfsic_is_initialized;
0324 static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable;
0325 
0326 
0327 static void btrfsic_block_init(struct btrfsic_block *b)
0328 {
0329     b->magic_num = BTRFSIC_BLOCK_MAGIC_NUMBER;
0330     b->dev_state = NULL;
0331     b->dev_bytenr = 0;
0332     b->logical_bytenr = 0;
0333     b->generation = BTRFSIC_GENERATION_UNKNOWN;
0334     b->disk_key.objectid = 0;
0335     b->disk_key.type = 0;
0336     b->disk_key.offset = 0;
0337     b->is_metadata = 0;
0338     b->is_superblock = 0;
0339     b->is_iodone = 0;
0340     b->iodone_w_error = 0;
0341     b->never_written = 0;
0342     b->mirror_num = 0;
0343     b->next_in_same_bio = NULL;
0344     b->orig_bio_private = NULL;
0345     b->orig_bio_end_io = NULL;
0346     INIT_LIST_HEAD(&b->collision_resolving_node);
0347     INIT_LIST_HEAD(&b->all_blocks_node);
0348     INIT_LIST_HEAD(&b->ref_to_list);
0349     INIT_LIST_HEAD(&b->ref_from_list);
0350     b->submit_bio_bh_rw = 0;
0351     b->flush_gen = 0;
0352 }
0353 
0354 static struct btrfsic_block *btrfsic_block_alloc(void)
0355 {
0356     struct btrfsic_block *b;
0357 
0358     b = kzalloc(sizeof(*b), GFP_NOFS);
0359     if (NULL != b)
0360         btrfsic_block_init(b);
0361 
0362     return b;
0363 }
0364 
0365 static void btrfsic_block_free(struct btrfsic_block *b)
0366 {
0367     BUG_ON(!(NULL == b || BTRFSIC_BLOCK_MAGIC_NUMBER == b->magic_num));
0368     kfree(b);
0369 }
0370 
0371 static void btrfsic_block_link_init(struct btrfsic_block_link *l)
0372 {
0373     l->magic_num = BTRFSIC_BLOCK_LINK_MAGIC_NUMBER;
0374     l->ref_cnt = 1;
0375     INIT_LIST_HEAD(&l->node_ref_to);
0376     INIT_LIST_HEAD(&l->node_ref_from);
0377     INIT_LIST_HEAD(&l->collision_resolving_node);
0378     l->block_ref_to = NULL;
0379     l->block_ref_from = NULL;
0380 }
0381 
0382 static struct btrfsic_block_link *btrfsic_block_link_alloc(void)
0383 {
0384     struct btrfsic_block_link *l;
0385 
0386     l = kzalloc(sizeof(*l), GFP_NOFS);
0387     if (NULL != l)
0388         btrfsic_block_link_init(l);
0389 
0390     return l;
0391 }
0392 
0393 static void btrfsic_block_link_free(struct btrfsic_block_link *l)
0394 {
0395     BUG_ON(!(NULL == l || BTRFSIC_BLOCK_LINK_MAGIC_NUMBER == l->magic_num));
0396     kfree(l);
0397 }
0398 
0399 static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds)
0400 {
0401     ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER;
0402     ds->bdev = NULL;
0403     ds->state = NULL;
0404     INIT_LIST_HEAD(&ds->collision_resolving_node);
0405     ds->last_flush_gen = 0;
0406     btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush);
0407     ds->dummy_block_for_bio_bh_flush.is_iodone = 1;
0408     ds->dummy_block_for_bio_bh_flush.dev_state = ds;
0409 }
0410 
0411 static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void)
0412 {
0413     struct btrfsic_dev_state *ds;
0414 
0415     ds = kzalloc(sizeof(*ds), GFP_NOFS);
0416     if (NULL != ds)
0417         btrfsic_dev_state_init(ds);
0418 
0419     return ds;
0420 }
0421 
0422 static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds)
0423 {
0424     BUG_ON(!(NULL == ds ||
0425          BTRFSIC_DEV2STATE_MAGIC_NUMBER == ds->magic_num));
0426     kfree(ds);
0427 }
0428 
0429 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h)
0430 {
0431     int i;
0432 
0433     for (i = 0; i < BTRFSIC_BLOCK_HASHTABLE_SIZE; i++)
0434         INIT_LIST_HEAD(h->table + i);
0435 }
0436 
0437 static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
0438                     struct btrfsic_block_hashtable *h)
0439 {
0440     const unsigned int hashval =
0441         (((unsigned int)(b->dev_bytenr >> 16)) ^
0442          ((unsigned int)((uintptr_t)b->dev_state->bdev))) &
0443          (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
0444 
0445     list_add(&b->collision_resolving_node, h->table + hashval);
0446 }
0447 
0448 static void btrfsic_block_hashtable_remove(struct btrfsic_block *b)
0449 {
0450     list_del(&b->collision_resolving_node);
0451 }
0452 
0453 static struct btrfsic_block *btrfsic_block_hashtable_lookup(
0454         struct block_device *bdev,
0455         u64 dev_bytenr,
0456         struct btrfsic_block_hashtable *h)
0457 {
0458     const unsigned int hashval =
0459         (((unsigned int)(dev_bytenr >> 16)) ^
0460          ((unsigned int)((uintptr_t)bdev))) &
0461          (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
0462     struct btrfsic_block *b;
0463 
0464     list_for_each_entry(b, h->table + hashval, collision_resolving_node) {
0465         if (b->dev_state->bdev == bdev && b->dev_bytenr == dev_bytenr)
0466             return b;
0467     }
0468 
0469     return NULL;
0470 }
0471 
0472 static void btrfsic_block_link_hashtable_init(
0473         struct btrfsic_block_link_hashtable *h)
0474 {
0475     int i;
0476 
0477     for (i = 0; i < BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE; i++)
0478         INIT_LIST_HEAD(h->table + i);
0479 }
0480 
0481 static void btrfsic_block_link_hashtable_add(
0482         struct btrfsic_block_link *l,
0483         struct btrfsic_block_link_hashtable *h)
0484 {
0485     const unsigned int hashval =
0486         (((unsigned int)(l->block_ref_to->dev_bytenr >> 16)) ^
0487          ((unsigned int)(l->block_ref_from->dev_bytenr >> 16)) ^
0488          ((unsigned int)((uintptr_t)l->block_ref_to->dev_state->bdev)) ^
0489          ((unsigned int)((uintptr_t)l->block_ref_from->dev_state->bdev)))
0490          & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
0491 
0492     BUG_ON(NULL == l->block_ref_to);
0493     BUG_ON(NULL == l->block_ref_from);
0494     list_add(&l->collision_resolving_node, h->table + hashval);
0495 }
0496 
0497 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l)
0498 {
0499     list_del(&l->collision_resolving_node);
0500 }
0501 
0502 static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
0503         struct block_device *bdev_ref_to,
0504         u64 dev_bytenr_ref_to,
0505         struct block_device *bdev_ref_from,
0506         u64 dev_bytenr_ref_from,
0507         struct btrfsic_block_link_hashtable *h)
0508 {
0509     const unsigned int hashval =
0510         (((unsigned int)(dev_bytenr_ref_to >> 16)) ^
0511          ((unsigned int)(dev_bytenr_ref_from >> 16)) ^
0512          ((unsigned int)((uintptr_t)bdev_ref_to)) ^
0513          ((unsigned int)((uintptr_t)bdev_ref_from))) &
0514          (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
0515     struct btrfsic_block_link *l;
0516 
0517     list_for_each_entry(l, h->table + hashval, collision_resolving_node) {
0518         BUG_ON(NULL == l->block_ref_to);
0519         BUG_ON(NULL == l->block_ref_from);
0520         if (l->block_ref_to->dev_state->bdev == bdev_ref_to &&
0521             l->block_ref_to->dev_bytenr == dev_bytenr_ref_to &&
0522             l->block_ref_from->dev_state->bdev == bdev_ref_from &&
0523             l->block_ref_from->dev_bytenr == dev_bytenr_ref_from)
0524             return l;
0525     }
0526 
0527     return NULL;
0528 }
0529 
0530 static void btrfsic_dev_state_hashtable_init(
0531         struct btrfsic_dev_state_hashtable *h)
0532 {
0533     int i;
0534 
0535     for (i = 0; i < BTRFSIC_DEV2STATE_HASHTABLE_SIZE; i++)
0536         INIT_LIST_HEAD(h->table + i);
0537 }
0538 
0539 static void btrfsic_dev_state_hashtable_add(
0540         struct btrfsic_dev_state *ds,
0541         struct btrfsic_dev_state_hashtable *h)
0542 {
0543     const unsigned int hashval =
0544         (((unsigned int)((uintptr_t)ds->bdev->bd_dev)) &
0545          (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
0546 
0547     list_add(&ds->collision_resolving_node, h->table + hashval);
0548 }
0549 
0550 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds)
0551 {
0552     list_del(&ds->collision_resolving_node);
0553 }
0554 
0555 static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(dev_t dev,
0556         struct btrfsic_dev_state_hashtable *h)
0557 {
0558     const unsigned int hashval =
0559         dev & (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1);
0560     struct btrfsic_dev_state *ds;
0561 
0562     list_for_each_entry(ds, h->table + hashval, collision_resolving_node) {
0563         if (ds->bdev->bd_dev == dev)
0564             return ds;
0565     }
0566 
0567     return NULL;
0568 }
0569 
0570 static int btrfsic_process_superblock(struct btrfsic_state *state,
0571                       struct btrfs_fs_devices *fs_devices)
0572 {
0573     struct btrfs_super_block *selected_super;
0574     struct list_head *dev_head = &fs_devices->devices;
0575     struct btrfs_device *device;
0576     struct btrfsic_dev_state *selected_dev_state = NULL;
0577     int ret = 0;
0578     int pass;
0579 
0580     selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS);
0581     if (!selected_super)
0582         return -ENOMEM;
0583 
0584     list_for_each_entry(device, dev_head, dev_list) {
0585         int i;
0586         struct btrfsic_dev_state *dev_state;
0587 
0588         if (!device->bdev || !device->name)
0589             continue;
0590 
0591         dev_state = btrfsic_dev_state_lookup(device->bdev->bd_dev);
0592         BUG_ON(NULL == dev_state);
0593         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
0594             ret = btrfsic_process_superblock_dev_mirror(
0595                     state, dev_state, device, i,
0596                     &selected_dev_state, selected_super);
0597             if (0 != ret && 0 == i) {
0598                 kfree(selected_super);
0599                 return ret;
0600             }
0601         }
0602     }
0603 
0604     if (NULL == state->latest_superblock) {
0605         pr_info("btrfsic: no superblock found!\n");
0606         kfree(selected_super);
0607         return -1;
0608     }
0609 
0610     for (pass = 0; pass < 3; pass++) {
0611         int num_copies;
0612         int mirror_num;
0613         u64 next_bytenr;
0614 
0615         switch (pass) {
0616         case 0:
0617             next_bytenr = btrfs_super_root(selected_super);
0618             if (state->print_mask &
0619                 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
0620                 pr_info("root@%llu\n", next_bytenr);
0621             break;
0622         case 1:
0623             next_bytenr = btrfs_super_chunk_root(selected_super);
0624             if (state->print_mask &
0625                 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
0626                 pr_info("chunk@%llu\n", next_bytenr);
0627             break;
0628         case 2:
0629             next_bytenr = btrfs_super_log_root(selected_super);
0630             if (0 == next_bytenr)
0631                 continue;
0632             if (state->print_mask &
0633                 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
0634                 pr_info("log@%llu\n", next_bytenr);
0635             break;
0636         }
0637 
0638         num_copies = btrfs_num_copies(state->fs_info, next_bytenr,
0639                           state->metablock_size);
0640         if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
0641             pr_info("num_copies(log_bytenr=%llu) = %d\n",
0642                    next_bytenr, num_copies);
0643 
0644         for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
0645             struct btrfsic_block *next_block;
0646             struct btrfsic_block_data_ctx tmp_next_block_ctx;
0647             struct btrfsic_block_link *l;
0648 
0649             ret = btrfsic_map_block(state, next_bytenr,
0650                         state->metablock_size,
0651                         &tmp_next_block_ctx,
0652                         mirror_num);
0653             if (ret) {
0654                 pr_info("btrfsic: btrfsic_map_block(root @%llu, mirror %d) failed!\n",
0655                        next_bytenr, mirror_num);
0656                 kfree(selected_super);
0657                 return -1;
0658             }
0659 
0660             next_block = btrfsic_block_hashtable_lookup(
0661                     tmp_next_block_ctx.dev->bdev,
0662                     tmp_next_block_ctx.dev_bytenr,
0663                     &state->block_hashtable);
0664             BUG_ON(NULL == next_block);
0665 
0666             l = btrfsic_block_link_hashtable_lookup(
0667                     tmp_next_block_ctx.dev->bdev,
0668                     tmp_next_block_ctx.dev_bytenr,
0669                     state->latest_superblock->dev_state->
0670                     bdev,
0671                     state->latest_superblock->dev_bytenr,
0672                     &state->block_link_hashtable);
0673             BUG_ON(NULL == l);
0674 
0675             ret = btrfsic_read_block(state, &tmp_next_block_ctx);
0676             if (ret < (int)PAGE_SIZE) {
0677                 pr_info("btrfsic: read @logical %llu failed!\n",
0678                        tmp_next_block_ctx.start);
0679                 btrfsic_release_block_ctx(&tmp_next_block_ctx);
0680                 kfree(selected_super);
0681                 return -1;
0682             }
0683 
0684             ret = btrfsic_process_metablock(state,
0685                             next_block,
0686                             &tmp_next_block_ctx,
0687                             BTRFS_MAX_LEVEL + 3, 1);
0688             btrfsic_release_block_ctx(&tmp_next_block_ctx);
0689         }
0690     }
0691 
0692     kfree(selected_super);
0693     return ret;
0694 }
0695 
0696 static int btrfsic_process_superblock_dev_mirror(
0697         struct btrfsic_state *state,
0698         struct btrfsic_dev_state *dev_state,
0699         struct btrfs_device *device,
0700         int superblock_mirror_num,
0701         struct btrfsic_dev_state **selected_dev_state,
0702         struct btrfs_super_block *selected_super)
0703 {
0704     struct btrfs_fs_info *fs_info = state->fs_info;
0705     struct btrfs_super_block *super_tmp;
0706     u64 dev_bytenr;
0707     struct btrfsic_block *superblock_tmp;
0708     int pass;
0709     struct block_device *const superblock_bdev = device->bdev;
0710     struct page *page;
0711     struct address_space *mapping = superblock_bdev->bd_inode->i_mapping;
0712     int ret = 0;
0713 
0714     /* super block bytenr is always the unmapped device bytenr */
0715     dev_bytenr = btrfs_sb_offset(superblock_mirror_num);
0716     if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->commit_total_bytes)
0717         return -1;
0718 
0719     page = read_cache_page_gfp(mapping, dev_bytenr >> PAGE_SHIFT, GFP_NOFS);
0720     if (IS_ERR(page))
0721         return -1;
0722 
0723     super_tmp = page_address(page);
0724 
0725     if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
0726         btrfs_super_magic(super_tmp) != BTRFS_MAGIC ||
0727         memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) ||
0728         btrfs_super_nodesize(super_tmp) != state->metablock_size ||
0729         btrfs_super_sectorsize(super_tmp) != state->datablock_size) {
0730         ret = 0;
0731         goto out;
0732     }
0733 
0734     superblock_tmp =
0735         btrfsic_block_hashtable_lookup(superblock_bdev,
0736                        dev_bytenr,
0737                        &state->block_hashtable);
0738     if (NULL == superblock_tmp) {
0739         superblock_tmp = btrfsic_block_alloc();
0740         if (NULL == superblock_tmp) {
0741             ret = -1;
0742             goto out;
0743         }
0744         /* for superblock, only the dev_bytenr makes sense */
0745         superblock_tmp->dev_bytenr = dev_bytenr;
0746         superblock_tmp->dev_state = dev_state;
0747         superblock_tmp->logical_bytenr = dev_bytenr;
0748         superblock_tmp->generation = btrfs_super_generation(super_tmp);
0749         superblock_tmp->is_metadata = 1;
0750         superblock_tmp->is_superblock = 1;
0751         superblock_tmp->is_iodone = 1;
0752         superblock_tmp->never_written = 0;
0753         superblock_tmp->mirror_num = 1 + superblock_mirror_num;
0754         if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
0755             btrfs_info_in_rcu(fs_info,
0756             "new initial S-block (bdev %p, %s) @%llu (%pg/%llu/%d)",
0757                      superblock_bdev,
0758                      rcu_str_deref(device->name), dev_bytenr,
0759                      dev_state->bdev, dev_bytenr,
0760                      superblock_mirror_num);
0761         list_add(&superblock_tmp->all_blocks_node,
0762              &state->all_blocks_list);
0763         btrfsic_block_hashtable_add(superblock_tmp,
0764                         &state->block_hashtable);
0765     }
0766 
0767     /* select the one with the highest generation field */
0768     if (btrfs_super_generation(super_tmp) >
0769         state->max_superblock_generation ||
0770         0 == state->max_superblock_generation) {
0771         memcpy(selected_super, super_tmp, sizeof(*selected_super));
0772         *selected_dev_state = dev_state;
0773         state->max_superblock_generation =
0774             btrfs_super_generation(super_tmp);
0775         state->latest_superblock = superblock_tmp;
0776     }
0777 
0778     for (pass = 0; pass < 3; pass++) {
0779         u64 next_bytenr;
0780         int num_copies;
0781         int mirror_num;
0782         const char *additional_string = NULL;
0783         struct btrfs_disk_key tmp_disk_key;
0784 
0785         tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
0786         tmp_disk_key.offset = 0;
0787         switch (pass) {
0788         case 0:
0789             btrfs_set_disk_key_objectid(&tmp_disk_key,
0790                             BTRFS_ROOT_TREE_OBJECTID);
0791             additional_string = "initial root ";
0792             next_bytenr = btrfs_super_root(super_tmp);
0793             break;
0794         case 1:
0795             btrfs_set_disk_key_objectid(&tmp_disk_key,
0796                             BTRFS_CHUNK_TREE_OBJECTID);
0797             additional_string = "initial chunk ";
0798             next_bytenr = btrfs_super_chunk_root(super_tmp);
0799             break;
0800         case 2:
0801             btrfs_set_disk_key_objectid(&tmp_disk_key,
0802                             BTRFS_TREE_LOG_OBJECTID);
0803             additional_string = "initial log ";
0804             next_bytenr = btrfs_super_log_root(super_tmp);
0805             if (0 == next_bytenr)
0806                 continue;
0807             break;
0808         }
0809 
0810         num_copies = btrfs_num_copies(fs_info, next_bytenr,
0811                           state->metablock_size);
0812         if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
0813             pr_info("num_copies(log_bytenr=%llu) = %d\n",
0814                    next_bytenr, num_copies);
0815         for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
0816             struct btrfsic_block *next_block;
0817             struct btrfsic_block_data_ctx tmp_next_block_ctx;
0818             struct btrfsic_block_link *l;
0819 
0820             if (btrfsic_map_block(state, next_bytenr,
0821                           state->metablock_size,
0822                           &tmp_next_block_ctx,
0823                           mirror_num)) {
0824                 pr_info("btrfsic: btrfsic_map_block(bytenr @%llu, mirror %d) failed!\n",
0825                        next_bytenr, mirror_num);
0826                 ret = -1;
0827                 goto out;
0828             }
0829 
0830             next_block = btrfsic_block_lookup_or_add(
0831                     state, &tmp_next_block_ctx,
0832                     additional_string, 1, 1, 0,
0833                     mirror_num, NULL);
0834             if (NULL == next_block) {
0835                 btrfsic_release_block_ctx(&tmp_next_block_ctx);
0836                 ret = -1;
0837                 goto out;
0838             }
0839 
0840             next_block->disk_key = tmp_disk_key;
0841             next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
0842             l = btrfsic_block_link_lookup_or_add(
0843                     state, &tmp_next_block_ctx,
0844                     next_block, superblock_tmp,
0845                     BTRFSIC_GENERATION_UNKNOWN);
0846             btrfsic_release_block_ctx(&tmp_next_block_ctx);
0847             if (NULL == l) {
0848                 ret = -1;
0849                 goto out;
0850             }
0851         }
0852     }
0853     if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES)
0854         btrfsic_dump_tree_sub(state, superblock_tmp, 0);
0855 
0856 out:
0857     put_page(page);
0858     return ret;
0859 }
0860 
0861 static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void)
0862 {
0863     struct btrfsic_stack_frame *sf;
0864 
0865     sf = kzalloc(sizeof(*sf), GFP_NOFS);
0866     if (sf)
0867         sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER;
0868     return sf;
0869 }
0870 
0871 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf)
0872 {
0873     BUG_ON(!(NULL == sf ||
0874          BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER == sf->magic));
0875     kfree(sf);
0876 }
0877 
0878 static noinline_for_stack int btrfsic_process_metablock(
0879         struct btrfsic_state *state,
0880         struct btrfsic_block *const first_block,
0881         struct btrfsic_block_data_ctx *const first_block_ctx,
0882         int first_limit_nesting, int force_iodone_flag)
0883 {
0884     struct btrfsic_stack_frame initial_stack_frame = { 0 };
0885     struct btrfsic_stack_frame *sf;
0886     struct btrfsic_stack_frame *next_stack;
0887     struct btrfs_header *const first_hdr =
0888         (struct btrfs_header *)first_block_ctx->datav[0];
0889 
0890     BUG_ON(!first_hdr);
0891     sf = &initial_stack_frame;
0892     sf->error = 0;
0893     sf->i = -1;
0894     sf->limit_nesting = first_limit_nesting;
0895     sf->block = first_block;
0896     sf->block_ctx = first_block_ctx;
0897     sf->next_block = NULL;
0898     sf->hdr = first_hdr;
0899     sf->prev = NULL;
0900 
0901 continue_with_new_stack_frame:
0902     sf->block->generation = btrfs_stack_header_generation(sf->hdr);
0903     if (0 == sf->hdr->level) {
0904         struct btrfs_leaf *const leafhdr =
0905             (struct btrfs_leaf *)sf->hdr;
0906 
0907         if (-1 == sf->i) {
0908             sf->nr = btrfs_stack_header_nritems(&leafhdr->header);
0909 
0910             if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
0911                 pr_info("leaf %llu items %d generation %llu owner %llu\n",
0912                        sf->block_ctx->start, sf->nr,
0913                        btrfs_stack_header_generation(
0914                            &leafhdr->header),
0915                        btrfs_stack_header_owner(
0916                            &leafhdr->header));
0917         }
0918 
0919 continue_with_current_leaf_stack_frame:
0920         if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
0921             sf->i++;
0922             sf->num_copies = 0;
0923         }
0924 
0925         if (sf->i < sf->nr) {
0926             struct btrfs_item disk_item;
0927             u32 disk_item_offset =
0928                 (uintptr_t)(leafhdr->items + sf->i) -
0929                 (uintptr_t)leafhdr;
0930             struct btrfs_disk_key *disk_key;
0931             u8 type;
0932             u32 item_offset;
0933             u32 item_size;
0934 
0935             if (disk_item_offset + sizeof(struct btrfs_item) >
0936                 sf->block_ctx->len) {
0937 leaf_item_out_of_bounce_error:
0938                 pr_info(
0939         "btrfsic: leaf item out of bounce at logical %llu, dev %pg\n",
0940                        sf->block_ctx->start,
0941                        sf->block_ctx->dev->bdev);
0942                 goto one_stack_frame_backwards;
0943             }
0944             btrfsic_read_from_block_data(sf->block_ctx,
0945                              &disk_item,
0946                              disk_item_offset,
0947                              sizeof(struct btrfs_item));
0948             item_offset = btrfs_stack_item_offset(&disk_item);
0949             item_size = btrfs_stack_item_size(&disk_item);
0950             disk_key = &disk_item.key;
0951             type = btrfs_disk_key_type(disk_key);
0952 
0953             if (BTRFS_ROOT_ITEM_KEY == type) {
0954                 struct btrfs_root_item root_item;
0955                 u32 root_item_offset;
0956                 u64 next_bytenr;
0957 
0958                 root_item_offset = item_offset +
0959                     offsetof(struct btrfs_leaf, items);
0960                 if (root_item_offset + item_size >
0961                     sf->block_ctx->len)
0962                     goto leaf_item_out_of_bounce_error;
0963                 btrfsic_read_from_block_data(
0964                     sf->block_ctx, &root_item,
0965                     root_item_offset,
0966                     item_size);
0967                 next_bytenr = btrfs_root_bytenr(&root_item);
0968 
0969                 sf->error =
0970                     btrfsic_create_link_to_next_block(
0971                         state,
0972                         sf->block,
0973                         sf->block_ctx,
0974                         next_bytenr,
0975                         sf->limit_nesting,
0976                         &sf->next_block_ctx,
0977                         &sf->next_block,
0978                         force_iodone_flag,
0979                         &sf->num_copies,
0980                         &sf->mirror_num,
0981                         disk_key,
0982                         btrfs_root_generation(
0983                         &root_item));
0984                 if (sf->error)
0985                     goto one_stack_frame_backwards;
0986 
0987                 if (NULL != sf->next_block) {
0988                     struct btrfs_header *const next_hdr =
0989                         (struct btrfs_header *)
0990                         sf->next_block_ctx.datav[0];
0991 
0992                     next_stack =
0993                         btrfsic_stack_frame_alloc();
0994                     if (NULL == next_stack) {
0995                         sf->error = -1;
0996                         btrfsic_release_block_ctx(
0997                                 &sf->
0998                                 next_block_ctx);
0999                         goto one_stack_frame_backwards;
1000                     }
1001 
1002                     next_stack->i = -1;
1003                     next_stack->block = sf->next_block;
1004                     next_stack->block_ctx =
1005                         &sf->next_block_ctx;
1006                     next_stack->next_block = NULL;
1007                     next_stack->hdr = next_hdr;
1008                     next_stack->limit_nesting =
1009                         sf->limit_nesting - 1;
1010                     next_stack->prev = sf;
1011                     sf = next_stack;
1012                     goto continue_with_new_stack_frame;
1013                 }
1014             } else if (BTRFS_EXTENT_DATA_KEY == type &&
1015                    state->include_extent_data) {
1016                 sf->error = btrfsic_handle_extent_data(
1017                         state,
1018                         sf->block,
1019                         sf->block_ctx,
1020                         item_offset,
1021                         force_iodone_flag);
1022                 if (sf->error)
1023                     goto one_stack_frame_backwards;
1024             }
1025 
1026             goto continue_with_current_leaf_stack_frame;
1027         }
1028     } else {
1029         struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr;
1030 
1031         if (-1 == sf->i) {
1032             sf->nr = btrfs_stack_header_nritems(&nodehdr->header);
1033 
1034             if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1035                 pr_info("node %llu level %d items %d generation %llu owner %llu\n",
1036                        sf->block_ctx->start,
1037                        nodehdr->header.level, sf->nr,
1038                        btrfs_stack_header_generation(
1039                        &nodehdr->header),
1040                        btrfs_stack_header_owner(
1041                        &nodehdr->header));
1042         }
1043 
1044 continue_with_current_node_stack_frame:
1045         if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
1046             sf->i++;
1047             sf->num_copies = 0;
1048         }
1049 
1050         if (sf->i < sf->nr) {
1051             struct btrfs_key_ptr key_ptr;
1052             u32 key_ptr_offset;
1053             u64 next_bytenr;
1054 
1055             key_ptr_offset = (uintptr_t)(nodehdr->ptrs + sf->i) -
1056                       (uintptr_t)nodehdr;
1057             if (key_ptr_offset + sizeof(struct btrfs_key_ptr) >
1058                 sf->block_ctx->len) {
1059                 pr_info(
1060         "btrfsic: node item out of bounce at logical %llu, dev %pg\n",
1061                        sf->block_ctx->start,
1062                        sf->block_ctx->dev->bdev);
1063                 goto one_stack_frame_backwards;
1064             }
1065             btrfsic_read_from_block_data(
1066                 sf->block_ctx, &key_ptr, key_ptr_offset,
1067                 sizeof(struct btrfs_key_ptr));
1068             next_bytenr = btrfs_stack_key_blockptr(&key_ptr);
1069 
1070             sf->error = btrfsic_create_link_to_next_block(
1071                     state,
1072                     sf->block,
1073                     sf->block_ctx,
1074                     next_bytenr,
1075                     sf->limit_nesting,
1076                     &sf->next_block_ctx,
1077                     &sf->next_block,
1078                     force_iodone_flag,
1079                     &sf->num_copies,
1080                     &sf->mirror_num,
1081                     &key_ptr.key,
1082                     btrfs_stack_key_generation(&key_ptr));
1083             if (sf->error)
1084                 goto one_stack_frame_backwards;
1085 
1086             if (NULL != sf->next_block) {
1087                 struct btrfs_header *const next_hdr =
1088                     (struct btrfs_header *)
1089                     sf->next_block_ctx.datav[0];
1090 
1091                 next_stack = btrfsic_stack_frame_alloc();
1092                 if (NULL == next_stack) {
1093                     sf->error = -1;
1094                     goto one_stack_frame_backwards;
1095                 }
1096 
1097                 next_stack->i = -1;
1098                 next_stack->block = sf->next_block;
1099                 next_stack->block_ctx = &sf->next_block_ctx;
1100                 next_stack->next_block = NULL;
1101                 next_stack->hdr = next_hdr;
1102                 next_stack->limit_nesting =
1103                     sf->limit_nesting - 1;
1104                 next_stack->prev = sf;
1105                 sf = next_stack;
1106                 goto continue_with_new_stack_frame;
1107             }
1108 
1109             goto continue_with_current_node_stack_frame;
1110         }
1111     }
1112 
1113 one_stack_frame_backwards:
1114     if (NULL != sf->prev) {
1115         struct btrfsic_stack_frame *const prev = sf->prev;
1116 
1117         /* the one for the initial block is freed in the caller */
1118         btrfsic_release_block_ctx(sf->block_ctx);
1119 
1120         if (sf->error) {
1121             prev->error = sf->error;
1122             btrfsic_stack_frame_free(sf);
1123             sf = prev;
1124             goto one_stack_frame_backwards;
1125         }
1126 
1127         btrfsic_stack_frame_free(sf);
1128         sf = prev;
1129         goto continue_with_new_stack_frame;
1130     } else {
1131         BUG_ON(&initial_stack_frame != sf);
1132     }
1133 
1134     return sf->error;
1135 }
1136 
1137 static void btrfsic_read_from_block_data(
1138     struct btrfsic_block_data_ctx *block_ctx,
1139     void *dstv, u32 offset, size_t len)
1140 {
1141     size_t cur;
1142     size_t pgoff;
1143     char *kaddr;
1144     char *dst = (char *)dstv;
1145     size_t start_offset = offset_in_page(block_ctx->start);
1146     unsigned long i = (start_offset + offset) >> PAGE_SHIFT;
1147 
1148     WARN_ON(offset + len > block_ctx->len);
1149     pgoff = offset_in_page(start_offset + offset);
1150 
1151     while (len > 0) {
1152         cur = min(len, ((size_t)PAGE_SIZE - pgoff));
1153         BUG_ON(i >= DIV_ROUND_UP(block_ctx->len, PAGE_SIZE));
1154         kaddr = block_ctx->datav[i];
1155         memcpy(dst, kaddr + pgoff, cur);
1156 
1157         dst += cur;
1158         len -= cur;
1159         pgoff = 0;
1160         i++;
1161     }
1162 }
1163 
1164 static int btrfsic_create_link_to_next_block(
1165         struct btrfsic_state *state,
1166         struct btrfsic_block *block,
1167         struct btrfsic_block_data_ctx *block_ctx,
1168         u64 next_bytenr,
1169         int limit_nesting,
1170         struct btrfsic_block_data_ctx *next_block_ctx,
1171         struct btrfsic_block **next_blockp,
1172         int force_iodone_flag,
1173         int *num_copiesp, int *mirror_nump,
1174         struct btrfs_disk_key *disk_key,
1175         u64 parent_generation)
1176 {
1177     struct btrfs_fs_info *fs_info = state->fs_info;
1178     struct btrfsic_block *next_block = NULL;
1179     int ret;
1180     struct btrfsic_block_link *l;
1181     int did_alloc_block_link;
1182     int block_was_created;
1183 
1184     *next_blockp = NULL;
1185     if (0 == *num_copiesp) {
1186         *num_copiesp = btrfs_num_copies(fs_info, next_bytenr,
1187                         state->metablock_size);
1188         if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1189             pr_info("num_copies(log_bytenr=%llu) = %d\n",
1190                    next_bytenr, *num_copiesp);
1191         *mirror_nump = 1;
1192     }
1193 
1194     if (*mirror_nump > *num_copiesp)
1195         return 0;
1196 
1197     if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1198         pr_info("btrfsic_create_link_to_next_block(mirror_num=%d)\n",
1199                *mirror_nump);
1200     ret = btrfsic_map_block(state, next_bytenr,
1201                 state->metablock_size,
1202                 next_block_ctx, *mirror_nump);
1203     if (ret) {
1204         pr_info("btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
1205                next_bytenr, *mirror_nump);
1206         btrfsic_release_block_ctx(next_block_ctx);
1207         *next_blockp = NULL;
1208         return -1;
1209     }
1210 
1211     next_block = btrfsic_block_lookup_or_add(state,
1212                          next_block_ctx, "referenced ",
1213                          1, force_iodone_flag,
1214                          !force_iodone_flag,
1215                          *mirror_nump,
1216                          &block_was_created);
1217     if (NULL == next_block) {
1218         btrfsic_release_block_ctx(next_block_ctx);
1219         *next_blockp = NULL;
1220         return -1;
1221     }
1222     if (block_was_created) {
1223         l = NULL;
1224         next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
1225     } else {
1226         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) {
1227             if (next_block->logical_bytenr != next_bytenr &&
1228                 !(!next_block->is_metadata &&
1229                   0 == next_block->logical_bytenr))
1230                 pr_info(
1231 "referenced block @%llu (%pg/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu)\n",
1232                        next_bytenr, next_block_ctx->dev->bdev,
1233                        next_block_ctx->dev_bytenr, *mirror_nump,
1234                        btrfsic_get_block_type(state,
1235                                   next_block),
1236                        next_block->logical_bytenr);
1237             else
1238                 pr_info(
1239         "referenced block @%llu (%pg/%llu/%d) found in hash table, %c\n",
1240                        next_bytenr, next_block_ctx->dev->bdev,
1241                        next_block_ctx->dev_bytenr, *mirror_nump,
1242                        btrfsic_get_block_type(state,
1243                                   next_block));
1244         }
1245         next_block->logical_bytenr = next_bytenr;
1246 
1247         next_block->mirror_num = *mirror_nump;
1248         l = btrfsic_block_link_hashtable_lookup(
1249                 next_block_ctx->dev->bdev,
1250                 next_block_ctx->dev_bytenr,
1251                 block_ctx->dev->bdev,
1252                 block_ctx->dev_bytenr,
1253                 &state->block_link_hashtable);
1254     }
1255 
1256     next_block->disk_key = *disk_key;
1257     if (NULL == l) {
1258         l = btrfsic_block_link_alloc();
1259         if (NULL == l) {
1260             btrfsic_release_block_ctx(next_block_ctx);
1261             *next_blockp = NULL;
1262             return -1;
1263         }
1264 
1265         did_alloc_block_link = 1;
1266         l->block_ref_to = next_block;
1267         l->block_ref_from = block;
1268         l->ref_cnt = 1;
1269         l->parent_generation = parent_generation;
1270 
1271         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1272             btrfsic_print_add_link(state, l);
1273 
1274         list_add(&l->node_ref_to, &block->ref_to_list);
1275         list_add(&l->node_ref_from, &next_block->ref_from_list);
1276 
1277         btrfsic_block_link_hashtable_add(l,
1278                          &state->block_link_hashtable);
1279     } else {
1280         did_alloc_block_link = 0;
1281         if (0 == limit_nesting) {
1282             l->ref_cnt++;
1283             l->parent_generation = parent_generation;
1284             if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1285                 btrfsic_print_add_link(state, l);
1286         }
1287     }
1288 
1289     if (limit_nesting > 0 && did_alloc_block_link) {
1290         ret = btrfsic_read_block(state, next_block_ctx);
1291         if (ret < (int)next_block_ctx->len) {
1292             pr_info("btrfsic: read block @logical %llu failed!\n",
1293                    next_bytenr);
1294             btrfsic_release_block_ctx(next_block_ctx);
1295             *next_blockp = NULL;
1296             return -1;
1297         }
1298 
1299         *next_blockp = next_block;
1300     } else {
1301         *next_blockp = NULL;
1302     }
1303     (*mirror_nump)++;
1304 
1305     return 0;
1306 }
1307 
1308 static int btrfsic_handle_extent_data(
1309         struct btrfsic_state *state,
1310         struct btrfsic_block *block,
1311         struct btrfsic_block_data_ctx *block_ctx,
1312         u32 item_offset, int force_iodone_flag)
1313 {
1314     struct btrfs_fs_info *fs_info = state->fs_info;
1315     struct btrfs_file_extent_item file_extent_item;
1316     u64 file_extent_item_offset;
1317     u64 next_bytenr;
1318     u64 num_bytes;
1319     u64 generation;
1320     struct btrfsic_block_link *l;
1321     int ret;
1322 
1323     file_extent_item_offset = offsetof(struct btrfs_leaf, items) +
1324                   item_offset;
1325     if (file_extent_item_offset +
1326         offsetof(struct btrfs_file_extent_item, disk_num_bytes) >
1327         block_ctx->len) {
1328         pr_info("btrfsic: file item out of bounce at logical %llu, dev %pg\n",
1329                block_ctx->start, block_ctx->dev->bdev);
1330         return -1;
1331     }
1332 
1333     btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1334         file_extent_item_offset,
1335         offsetof(struct btrfs_file_extent_item, disk_num_bytes));
1336     if (BTRFS_FILE_EXTENT_REG != file_extent_item.type ||
1337         btrfs_stack_file_extent_disk_bytenr(&file_extent_item) == 0) {
1338         if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1339             pr_info("extent_data: type %u, disk_bytenr = %llu\n",
1340                    file_extent_item.type,
1341                    btrfs_stack_file_extent_disk_bytenr(
1342                    &file_extent_item));
1343         return 0;
1344     }
1345 
1346     if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) >
1347         block_ctx->len) {
1348         pr_info("btrfsic: file item out of bounce at logical %llu, dev %pg\n",
1349                block_ctx->start, block_ctx->dev->bdev);
1350         return -1;
1351     }
1352     btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1353                      file_extent_item_offset,
1354                      sizeof(struct btrfs_file_extent_item));
1355     next_bytenr = btrfs_stack_file_extent_disk_bytenr(&file_extent_item);
1356     if (btrfs_stack_file_extent_compression(&file_extent_item) ==
1357         BTRFS_COMPRESS_NONE) {
1358         next_bytenr += btrfs_stack_file_extent_offset(&file_extent_item);
1359         num_bytes = btrfs_stack_file_extent_num_bytes(&file_extent_item);
1360     } else {
1361         num_bytes = btrfs_stack_file_extent_disk_num_bytes(&file_extent_item);
1362     }
1363     generation = btrfs_stack_file_extent_generation(&file_extent_item);
1364 
1365     if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1366         pr_info("extent_data: type %u, disk_bytenr = %llu, offset = %llu, num_bytes = %llu\n",
1367                file_extent_item.type,
1368                btrfs_stack_file_extent_disk_bytenr(&file_extent_item),
1369                btrfs_stack_file_extent_offset(&file_extent_item),
1370                num_bytes);
1371     while (num_bytes > 0) {
1372         u32 chunk_len;
1373         int num_copies;
1374         int mirror_num;
1375 
1376         if (num_bytes > state->datablock_size)
1377             chunk_len = state->datablock_size;
1378         else
1379             chunk_len = num_bytes;
1380 
1381         num_copies = btrfs_num_copies(fs_info, next_bytenr,
1382                           state->datablock_size);
1383         if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1384             pr_info("num_copies(log_bytenr=%llu) = %d\n",
1385                    next_bytenr, num_copies);
1386         for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
1387             struct btrfsic_block_data_ctx next_block_ctx;
1388             struct btrfsic_block *next_block;
1389             int block_was_created;
1390 
1391             if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1392                 pr_info("btrfsic_handle_extent_data(mirror_num=%d)\n",
1393                     mirror_num);
1394             if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1395                 pr_info("\tdisk_bytenr = %llu, num_bytes %u\n",
1396                        next_bytenr, chunk_len);
1397             ret = btrfsic_map_block(state, next_bytenr,
1398                         chunk_len, &next_block_ctx,
1399                         mirror_num);
1400             if (ret) {
1401                 pr_info("btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
1402                        next_bytenr, mirror_num);
1403                 return -1;
1404             }
1405 
1406             next_block = btrfsic_block_lookup_or_add(
1407                     state,
1408                     &next_block_ctx,
1409                     "referenced ",
1410                     0,
1411                     force_iodone_flag,
1412                     !force_iodone_flag,
1413                     mirror_num,
1414                     &block_was_created);
1415             if (NULL == next_block) {
1416                 btrfsic_release_block_ctx(&next_block_ctx);
1417                 return -1;
1418             }
1419             if (!block_was_created) {
1420                 if ((state->print_mask &
1421                      BTRFSIC_PRINT_MASK_VERBOSE) &&
1422                     next_block->logical_bytenr != next_bytenr &&
1423                     !(!next_block->is_metadata &&
1424                       0 == next_block->logical_bytenr)) {
1425                     pr_info(
1426 "referenced block @%llu (%pg/%llu/%d) found in hash table, D, bytenr mismatch (!= stored %llu)\n",
1427                            next_bytenr,
1428                            next_block_ctx.dev->bdev,
1429                            next_block_ctx.dev_bytenr,
1430                            mirror_num,
1431                            next_block->logical_bytenr);
1432                 }
1433                 next_block->logical_bytenr = next_bytenr;
1434                 next_block->mirror_num = mirror_num;
1435             }
1436 
1437             l = btrfsic_block_link_lookup_or_add(state,
1438                                  &next_block_ctx,
1439                                  next_block, block,
1440                                  generation);
1441             btrfsic_release_block_ctx(&next_block_ctx);
1442             if (NULL == l)
1443                 return -1;
1444         }
1445 
1446         next_bytenr += chunk_len;
1447         num_bytes -= chunk_len;
1448     }
1449 
1450     return 0;
1451 }
1452 
1453 static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
1454                  struct btrfsic_block_data_ctx *block_ctx_out,
1455                  int mirror_num)
1456 {
1457     struct btrfs_fs_info *fs_info = state->fs_info;
1458     int ret;
1459     u64 length;
1460     struct btrfs_io_context *multi = NULL;
1461     struct btrfs_device *device;
1462 
1463     length = len;
1464     ret = btrfs_map_block(fs_info, BTRFS_MAP_READ,
1465                   bytenr, &length, &multi, mirror_num);
1466 
1467     if (ret) {
1468         block_ctx_out->start = 0;
1469         block_ctx_out->dev_bytenr = 0;
1470         block_ctx_out->len = 0;
1471         block_ctx_out->dev = NULL;
1472         block_ctx_out->datav = NULL;
1473         block_ctx_out->pagev = NULL;
1474         block_ctx_out->mem_to_free = NULL;
1475 
1476         return ret;
1477     }
1478 
1479     device = multi->stripes[0].dev;
1480     if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state) ||
1481         !device->bdev || !device->name)
1482         block_ctx_out->dev = NULL;
1483     else
1484         block_ctx_out->dev = btrfsic_dev_state_lookup(
1485                             device->bdev->bd_dev);
1486     block_ctx_out->dev_bytenr = multi->stripes[0].physical;
1487     block_ctx_out->start = bytenr;
1488     block_ctx_out->len = len;
1489     block_ctx_out->datav = NULL;
1490     block_ctx_out->pagev = NULL;
1491     block_ctx_out->mem_to_free = NULL;
1492 
1493     kfree(multi);
1494     if (NULL == block_ctx_out->dev) {
1495         ret = -ENXIO;
1496         pr_info("btrfsic: error, cannot lookup dev (#1)!\n");
1497     }
1498 
1499     return ret;
1500 }
1501 
1502 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx)
1503 {
1504     if (block_ctx->mem_to_free) {
1505         unsigned int num_pages;
1506 
1507         BUG_ON(!block_ctx->datav);
1508         BUG_ON(!block_ctx->pagev);
1509         num_pages = (block_ctx->len + (u64)PAGE_SIZE - 1) >>
1510                 PAGE_SHIFT;
1511         /* Pages must be unmapped in reverse order */
1512         while (num_pages > 0) {
1513             num_pages--;
1514             if (block_ctx->datav[num_pages])
1515                 block_ctx->datav[num_pages] = NULL;
1516             if (block_ctx->pagev[num_pages]) {
1517                 __free_page(block_ctx->pagev[num_pages]);
1518                 block_ctx->pagev[num_pages] = NULL;
1519             }
1520         }
1521 
1522         kfree(block_ctx->mem_to_free);
1523         block_ctx->mem_to_free = NULL;
1524         block_ctx->pagev = NULL;
1525         block_ctx->datav = NULL;
1526     }
1527 }
1528 
1529 static int btrfsic_read_block(struct btrfsic_state *state,
1530                   struct btrfsic_block_data_ctx *block_ctx)
1531 {
1532     unsigned int num_pages;
1533     unsigned int i;
1534     size_t size;
1535     u64 dev_bytenr;
1536     int ret;
1537 
1538     BUG_ON(block_ctx->datav);
1539     BUG_ON(block_ctx->pagev);
1540     BUG_ON(block_ctx->mem_to_free);
1541     if (!PAGE_ALIGNED(block_ctx->dev_bytenr)) {
1542         pr_info("btrfsic: read_block() with unaligned bytenr %llu\n",
1543                block_ctx->dev_bytenr);
1544         return -1;
1545     }
1546 
1547     num_pages = (block_ctx->len + (u64)PAGE_SIZE - 1) >>
1548             PAGE_SHIFT;
1549     size = sizeof(*block_ctx->datav) + sizeof(*block_ctx->pagev);
1550     block_ctx->mem_to_free = kcalloc(num_pages, size, GFP_NOFS);
1551     if (!block_ctx->mem_to_free)
1552         return -ENOMEM;
1553     block_ctx->datav = block_ctx->mem_to_free;
1554     block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages);
1555     ret = btrfs_alloc_page_array(num_pages, block_ctx->pagev);
1556     if (ret)
1557         return ret;
1558 
1559     dev_bytenr = block_ctx->dev_bytenr;
1560     for (i = 0; i < num_pages;) {
1561         struct bio *bio;
1562         unsigned int j;
1563 
1564         bio = bio_alloc(block_ctx->dev->bdev, num_pages - i,
1565                 REQ_OP_READ, GFP_NOFS);
1566         bio->bi_iter.bi_sector = dev_bytenr >> 9;
1567 
1568         for (j = i; j < num_pages; j++) {
1569             ret = bio_add_page(bio, block_ctx->pagev[j],
1570                        PAGE_SIZE, 0);
1571             if (PAGE_SIZE != ret)
1572                 break;
1573         }
1574         if (j == i) {
1575             pr_info("btrfsic: error, failed to add a single page!\n");
1576             return -1;
1577         }
1578         if (submit_bio_wait(bio)) {
1579             pr_info("btrfsic: read error at logical %llu dev %pg!\n",
1580                    block_ctx->start, block_ctx->dev->bdev);
1581             bio_put(bio);
1582             return -1;
1583         }
1584         bio_put(bio);
1585         dev_bytenr += (j - i) * PAGE_SIZE;
1586         i = j;
1587     }
1588     for (i = 0; i < num_pages; i++)
1589         block_ctx->datav[i] = page_address(block_ctx->pagev[i]);
1590 
1591     return block_ctx->len;
1592 }
1593 
1594 static void btrfsic_dump_database(struct btrfsic_state *state)
1595 {
1596     const struct btrfsic_block *b_all;
1597 
1598     BUG_ON(NULL == state);
1599 
1600     pr_info("all_blocks_list:\n");
1601     list_for_each_entry(b_all, &state->all_blocks_list, all_blocks_node) {
1602         const struct btrfsic_block_link *l;
1603 
1604         pr_info("%c-block @%llu (%pg/%llu/%d)\n",
1605                btrfsic_get_block_type(state, b_all),
1606                b_all->logical_bytenr, b_all->dev_state->bdev,
1607                b_all->dev_bytenr, b_all->mirror_num);
1608 
1609         list_for_each_entry(l, &b_all->ref_to_list, node_ref_to) {
1610             pr_info(
1611         " %c @%llu (%pg/%llu/%d) refers %u* to %c @%llu (%pg/%llu/%d)\n",
1612                    btrfsic_get_block_type(state, b_all),
1613                    b_all->logical_bytenr, b_all->dev_state->bdev,
1614                    b_all->dev_bytenr, b_all->mirror_num,
1615                    l->ref_cnt,
1616                    btrfsic_get_block_type(state, l->block_ref_to),
1617                    l->block_ref_to->logical_bytenr,
1618                    l->block_ref_to->dev_state->bdev,
1619                    l->block_ref_to->dev_bytenr,
1620                    l->block_ref_to->mirror_num);
1621         }
1622 
1623         list_for_each_entry(l, &b_all->ref_from_list, node_ref_from) {
1624             pr_info(
1625         " %c @%llu (%pg/%llu/%d) is ref %u* from %c @%llu (%pg/%llu/%d)\n",
1626                    btrfsic_get_block_type(state, b_all),
1627                    b_all->logical_bytenr, b_all->dev_state->bdev,
1628                    b_all->dev_bytenr, b_all->mirror_num,
1629                    l->ref_cnt,
1630                    btrfsic_get_block_type(state, l->block_ref_from),
1631                    l->block_ref_from->logical_bytenr,
1632                    l->block_ref_from->dev_state->bdev,
1633                    l->block_ref_from->dev_bytenr,
1634                    l->block_ref_from->mirror_num);
1635         }
1636 
1637         pr_info("\n");
1638     }
1639 }
1640 
1641 /*
1642  * Test whether the disk block contains a tree block (leaf or node)
1643  * (note that this test fails for the super block)
1644  */
1645 static noinline_for_stack int btrfsic_test_for_metadata(
1646         struct btrfsic_state *state,
1647         char **datav, unsigned int num_pages)
1648 {
1649     struct btrfs_fs_info *fs_info = state->fs_info;
1650     SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
1651     struct btrfs_header *h;
1652     u8 csum[BTRFS_CSUM_SIZE];
1653     unsigned int i;
1654 
1655     if (num_pages * PAGE_SIZE < state->metablock_size)
1656         return 1; /* not metadata */
1657     num_pages = state->metablock_size >> PAGE_SHIFT;
1658     h = (struct btrfs_header *)datav[0];
1659 
1660     if (memcmp(h->fsid, fs_info->fs_devices->fsid, BTRFS_FSID_SIZE))
1661         return 1;
1662 
1663     shash->tfm = fs_info->csum_shash;
1664     crypto_shash_init(shash);
1665 
1666     for (i = 0; i < num_pages; i++) {
1667         u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE);
1668         size_t sublen = i ? PAGE_SIZE :
1669                     (PAGE_SIZE - BTRFS_CSUM_SIZE);
1670 
1671         crypto_shash_update(shash, data, sublen);
1672     }
1673     crypto_shash_final(shash, csum);
1674     if (memcmp(csum, h->csum, fs_info->csum_size))
1675         return 1;
1676 
1677     return 0; /* is metadata */
1678 }
1679 
1680 static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1681                       u64 dev_bytenr, char **mapped_datav,
1682                       unsigned int num_pages,
1683                       struct bio *bio, int *bio_is_patched,
1684                       blk_opf_t submit_bio_bh_rw)
1685 {
1686     int is_metadata;
1687     struct btrfsic_block *block;
1688     struct btrfsic_block_data_ctx block_ctx;
1689     int ret;
1690     struct btrfsic_state *state = dev_state->state;
1691     struct block_device *bdev = dev_state->bdev;
1692     unsigned int processed_len;
1693 
1694     if (NULL != bio_is_patched)
1695         *bio_is_patched = 0;
1696 
1697 again:
1698     if (num_pages == 0)
1699         return;
1700 
1701     processed_len = 0;
1702     is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_datav,
1703                               num_pages));
1704 
1705     block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr,
1706                            &state->block_hashtable);
1707     if (NULL != block) {
1708         u64 bytenr = 0;
1709         struct btrfsic_block_link *l, *tmp;
1710 
1711         if (block->is_superblock) {
1712             bytenr = btrfs_super_bytenr((struct btrfs_super_block *)
1713                             mapped_datav[0]);
1714             if (num_pages * PAGE_SIZE <
1715                 BTRFS_SUPER_INFO_SIZE) {
1716                 pr_info("btrfsic: cannot work with too short bios!\n");
1717                 return;
1718             }
1719             is_metadata = 1;
1720             BUG_ON(!PAGE_ALIGNED(BTRFS_SUPER_INFO_SIZE));
1721             processed_len = BTRFS_SUPER_INFO_SIZE;
1722             if (state->print_mask &
1723                 BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) {
1724                 pr_info("[before new superblock is written]:\n");
1725                 btrfsic_dump_tree_sub(state, block, 0);
1726             }
1727         }
1728         if (is_metadata) {
1729             if (!block->is_superblock) {
1730                 if (num_pages * PAGE_SIZE <
1731                     state->metablock_size) {
1732                     pr_info("btrfsic: cannot work with too short bios!\n");
1733                     return;
1734                 }
1735                 processed_len = state->metablock_size;
1736                 bytenr = btrfs_stack_header_bytenr(
1737                         (struct btrfs_header *)
1738                         mapped_datav[0]);
1739                 btrfsic_cmp_log_and_dev_bytenr(state, bytenr,
1740                                    dev_state,
1741                                    dev_bytenr);
1742             }
1743             if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) {
1744                 if (block->logical_bytenr != bytenr &&
1745                     !(!block->is_metadata &&
1746                       block->logical_bytenr == 0))
1747                     pr_info(
1748 "written block @%llu (%pg/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu)\n",
1749                            bytenr, dev_state->bdev,
1750                            dev_bytenr,
1751                            block->mirror_num,
1752                            btrfsic_get_block_type(state,
1753                                       block),
1754                            block->logical_bytenr);
1755                 else
1756                     pr_info(
1757         "written block @%llu (%pg/%llu/%d) found in hash table, %c\n",
1758                            bytenr, dev_state->bdev,
1759                            dev_bytenr, block->mirror_num,
1760                            btrfsic_get_block_type(state,
1761                                       block));
1762             }
1763             block->logical_bytenr = bytenr;
1764         } else {
1765             if (num_pages * PAGE_SIZE <
1766                 state->datablock_size) {
1767                 pr_info("btrfsic: cannot work with too short bios!\n");
1768                 return;
1769             }
1770             processed_len = state->datablock_size;
1771             bytenr = block->logical_bytenr;
1772             if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1773                 pr_info(
1774         "written block @%llu (%pg/%llu/%d) found in hash table, %c\n",
1775                        bytenr, dev_state->bdev, dev_bytenr,
1776                        block->mirror_num,
1777                        btrfsic_get_block_type(state, block));
1778         }
1779 
1780         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1781             pr_info("ref_to_list: %cE, ref_from_list: %cE\n",
1782                    list_empty(&block->ref_to_list) ? ' ' : '!',
1783                    list_empty(&block->ref_from_list) ? ' ' : '!');
1784         if (btrfsic_is_block_ref_by_superblock(state, block, 0)) {
1785             pr_info(
1786 "btrfs: attempt to overwrite %c-block @%llu (%pg/%llu/%d), old(gen=%llu, objectid=%llu, type=%d, offset=%llu), new(gen=%llu), which is referenced by most recent superblock (superblockgen=%llu)!\n",
1787                    btrfsic_get_block_type(state, block), bytenr,
1788                    dev_state->bdev, dev_bytenr, block->mirror_num,
1789                    block->generation,
1790                    btrfs_disk_key_objectid(&block->disk_key),
1791                    block->disk_key.type,
1792                    btrfs_disk_key_offset(&block->disk_key),
1793                    btrfs_stack_header_generation(
1794                        (struct btrfs_header *) mapped_datav[0]),
1795                    state->max_superblock_generation);
1796             btrfsic_dump_tree(state);
1797         }
1798 
1799         if (!block->is_iodone && !block->never_written) {
1800             pr_info(
1801 "btrfs: attempt to overwrite %c-block @%llu (%pg/%llu/%d), oldgen=%llu, newgen=%llu, which is not yet iodone!\n",
1802                    btrfsic_get_block_type(state, block), bytenr,
1803                    dev_state->bdev, dev_bytenr, block->mirror_num,
1804                    block->generation,
1805                    btrfs_stack_header_generation(
1806                        (struct btrfs_header *)
1807                        mapped_datav[0]));
1808             /* it would not be safe to go on */
1809             btrfsic_dump_tree(state);
1810             goto continue_loop;
1811         }
1812 
1813         /*
1814          * Clear all references of this block. Do not free
1815          * the block itself even if is not referenced anymore
1816          * because it still carries valuable information
1817          * like whether it was ever written and IO completed.
1818          */
1819         list_for_each_entry_safe(l, tmp, &block->ref_to_list,
1820                      node_ref_to) {
1821             if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1822                 btrfsic_print_rem_link(state, l);
1823             l->ref_cnt--;
1824             if (0 == l->ref_cnt) {
1825                 list_del(&l->node_ref_to);
1826                 list_del(&l->node_ref_from);
1827                 btrfsic_block_link_hashtable_remove(l);
1828                 btrfsic_block_link_free(l);
1829             }
1830         }
1831 
1832         block_ctx.dev = dev_state;
1833         block_ctx.dev_bytenr = dev_bytenr;
1834         block_ctx.start = bytenr;
1835         block_ctx.len = processed_len;
1836         block_ctx.pagev = NULL;
1837         block_ctx.mem_to_free = NULL;
1838         block_ctx.datav = mapped_datav;
1839 
1840         if (is_metadata || state->include_extent_data) {
1841             block->never_written = 0;
1842             block->iodone_w_error = 0;
1843             if (NULL != bio) {
1844                 block->is_iodone = 0;
1845                 BUG_ON(NULL == bio_is_patched);
1846                 if (!*bio_is_patched) {
1847                     block->orig_bio_private =
1848                         bio->bi_private;
1849                     block->orig_bio_end_io =
1850                         bio->bi_end_io;
1851                     block->next_in_same_bio = NULL;
1852                     bio->bi_private = block;
1853                     bio->bi_end_io = btrfsic_bio_end_io;
1854                     *bio_is_patched = 1;
1855                 } else {
1856                     struct btrfsic_block *chained_block =
1857                         (struct btrfsic_block *)
1858                         bio->bi_private;
1859 
1860                     BUG_ON(NULL == chained_block);
1861                     block->orig_bio_private =
1862                         chained_block->orig_bio_private;
1863                     block->orig_bio_end_io =
1864                         chained_block->orig_bio_end_io;
1865                     block->next_in_same_bio = chained_block;
1866                     bio->bi_private = block;
1867                 }
1868             } else {
1869                 block->is_iodone = 1;
1870                 block->orig_bio_private = NULL;
1871                 block->orig_bio_end_io = NULL;
1872                 block->next_in_same_bio = NULL;
1873             }
1874         }
1875 
1876         block->flush_gen = dev_state->last_flush_gen + 1;
1877         block->submit_bio_bh_rw = submit_bio_bh_rw;
1878         if (is_metadata) {
1879             block->logical_bytenr = bytenr;
1880             block->is_metadata = 1;
1881             if (block->is_superblock) {
1882                 BUG_ON(PAGE_SIZE !=
1883                        BTRFS_SUPER_INFO_SIZE);
1884                 ret = btrfsic_process_written_superblock(
1885                         state,
1886                         block,
1887                         (struct btrfs_super_block *)
1888                         mapped_datav[0]);
1889                 if (state->print_mask &
1890                     BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) {
1891                     pr_info("[after new superblock is written]:\n");
1892                     btrfsic_dump_tree_sub(state, block, 0);
1893                 }
1894             } else {
1895                 block->mirror_num = 0;  /* unknown */
1896                 ret = btrfsic_process_metablock(
1897                         state,
1898                         block,
1899                         &block_ctx,
1900                         0, 0);
1901             }
1902             if (ret)
1903                 pr_info("btrfsic: btrfsic_process_metablock(root @%llu) failed!\n",
1904                        dev_bytenr);
1905         } else {
1906             block->is_metadata = 0;
1907             block->mirror_num = 0;  /* unknown */
1908             block->generation = BTRFSIC_GENERATION_UNKNOWN;
1909             if (!state->include_extent_data
1910                 && list_empty(&block->ref_from_list)) {
1911                 /*
1912                  * disk block is overwritten with extent
1913                  * data (not meta data) and we are configured
1914                  * to not include extent data: take the
1915                  * chance and free the block's memory
1916                  */
1917                 btrfsic_block_hashtable_remove(block);
1918                 list_del(&block->all_blocks_node);
1919                 btrfsic_block_free(block);
1920             }
1921         }
1922         btrfsic_release_block_ctx(&block_ctx);
1923     } else {
1924         /* block has not been found in hash table */
1925         u64 bytenr;
1926 
1927         if (!is_metadata) {
1928             processed_len = state->datablock_size;
1929             if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1930                 pr_info(
1931             "written block (%pg/%llu/?) !found in hash table, D\n",
1932                        dev_state->bdev, dev_bytenr);
1933             if (!state->include_extent_data) {
1934                 /* ignore that written D block */
1935                 goto continue_loop;
1936             }
1937 
1938             /* this is getting ugly for the
1939              * include_extent_data case... */
1940             bytenr = 0; /* unknown */
1941         } else {
1942             processed_len = state->metablock_size;
1943             bytenr = btrfs_stack_header_bytenr(
1944                     (struct btrfs_header *)
1945                     mapped_datav[0]);
1946             btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state,
1947                                dev_bytenr);
1948             if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1949                 pr_info(
1950             "written block @%llu (%pg/%llu/?) !found in hash table, M\n",
1951                        bytenr, dev_state->bdev, dev_bytenr);
1952         }
1953 
1954         block_ctx.dev = dev_state;
1955         block_ctx.dev_bytenr = dev_bytenr;
1956         block_ctx.start = bytenr;
1957         block_ctx.len = processed_len;
1958         block_ctx.pagev = NULL;
1959         block_ctx.mem_to_free = NULL;
1960         block_ctx.datav = mapped_datav;
1961 
1962         block = btrfsic_block_alloc();
1963         if (NULL == block) {
1964             btrfsic_release_block_ctx(&block_ctx);
1965             goto continue_loop;
1966         }
1967         block->dev_state = dev_state;
1968         block->dev_bytenr = dev_bytenr;
1969         block->logical_bytenr = bytenr;
1970         block->is_metadata = is_metadata;
1971         block->never_written = 0;
1972         block->iodone_w_error = 0;
1973         block->mirror_num = 0;  /* unknown */
1974         block->flush_gen = dev_state->last_flush_gen + 1;
1975         block->submit_bio_bh_rw = submit_bio_bh_rw;
1976         if (NULL != bio) {
1977             block->is_iodone = 0;
1978             BUG_ON(NULL == bio_is_patched);
1979             if (!*bio_is_patched) {
1980                 block->orig_bio_private = bio->bi_private;
1981                 block->orig_bio_end_io = bio->bi_end_io;
1982                 block->next_in_same_bio = NULL;
1983                 bio->bi_private = block;
1984                 bio->bi_end_io = btrfsic_bio_end_io;
1985                 *bio_is_patched = 1;
1986             } else {
1987                 struct btrfsic_block *chained_block =
1988                     (struct btrfsic_block *)
1989                     bio->bi_private;
1990 
1991                 BUG_ON(NULL == chained_block);
1992                 block->orig_bio_private =
1993                     chained_block->orig_bio_private;
1994                 block->orig_bio_end_io =
1995                     chained_block->orig_bio_end_io;
1996                 block->next_in_same_bio = chained_block;
1997                 bio->bi_private = block;
1998             }
1999         } else {
2000             block->is_iodone = 1;
2001             block->orig_bio_private = NULL;
2002             block->orig_bio_end_io = NULL;
2003             block->next_in_same_bio = NULL;
2004         }
2005         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2006             pr_info("new written %c-block @%llu (%pg/%llu/%d)\n",
2007                    is_metadata ? 'M' : 'D',
2008                    block->logical_bytenr, block->dev_state->bdev,
2009                    block->dev_bytenr, block->mirror_num);
2010         list_add(&block->all_blocks_node, &state->all_blocks_list);
2011         btrfsic_block_hashtable_add(block, &state->block_hashtable);
2012 
2013         if (is_metadata) {
2014             ret = btrfsic_process_metablock(state, block,
2015                             &block_ctx, 0, 0);
2016             if (ret)
2017                 pr_info("btrfsic: process_metablock(root @%llu) failed!\n",
2018                        dev_bytenr);
2019         }
2020         btrfsic_release_block_ctx(&block_ctx);
2021     }
2022 
2023 continue_loop:
2024     BUG_ON(!processed_len);
2025     dev_bytenr += processed_len;
2026     mapped_datav += processed_len >> PAGE_SHIFT;
2027     num_pages -= processed_len >> PAGE_SHIFT;
2028     goto again;
2029 }
2030 
2031 static void btrfsic_bio_end_io(struct bio *bp)
2032 {
2033     struct btrfsic_block *block = bp->bi_private;
2034     int iodone_w_error;
2035 
2036     /* mutex is not held! This is not save if IO is not yet completed
2037      * on umount */
2038     iodone_w_error = 0;
2039     if (bp->bi_status)
2040         iodone_w_error = 1;
2041 
2042     BUG_ON(NULL == block);
2043     bp->bi_private = block->orig_bio_private;
2044     bp->bi_end_io = block->orig_bio_end_io;
2045 
2046     do {
2047         struct btrfsic_block *next_block;
2048         struct btrfsic_dev_state *const dev_state = block->dev_state;
2049 
2050         if ((dev_state->state->print_mask &
2051              BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2052             pr_info("bio_end_io(err=%d) for %c @%llu (%pg/%llu/%d)\n",
2053                    bp->bi_status,
2054                    btrfsic_get_block_type(dev_state->state, block),
2055                    block->logical_bytenr, dev_state->bdev,
2056                    block->dev_bytenr, block->mirror_num);
2057         next_block = block->next_in_same_bio;
2058         block->iodone_w_error = iodone_w_error;
2059         if (block->submit_bio_bh_rw & REQ_PREFLUSH) {
2060             dev_state->last_flush_gen++;
2061             if ((dev_state->state->print_mask &
2062                  BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2063                 pr_info("bio_end_io() new %pg flush_gen=%llu\n",
2064                        dev_state->bdev,
2065                        dev_state->last_flush_gen);
2066         }
2067         if (block->submit_bio_bh_rw & REQ_FUA)
2068             block->flush_gen = 0; /* FUA completed means block is
2069                            * on disk */
2070         block->is_iodone = 1; /* for FLUSH, this releases the block */
2071         block = next_block;
2072     } while (NULL != block);
2073 
2074     bp->bi_end_io(bp);
2075 }
2076 
2077 static int btrfsic_process_written_superblock(
2078         struct btrfsic_state *state,
2079         struct btrfsic_block *const superblock,
2080         struct btrfs_super_block *const super_hdr)
2081 {
2082     struct btrfs_fs_info *fs_info = state->fs_info;
2083     int pass;
2084 
2085     superblock->generation = btrfs_super_generation(super_hdr);
2086     if (!(superblock->generation > state->max_superblock_generation ||
2087           0 == state->max_superblock_generation)) {
2088         if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
2089             pr_info(
2090     "btrfsic: superblock @%llu (%pg/%llu/%d) with old gen %llu <= %llu\n",
2091                    superblock->logical_bytenr,
2092                    superblock->dev_state->bdev,
2093                    superblock->dev_bytenr, superblock->mirror_num,
2094                    btrfs_super_generation(super_hdr),
2095                    state->max_superblock_generation);
2096     } else {
2097         if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
2098             pr_info(
2099     "btrfsic: got new superblock @%llu (%pg/%llu/%d) with new gen %llu > %llu\n",
2100                    superblock->logical_bytenr,
2101                    superblock->dev_state->bdev,
2102                    superblock->dev_bytenr, superblock->mirror_num,
2103                    btrfs_super_generation(super_hdr),
2104                    state->max_superblock_generation);
2105 
2106         state->max_superblock_generation =
2107             btrfs_super_generation(super_hdr);
2108         state->latest_superblock = superblock;
2109     }
2110 
2111     for (pass = 0; pass < 3; pass++) {
2112         int ret;
2113         u64 next_bytenr;
2114         struct btrfsic_block *next_block;
2115         struct btrfsic_block_data_ctx tmp_next_block_ctx;
2116         struct btrfsic_block_link *l;
2117         int num_copies;
2118         int mirror_num;
2119         const char *additional_string = NULL;
2120         struct btrfs_disk_key tmp_disk_key = {0};
2121 
2122         btrfs_set_disk_key_objectid(&tmp_disk_key,
2123                         BTRFS_ROOT_ITEM_KEY);
2124         btrfs_set_disk_key_objectid(&tmp_disk_key, 0);
2125 
2126         switch (pass) {
2127         case 0:
2128             btrfs_set_disk_key_objectid(&tmp_disk_key,
2129                             BTRFS_ROOT_TREE_OBJECTID);
2130             additional_string = "root ";
2131             next_bytenr = btrfs_super_root(super_hdr);
2132             if (state->print_mask &
2133                 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2134                 pr_info("root@%llu\n", next_bytenr);
2135             break;
2136         case 1:
2137             btrfs_set_disk_key_objectid(&tmp_disk_key,
2138                             BTRFS_CHUNK_TREE_OBJECTID);
2139             additional_string = "chunk ";
2140             next_bytenr = btrfs_super_chunk_root(super_hdr);
2141             if (state->print_mask &
2142                 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2143                 pr_info("chunk@%llu\n", next_bytenr);
2144             break;
2145         case 2:
2146             btrfs_set_disk_key_objectid(&tmp_disk_key,
2147                             BTRFS_TREE_LOG_OBJECTID);
2148             additional_string = "log ";
2149             next_bytenr = btrfs_super_log_root(super_hdr);
2150             if (0 == next_bytenr)
2151                 continue;
2152             if (state->print_mask &
2153                 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2154                 pr_info("log@%llu\n", next_bytenr);
2155             break;
2156         }
2157 
2158         num_copies = btrfs_num_copies(fs_info, next_bytenr,
2159                           BTRFS_SUPER_INFO_SIZE);
2160         if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
2161             pr_info("num_copies(log_bytenr=%llu) = %d\n",
2162                    next_bytenr, num_copies);
2163         for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2164             int was_created;
2165 
2166             if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2167                 pr_info("btrfsic_process_written_superblock(mirror_num=%d)\n", mirror_num);
2168             ret = btrfsic_map_block(state, next_bytenr,
2169                         BTRFS_SUPER_INFO_SIZE,
2170                         &tmp_next_block_ctx,
2171                         mirror_num);
2172             if (ret) {
2173                 pr_info("btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
2174                        next_bytenr, mirror_num);
2175                 return -1;
2176             }
2177 
2178             next_block = btrfsic_block_lookup_or_add(
2179                     state,
2180                     &tmp_next_block_ctx,
2181                     additional_string,
2182                     1, 0, 1,
2183                     mirror_num,
2184                     &was_created);
2185             if (NULL == next_block) {
2186                 btrfsic_release_block_ctx(&tmp_next_block_ctx);
2187                 return -1;
2188             }
2189 
2190             next_block->disk_key = tmp_disk_key;
2191             if (was_created)
2192                 next_block->generation =
2193                     BTRFSIC_GENERATION_UNKNOWN;
2194             l = btrfsic_block_link_lookup_or_add(
2195                     state,
2196                     &tmp_next_block_ctx,
2197                     next_block,
2198                     superblock,
2199                     BTRFSIC_GENERATION_UNKNOWN);
2200             btrfsic_release_block_ctx(&tmp_next_block_ctx);
2201             if (NULL == l)
2202                 return -1;
2203         }
2204     }
2205 
2206     if (WARN_ON(-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)))
2207         btrfsic_dump_tree(state);
2208 
2209     return 0;
2210 }
2211 
2212 static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
2213                     struct btrfsic_block *const block,
2214                     int recursion_level)
2215 {
2216     const struct btrfsic_block_link *l;
2217     int ret = 0;
2218 
2219     if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
2220         /*
2221          * Note that this situation can happen and does not
2222          * indicate an error in regular cases. It happens
2223          * when disk blocks are freed and later reused.
2224          * The check-integrity module is not aware of any
2225          * block free operations, it just recognizes block
2226          * write operations. Therefore it keeps the linkage
2227          * information for a block until a block is
2228          * rewritten. This can temporarily cause incorrect
2229          * and even circular linkage information. This
2230          * causes no harm unless such blocks are referenced
2231          * by the most recent super block.
2232          */
2233         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2234             pr_info("btrfsic: abort cyclic linkage (case 1).\n");
2235 
2236         return ret;
2237     }
2238 
2239     /*
2240      * This algorithm is recursive because the amount of used stack
2241      * space is very small and the max recursion depth is limited.
2242      */
2243     list_for_each_entry(l, &block->ref_to_list, node_ref_to) {
2244         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2245             pr_info(
2246         "rl=%d, %c @%llu (%pg/%llu/%d) %u* refers to %c @%llu (%pg/%llu/%d)\n",
2247                    recursion_level,
2248                    btrfsic_get_block_type(state, block),
2249                    block->logical_bytenr, block->dev_state->bdev,
2250                    block->dev_bytenr, block->mirror_num,
2251                    l->ref_cnt,
2252                    btrfsic_get_block_type(state, l->block_ref_to),
2253                    l->block_ref_to->logical_bytenr,
2254                    l->block_ref_to->dev_state->bdev,
2255                    l->block_ref_to->dev_bytenr,
2256                    l->block_ref_to->mirror_num);
2257         if (l->block_ref_to->never_written) {
2258             pr_info(
2259 "btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) which is never written!\n",
2260                    btrfsic_get_block_type(state, l->block_ref_to),
2261                    l->block_ref_to->logical_bytenr,
2262                    l->block_ref_to->dev_state->bdev,
2263                    l->block_ref_to->dev_bytenr,
2264                    l->block_ref_to->mirror_num);
2265             ret = -1;
2266         } else if (!l->block_ref_to->is_iodone) {
2267             pr_info(
2268 "btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) which is not yet iodone!\n",
2269                    btrfsic_get_block_type(state, l->block_ref_to),
2270                    l->block_ref_to->logical_bytenr,
2271                    l->block_ref_to->dev_state->bdev,
2272                    l->block_ref_to->dev_bytenr,
2273                    l->block_ref_to->mirror_num);
2274             ret = -1;
2275         } else if (l->block_ref_to->iodone_w_error) {
2276             pr_info(
2277 "btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) which has write error!\n",
2278                    btrfsic_get_block_type(state, l->block_ref_to),
2279                    l->block_ref_to->logical_bytenr,
2280                    l->block_ref_to->dev_state->bdev,
2281                    l->block_ref_to->dev_bytenr,
2282                    l->block_ref_to->mirror_num);
2283             ret = -1;
2284         } else if (l->parent_generation !=
2285                l->block_ref_to->generation &&
2286                BTRFSIC_GENERATION_UNKNOWN !=
2287                l->parent_generation &&
2288                BTRFSIC_GENERATION_UNKNOWN !=
2289                l->block_ref_to->generation) {
2290             pr_info(
2291 "btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) with generation %llu != parent generation %llu!\n",
2292                    btrfsic_get_block_type(state, l->block_ref_to),
2293                    l->block_ref_to->logical_bytenr,
2294                    l->block_ref_to->dev_state->bdev,
2295                    l->block_ref_to->dev_bytenr,
2296                    l->block_ref_to->mirror_num,
2297                    l->block_ref_to->generation,
2298                    l->parent_generation);
2299             ret = -1;
2300         } else if (l->block_ref_to->flush_gen >
2301                l->block_ref_to->dev_state->last_flush_gen) {
2302             pr_info(
2303 "btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) which is not flushed out of disk's write cache (block flush_gen=%llu, dev->flush_gen=%llu)!\n",
2304                    btrfsic_get_block_type(state, l->block_ref_to),
2305                    l->block_ref_to->logical_bytenr,
2306                    l->block_ref_to->dev_state->bdev,
2307                    l->block_ref_to->dev_bytenr,
2308                    l->block_ref_to->mirror_num, block->flush_gen,
2309                    l->block_ref_to->dev_state->last_flush_gen);
2310             ret = -1;
2311         } else if (-1 == btrfsic_check_all_ref_blocks(state,
2312                                   l->block_ref_to,
2313                                   recursion_level +
2314                                   1)) {
2315             ret = -1;
2316         }
2317     }
2318 
2319     return ret;
2320 }
2321 
2322 static int btrfsic_is_block_ref_by_superblock(
2323         const struct btrfsic_state *state,
2324         const struct btrfsic_block *block,
2325         int recursion_level)
2326 {
2327     const struct btrfsic_block_link *l;
2328 
2329     if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
2330         /* refer to comment at "abort cyclic linkage (case 1)" */
2331         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2332             pr_info("btrfsic: abort cyclic linkage (case 2).\n");
2333 
2334         return 0;
2335     }
2336 
2337     /*
2338      * This algorithm is recursive because the amount of used stack space
2339      * is very small and the max recursion depth is limited.
2340      */
2341     list_for_each_entry(l, &block->ref_from_list, node_ref_from) {
2342         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2343             pr_info(
2344     "rl=%d, %c @%llu (%pg/%llu/%d) is ref %u* from %c @%llu (%pg/%llu/%d)\n",
2345                    recursion_level,
2346                    btrfsic_get_block_type(state, block),
2347                    block->logical_bytenr, block->dev_state->bdev,
2348                    block->dev_bytenr, block->mirror_num,
2349                    l->ref_cnt,
2350                    btrfsic_get_block_type(state, l->block_ref_from),
2351                    l->block_ref_from->logical_bytenr,
2352                    l->block_ref_from->dev_state->bdev,
2353                    l->block_ref_from->dev_bytenr,
2354                    l->block_ref_from->mirror_num);
2355         if (l->block_ref_from->is_superblock &&
2356             state->latest_superblock->dev_bytenr ==
2357             l->block_ref_from->dev_bytenr &&
2358             state->latest_superblock->dev_state->bdev ==
2359             l->block_ref_from->dev_state->bdev)
2360             return 1;
2361         else if (btrfsic_is_block_ref_by_superblock(state,
2362                                 l->block_ref_from,
2363                                 recursion_level +
2364                                 1))
2365             return 1;
2366     }
2367 
2368     return 0;
2369 }
2370 
2371 static void btrfsic_print_add_link(const struct btrfsic_state *state,
2372                    const struct btrfsic_block_link *l)
2373 {
2374     pr_info("add %u* link from %c @%llu (%pg/%llu/%d) to %c @%llu (%pg/%llu/%d)\n",
2375            l->ref_cnt,
2376            btrfsic_get_block_type(state, l->block_ref_from),
2377            l->block_ref_from->logical_bytenr,
2378            l->block_ref_from->dev_state->bdev,
2379            l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num,
2380            btrfsic_get_block_type(state, l->block_ref_to),
2381            l->block_ref_to->logical_bytenr,
2382            l->block_ref_to->dev_state->bdev, l->block_ref_to->dev_bytenr,
2383            l->block_ref_to->mirror_num);
2384 }
2385 
2386 static void btrfsic_print_rem_link(const struct btrfsic_state *state,
2387                    const struct btrfsic_block_link *l)
2388 {
2389     pr_info("rem %u* link from %c @%llu (%pg/%llu/%d) to %c @%llu (%pg/%llu/%d)\n",
2390            l->ref_cnt,
2391            btrfsic_get_block_type(state, l->block_ref_from),
2392            l->block_ref_from->logical_bytenr,
2393            l->block_ref_from->dev_state->bdev,
2394            l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num,
2395            btrfsic_get_block_type(state, l->block_ref_to),
2396            l->block_ref_to->logical_bytenr,
2397            l->block_ref_to->dev_state->bdev, l->block_ref_to->dev_bytenr,
2398            l->block_ref_to->mirror_num);
2399 }
2400 
2401 static char btrfsic_get_block_type(const struct btrfsic_state *state,
2402                    const struct btrfsic_block *block)
2403 {
2404     if (block->is_superblock &&
2405         state->latest_superblock->dev_bytenr == block->dev_bytenr &&
2406         state->latest_superblock->dev_state->bdev == block->dev_state->bdev)
2407         return 'S';
2408     else if (block->is_superblock)
2409         return 's';
2410     else if (block->is_metadata)
2411         return 'M';
2412     else
2413         return 'D';
2414 }
2415 
2416 static void btrfsic_dump_tree(const struct btrfsic_state *state)
2417 {
2418     btrfsic_dump_tree_sub(state, state->latest_superblock, 0);
2419 }
2420 
2421 static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
2422                   const struct btrfsic_block *block,
2423                   int indent_level)
2424 {
2425     const struct btrfsic_block_link *l;
2426     int indent_add;
2427     static char buf[80];
2428     int cursor_position;
2429 
2430     /*
2431      * Should better fill an on-stack buffer with a complete line and
2432      * dump it at once when it is time to print a newline character.
2433      */
2434 
2435     /*
2436      * This algorithm is recursive because the amount of used stack space
2437      * is very small and the max recursion depth is limited.
2438      */
2439     indent_add = sprintf(buf, "%c-%llu(%pg/%llu/%u)",
2440                  btrfsic_get_block_type(state, block),
2441                  block->logical_bytenr, block->dev_state->bdev,
2442                  block->dev_bytenr, block->mirror_num);
2443     if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
2444         printk("[...]\n");
2445         return;
2446     }
2447     printk(buf);
2448     indent_level += indent_add;
2449     if (list_empty(&block->ref_to_list)) {
2450         printk("\n");
2451         return;
2452     }
2453     if (block->mirror_num > 1 &&
2454         !(state->print_mask & BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS)) {
2455         printk(" [...]\n");
2456         return;
2457     }
2458 
2459     cursor_position = indent_level;
2460     list_for_each_entry(l, &block->ref_to_list, node_ref_to) {
2461         while (cursor_position < indent_level) {
2462             printk(" ");
2463             cursor_position++;
2464         }
2465         if (l->ref_cnt > 1)
2466             indent_add = sprintf(buf, " %d*--> ", l->ref_cnt);
2467         else
2468             indent_add = sprintf(buf, " --> ");
2469         if (indent_level + indent_add >
2470             BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
2471             printk("[...]\n");
2472             cursor_position = 0;
2473             continue;
2474         }
2475 
2476         printk(buf);
2477 
2478         btrfsic_dump_tree_sub(state, l->block_ref_to,
2479                       indent_level + indent_add);
2480         cursor_position = 0;
2481     }
2482 }
2483 
2484 static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
2485         struct btrfsic_state *state,
2486         struct btrfsic_block_data_ctx *next_block_ctx,
2487         struct btrfsic_block *next_block,
2488         struct btrfsic_block *from_block,
2489         u64 parent_generation)
2490 {
2491     struct btrfsic_block_link *l;
2492 
2493     l = btrfsic_block_link_hashtable_lookup(next_block_ctx->dev->bdev,
2494                         next_block_ctx->dev_bytenr,
2495                         from_block->dev_state->bdev,
2496                         from_block->dev_bytenr,
2497                         &state->block_link_hashtable);
2498     if (NULL == l) {
2499         l = btrfsic_block_link_alloc();
2500         if (!l)
2501             return NULL;
2502 
2503         l->block_ref_to = next_block;
2504         l->block_ref_from = from_block;
2505         l->ref_cnt = 1;
2506         l->parent_generation = parent_generation;
2507 
2508         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2509             btrfsic_print_add_link(state, l);
2510 
2511         list_add(&l->node_ref_to, &from_block->ref_to_list);
2512         list_add(&l->node_ref_from, &next_block->ref_from_list);
2513 
2514         btrfsic_block_link_hashtable_add(l,
2515                          &state->block_link_hashtable);
2516     } else {
2517         l->ref_cnt++;
2518         l->parent_generation = parent_generation;
2519         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2520             btrfsic_print_add_link(state, l);
2521     }
2522 
2523     return l;
2524 }
2525 
2526 static struct btrfsic_block *btrfsic_block_lookup_or_add(
2527         struct btrfsic_state *state,
2528         struct btrfsic_block_data_ctx *block_ctx,
2529         const char *additional_string,
2530         int is_metadata,
2531         int is_iodone,
2532         int never_written,
2533         int mirror_num,
2534         int *was_created)
2535 {
2536     struct btrfsic_block *block;
2537 
2538     block = btrfsic_block_hashtable_lookup(block_ctx->dev->bdev,
2539                            block_ctx->dev_bytenr,
2540                            &state->block_hashtable);
2541     if (NULL == block) {
2542         struct btrfsic_dev_state *dev_state;
2543 
2544         block = btrfsic_block_alloc();
2545         if (!block)
2546             return NULL;
2547 
2548         dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev->bd_dev);
2549         if (NULL == dev_state) {
2550             pr_info("btrfsic: error, lookup dev_state failed!\n");
2551             btrfsic_block_free(block);
2552             return NULL;
2553         }
2554         block->dev_state = dev_state;
2555         block->dev_bytenr = block_ctx->dev_bytenr;
2556         block->logical_bytenr = block_ctx->start;
2557         block->is_metadata = is_metadata;
2558         block->is_iodone = is_iodone;
2559         block->never_written = never_written;
2560         block->mirror_num = mirror_num;
2561         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2562             pr_info("New %s%c-block @%llu (%pg/%llu/%d)\n",
2563                    additional_string,
2564                    btrfsic_get_block_type(state, block),
2565                    block->logical_bytenr, dev_state->bdev,
2566                    block->dev_bytenr, mirror_num);
2567         list_add(&block->all_blocks_node, &state->all_blocks_list);
2568         btrfsic_block_hashtable_add(block, &state->block_hashtable);
2569         if (NULL != was_created)
2570             *was_created = 1;
2571     } else {
2572         if (NULL != was_created)
2573             *was_created = 0;
2574     }
2575 
2576     return block;
2577 }
2578 
2579 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
2580                        u64 bytenr,
2581                        struct btrfsic_dev_state *dev_state,
2582                        u64 dev_bytenr)
2583 {
2584     struct btrfs_fs_info *fs_info = state->fs_info;
2585     struct btrfsic_block_data_ctx block_ctx;
2586     int num_copies;
2587     int mirror_num;
2588     int match = 0;
2589     int ret;
2590 
2591     num_copies = btrfs_num_copies(fs_info, bytenr, state->metablock_size);
2592 
2593     for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2594         ret = btrfsic_map_block(state, bytenr, state->metablock_size,
2595                     &block_ctx, mirror_num);
2596         if (ret) {
2597             pr_info("btrfsic: btrfsic_map_block(logical @%llu, mirror %d) failed!\n",
2598                    bytenr, mirror_num);
2599             continue;
2600         }
2601 
2602         if (dev_state->bdev == block_ctx.dev->bdev &&
2603             dev_bytenr == block_ctx.dev_bytenr) {
2604             match++;
2605             btrfsic_release_block_ctx(&block_ctx);
2606             break;
2607         }
2608         btrfsic_release_block_ctx(&block_ctx);
2609     }
2610 
2611     if (WARN_ON(!match)) {
2612         pr_info(
2613 "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio, buffer->log_bytenr=%llu, submit_bio(bdev=%pg, phys_bytenr=%llu)!\n",
2614                bytenr, dev_state->bdev, dev_bytenr);
2615         for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2616             ret = btrfsic_map_block(state, bytenr,
2617                         state->metablock_size,
2618                         &block_ctx, mirror_num);
2619             if (ret)
2620                 continue;
2621 
2622             pr_info("read logical bytenr @%llu maps to (%pg/%llu/%d)\n",
2623                    bytenr, block_ctx.dev->bdev,
2624                    block_ctx.dev_bytenr, mirror_num);
2625         }
2626     }
2627 }
2628 
2629 static struct btrfsic_dev_state *btrfsic_dev_state_lookup(dev_t dev)
2630 {
2631     return btrfsic_dev_state_hashtable_lookup(dev,
2632                           &btrfsic_dev_state_hashtable);
2633 }
2634 
2635 static void btrfsic_check_write_bio(struct bio *bio, struct btrfsic_dev_state *dev_state)
2636 {
2637     unsigned int segs = bio_segments(bio);
2638     u64 dev_bytenr = 512 * bio->bi_iter.bi_sector;
2639     u64 cur_bytenr = dev_bytenr;
2640     struct bvec_iter iter;
2641     struct bio_vec bvec;
2642     char **mapped_datav;
2643     int bio_is_patched = 0;
2644     int i = 0;
2645 
2646     if (dev_state->state->print_mask & BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
2647         pr_info(
2648 "submit_bio(rw=%d,0x%x, bi_vcnt=%u, bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n",
2649                bio_op(bio), bio->bi_opf, segs,
2650                bio->bi_iter.bi_sector, dev_bytenr, bio->bi_bdev);
2651 
2652     mapped_datav = kmalloc_array(segs, sizeof(*mapped_datav), GFP_NOFS);
2653     if (!mapped_datav)
2654         return;
2655 
2656     bio_for_each_segment(bvec, bio, iter) {
2657         BUG_ON(bvec.bv_len != PAGE_SIZE);
2658         mapped_datav[i] = page_address(bvec.bv_page);
2659         i++;
2660 
2661         if (dev_state->state->print_mask &
2662             BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE)
2663             pr_info("#%u: bytenr=%llu, len=%u, offset=%u\n",
2664                    i, cur_bytenr, bvec.bv_len, bvec.bv_offset);
2665         cur_bytenr += bvec.bv_len;
2666     }
2667 
2668     btrfsic_process_written_block(dev_state, dev_bytenr, mapped_datav, segs,
2669                       bio, &bio_is_patched, bio->bi_opf);
2670     kfree(mapped_datav);
2671 }
2672 
2673 static void btrfsic_check_flush_bio(struct bio *bio, struct btrfsic_dev_state *dev_state)
2674 {
2675     if (dev_state->state->print_mask & BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
2676         pr_info("submit_bio(rw=%d,0x%x FLUSH, bdev=%p)\n",
2677                bio_op(bio), bio->bi_opf, bio->bi_bdev);
2678 
2679     if (dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
2680         struct btrfsic_block *const block =
2681             &dev_state->dummy_block_for_bio_bh_flush;
2682 
2683         block->is_iodone = 0;
2684         block->never_written = 0;
2685         block->iodone_w_error = 0;
2686         block->flush_gen = dev_state->last_flush_gen + 1;
2687         block->submit_bio_bh_rw = bio->bi_opf;
2688         block->orig_bio_private = bio->bi_private;
2689         block->orig_bio_end_io = bio->bi_end_io;
2690         block->next_in_same_bio = NULL;
2691         bio->bi_private = block;
2692         bio->bi_end_io = btrfsic_bio_end_io;
2693     } else if ((dev_state->state->print_mask &
2694            (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
2695             BTRFSIC_PRINT_MASK_VERBOSE))) {
2696         pr_info(
2697 "btrfsic_submit_bio(%pg) with FLUSH but dummy block already in use (ignored)!\n",
2698                dev_state->bdev);
2699     }
2700 }
2701 
2702 void btrfsic_check_bio(struct bio *bio)
2703 {
2704     struct btrfsic_dev_state *dev_state;
2705 
2706     if (!btrfsic_is_initialized)
2707         return;
2708 
2709     /*
2710      * We can be called before btrfsic_mount, so there might not be a
2711      * dev_state.
2712      */
2713     dev_state = btrfsic_dev_state_lookup(bio->bi_bdev->bd_dev);
2714     mutex_lock(&btrfsic_mutex);
2715     if (dev_state) {
2716         if (bio_op(bio) == REQ_OP_WRITE && bio_has_data(bio))
2717             btrfsic_check_write_bio(bio, dev_state);
2718         else if (bio->bi_opf & REQ_PREFLUSH)
2719             btrfsic_check_flush_bio(bio, dev_state);
2720     }
2721     mutex_unlock(&btrfsic_mutex);
2722 }
2723 
2724 int btrfsic_mount(struct btrfs_fs_info *fs_info,
2725           struct btrfs_fs_devices *fs_devices,
2726           int including_extent_data, u32 print_mask)
2727 {
2728     int ret;
2729     struct btrfsic_state *state;
2730     struct list_head *dev_head = &fs_devices->devices;
2731     struct btrfs_device *device;
2732 
2733     if (!PAGE_ALIGNED(fs_info->nodesize)) {
2734         pr_info("btrfsic: cannot handle nodesize %d not being a multiple of PAGE_SIZE %ld!\n",
2735                fs_info->nodesize, PAGE_SIZE);
2736         return -1;
2737     }
2738     if (!PAGE_ALIGNED(fs_info->sectorsize)) {
2739         pr_info("btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_SIZE %ld!\n",
2740                fs_info->sectorsize, PAGE_SIZE);
2741         return -1;
2742     }
2743     state = kvzalloc(sizeof(*state), GFP_KERNEL);
2744     if (!state)
2745         return -ENOMEM;
2746 
2747     if (!btrfsic_is_initialized) {
2748         mutex_init(&btrfsic_mutex);
2749         btrfsic_dev_state_hashtable_init(&btrfsic_dev_state_hashtable);
2750         btrfsic_is_initialized = 1;
2751     }
2752     mutex_lock(&btrfsic_mutex);
2753     state->fs_info = fs_info;
2754     state->print_mask = print_mask;
2755     state->include_extent_data = including_extent_data;
2756     state->metablock_size = fs_info->nodesize;
2757     state->datablock_size = fs_info->sectorsize;
2758     INIT_LIST_HEAD(&state->all_blocks_list);
2759     btrfsic_block_hashtable_init(&state->block_hashtable);
2760     btrfsic_block_link_hashtable_init(&state->block_link_hashtable);
2761     state->max_superblock_generation = 0;
2762     state->latest_superblock = NULL;
2763 
2764     list_for_each_entry(device, dev_head, dev_list) {
2765         struct btrfsic_dev_state *ds;
2766 
2767         if (!device->bdev || !device->name)
2768             continue;
2769 
2770         ds = btrfsic_dev_state_alloc();
2771         if (NULL == ds) {
2772             mutex_unlock(&btrfsic_mutex);
2773             return -ENOMEM;
2774         }
2775         ds->bdev = device->bdev;
2776         ds->state = state;
2777         btrfsic_dev_state_hashtable_add(ds,
2778                         &btrfsic_dev_state_hashtable);
2779     }
2780 
2781     ret = btrfsic_process_superblock(state, fs_devices);
2782     if (0 != ret) {
2783         mutex_unlock(&btrfsic_mutex);
2784         btrfsic_unmount(fs_devices);
2785         return ret;
2786     }
2787 
2788     if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_DATABASE)
2789         btrfsic_dump_database(state);
2790     if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_TREE)
2791         btrfsic_dump_tree(state);
2792 
2793     mutex_unlock(&btrfsic_mutex);
2794     return 0;
2795 }
2796 
2797 void btrfsic_unmount(struct btrfs_fs_devices *fs_devices)
2798 {
2799     struct btrfsic_block *b_all, *tmp_all;
2800     struct btrfsic_state *state;
2801     struct list_head *dev_head = &fs_devices->devices;
2802     struct btrfs_device *device;
2803 
2804     if (!btrfsic_is_initialized)
2805         return;
2806 
2807     mutex_lock(&btrfsic_mutex);
2808 
2809     state = NULL;
2810     list_for_each_entry(device, dev_head, dev_list) {
2811         struct btrfsic_dev_state *ds;
2812 
2813         if (!device->bdev || !device->name)
2814             continue;
2815 
2816         ds = btrfsic_dev_state_hashtable_lookup(
2817                 device->bdev->bd_dev,
2818                 &btrfsic_dev_state_hashtable);
2819         if (NULL != ds) {
2820             state = ds->state;
2821             btrfsic_dev_state_hashtable_remove(ds);
2822             btrfsic_dev_state_free(ds);
2823         }
2824     }
2825 
2826     if (NULL == state) {
2827         pr_info("btrfsic: error, cannot find state information on umount!\n");
2828         mutex_unlock(&btrfsic_mutex);
2829         return;
2830     }
2831 
2832     /*
2833      * Don't care about keeping the lists' state up to date,
2834      * just free all memory that was allocated dynamically.
2835      * Free the blocks and the block_links.
2836      */
2837     list_for_each_entry_safe(b_all, tmp_all, &state->all_blocks_list,
2838                  all_blocks_node) {
2839         struct btrfsic_block_link *l, *tmp;
2840 
2841         list_for_each_entry_safe(l, tmp, &b_all->ref_to_list,
2842                      node_ref_to) {
2843             if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2844                 btrfsic_print_rem_link(state, l);
2845 
2846             l->ref_cnt--;
2847             if (0 == l->ref_cnt)
2848                 btrfsic_block_link_free(l);
2849         }
2850 
2851         if (b_all->is_iodone || b_all->never_written)
2852             btrfsic_block_free(b_all);
2853         else
2854             pr_info(
2855 "btrfs: attempt to free %c-block @%llu (%pg/%llu/%d) on umount which is not yet iodone!\n",
2856                    btrfsic_get_block_type(state, b_all),
2857                    b_all->logical_bytenr, b_all->dev_state->bdev,
2858                    b_all->dev_bytenr, b_all->mirror_num);
2859     }
2860 
2861     mutex_unlock(&btrfsic_mutex);
2862 
2863     kvfree(state);
2864 }