Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Copyright (C) 2014 Facebook.  All rights reserved.
0004  */
0005 
0006 #include <linux/sched.h>
0007 #include <linux/stacktrace.h>
0008 #include "ctree.h"
0009 #include "disk-io.h"
0010 #include "locking.h"
0011 #include "delayed-ref.h"
0012 #include "ref-verify.h"
0013 
0014 /*
0015  * Used to keep track the roots and number of refs each root has for a given
0016  * bytenr.  This just tracks the number of direct references, no shared
0017  * references.
0018  */
0019 struct root_entry {
0020     u64 root_objectid;
0021     u64 num_refs;
0022     struct rb_node node;
0023 };
0024 
0025 /*
0026  * These are meant to represent what should exist in the extent tree, these can
0027  * be used to verify the extent tree is consistent as these should all match
0028  * what the extent tree says.
0029  */
0030 struct ref_entry {
0031     u64 root_objectid;
0032     u64 parent;
0033     u64 owner;
0034     u64 offset;
0035     u64 num_refs;
0036     struct rb_node node;
0037 };
0038 
0039 #define MAX_TRACE   16
0040 
0041 /*
0042  * Whenever we add/remove a reference we record the action.  The action maps
0043  * back to the delayed ref action.  We hold the ref we are changing in the
0044  * action so we can account for the history properly, and we record the root we
0045  * were called with since it could be different from ref_root.  We also store
0046  * stack traces because that's how I roll.
0047  */
0048 struct ref_action {
0049     int action;
0050     u64 root;
0051     struct ref_entry ref;
0052     struct list_head list;
0053     unsigned long trace[MAX_TRACE];
0054     unsigned int trace_len;
0055 };
0056 
0057 /*
0058  * One of these for every block we reference, it holds the roots and references
0059  * to it as well as all of the ref actions that have occurred to it.  We never
0060  * free it until we unmount the file system in order to make sure re-allocations
0061  * are happening properly.
0062  */
0063 struct block_entry {
0064     u64 bytenr;
0065     u64 len;
0066     u64 num_refs;
0067     int metadata;
0068     int from_disk;
0069     struct rb_root roots;
0070     struct rb_root refs;
0071     struct rb_node node;
0072     struct list_head actions;
0073 };
0074 
0075 static struct block_entry *insert_block_entry(struct rb_root *root,
0076                           struct block_entry *be)
0077 {
0078     struct rb_node **p = &root->rb_node;
0079     struct rb_node *parent_node = NULL;
0080     struct block_entry *entry;
0081 
0082     while (*p) {
0083         parent_node = *p;
0084         entry = rb_entry(parent_node, struct block_entry, node);
0085         if (entry->bytenr > be->bytenr)
0086             p = &(*p)->rb_left;
0087         else if (entry->bytenr < be->bytenr)
0088             p = &(*p)->rb_right;
0089         else
0090             return entry;
0091     }
0092 
0093     rb_link_node(&be->node, parent_node, p);
0094     rb_insert_color(&be->node, root);
0095     return NULL;
0096 }
0097 
0098 static struct block_entry *lookup_block_entry(struct rb_root *root, u64 bytenr)
0099 {
0100     struct rb_node *n;
0101     struct block_entry *entry = NULL;
0102 
0103     n = root->rb_node;
0104     while (n) {
0105         entry = rb_entry(n, struct block_entry, node);
0106         if (entry->bytenr < bytenr)
0107             n = n->rb_right;
0108         else if (entry->bytenr > bytenr)
0109             n = n->rb_left;
0110         else
0111             return entry;
0112     }
0113     return NULL;
0114 }
0115 
0116 static struct root_entry *insert_root_entry(struct rb_root *root,
0117                         struct root_entry *re)
0118 {
0119     struct rb_node **p = &root->rb_node;
0120     struct rb_node *parent_node = NULL;
0121     struct root_entry *entry;
0122 
0123     while (*p) {
0124         parent_node = *p;
0125         entry = rb_entry(parent_node, struct root_entry, node);
0126         if (entry->root_objectid > re->root_objectid)
0127             p = &(*p)->rb_left;
0128         else if (entry->root_objectid < re->root_objectid)
0129             p = &(*p)->rb_right;
0130         else
0131             return entry;
0132     }
0133 
0134     rb_link_node(&re->node, parent_node, p);
0135     rb_insert_color(&re->node, root);
0136     return NULL;
0137 
0138 }
0139 
0140 static int comp_refs(struct ref_entry *ref1, struct ref_entry *ref2)
0141 {
0142     if (ref1->root_objectid < ref2->root_objectid)
0143         return -1;
0144     if (ref1->root_objectid > ref2->root_objectid)
0145         return 1;
0146     if (ref1->parent < ref2->parent)
0147         return -1;
0148     if (ref1->parent > ref2->parent)
0149         return 1;
0150     if (ref1->owner < ref2->owner)
0151         return -1;
0152     if (ref1->owner > ref2->owner)
0153         return 1;
0154     if (ref1->offset < ref2->offset)
0155         return -1;
0156     if (ref1->offset > ref2->offset)
0157         return 1;
0158     return 0;
0159 }
0160 
0161 static struct ref_entry *insert_ref_entry(struct rb_root *root,
0162                       struct ref_entry *ref)
0163 {
0164     struct rb_node **p = &root->rb_node;
0165     struct rb_node *parent_node = NULL;
0166     struct ref_entry *entry;
0167     int cmp;
0168 
0169     while (*p) {
0170         parent_node = *p;
0171         entry = rb_entry(parent_node, struct ref_entry, node);
0172         cmp = comp_refs(entry, ref);
0173         if (cmp > 0)
0174             p = &(*p)->rb_left;
0175         else if (cmp < 0)
0176             p = &(*p)->rb_right;
0177         else
0178             return entry;
0179     }
0180 
0181     rb_link_node(&ref->node, parent_node, p);
0182     rb_insert_color(&ref->node, root);
0183     return NULL;
0184 
0185 }
0186 
0187 static struct root_entry *lookup_root_entry(struct rb_root *root, u64 objectid)
0188 {
0189     struct rb_node *n;
0190     struct root_entry *entry = NULL;
0191 
0192     n = root->rb_node;
0193     while (n) {
0194         entry = rb_entry(n, struct root_entry, node);
0195         if (entry->root_objectid < objectid)
0196             n = n->rb_right;
0197         else if (entry->root_objectid > objectid)
0198             n = n->rb_left;
0199         else
0200             return entry;
0201     }
0202     return NULL;
0203 }
0204 
0205 #ifdef CONFIG_STACKTRACE
0206 static void __save_stack_trace(struct ref_action *ra)
0207 {
0208     ra->trace_len = stack_trace_save(ra->trace, MAX_TRACE, 2);
0209 }
0210 
0211 static void __print_stack_trace(struct btrfs_fs_info *fs_info,
0212                 struct ref_action *ra)
0213 {
0214     if (ra->trace_len == 0) {
0215         btrfs_err(fs_info, "  ref-verify: no stacktrace");
0216         return;
0217     }
0218     stack_trace_print(ra->trace, ra->trace_len, 2);
0219 }
0220 #else
0221 static inline void __save_stack_trace(struct ref_action *ra)
0222 {
0223 }
0224 
0225 static inline void __print_stack_trace(struct btrfs_fs_info *fs_info,
0226                        struct ref_action *ra)
0227 {
0228     btrfs_err(fs_info, "  ref-verify: no stacktrace support");
0229 }
0230 #endif
0231 
0232 static void free_block_entry(struct block_entry *be)
0233 {
0234     struct root_entry *re;
0235     struct ref_entry *ref;
0236     struct ref_action *ra;
0237     struct rb_node *n;
0238 
0239     while ((n = rb_first(&be->roots))) {
0240         re = rb_entry(n, struct root_entry, node);
0241         rb_erase(&re->node, &be->roots);
0242         kfree(re);
0243     }
0244 
0245     while((n = rb_first(&be->refs))) {
0246         ref = rb_entry(n, struct ref_entry, node);
0247         rb_erase(&ref->node, &be->refs);
0248         kfree(ref);
0249     }
0250 
0251     while (!list_empty(&be->actions)) {
0252         ra = list_first_entry(&be->actions, struct ref_action,
0253                       list);
0254         list_del(&ra->list);
0255         kfree(ra);
0256     }
0257     kfree(be);
0258 }
0259 
0260 static struct block_entry *add_block_entry(struct btrfs_fs_info *fs_info,
0261                        u64 bytenr, u64 len,
0262                        u64 root_objectid)
0263 {
0264     struct block_entry *be = NULL, *exist;
0265     struct root_entry *re = NULL;
0266 
0267     re = kzalloc(sizeof(struct root_entry), GFP_NOFS);
0268     be = kzalloc(sizeof(struct block_entry), GFP_NOFS);
0269     if (!be || !re) {
0270         kfree(re);
0271         kfree(be);
0272         return ERR_PTR(-ENOMEM);
0273     }
0274     be->bytenr = bytenr;
0275     be->len = len;
0276 
0277     re->root_objectid = root_objectid;
0278     re->num_refs = 0;
0279 
0280     spin_lock(&fs_info->ref_verify_lock);
0281     exist = insert_block_entry(&fs_info->block_tree, be);
0282     if (exist) {
0283         if (root_objectid) {
0284             struct root_entry *exist_re;
0285 
0286             exist_re = insert_root_entry(&exist->roots, re);
0287             if (exist_re)
0288                 kfree(re);
0289         } else {
0290             kfree(re);
0291         }
0292         kfree(be);
0293         return exist;
0294     }
0295 
0296     be->num_refs = 0;
0297     be->metadata = 0;
0298     be->from_disk = 0;
0299     be->roots = RB_ROOT;
0300     be->refs = RB_ROOT;
0301     INIT_LIST_HEAD(&be->actions);
0302     if (root_objectid)
0303         insert_root_entry(&be->roots, re);
0304     else
0305         kfree(re);
0306     return be;
0307 }
0308 
0309 static int add_tree_block(struct btrfs_fs_info *fs_info, u64 ref_root,
0310               u64 parent, u64 bytenr, int level)
0311 {
0312     struct block_entry *be;
0313     struct root_entry *re;
0314     struct ref_entry *ref = NULL, *exist;
0315 
0316     ref = kmalloc(sizeof(struct ref_entry), GFP_NOFS);
0317     if (!ref)
0318         return -ENOMEM;
0319 
0320     if (parent)
0321         ref->root_objectid = 0;
0322     else
0323         ref->root_objectid = ref_root;
0324     ref->parent = parent;
0325     ref->owner = level;
0326     ref->offset = 0;
0327     ref->num_refs = 1;
0328 
0329     be = add_block_entry(fs_info, bytenr, fs_info->nodesize, ref_root);
0330     if (IS_ERR(be)) {
0331         kfree(ref);
0332         return PTR_ERR(be);
0333     }
0334     be->num_refs++;
0335     be->from_disk = 1;
0336     be->metadata = 1;
0337 
0338     if (!parent) {
0339         ASSERT(ref_root);
0340         re = lookup_root_entry(&be->roots, ref_root);
0341         ASSERT(re);
0342         re->num_refs++;
0343     }
0344     exist = insert_ref_entry(&be->refs, ref);
0345     if (exist) {
0346         exist->num_refs++;
0347         kfree(ref);
0348     }
0349     spin_unlock(&fs_info->ref_verify_lock);
0350 
0351     return 0;
0352 }
0353 
0354 static int add_shared_data_ref(struct btrfs_fs_info *fs_info,
0355                    u64 parent, u32 num_refs, u64 bytenr,
0356                    u64 num_bytes)
0357 {
0358     struct block_entry *be;
0359     struct ref_entry *ref;
0360 
0361     ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS);
0362     if (!ref)
0363         return -ENOMEM;
0364     be = add_block_entry(fs_info, bytenr, num_bytes, 0);
0365     if (IS_ERR(be)) {
0366         kfree(ref);
0367         return PTR_ERR(be);
0368     }
0369     be->num_refs += num_refs;
0370 
0371     ref->parent = parent;
0372     ref->num_refs = num_refs;
0373     if (insert_ref_entry(&be->refs, ref)) {
0374         spin_unlock(&fs_info->ref_verify_lock);
0375         btrfs_err(fs_info, "existing shared ref when reading from disk?");
0376         kfree(ref);
0377         return -EINVAL;
0378     }
0379     spin_unlock(&fs_info->ref_verify_lock);
0380     return 0;
0381 }
0382 
0383 static int add_extent_data_ref(struct btrfs_fs_info *fs_info,
0384                    struct extent_buffer *leaf,
0385                    struct btrfs_extent_data_ref *dref,
0386                    u64 bytenr, u64 num_bytes)
0387 {
0388     struct block_entry *be;
0389     struct ref_entry *ref;
0390     struct root_entry *re;
0391     u64 ref_root = btrfs_extent_data_ref_root(leaf, dref);
0392     u64 owner = btrfs_extent_data_ref_objectid(leaf, dref);
0393     u64 offset = btrfs_extent_data_ref_offset(leaf, dref);
0394     u32 num_refs = btrfs_extent_data_ref_count(leaf, dref);
0395 
0396     ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS);
0397     if (!ref)
0398         return -ENOMEM;
0399     be = add_block_entry(fs_info, bytenr, num_bytes, ref_root);
0400     if (IS_ERR(be)) {
0401         kfree(ref);
0402         return PTR_ERR(be);
0403     }
0404     be->num_refs += num_refs;
0405 
0406     ref->parent = 0;
0407     ref->owner = owner;
0408     ref->root_objectid = ref_root;
0409     ref->offset = offset;
0410     ref->num_refs = num_refs;
0411     if (insert_ref_entry(&be->refs, ref)) {
0412         spin_unlock(&fs_info->ref_verify_lock);
0413         btrfs_err(fs_info, "existing ref when reading from disk?");
0414         kfree(ref);
0415         return -EINVAL;
0416     }
0417 
0418     re = lookup_root_entry(&be->roots, ref_root);
0419     if (!re) {
0420         spin_unlock(&fs_info->ref_verify_lock);
0421         btrfs_err(fs_info, "missing root in new block entry?");
0422         return -EINVAL;
0423     }
0424     re->num_refs += num_refs;
0425     spin_unlock(&fs_info->ref_verify_lock);
0426     return 0;
0427 }
0428 
0429 static int process_extent_item(struct btrfs_fs_info *fs_info,
0430                    struct btrfs_path *path, struct btrfs_key *key,
0431                    int slot, int *tree_block_level)
0432 {
0433     struct btrfs_extent_item *ei;
0434     struct btrfs_extent_inline_ref *iref;
0435     struct btrfs_extent_data_ref *dref;
0436     struct btrfs_shared_data_ref *sref;
0437     struct extent_buffer *leaf = path->nodes[0];
0438     u32 item_size = btrfs_item_size(leaf, slot);
0439     unsigned long end, ptr;
0440     u64 offset, flags, count;
0441     int type, ret;
0442 
0443     ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
0444     flags = btrfs_extent_flags(leaf, ei);
0445 
0446     if ((key->type == BTRFS_EXTENT_ITEM_KEY) &&
0447         flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
0448         struct btrfs_tree_block_info *info;
0449 
0450         info = (struct btrfs_tree_block_info *)(ei + 1);
0451         *tree_block_level = btrfs_tree_block_level(leaf, info);
0452         iref = (struct btrfs_extent_inline_ref *)(info + 1);
0453     } else {
0454         if (key->type == BTRFS_METADATA_ITEM_KEY)
0455             *tree_block_level = key->offset;
0456         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
0457     }
0458 
0459     ptr = (unsigned long)iref;
0460     end = (unsigned long)ei + item_size;
0461     while (ptr < end) {
0462         iref = (struct btrfs_extent_inline_ref *)ptr;
0463         type = btrfs_extent_inline_ref_type(leaf, iref);
0464         offset = btrfs_extent_inline_ref_offset(leaf, iref);
0465         switch (type) {
0466         case BTRFS_TREE_BLOCK_REF_KEY:
0467             ret = add_tree_block(fs_info, offset, 0, key->objectid,
0468                          *tree_block_level);
0469             break;
0470         case BTRFS_SHARED_BLOCK_REF_KEY:
0471             ret = add_tree_block(fs_info, 0, offset, key->objectid,
0472                          *tree_block_level);
0473             break;
0474         case BTRFS_EXTENT_DATA_REF_KEY:
0475             dref = (struct btrfs_extent_data_ref *)(&iref->offset);
0476             ret = add_extent_data_ref(fs_info, leaf, dref,
0477                           key->objectid, key->offset);
0478             break;
0479         case BTRFS_SHARED_DATA_REF_KEY:
0480             sref = (struct btrfs_shared_data_ref *)(iref + 1);
0481             count = btrfs_shared_data_ref_count(leaf, sref);
0482             ret = add_shared_data_ref(fs_info, offset, count,
0483                           key->objectid, key->offset);
0484             break;
0485         default:
0486             btrfs_err(fs_info, "invalid key type in iref");
0487             ret = -EINVAL;
0488             break;
0489         }
0490         if (ret)
0491             break;
0492         ptr += btrfs_extent_inline_ref_size(type);
0493     }
0494     return ret;
0495 }
0496 
0497 static int process_leaf(struct btrfs_root *root,
0498             struct btrfs_path *path, u64 *bytenr, u64 *num_bytes,
0499             int *tree_block_level)
0500 {
0501     struct btrfs_fs_info *fs_info = root->fs_info;
0502     struct extent_buffer *leaf = path->nodes[0];
0503     struct btrfs_extent_data_ref *dref;
0504     struct btrfs_shared_data_ref *sref;
0505     u32 count;
0506     int i = 0, ret = 0;
0507     struct btrfs_key key;
0508     int nritems = btrfs_header_nritems(leaf);
0509 
0510     for (i = 0; i < nritems; i++) {
0511         btrfs_item_key_to_cpu(leaf, &key, i);
0512         switch (key.type) {
0513         case BTRFS_EXTENT_ITEM_KEY:
0514             *num_bytes = key.offset;
0515             fallthrough;
0516         case BTRFS_METADATA_ITEM_KEY:
0517             *bytenr = key.objectid;
0518             ret = process_extent_item(fs_info, path, &key, i,
0519                           tree_block_level);
0520             break;
0521         case BTRFS_TREE_BLOCK_REF_KEY:
0522             ret = add_tree_block(fs_info, key.offset, 0,
0523                          key.objectid, *tree_block_level);
0524             break;
0525         case BTRFS_SHARED_BLOCK_REF_KEY:
0526             ret = add_tree_block(fs_info, 0, key.offset,
0527                          key.objectid, *tree_block_level);
0528             break;
0529         case BTRFS_EXTENT_DATA_REF_KEY:
0530             dref = btrfs_item_ptr(leaf, i,
0531                           struct btrfs_extent_data_ref);
0532             ret = add_extent_data_ref(fs_info, leaf, dref, *bytenr,
0533                           *num_bytes);
0534             break;
0535         case BTRFS_SHARED_DATA_REF_KEY:
0536             sref = btrfs_item_ptr(leaf, i,
0537                           struct btrfs_shared_data_ref);
0538             count = btrfs_shared_data_ref_count(leaf, sref);
0539             ret = add_shared_data_ref(fs_info, key.offset, count,
0540                           *bytenr, *num_bytes);
0541             break;
0542         default:
0543             break;
0544         }
0545         if (ret)
0546             break;
0547     }
0548     return ret;
0549 }
0550 
0551 /* Walk down to the leaf from the given level */
0552 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
0553               int level, u64 *bytenr, u64 *num_bytes,
0554               int *tree_block_level)
0555 {
0556     struct extent_buffer *eb;
0557     int ret = 0;
0558 
0559     while (level >= 0) {
0560         if (level) {
0561             eb = btrfs_read_node_slot(path->nodes[level],
0562                           path->slots[level]);
0563             if (IS_ERR(eb))
0564                 return PTR_ERR(eb);
0565             btrfs_tree_read_lock(eb);
0566             path->nodes[level-1] = eb;
0567             path->slots[level-1] = 0;
0568             path->locks[level-1] = BTRFS_READ_LOCK;
0569         } else {
0570             ret = process_leaf(root, path, bytenr, num_bytes,
0571                        tree_block_level);
0572             if (ret)
0573                 break;
0574         }
0575         level--;
0576     }
0577     return ret;
0578 }
0579 
0580 /* Walk up to the next node that needs to be processed */
0581 static int walk_up_tree(struct btrfs_path *path, int *level)
0582 {
0583     int l;
0584 
0585     for (l = 0; l < BTRFS_MAX_LEVEL; l++) {
0586         if (!path->nodes[l])
0587             continue;
0588         if (l) {
0589             path->slots[l]++;
0590             if (path->slots[l] <
0591                 btrfs_header_nritems(path->nodes[l])) {
0592                 *level = l;
0593                 return 0;
0594             }
0595         }
0596         btrfs_tree_unlock_rw(path->nodes[l], path->locks[l]);
0597         free_extent_buffer(path->nodes[l]);
0598         path->nodes[l] = NULL;
0599         path->slots[l] = 0;
0600         path->locks[l] = 0;
0601     }
0602 
0603     return 1;
0604 }
0605 
0606 static void dump_ref_action(struct btrfs_fs_info *fs_info,
0607                 struct ref_action *ra)
0608 {
0609     btrfs_err(fs_info,
0610 "  Ref action %d, root %llu, ref_root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu",
0611           ra->action, ra->root, ra->ref.root_objectid, ra->ref.parent,
0612           ra->ref.owner, ra->ref.offset, ra->ref.num_refs);
0613     __print_stack_trace(fs_info, ra);
0614 }
0615 
0616 /*
0617  * Dumps all the information from the block entry to printk, it's going to be
0618  * awesome.
0619  */
0620 static void dump_block_entry(struct btrfs_fs_info *fs_info,
0621                  struct block_entry *be)
0622 {
0623     struct ref_entry *ref;
0624     struct root_entry *re;
0625     struct ref_action *ra;
0626     struct rb_node *n;
0627 
0628     btrfs_err(fs_info,
0629 "dumping block entry [%llu %llu], num_refs %llu, metadata %d, from disk %d",
0630           be->bytenr, be->len, be->num_refs, be->metadata,
0631           be->from_disk);
0632 
0633     for (n = rb_first(&be->refs); n; n = rb_next(n)) {
0634         ref = rb_entry(n, struct ref_entry, node);
0635         btrfs_err(fs_info,
0636 "  ref root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu",
0637               ref->root_objectid, ref->parent, ref->owner,
0638               ref->offset, ref->num_refs);
0639     }
0640 
0641     for (n = rb_first(&be->roots); n; n = rb_next(n)) {
0642         re = rb_entry(n, struct root_entry, node);
0643         btrfs_err(fs_info, "  root entry %llu, num_refs %llu",
0644               re->root_objectid, re->num_refs);
0645     }
0646 
0647     list_for_each_entry(ra, &be->actions, list)
0648         dump_ref_action(fs_info, ra);
0649 }
0650 
0651 /*
0652  * btrfs_ref_tree_mod: called when we modify a ref for a bytenr
0653  *
0654  * This will add an action item to the given bytenr and do sanity checks to make
0655  * sure we haven't messed something up.  If we are making a new allocation and
0656  * this block entry has history we will delete all previous actions as long as
0657  * our sanity checks pass as they are no longer needed.
0658  */
0659 int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
0660                struct btrfs_ref *generic_ref)
0661 {
0662     struct ref_entry *ref = NULL, *exist;
0663     struct ref_action *ra = NULL;
0664     struct block_entry *be = NULL;
0665     struct root_entry *re = NULL;
0666     int action = generic_ref->action;
0667     int ret = 0;
0668     bool metadata;
0669     u64 bytenr = generic_ref->bytenr;
0670     u64 num_bytes = generic_ref->len;
0671     u64 parent = generic_ref->parent;
0672     u64 ref_root = 0;
0673     u64 owner = 0;
0674     u64 offset = 0;
0675 
0676     if (!btrfs_test_opt(fs_info, REF_VERIFY))
0677         return 0;
0678 
0679     if (generic_ref->type == BTRFS_REF_METADATA) {
0680         if (!parent)
0681             ref_root = generic_ref->tree_ref.owning_root;
0682         owner = generic_ref->tree_ref.level;
0683     } else if (!parent) {
0684         ref_root = generic_ref->data_ref.owning_root;
0685         owner = generic_ref->data_ref.ino;
0686         offset = generic_ref->data_ref.offset;
0687     }
0688     metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
0689 
0690     ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS);
0691     ra = kmalloc(sizeof(struct ref_action), GFP_NOFS);
0692     if (!ra || !ref) {
0693         kfree(ref);
0694         kfree(ra);
0695         ret = -ENOMEM;
0696         goto out;
0697     }
0698 
0699     ref->parent = parent;
0700     ref->owner = owner;
0701     ref->root_objectid = ref_root;
0702     ref->offset = offset;
0703     ref->num_refs = (action == BTRFS_DROP_DELAYED_REF) ? -1 : 1;
0704 
0705     memcpy(&ra->ref, ref, sizeof(struct ref_entry));
0706     /*
0707      * Save the extra info from the delayed ref in the ref action to make it
0708      * easier to figure out what is happening.  The real ref's we add to the
0709      * ref tree need to reflect what we save on disk so it matches any
0710      * on-disk refs we pre-loaded.
0711      */
0712     ra->ref.owner = owner;
0713     ra->ref.offset = offset;
0714     ra->ref.root_objectid = ref_root;
0715     __save_stack_trace(ra);
0716 
0717     INIT_LIST_HEAD(&ra->list);
0718     ra->action = action;
0719     ra->root = generic_ref->real_root;
0720 
0721     /*
0722      * This is an allocation, preallocate the block_entry in case we haven't
0723      * used it before.
0724      */
0725     ret = -EINVAL;
0726     if (action == BTRFS_ADD_DELAYED_EXTENT) {
0727         /*
0728          * For subvol_create we'll just pass in whatever the parent root
0729          * is and the new root objectid, so let's not treat the passed
0730          * in root as if it really has a ref for this bytenr.
0731          */
0732         be = add_block_entry(fs_info, bytenr, num_bytes, ref_root);
0733         if (IS_ERR(be)) {
0734             kfree(ref);
0735             kfree(ra);
0736             ret = PTR_ERR(be);
0737             goto out;
0738         }
0739         be->num_refs++;
0740         if (metadata)
0741             be->metadata = 1;
0742 
0743         if (be->num_refs != 1) {
0744             btrfs_err(fs_info,
0745             "re-allocated a block that still has references to it!");
0746             dump_block_entry(fs_info, be);
0747             dump_ref_action(fs_info, ra);
0748             kfree(ref);
0749             kfree(ra);
0750             goto out_unlock;
0751         }
0752 
0753         while (!list_empty(&be->actions)) {
0754             struct ref_action *tmp;
0755 
0756             tmp = list_first_entry(&be->actions, struct ref_action,
0757                            list);
0758             list_del(&tmp->list);
0759             kfree(tmp);
0760         }
0761     } else {
0762         struct root_entry *tmp;
0763 
0764         if (!parent) {
0765             re = kmalloc(sizeof(struct root_entry), GFP_NOFS);
0766             if (!re) {
0767                 kfree(ref);
0768                 kfree(ra);
0769                 ret = -ENOMEM;
0770                 goto out;
0771             }
0772             /*
0773              * This is the root that is modifying us, so it's the
0774              * one we want to lookup below when we modify the
0775              * re->num_refs.
0776              */
0777             ref_root = generic_ref->real_root;
0778             re->root_objectid = generic_ref->real_root;
0779             re->num_refs = 0;
0780         }
0781 
0782         spin_lock(&fs_info->ref_verify_lock);
0783         be = lookup_block_entry(&fs_info->block_tree, bytenr);
0784         if (!be) {
0785             btrfs_err(fs_info,
0786 "trying to do action %d to bytenr %llu num_bytes %llu but there is no existing entry!",
0787                   action, bytenr, num_bytes);
0788             dump_ref_action(fs_info, ra);
0789             kfree(ref);
0790             kfree(ra);
0791             goto out_unlock;
0792         } else if (be->num_refs == 0) {
0793             btrfs_err(fs_info,
0794         "trying to do action %d for a bytenr that has 0 total references",
0795                 action);
0796             dump_block_entry(fs_info, be);
0797             dump_ref_action(fs_info, ra);
0798             kfree(ref);
0799             kfree(ra);
0800             goto out_unlock;
0801         }
0802 
0803         if (!parent) {
0804             tmp = insert_root_entry(&be->roots, re);
0805             if (tmp) {
0806                 kfree(re);
0807                 re = tmp;
0808             }
0809         }
0810     }
0811 
0812     exist = insert_ref_entry(&be->refs, ref);
0813     if (exist) {
0814         if (action == BTRFS_DROP_DELAYED_REF) {
0815             if (exist->num_refs == 0) {
0816                 btrfs_err(fs_info,
0817 "dropping a ref for a existing root that doesn't have a ref on the block");
0818                 dump_block_entry(fs_info, be);
0819                 dump_ref_action(fs_info, ra);
0820                 kfree(ref);
0821                 kfree(ra);
0822                 goto out_unlock;
0823             }
0824             exist->num_refs--;
0825             if (exist->num_refs == 0) {
0826                 rb_erase(&exist->node, &be->refs);
0827                 kfree(exist);
0828             }
0829         } else if (!be->metadata) {
0830             exist->num_refs++;
0831         } else {
0832             btrfs_err(fs_info,
0833 "attempting to add another ref for an existing ref on a tree block");
0834             dump_block_entry(fs_info, be);
0835             dump_ref_action(fs_info, ra);
0836             kfree(ref);
0837             kfree(ra);
0838             goto out_unlock;
0839         }
0840         kfree(ref);
0841     } else {
0842         if (action == BTRFS_DROP_DELAYED_REF) {
0843             btrfs_err(fs_info,
0844 "dropping a ref for a root that doesn't have a ref on the block");
0845             dump_block_entry(fs_info, be);
0846             dump_ref_action(fs_info, ra);
0847             kfree(ref);
0848             kfree(ra);
0849             goto out_unlock;
0850         }
0851     }
0852 
0853     if (!parent && !re) {
0854         re = lookup_root_entry(&be->roots, ref_root);
0855         if (!re) {
0856             /*
0857              * This shouldn't happen because we will add our re
0858              * above when we lookup the be with !parent, but just in
0859              * case catch this case so we don't panic because I
0860              * didn't think of some other corner case.
0861              */
0862             btrfs_err(fs_info, "failed to find root %llu for %llu",
0863                   generic_ref->real_root, be->bytenr);
0864             dump_block_entry(fs_info, be);
0865             dump_ref_action(fs_info, ra);
0866             kfree(ra);
0867             goto out_unlock;
0868         }
0869     }
0870     if (action == BTRFS_DROP_DELAYED_REF) {
0871         if (re)
0872             re->num_refs--;
0873         be->num_refs--;
0874     } else if (action == BTRFS_ADD_DELAYED_REF) {
0875         be->num_refs++;
0876         if (re)
0877             re->num_refs++;
0878     }
0879     list_add_tail(&ra->list, &be->actions);
0880     ret = 0;
0881 out_unlock:
0882     spin_unlock(&fs_info->ref_verify_lock);
0883 out:
0884     if (ret)
0885         btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
0886     return ret;
0887 }
0888 
0889 /* Free up the ref cache */
0890 void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info)
0891 {
0892     struct block_entry *be;
0893     struct rb_node *n;
0894 
0895     if (!btrfs_test_opt(fs_info, REF_VERIFY))
0896         return;
0897 
0898     spin_lock(&fs_info->ref_verify_lock);
0899     while ((n = rb_first(&fs_info->block_tree))) {
0900         be = rb_entry(n, struct block_entry, node);
0901         rb_erase(&be->node, &fs_info->block_tree);
0902         free_block_entry(be);
0903         cond_resched_lock(&fs_info->ref_verify_lock);
0904     }
0905     spin_unlock(&fs_info->ref_verify_lock);
0906 }
0907 
0908 void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start,
0909                    u64 len)
0910 {
0911     struct block_entry *be = NULL, *entry;
0912     struct rb_node *n;
0913 
0914     if (!btrfs_test_opt(fs_info, REF_VERIFY))
0915         return;
0916 
0917     spin_lock(&fs_info->ref_verify_lock);
0918     n = fs_info->block_tree.rb_node;
0919     while (n) {
0920         entry = rb_entry(n, struct block_entry, node);
0921         if (entry->bytenr < start) {
0922             n = n->rb_right;
0923         } else if (entry->bytenr > start) {
0924             n = n->rb_left;
0925         } else {
0926             be = entry;
0927             break;
0928         }
0929         /* We want to get as close to start as possible */
0930         if (be == NULL ||
0931             (entry->bytenr < start && be->bytenr > start) ||
0932             (entry->bytenr < start && entry->bytenr > be->bytenr))
0933             be = entry;
0934     }
0935 
0936     /*
0937      * Could have an empty block group, maybe have something to check for
0938      * this case to verify we were actually empty?
0939      */
0940     if (!be) {
0941         spin_unlock(&fs_info->ref_verify_lock);
0942         return;
0943     }
0944 
0945     n = &be->node;
0946     while (n) {
0947         be = rb_entry(n, struct block_entry, node);
0948         n = rb_next(n);
0949         if (be->bytenr < start && be->bytenr + be->len > start) {
0950             btrfs_err(fs_info,
0951                 "block entry overlaps a block group [%llu,%llu]!",
0952                 start, len);
0953             dump_block_entry(fs_info, be);
0954             continue;
0955         }
0956         if (be->bytenr < start)
0957             continue;
0958         if (be->bytenr >= start + len)
0959             break;
0960         if (be->bytenr + be->len > start + len) {
0961             btrfs_err(fs_info,
0962                 "block entry overlaps a block group [%llu,%llu]!",
0963                 start, len);
0964             dump_block_entry(fs_info, be);
0965         }
0966         rb_erase(&be->node, &fs_info->block_tree);
0967         free_block_entry(be);
0968     }
0969     spin_unlock(&fs_info->ref_verify_lock);
0970 }
0971 
0972 /* Walk down all roots and build the ref tree, meant to be called at mount */
0973 int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
0974 {
0975     struct btrfs_root *extent_root;
0976     struct btrfs_path *path;
0977     struct extent_buffer *eb;
0978     int tree_block_level = 0;
0979     u64 bytenr = 0, num_bytes = 0;
0980     int ret, level;
0981 
0982     if (!btrfs_test_opt(fs_info, REF_VERIFY))
0983         return 0;
0984 
0985     path = btrfs_alloc_path();
0986     if (!path)
0987         return -ENOMEM;
0988 
0989     extent_root = btrfs_extent_root(fs_info, 0);
0990     eb = btrfs_read_lock_root_node(extent_root);
0991     level = btrfs_header_level(eb);
0992     path->nodes[level] = eb;
0993     path->slots[level] = 0;
0994     path->locks[level] = BTRFS_READ_LOCK;
0995 
0996     while (1) {
0997         /*
0998          * We have to keep track of the bytenr/num_bytes we last hit
0999          * because we could have run out of space for an inline ref, and
1000          * would have had to added a ref key item which may appear on a
1001          * different leaf from the original extent item.
1002          */
1003         ret = walk_down_tree(extent_root, path, level,
1004                      &bytenr, &num_bytes, &tree_block_level);
1005         if (ret)
1006             break;
1007         ret = walk_up_tree(path, &level);
1008         if (ret < 0)
1009             break;
1010         if (ret > 0) {
1011             ret = 0;
1012             break;
1013         }
1014     }
1015     if (ret) {
1016         btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
1017         btrfs_free_ref_cache(fs_info);
1018     }
1019     btrfs_free_path(path);
1020     return ret;
1021 }