0001
0002
0003
0004
0005
0006 #include <linux/sched.h>
0007 #include <linux/pagemap.h>
0008 #include <linux/writeback.h>
0009 #include <linux/blkdev.h>
0010 #include <linux/rbtree.h>
0011 #include <linux/slab.h>
0012 #include <linux/error-injection.h>
0013 #include "ctree.h"
0014 #include "disk-io.h"
0015 #include "transaction.h"
0016 #include "volumes.h"
0017 #include "locking.h"
0018 #include "btrfs_inode.h"
0019 #include "async-thread.h"
0020 #include "free-space-cache.h"
0021 #include "qgroup.h"
0022 #include "print-tree.h"
0023 #include "delalloc-space.h"
0024 #include "block-group.h"
0025 #include "backref.h"
0026 #include "misc.h"
0027 #include "subpage.h"
0028 #include "zoned.h"
0029 #include "inode-item.h"
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078 #define RELOCATION_RESERVED_NODES 256
0079
0080
0081
0082 struct mapping_node {
0083 struct {
0084 struct rb_node rb_node;
0085 u64 bytenr;
0086 };
0087 void *data;
0088 };
0089
0090 struct mapping_tree {
0091 struct rb_root rb_root;
0092 spinlock_t lock;
0093 };
0094
0095
0096
0097
0098 struct tree_block {
0099 struct {
0100 struct rb_node rb_node;
0101 u64 bytenr;
0102 };
0103 u64 owner;
0104 struct btrfs_key key;
0105 unsigned int level:8;
0106 unsigned int key_ready:1;
0107 };
0108
0109 #define MAX_EXTENTS 128
0110
0111 struct file_extent_cluster {
0112 u64 start;
0113 u64 end;
0114 u64 boundary[MAX_EXTENTS];
0115 unsigned int nr;
0116 };
0117
0118 struct reloc_control {
0119
0120 struct btrfs_block_group *block_group;
0121
0122 struct btrfs_root *extent_root;
0123
0124 struct inode *data_inode;
0125
0126 struct btrfs_block_rsv *block_rsv;
0127
0128 struct btrfs_backref_cache backref_cache;
0129
0130 struct file_extent_cluster cluster;
0131
0132 struct extent_io_tree processed_blocks;
0133
0134 struct mapping_tree reloc_root_tree;
0135
0136 struct list_head reloc_roots;
0137
0138 struct list_head dirty_subvol_roots;
0139
0140 u64 merging_rsv_size;
0141
0142 u64 nodes_relocated;
0143
0144 u64 reserved_bytes;
0145
0146 u64 search_start;
0147 u64 extents_found;
0148
0149 unsigned int stage:8;
0150 unsigned int create_reloc_tree:1;
0151 unsigned int merge_reloc_tree:1;
0152 unsigned int found_file_extent:1;
0153 };
0154
0155
0156 #define MOVE_DATA_EXTENTS 0
0157 #define UPDATE_DATA_PTRS 1
0158
0159 static void mark_block_processed(struct reloc_control *rc,
0160 struct btrfs_backref_node *node)
0161 {
0162 u32 blocksize;
0163
0164 if (node->level == 0 ||
0165 in_range(node->bytenr, rc->block_group->start,
0166 rc->block_group->length)) {
0167 blocksize = rc->extent_root->fs_info->nodesize;
0168 set_extent_bits(&rc->processed_blocks, node->bytenr,
0169 node->bytenr + blocksize - 1, EXTENT_DIRTY);
0170 }
0171 node->processed = 1;
0172 }
0173
0174
0175 static void mapping_tree_init(struct mapping_tree *tree)
0176 {
0177 tree->rb_root = RB_ROOT;
0178 spin_lock_init(&tree->lock);
0179 }
0180
0181
0182
0183
0184 static struct btrfs_backref_node *walk_up_backref(
0185 struct btrfs_backref_node *node,
0186 struct btrfs_backref_edge *edges[], int *index)
0187 {
0188 struct btrfs_backref_edge *edge;
0189 int idx = *index;
0190
0191 while (!list_empty(&node->upper)) {
0192 edge = list_entry(node->upper.next,
0193 struct btrfs_backref_edge, list[LOWER]);
0194 edges[idx++] = edge;
0195 node = edge->node[UPPER];
0196 }
0197 BUG_ON(node->detached);
0198 *index = idx;
0199 return node;
0200 }
0201
0202
0203
0204
0205 static struct btrfs_backref_node *walk_down_backref(
0206 struct btrfs_backref_edge *edges[], int *index)
0207 {
0208 struct btrfs_backref_edge *edge;
0209 struct btrfs_backref_node *lower;
0210 int idx = *index;
0211
0212 while (idx > 0) {
0213 edge = edges[idx - 1];
0214 lower = edge->node[LOWER];
0215 if (list_is_last(&edge->list[LOWER], &lower->upper)) {
0216 idx--;
0217 continue;
0218 }
0219 edge = list_entry(edge->list[LOWER].next,
0220 struct btrfs_backref_edge, list[LOWER]);
0221 edges[idx - 1] = edge;
0222 *index = idx;
0223 return edge->node[UPPER];
0224 }
0225 *index = 0;
0226 return NULL;
0227 }
0228
0229 static void update_backref_node(struct btrfs_backref_cache *cache,
0230 struct btrfs_backref_node *node, u64 bytenr)
0231 {
0232 struct rb_node *rb_node;
0233 rb_erase(&node->rb_node, &cache->rb_root);
0234 node->bytenr = bytenr;
0235 rb_node = rb_simple_insert(&cache->rb_root, node->bytenr, &node->rb_node);
0236 if (rb_node)
0237 btrfs_backref_panic(cache->fs_info, bytenr, -EEXIST);
0238 }
0239
0240
0241
0242
0243 static int update_backref_cache(struct btrfs_trans_handle *trans,
0244 struct btrfs_backref_cache *cache)
0245 {
0246 struct btrfs_backref_node *node;
0247 int level = 0;
0248
0249 if (cache->last_trans == 0) {
0250 cache->last_trans = trans->transid;
0251 return 0;
0252 }
0253
0254 if (cache->last_trans == trans->transid)
0255 return 0;
0256
0257
0258
0259
0260
0261
0262 while (!list_empty(&cache->detached)) {
0263 node = list_entry(cache->detached.next,
0264 struct btrfs_backref_node, list);
0265 btrfs_backref_cleanup_node(cache, node);
0266 }
0267
0268 while (!list_empty(&cache->changed)) {
0269 node = list_entry(cache->changed.next,
0270 struct btrfs_backref_node, list);
0271 list_del_init(&node->list);
0272 BUG_ON(node->pending);
0273 update_backref_node(cache, node, node->new_bytenr);
0274 }
0275
0276
0277
0278
0279
0280 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
0281 list_for_each_entry(node, &cache->pending[level], list) {
0282 BUG_ON(!node->pending);
0283 if (node->bytenr == node->new_bytenr)
0284 continue;
0285 update_backref_node(cache, node, node->new_bytenr);
0286 }
0287 }
0288
0289 cache->last_trans = 0;
0290 return 1;
0291 }
0292
0293 static bool reloc_root_is_dead(struct btrfs_root *root)
0294 {
0295
0296
0297
0298
0299
0300 smp_rmb();
0301 if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state))
0302 return true;
0303 return false;
0304 }
0305
0306
0307
0308
0309
0310
0311
0312
0313
0314 static bool have_reloc_root(struct btrfs_root *root)
0315 {
0316 if (reloc_root_is_dead(root))
0317 return false;
0318 if (!root->reloc_root)
0319 return false;
0320 return true;
0321 }
0322
0323 int btrfs_should_ignore_reloc_root(struct btrfs_root *root)
0324 {
0325 struct btrfs_root *reloc_root;
0326
0327 if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
0328 return 0;
0329
0330
0331 if (reloc_root_is_dead(root))
0332 return 1;
0333
0334 reloc_root = root->reloc_root;
0335 if (!reloc_root)
0336 return 0;
0337
0338 if (btrfs_header_generation(reloc_root->commit_root) ==
0339 root->fs_info->running_transaction->transid)
0340 return 0;
0341
0342
0343
0344
0345
0346
0347 return 1;
0348 }
0349
0350
0351
0352
0353 struct btrfs_root *find_reloc_root(struct btrfs_fs_info *fs_info, u64 bytenr)
0354 {
0355 struct reloc_control *rc = fs_info->reloc_ctl;
0356 struct rb_node *rb_node;
0357 struct mapping_node *node;
0358 struct btrfs_root *root = NULL;
0359
0360 ASSERT(rc);
0361 spin_lock(&rc->reloc_root_tree.lock);
0362 rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root, bytenr);
0363 if (rb_node) {
0364 node = rb_entry(rb_node, struct mapping_node, rb_node);
0365 root = node->data;
0366 }
0367 spin_unlock(&rc->reloc_root_tree.lock);
0368 return btrfs_grab_root(root);
0369 }
0370
0371
0372
0373
0374
0375
0376
0377
0378
0379
0380
0381
0382
0383
0384 static bool handle_useless_nodes(struct reloc_control *rc,
0385 struct btrfs_backref_node *node)
0386 {
0387 struct btrfs_backref_cache *cache = &rc->backref_cache;
0388 struct list_head *useless_node = &cache->useless_node;
0389 bool ret = false;
0390
0391 while (!list_empty(useless_node)) {
0392 struct btrfs_backref_node *cur;
0393
0394 cur = list_first_entry(useless_node, struct btrfs_backref_node,
0395 list);
0396 list_del_init(&cur->list);
0397
0398
0399 ASSERT(list_empty(&cur->upper));
0400
0401 if (cur == node)
0402 ret = true;
0403
0404
0405 if (cur->lowest) {
0406 list_del_init(&cur->lower);
0407 cur->lowest = 0;
0408 }
0409
0410
0411 while (!list_empty(&cur->lower)) {
0412 struct btrfs_backref_edge *edge;
0413 struct btrfs_backref_node *lower;
0414
0415 edge = list_entry(cur->lower.next,
0416 struct btrfs_backref_edge, list[UPPER]);
0417 list_del(&edge->list[UPPER]);
0418 list_del(&edge->list[LOWER]);
0419 lower = edge->node[LOWER];
0420 btrfs_backref_free_edge(cache, edge);
0421
0422
0423 if (list_empty(&lower->upper))
0424 list_add(&lower->list, useless_node);
0425 }
0426
0427 mark_block_processed(rc, cur);
0428
0429
0430
0431
0432
0433
0434 if (cur->level > 0) {
0435 list_add(&cur->list, &cache->detached);
0436 cur->detached = 1;
0437 } else {
0438 rb_erase(&cur->rb_node, &cache->rb_root);
0439 btrfs_backref_free_node(cache, cur);
0440 }
0441 }
0442 return ret;
0443 }
0444
0445
0446
0447
0448
0449
0450
0451
0452
0453
0454
0455
0456
0457
0458
0459 static noinline_for_stack struct btrfs_backref_node *build_backref_tree(
0460 struct reloc_control *rc, struct btrfs_key *node_key,
0461 int level, u64 bytenr)
0462 {
0463 struct btrfs_backref_iter *iter;
0464 struct btrfs_backref_cache *cache = &rc->backref_cache;
0465
0466 struct btrfs_path *path;
0467 struct btrfs_backref_node *cur;
0468 struct btrfs_backref_node *node = NULL;
0469 struct btrfs_backref_edge *edge;
0470 int ret;
0471 int err = 0;
0472
0473 iter = btrfs_backref_iter_alloc(rc->extent_root->fs_info, GFP_NOFS);
0474 if (!iter)
0475 return ERR_PTR(-ENOMEM);
0476 path = btrfs_alloc_path();
0477 if (!path) {
0478 err = -ENOMEM;
0479 goto out;
0480 }
0481
0482 node = btrfs_backref_alloc_node(cache, bytenr, level);
0483 if (!node) {
0484 err = -ENOMEM;
0485 goto out;
0486 }
0487
0488 node->lowest = 1;
0489 cur = node;
0490
0491
0492 do {
0493 ret = btrfs_backref_add_tree_node(cache, path, iter, node_key,
0494 cur);
0495 if (ret < 0) {
0496 err = ret;
0497 goto out;
0498 }
0499 edge = list_first_entry_or_null(&cache->pending_edge,
0500 struct btrfs_backref_edge, list[UPPER]);
0501
0502
0503
0504
0505 if (edge) {
0506 list_del_init(&edge->list[UPPER]);
0507 cur = edge->node[UPPER];
0508 }
0509 } while (edge);
0510
0511
0512 ret = btrfs_backref_finish_upper_links(cache, node);
0513 if (ret < 0) {
0514 err = ret;
0515 goto out;
0516 }
0517
0518 if (handle_useless_nodes(rc, node))
0519 node = NULL;
0520 out:
0521 btrfs_backref_iter_free(iter);
0522 btrfs_free_path(path);
0523 if (err) {
0524 btrfs_backref_error_cleanup(cache, node);
0525 return ERR_PTR(err);
0526 }
0527 ASSERT(!node || !node->detached);
0528 ASSERT(list_empty(&cache->useless_node) &&
0529 list_empty(&cache->pending_edge));
0530 return node;
0531 }
0532
0533
0534
0535
0536
0537
0538 static int clone_backref_node(struct btrfs_trans_handle *trans,
0539 struct reloc_control *rc,
0540 struct btrfs_root *src,
0541 struct btrfs_root *dest)
0542 {
0543 struct btrfs_root *reloc_root = src->reloc_root;
0544 struct btrfs_backref_cache *cache = &rc->backref_cache;
0545 struct btrfs_backref_node *node = NULL;
0546 struct btrfs_backref_node *new_node;
0547 struct btrfs_backref_edge *edge;
0548 struct btrfs_backref_edge *new_edge;
0549 struct rb_node *rb_node;
0550
0551 if (cache->last_trans > 0)
0552 update_backref_cache(trans, cache);
0553
0554 rb_node = rb_simple_search(&cache->rb_root, src->commit_root->start);
0555 if (rb_node) {
0556 node = rb_entry(rb_node, struct btrfs_backref_node, rb_node);
0557 if (node->detached)
0558 node = NULL;
0559 else
0560 BUG_ON(node->new_bytenr != reloc_root->node->start);
0561 }
0562
0563 if (!node) {
0564 rb_node = rb_simple_search(&cache->rb_root,
0565 reloc_root->commit_root->start);
0566 if (rb_node) {
0567 node = rb_entry(rb_node, struct btrfs_backref_node,
0568 rb_node);
0569 BUG_ON(node->detached);
0570 }
0571 }
0572
0573 if (!node)
0574 return 0;
0575
0576 new_node = btrfs_backref_alloc_node(cache, dest->node->start,
0577 node->level);
0578 if (!new_node)
0579 return -ENOMEM;
0580
0581 new_node->lowest = node->lowest;
0582 new_node->checked = 1;
0583 new_node->root = btrfs_grab_root(dest);
0584 ASSERT(new_node->root);
0585
0586 if (!node->lowest) {
0587 list_for_each_entry(edge, &node->lower, list[UPPER]) {
0588 new_edge = btrfs_backref_alloc_edge(cache);
0589 if (!new_edge)
0590 goto fail;
0591
0592 btrfs_backref_link_edge(new_edge, edge->node[LOWER],
0593 new_node, LINK_UPPER);
0594 }
0595 } else {
0596 list_add_tail(&new_node->lower, &cache->leaves);
0597 }
0598
0599 rb_node = rb_simple_insert(&cache->rb_root, new_node->bytenr,
0600 &new_node->rb_node);
0601 if (rb_node)
0602 btrfs_backref_panic(trans->fs_info, new_node->bytenr, -EEXIST);
0603
0604 if (!new_node->lowest) {
0605 list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) {
0606 list_add_tail(&new_edge->list[LOWER],
0607 &new_edge->node[LOWER]->upper);
0608 }
0609 }
0610 return 0;
0611 fail:
0612 while (!list_empty(&new_node->lower)) {
0613 new_edge = list_entry(new_node->lower.next,
0614 struct btrfs_backref_edge, list[UPPER]);
0615 list_del(&new_edge->list[UPPER]);
0616 btrfs_backref_free_edge(cache, new_edge);
0617 }
0618 btrfs_backref_free_node(cache, new_node);
0619 return -ENOMEM;
0620 }
0621
0622
0623
0624
0625 static int __must_check __add_reloc_root(struct btrfs_root *root)
0626 {
0627 struct btrfs_fs_info *fs_info = root->fs_info;
0628 struct rb_node *rb_node;
0629 struct mapping_node *node;
0630 struct reloc_control *rc = fs_info->reloc_ctl;
0631
0632 node = kmalloc(sizeof(*node), GFP_NOFS);
0633 if (!node)
0634 return -ENOMEM;
0635
0636 node->bytenr = root->commit_root->start;
0637 node->data = root;
0638
0639 spin_lock(&rc->reloc_root_tree.lock);
0640 rb_node = rb_simple_insert(&rc->reloc_root_tree.rb_root,
0641 node->bytenr, &node->rb_node);
0642 spin_unlock(&rc->reloc_root_tree.lock);
0643 if (rb_node) {
0644 btrfs_err(fs_info,
0645 "Duplicate root found for start=%llu while inserting into relocation tree",
0646 node->bytenr);
0647 return -EEXIST;
0648 }
0649
0650 list_add_tail(&root->root_list, &rc->reloc_roots);
0651 return 0;
0652 }
0653
0654
0655
0656
0657
0658 static void __del_reloc_root(struct btrfs_root *root)
0659 {
0660 struct btrfs_fs_info *fs_info = root->fs_info;
0661 struct rb_node *rb_node;
0662 struct mapping_node *node = NULL;
0663 struct reloc_control *rc = fs_info->reloc_ctl;
0664 bool put_ref = false;
0665
0666 if (rc && root->node) {
0667 spin_lock(&rc->reloc_root_tree.lock);
0668 rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root,
0669 root->commit_root->start);
0670 if (rb_node) {
0671 node = rb_entry(rb_node, struct mapping_node, rb_node);
0672 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
0673 RB_CLEAR_NODE(&node->rb_node);
0674 }
0675 spin_unlock(&rc->reloc_root_tree.lock);
0676 ASSERT(!node || (struct btrfs_root *)node->data == root);
0677 }
0678
0679
0680
0681
0682
0683
0684
0685
0686
0687 spin_lock(&fs_info->trans_lock);
0688 if (!list_empty(&root->root_list)) {
0689 put_ref = true;
0690 list_del_init(&root->root_list);
0691 }
0692 spin_unlock(&fs_info->trans_lock);
0693 if (put_ref)
0694 btrfs_put_root(root);
0695 kfree(node);
0696 }
0697
0698
0699
0700
0701
0702 static int __update_reloc_root(struct btrfs_root *root)
0703 {
0704 struct btrfs_fs_info *fs_info = root->fs_info;
0705 struct rb_node *rb_node;
0706 struct mapping_node *node = NULL;
0707 struct reloc_control *rc = fs_info->reloc_ctl;
0708
0709 spin_lock(&rc->reloc_root_tree.lock);
0710 rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root,
0711 root->commit_root->start);
0712 if (rb_node) {
0713 node = rb_entry(rb_node, struct mapping_node, rb_node);
0714 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
0715 }
0716 spin_unlock(&rc->reloc_root_tree.lock);
0717
0718 if (!node)
0719 return 0;
0720 BUG_ON((struct btrfs_root *)node->data != root);
0721
0722 spin_lock(&rc->reloc_root_tree.lock);
0723 node->bytenr = root->node->start;
0724 rb_node = rb_simple_insert(&rc->reloc_root_tree.rb_root,
0725 node->bytenr, &node->rb_node);
0726 spin_unlock(&rc->reloc_root_tree.lock);
0727 if (rb_node)
0728 btrfs_backref_panic(fs_info, node->bytenr, -EEXIST);
0729 return 0;
0730 }
0731
0732 static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
0733 struct btrfs_root *root, u64 objectid)
0734 {
0735 struct btrfs_fs_info *fs_info = root->fs_info;
0736 struct btrfs_root *reloc_root;
0737 struct extent_buffer *eb;
0738 struct btrfs_root_item *root_item;
0739 struct btrfs_key root_key;
0740 int ret = 0;
0741 bool must_abort = false;
0742
0743 root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
0744 if (!root_item)
0745 return ERR_PTR(-ENOMEM);
0746
0747 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
0748 root_key.type = BTRFS_ROOT_ITEM_KEY;
0749 root_key.offset = objectid;
0750
0751 if (root->root_key.objectid == objectid) {
0752 u64 commit_root_gen;
0753
0754
0755 ret = btrfs_copy_root(trans, root, root->commit_root, &eb,
0756 BTRFS_TREE_RELOC_OBJECTID);
0757 if (ret)
0758 goto fail;
0759
0760
0761
0762
0763
0764
0765
0766
0767
0768 commit_root_gen = btrfs_header_generation(root->commit_root);
0769 btrfs_set_root_last_snapshot(&root->root_item, commit_root_gen);
0770 } else {
0771
0772
0773
0774
0775
0776
0777
0778 ret = btrfs_copy_root(trans, root, root->node, &eb,
0779 BTRFS_TREE_RELOC_OBJECTID);
0780 if (ret)
0781 goto fail;
0782 }
0783
0784
0785
0786
0787
0788 must_abort = true;
0789
0790 memcpy(root_item, &root->root_item, sizeof(*root_item));
0791 btrfs_set_root_bytenr(root_item, eb->start);
0792 btrfs_set_root_level(root_item, btrfs_header_level(eb));
0793 btrfs_set_root_generation(root_item, trans->transid);
0794
0795 if (root->root_key.objectid == objectid) {
0796 btrfs_set_root_refs(root_item, 0);
0797 memset(&root_item->drop_progress, 0,
0798 sizeof(struct btrfs_disk_key));
0799 btrfs_set_root_drop_level(root_item, 0);
0800 }
0801
0802 btrfs_tree_unlock(eb);
0803 free_extent_buffer(eb);
0804
0805 ret = btrfs_insert_root(trans, fs_info->tree_root,
0806 &root_key, root_item);
0807 if (ret)
0808 goto fail;
0809
0810 kfree(root_item);
0811
0812 reloc_root = btrfs_read_tree_root(fs_info->tree_root, &root_key);
0813 if (IS_ERR(reloc_root)) {
0814 ret = PTR_ERR(reloc_root);
0815 goto abort;
0816 }
0817 set_bit(BTRFS_ROOT_SHAREABLE, &reloc_root->state);
0818 reloc_root->last_trans = trans->transid;
0819 return reloc_root;
0820 fail:
0821 kfree(root_item);
0822 abort:
0823 if (must_abort)
0824 btrfs_abort_transaction(trans, ret);
0825 return ERR_PTR(ret);
0826 }
0827
0828
0829
0830
0831
0832
0833
0834
0835 int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
0836 struct btrfs_root *root)
0837 {
0838 struct btrfs_fs_info *fs_info = root->fs_info;
0839 struct btrfs_root *reloc_root;
0840 struct reloc_control *rc = fs_info->reloc_ctl;
0841 struct btrfs_block_rsv *rsv;
0842 int clear_rsv = 0;
0843 int ret;
0844
0845 if (!rc)
0846 return 0;
0847
0848
0849
0850
0851
0852 if (reloc_root_is_dead(root))
0853 return 0;
0854
0855
0856
0857
0858
0859
0860
0861
0862
0863 if (root->reloc_root) {
0864 reloc_root = root->reloc_root;
0865 reloc_root->last_trans = trans->transid;
0866 return 0;
0867 }
0868
0869
0870
0871
0872
0873 if (!rc->create_reloc_tree ||
0874 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
0875 return 0;
0876
0877 if (!trans->reloc_reserved) {
0878 rsv = trans->block_rsv;
0879 trans->block_rsv = rc->block_rsv;
0880 clear_rsv = 1;
0881 }
0882 reloc_root = create_reloc_root(trans, root, root->root_key.objectid);
0883 if (clear_rsv)
0884 trans->block_rsv = rsv;
0885 if (IS_ERR(reloc_root))
0886 return PTR_ERR(reloc_root);
0887
0888 ret = __add_reloc_root(reloc_root);
0889 ASSERT(ret != -EEXIST);
0890 if (ret) {
0891
0892 btrfs_put_root(reloc_root);
0893 return ret;
0894 }
0895 root->reloc_root = btrfs_grab_root(reloc_root);
0896 return 0;
0897 }
0898
0899
0900
0901
0902 int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
0903 struct btrfs_root *root)
0904 {
0905 struct btrfs_fs_info *fs_info = root->fs_info;
0906 struct btrfs_root *reloc_root;
0907 struct btrfs_root_item *root_item;
0908 int ret;
0909
0910 if (!have_reloc_root(root))
0911 return 0;
0912
0913 reloc_root = root->reloc_root;
0914 root_item = &reloc_root->root_item;
0915
0916
0917
0918
0919
0920
0921 btrfs_grab_root(reloc_root);
0922
0923
0924 if (fs_info->reloc_ctl->merge_reloc_tree &&
0925 btrfs_root_refs(root_item) == 0) {
0926 set_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
0927
0928
0929
0930
0931 smp_wmb();
0932 __del_reloc_root(reloc_root);
0933 }
0934
0935 if (reloc_root->commit_root != reloc_root->node) {
0936 __update_reloc_root(reloc_root);
0937 btrfs_set_root_node(root_item, reloc_root->node);
0938 free_extent_buffer(reloc_root->commit_root);
0939 reloc_root->commit_root = btrfs_root_node(reloc_root);
0940 }
0941
0942 ret = btrfs_update_root(trans, fs_info->tree_root,
0943 &reloc_root->root_key, root_item);
0944 btrfs_put_root(reloc_root);
0945 return ret;
0946 }
0947
0948
0949
0950
0951
0952 static struct inode *find_next_inode(struct btrfs_root *root, u64 objectid)
0953 {
0954 struct rb_node *node;
0955 struct rb_node *prev;
0956 struct btrfs_inode *entry;
0957 struct inode *inode;
0958
0959 spin_lock(&root->inode_lock);
0960 again:
0961 node = root->inode_tree.rb_node;
0962 prev = NULL;
0963 while (node) {
0964 prev = node;
0965 entry = rb_entry(node, struct btrfs_inode, rb_node);
0966
0967 if (objectid < btrfs_ino(entry))
0968 node = node->rb_left;
0969 else if (objectid > btrfs_ino(entry))
0970 node = node->rb_right;
0971 else
0972 break;
0973 }
0974 if (!node) {
0975 while (prev) {
0976 entry = rb_entry(prev, struct btrfs_inode, rb_node);
0977 if (objectid <= btrfs_ino(entry)) {
0978 node = prev;
0979 break;
0980 }
0981 prev = rb_next(prev);
0982 }
0983 }
0984 while (node) {
0985 entry = rb_entry(node, struct btrfs_inode, rb_node);
0986 inode = igrab(&entry->vfs_inode);
0987 if (inode) {
0988 spin_unlock(&root->inode_lock);
0989 return inode;
0990 }
0991
0992 objectid = btrfs_ino(entry) + 1;
0993 if (cond_resched_lock(&root->inode_lock))
0994 goto again;
0995
0996 node = rb_next(node);
0997 }
0998 spin_unlock(&root->inode_lock);
0999 return NULL;
1000 }
1001
1002
1003
1004
1005 static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
1006 u64 bytenr, u64 num_bytes)
1007 {
1008 struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
1009 struct btrfs_path *path;
1010 struct btrfs_file_extent_item *fi;
1011 struct extent_buffer *leaf;
1012 int ret;
1013
1014 path = btrfs_alloc_path();
1015 if (!path)
1016 return -ENOMEM;
1017
1018 bytenr -= BTRFS_I(reloc_inode)->index_cnt;
1019 ret = btrfs_lookup_file_extent(NULL, root, path,
1020 btrfs_ino(BTRFS_I(reloc_inode)), bytenr, 0);
1021 if (ret < 0)
1022 goto out;
1023 if (ret > 0) {
1024 ret = -ENOENT;
1025 goto out;
1026 }
1027
1028 leaf = path->nodes[0];
1029 fi = btrfs_item_ptr(leaf, path->slots[0],
1030 struct btrfs_file_extent_item);
1031
1032 BUG_ON(btrfs_file_extent_offset(leaf, fi) ||
1033 btrfs_file_extent_compression(leaf, fi) ||
1034 btrfs_file_extent_encryption(leaf, fi) ||
1035 btrfs_file_extent_other_encoding(leaf, fi));
1036
1037 if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
1038 ret = -EINVAL;
1039 goto out;
1040 }
1041
1042 *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1043 ret = 0;
1044 out:
1045 btrfs_free_path(path);
1046 return ret;
1047 }
1048
1049
1050
1051
1052
1053 static noinline_for_stack
1054 int replace_file_extents(struct btrfs_trans_handle *trans,
1055 struct reloc_control *rc,
1056 struct btrfs_root *root,
1057 struct extent_buffer *leaf)
1058 {
1059 struct btrfs_fs_info *fs_info = root->fs_info;
1060 struct btrfs_key key;
1061 struct btrfs_file_extent_item *fi;
1062 struct inode *inode = NULL;
1063 u64 parent;
1064 u64 bytenr;
1065 u64 new_bytenr = 0;
1066 u64 num_bytes;
1067 u64 end;
1068 u32 nritems;
1069 u32 i;
1070 int ret = 0;
1071 int first = 1;
1072 int dirty = 0;
1073
1074 if (rc->stage != UPDATE_DATA_PTRS)
1075 return 0;
1076
1077
1078 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1079 parent = leaf->start;
1080 else
1081 parent = 0;
1082
1083 nritems = btrfs_header_nritems(leaf);
1084 for (i = 0; i < nritems; i++) {
1085 struct btrfs_ref ref = { 0 };
1086
1087 cond_resched();
1088 btrfs_item_key_to_cpu(leaf, &key, i);
1089 if (key.type != BTRFS_EXTENT_DATA_KEY)
1090 continue;
1091 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1092 if (btrfs_file_extent_type(leaf, fi) ==
1093 BTRFS_FILE_EXTENT_INLINE)
1094 continue;
1095 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1096 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1097 if (bytenr == 0)
1098 continue;
1099 if (!in_range(bytenr, rc->block_group->start,
1100 rc->block_group->length))
1101 continue;
1102
1103
1104
1105
1106
1107 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1108 if (first) {
1109 inode = find_next_inode(root, key.objectid);
1110 first = 0;
1111 } else if (inode && btrfs_ino(BTRFS_I(inode)) < key.objectid) {
1112 btrfs_add_delayed_iput(inode);
1113 inode = find_next_inode(root, key.objectid);
1114 }
1115 if (inode && btrfs_ino(BTRFS_I(inode)) == key.objectid) {
1116 end = key.offset +
1117 btrfs_file_extent_num_bytes(leaf, fi);
1118 WARN_ON(!IS_ALIGNED(key.offset,
1119 fs_info->sectorsize));
1120 WARN_ON(!IS_ALIGNED(end, fs_info->sectorsize));
1121 end--;
1122 ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
1123 key.offset, end);
1124 if (!ret)
1125 continue;
1126
1127 btrfs_drop_extent_cache(BTRFS_I(inode),
1128 key.offset, end, 1);
1129 unlock_extent(&BTRFS_I(inode)->io_tree,
1130 key.offset, end);
1131 }
1132 }
1133
1134 ret = get_new_location(rc->data_inode, &new_bytenr,
1135 bytenr, num_bytes);
1136 if (ret) {
1137
1138
1139
1140
1141 break;
1142 }
1143
1144 btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr);
1145 dirty = 1;
1146
1147 key.offset -= btrfs_file_extent_offset(leaf, fi);
1148 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
1149 num_bytes, parent);
1150 btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
1151 key.objectid, key.offset,
1152 root->root_key.objectid, false);
1153 ret = btrfs_inc_extent_ref(trans, &ref);
1154 if (ret) {
1155 btrfs_abort_transaction(trans, ret);
1156 break;
1157 }
1158
1159 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
1160 num_bytes, parent);
1161 btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
1162 key.objectid, key.offset,
1163 root->root_key.objectid, false);
1164 ret = btrfs_free_extent(trans, &ref);
1165 if (ret) {
1166 btrfs_abort_transaction(trans, ret);
1167 break;
1168 }
1169 }
1170 if (dirty)
1171 btrfs_mark_buffer_dirty(leaf);
1172 if (inode)
1173 btrfs_add_delayed_iput(inode);
1174 return ret;
1175 }
1176
1177 static noinline_for_stack
1178 int memcmp_node_keys(struct extent_buffer *eb, int slot,
1179 struct btrfs_path *path, int level)
1180 {
1181 struct btrfs_disk_key key1;
1182 struct btrfs_disk_key key2;
1183 btrfs_node_key(eb, &key1, slot);
1184 btrfs_node_key(path->nodes[level], &key2, path->slots[level]);
1185 return memcmp(&key1, &key2, sizeof(key1));
1186 }
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197 static noinline_for_stack
1198 int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc,
1199 struct btrfs_root *dest, struct btrfs_root *src,
1200 struct btrfs_path *path, struct btrfs_key *next_key,
1201 int lowest_level, int max_level)
1202 {
1203 struct btrfs_fs_info *fs_info = dest->fs_info;
1204 struct extent_buffer *eb;
1205 struct extent_buffer *parent;
1206 struct btrfs_ref ref = { 0 };
1207 struct btrfs_key key;
1208 u64 old_bytenr;
1209 u64 new_bytenr;
1210 u64 old_ptr_gen;
1211 u64 new_ptr_gen;
1212 u64 last_snapshot;
1213 u32 blocksize;
1214 int cow = 0;
1215 int level;
1216 int ret;
1217 int slot;
1218
1219 ASSERT(src->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID);
1220 ASSERT(dest->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
1221
1222 last_snapshot = btrfs_root_last_snapshot(&src->root_item);
1223 again:
1224 slot = path->slots[lowest_level];
1225 btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot);
1226
1227 eb = btrfs_lock_root_node(dest);
1228 level = btrfs_header_level(eb);
1229
1230 if (level < lowest_level) {
1231 btrfs_tree_unlock(eb);
1232 free_extent_buffer(eb);
1233 return 0;
1234 }
1235
1236 if (cow) {
1237 ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb,
1238 BTRFS_NESTING_COW);
1239 if (ret) {
1240 btrfs_tree_unlock(eb);
1241 free_extent_buffer(eb);
1242 return ret;
1243 }
1244 }
1245
1246 if (next_key) {
1247 next_key->objectid = (u64)-1;
1248 next_key->type = (u8)-1;
1249 next_key->offset = (u64)-1;
1250 }
1251
1252 parent = eb;
1253 while (1) {
1254 level = btrfs_header_level(parent);
1255 ASSERT(level >= lowest_level);
1256
1257 ret = btrfs_bin_search(parent, &key, &slot);
1258 if (ret < 0)
1259 break;
1260 if (ret && slot > 0)
1261 slot--;
1262
1263 if (next_key && slot + 1 < btrfs_header_nritems(parent))
1264 btrfs_node_key_to_cpu(parent, next_key, slot + 1);
1265
1266 old_bytenr = btrfs_node_blockptr(parent, slot);
1267 blocksize = fs_info->nodesize;
1268 old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
1269
1270 if (level <= max_level) {
1271 eb = path->nodes[level];
1272 new_bytenr = btrfs_node_blockptr(eb,
1273 path->slots[level]);
1274 new_ptr_gen = btrfs_node_ptr_generation(eb,
1275 path->slots[level]);
1276 } else {
1277 new_bytenr = 0;
1278 new_ptr_gen = 0;
1279 }
1280
1281 if (WARN_ON(new_bytenr > 0 && new_bytenr == old_bytenr)) {
1282 ret = level;
1283 break;
1284 }
1285
1286 if (new_bytenr == 0 || old_ptr_gen > last_snapshot ||
1287 memcmp_node_keys(parent, slot, path, level)) {
1288 if (level <= lowest_level) {
1289 ret = 0;
1290 break;
1291 }
1292
1293 eb = btrfs_read_node_slot(parent, slot);
1294 if (IS_ERR(eb)) {
1295 ret = PTR_ERR(eb);
1296 break;
1297 }
1298 btrfs_tree_lock(eb);
1299 if (cow) {
1300 ret = btrfs_cow_block(trans, dest, eb, parent,
1301 slot, &eb,
1302 BTRFS_NESTING_COW);
1303 if (ret) {
1304 btrfs_tree_unlock(eb);
1305 free_extent_buffer(eb);
1306 break;
1307 }
1308 }
1309
1310 btrfs_tree_unlock(parent);
1311 free_extent_buffer(parent);
1312
1313 parent = eb;
1314 continue;
1315 }
1316
1317 if (!cow) {
1318 btrfs_tree_unlock(parent);
1319 free_extent_buffer(parent);
1320 cow = 1;
1321 goto again;
1322 }
1323
1324 btrfs_node_key_to_cpu(path->nodes[level], &key,
1325 path->slots[level]);
1326 btrfs_release_path(path);
1327
1328 path->lowest_level = level;
1329 set_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &src->state);
1330 ret = btrfs_search_slot(trans, src, &key, path, 0, 1);
1331 clear_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &src->state);
1332 path->lowest_level = 0;
1333 if (ret) {
1334 if (ret > 0)
1335 ret = -ENOENT;
1336 break;
1337 }
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353 ret = btrfs_qgroup_add_swapped_blocks(trans, dest,
1354 rc->block_group, parent, slot,
1355 path->nodes[level], path->slots[level],
1356 last_snapshot);
1357 if (ret < 0)
1358 break;
1359
1360
1361
1362 btrfs_set_node_blockptr(parent, slot, new_bytenr);
1363 btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen);
1364 btrfs_mark_buffer_dirty(parent);
1365
1366 btrfs_set_node_blockptr(path->nodes[level],
1367 path->slots[level], old_bytenr);
1368 btrfs_set_node_ptr_generation(path->nodes[level],
1369 path->slots[level], old_ptr_gen);
1370 btrfs_mark_buffer_dirty(path->nodes[level]);
1371
1372 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, old_bytenr,
1373 blocksize, path->nodes[level]->start);
1374 btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid,
1375 0, true);
1376 ret = btrfs_inc_extent_ref(trans, &ref);
1377 if (ret) {
1378 btrfs_abort_transaction(trans, ret);
1379 break;
1380 }
1381 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
1382 blocksize, 0);
1383 btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid, 0,
1384 true);
1385 ret = btrfs_inc_extent_ref(trans, &ref);
1386 if (ret) {
1387 btrfs_abort_transaction(trans, ret);
1388 break;
1389 }
1390
1391 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, new_bytenr,
1392 blocksize, path->nodes[level]->start);
1393 btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid,
1394 0, true);
1395 ret = btrfs_free_extent(trans, &ref);
1396 if (ret) {
1397 btrfs_abort_transaction(trans, ret);
1398 break;
1399 }
1400
1401 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, old_bytenr,
1402 blocksize, 0);
1403 btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid,
1404 0, true);
1405 ret = btrfs_free_extent(trans, &ref);
1406 if (ret) {
1407 btrfs_abort_transaction(trans, ret);
1408 break;
1409 }
1410
1411 btrfs_unlock_up_safe(path, 0);
1412
1413 ret = level;
1414 break;
1415 }
1416 btrfs_tree_unlock(parent);
1417 free_extent_buffer(parent);
1418 return ret;
1419 }
1420
1421
1422
1423
1424 static noinline_for_stack
1425 int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
1426 int *level)
1427 {
1428 struct extent_buffer *eb;
1429 int i;
1430 u64 last_snapshot;
1431 u32 nritems;
1432
1433 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1434
1435 for (i = 0; i < *level; i++) {
1436 free_extent_buffer(path->nodes[i]);
1437 path->nodes[i] = NULL;
1438 }
1439
1440 for (i = *level; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) {
1441 eb = path->nodes[i];
1442 nritems = btrfs_header_nritems(eb);
1443 while (path->slots[i] + 1 < nritems) {
1444 path->slots[i]++;
1445 if (btrfs_node_ptr_generation(eb, path->slots[i]) <=
1446 last_snapshot)
1447 continue;
1448
1449 *level = i;
1450 return 0;
1451 }
1452 free_extent_buffer(path->nodes[i]);
1453 path->nodes[i] = NULL;
1454 }
1455 return 1;
1456 }
1457
1458
1459
1460
1461 static noinline_for_stack
1462 int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
1463 int *level)
1464 {
1465 struct extent_buffer *eb = NULL;
1466 int i;
1467 u64 ptr_gen = 0;
1468 u64 last_snapshot;
1469 u32 nritems;
1470
1471 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1472
1473 for (i = *level; i > 0; i--) {
1474 eb = path->nodes[i];
1475 nritems = btrfs_header_nritems(eb);
1476 while (path->slots[i] < nritems) {
1477 ptr_gen = btrfs_node_ptr_generation(eb, path->slots[i]);
1478 if (ptr_gen > last_snapshot)
1479 break;
1480 path->slots[i]++;
1481 }
1482 if (path->slots[i] >= nritems) {
1483 if (i == *level)
1484 break;
1485 *level = i + 1;
1486 return 0;
1487 }
1488 if (i == 1) {
1489 *level = i;
1490 return 0;
1491 }
1492
1493 eb = btrfs_read_node_slot(eb, path->slots[i]);
1494 if (IS_ERR(eb))
1495 return PTR_ERR(eb);
1496 BUG_ON(btrfs_header_level(eb) != i - 1);
1497 path->nodes[i - 1] = eb;
1498 path->slots[i - 1] = 0;
1499 }
1500 return 1;
1501 }
1502
1503
1504
1505
1506
1507 static int invalidate_extent_cache(struct btrfs_root *root,
1508 struct btrfs_key *min_key,
1509 struct btrfs_key *max_key)
1510 {
1511 struct btrfs_fs_info *fs_info = root->fs_info;
1512 struct inode *inode = NULL;
1513 u64 objectid;
1514 u64 start, end;
1515 u64 ino;
1516
1517 objectid = min_key->objectid;
1518 while (1) {
1519 cond_resched();
1520 iput(inode);
1521
1522 if (objectid > max_key->objectid)
1523 break;
1524
1525 inode = find_next_inode(root, objectid);
1526 if (!inode)
1527 break;
1528 ino = btrfs_ino(BTRFS_I(inode));
1529
1530 if (ino > max_key->objectid) {
1531 iput(inode);
1532 break;
1533 }
1534
1535 objectid = ino + 1;
1536 if (!S_ISREG(inode->i_mode))
1537 continue;
1538
1539 if (unlikely(min_key->objectid == ino)) {
1540 if (min_key->type > BTRFS_EXTENT_DATA_KEY)
1541 continue;
1542 if (min_key->type < BTRFS_EXTENT_DATA_KEY)
1543 start = 0;
1544 else {
1545 start = min_key->offset;
1546 WARN_ON(!IS_ALIGNED(start, fs_info->sectorsize));
1547 }
1548 } else {
1549 start = 0;
1550 }
1551
1552 if (unlikely(max_key->objectid == ino)) {
1553 if (max_key->type < BTRFS_EXTENT_DATA_KEY)
1554 continue;
1555 if (max_key->type > BTRFS_EXTENT_DATA_KEY) {
1556 end = (u64)-1;
1557 } else {
1558 if (max_key->offset == 0)
1559 continue;
1560 end = max_key->offset;
1561 WARN_ON(!IS_ALIGNED(end, fs_info->sectorsize));
1562 end--;
1563 }
1564 } else {
1565 end = (u64)-1;
1566 }
1567
1568
1569 lock_extent(&BTRFS_I(inode)->io_tree, start, end);
1570 btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 1);
1571 unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
1572 }
1573 return 0;
1574 }
1575
1576 static int find_next_key(struct btrfs_path *path, int level,
1577 struct btrfs_key *key)
1578
1579 {
1580 while (level < BTRFS_MAX_LEVEL) {
1581 if (!path->nodes[level])
1582 break;
1583 if (path->slots[level] + 1 <
1584 btrfs_header_nritems(path->nodes[level])) {
1585 btrfs_node_key_to_cpu(path->nodes[level], key,
1586 path->slots[level] + 1);
1587 return 0;
1588 }
1589 level++;
1590 }
1591 return 1;
1592 }
1593
1594
1595
1596
1597 static int insert_dirty_subvol(struct btrfs_trans_handle *trans,
1598 struct reloc_control *rc,
1599 struct btrfs_root *root)
1600 {
1601 struct btrfs_root *reloc_root = root->reloc_root;
1602 struct btrfs_root_item *reloc_root_item;
1603 int ret;
1604
1605
1606 ASSERT(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
1607 ASSERT(reloc_root);
1608
1609 reloc_root_item = &reloc_root->root_item;
1610 memset(&reloc_root_item->drop_progress, 0,
1611 sizeof(reloc_root_item->drop_progress));
1612 btrfs_set_root_drop_level(reloc_root_item, 0);
1613 btrfs_set_root_refs(reloc_root_item, 0);
1614 ret = btrfs_update_reloc_root(trans, root);
1615 if (ret)
1616 return ret;
1617
1618 if (list_empty(&root->reloc_dirty_list)) {
1619 btrfs_grab_root(root);
1620 list_add_tail(&root->reloc_dirty_list, &rc->dirty_subvol_roots);
1621 }
1622
1623 return 0;
1624 }
1625
1626 static int clean_dirty_subvols(struct reloc_control *rc)
1627 {
1628 struct btrfs_root *root;
1629 struct btrfs_root *next;
1630 int ret = 0;
1631 int ret2;
1632
1633 list_for_each_entry_safe(root, next, &rc->dirty_subvol_roots,
1634 reloc_dirty_list) {
1635 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1636
1637 struct btrfs_root *reloc_root = root->reloc_root;
1638
1639 list_del_init(&root->reloc_dirty_list);
1640 root->reloc_root = NULL;
1641
1642
1643
1644
1645 smp_wmb();
1646 clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
1647 if (reloc_root) {
1648
1649
1650
1651
1652
1653 ret2 = btrfs_drop_snapshot(reloc_root, 0, 1);
1654 if (ret2 < 0) {
1655 btrfs_put_root(reloc_root);
1656 if (!ret)
1657 ret = ret2;
1658 }
1659 }
1660 btrfs_put_root(root);
1661 } else {
1662
1663 ret2 = btrfs_drop_snapshot(root, 0, 1);
1664 if (ret2 < 0) {
1665 btrfs_put_root(root);
1666 if (!ret)
1667 ret = ret2;
1668 }
1669 }
1670 }
1671 return ret;
1672 }
1673
1674
1675
1676
1677
1678 static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
1679 struct btrfs_root *root)
1680 {
1681 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
1682 struct btrfs_key key;
1683 struct btrfs_key next_key;
1684 struct btrfs_trans_handle *trans = NULL;
1685 struct btrfs_root *reloc_root;
1686 struct btrfs_root_item *root_item;
1687 struct btrfs_path *path;
1688 struct extent_buffer *leaf;
1689 int reserve_level;
1690 int level;
1691 int max_level;
1692 int replaced = 0;
1693 int ret = 0;
1694 u32 min_reserved;
1695
1696 path = btrfs_alloc_path();
1697 if (!path)
1698 return -ENOMEM;
1699 path->reada = READA_FORWARD;
1700
1701 reloc_root = root->reloc_root;
1702 root_item = &reloc_root->root_item;
1703
1704 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
1705 level = btrfs_root_level(root_item);
1706 atomic_inc(&reloc_root->node->refs);
1707 path->nodes[level] = reloc_root->node;
1708 path->slots[level] = 0;
1709 } else {
1710 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1711
1712 level = btrfs_root_drop_level(root_item);
1713 BUG_ON(level == 0);
1714 path->lowest_level = level;
1715 ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0);
1716 path->lowest_level = 0;
1717 if (ret < 0) {
1718 btrfs_free_path(path);
1719 return ret;
1720 }
1721
1722 btrfs_node_key_to_cpu(path->nodes[level], &next_key,
1723 path->slots[level]);
1724 WARN_ON(memcmp(&key, &next_key, sizeof(key)));
1725
1726 btrfs_unlock_up_safe(path, 0);
1727 }
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737 reserve_level = max_t(int, 1, btrfs_root_level(root_item));
1738 min_reserved = fs_info->nodesize * reserve_level * 2;
1739 memset(&next_key, 0, sizeof(next_key));
1740
1741 while (1) {
1742 ret = btrfs_block_rsv_refill(fs_info, rc->block_rsv,
1743 min_reserved,
1744 BTRFS_RESERVE_FLUSH_LIMIT);
1745 if (ret)
1746 goto out;
1747 trans = btrfs_start_transaction(root, 0);
1748 if (IS_ERR(trans)) {
1749 ret = PTR_ERR(trans);
1750 trans = NULL;
1751 goto out;
1752 }
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764 reloc_root->last_trans = trans->transid;
1765 trans->block_rsv = rc->block_rsv;
1766
1767 replaced = 0;
1768 max_level = level;
1769
1770 ret = walk_down_reloc_tree(reloc_root, path, &level);
1771 if (ret < 0)
1772 goto out;
1773 if (ret > 0)
1774 break;
1775
1776 if (!find_next_key(path, level, &key) &&
1777 btrfs_comp_cpu_keys(&next_key, &key) >= 0) {
1778 ret = 0;
1779 } else {
1780 ret = replace_path(trans, rc, root, reloc_root, path,
1781 &next_key, level, max_level);
1782 }
1783 if (ret < 0)
1784 goto out;
1785 if (ret > 0) {
1786 level = ret;
1787 btrfs_node_key_to_cpu(path->nodes[level], &key,
1788 path->slots[level]);
1789 replaced = 1;
1790 }
1791
1792 ret = walk_up_reloc_tree(reloc_root, path, &level);
1793 if (ret > 0)
1794 break;
1795
1796 BUG_ON(level == 0);
1797
1798
1799
1800
1801 btrfs_node_key(path->nodes[level], &root_item->drop_progress,
1802 path->slots[level]);
1803 btrfs_set_root_drop_level(root_item, level);
1804
1805 btrfs_end_transaction_throttle(trans);
1806 trans = NULL;
1807
1808 btrfs_btree_balance_dirty(fs_info);
1809
1810 if (replaced && rc->stage == UPDATE_DATA_PTRS)
1811 invalidate_extent_cache(root, &key, &next_key);
1812 }
1813
1814
1815
1816
1817
1818 leaf = btrfs_lock_root_node(root);
1819 ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf,
1820 BTRFS_NESTING_COW);
1821 btrfs_tree_unlock(leaf);
1822 free_extent_buffer(leaf);
1823 out:
1824 btrfs_free_path(path);
1825
1826 if (ret == 0) {
1827 ret = insert_dirty_subvol(trans, rc, root);
1828 if (ret)
1829 btrfs_abort_transaction(trans, ret);
1830 }
1831
1832 if (trans)
1833 btrfs_end_transaction_throttle(trans);
1834
1835 btrfs_btree_balance_dirty(fs_info);
1836
1837 if (replaced && rc->stage == UPDATE_DATA_PTRS)
1838 invalidate_extent_cache(root, &key, &next_key);
1839
1840 return ret;
1841 }
1842
1843 static noinline_for_stack
1844 int prepare_to_merge(struct reloc_control *rc, int err)
1845 {
1846 struct btrfs_root *root = rc->extent_root;
1847 struct btrfs_fs_info *fs_info = root->fs_info;
1848 struct btrfs_root *reloc_root;
1849 struct btrfs_trans_handle *trans;
1850 LIST_HEAD(reloc_roots);
1851 u64 num_bytes = 0;
1852 int ret;
1853
1854 mutex_lock(&fs_info->reloc_mutex);
1855 rc->merging_rsv_size += fs_info->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
1856 rc->merging_rsv_size += rc->nodes_relocated * 2;
1857 mutex_unlock(&fs_info->reloc_mutex);
1858
1859 again:
1860 if (!err) {
1861 num_bytes = rc->merging_rsv_size;
1862 ret = btrfs_block_rsv_add(fs_info, rc->block_rsv, num_bytes,
1863 BTRFS_RESERVE_FLUSH_ALL);
1864 if (ret)
1865 err = ret;
1866 }
1867
1868 trans = btrfs_join_transaction(rc->extent_root);
1869 if (IS_ERR(trans)) {
1870 if (!err)
1871 btrfs_block_rsv_release(fs_info, rc->block_rsv,
1872 num_bytes, NULL);
1873 return PTR_ERR(trans);
1874 }
1875
1876 if (!err) {
1877 if (num_bytes != rc->merging_rsv_size) {
1878 btrfs_end_transaction(trans);
1879 btrfs_block_rsv_release(fs_info, rc->block_rsv,
1880 num_bytes, NULL);
1881 goto again;
1882 }
1883 }
1884
1885 rc->merge_reloc_tree = 1;
1886
1887 while (!list_empty(&rc->reloc_roots)) {
1888 reloc_root = list_entry(rc->reloc_roots.next,
1889 struct btrfs_root, root_list);
1890 list_del_init(&reloc_root->root_list);
1891
1892 root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
1893 false);
1894 if (IS_ERR(root)) {
1895
1896
1897
1898
1899 list_add(&reloc_root->root_list, &reloc_roots);
1900 btrfs_abort_transaction(trans, (int)PTR_ERR(root));
1901 if (!err)
1902 err = PTR_ERR(root);
1903 break;
1904 }
1905 ASSERT(root->reloc_root == reloc_root);
1906
1907
1908
1909
1910
1911 if (!err)
1912 btrfs_set_root_refs(&reloc_root->root_item, 1);
1913 ret = btrfs_update_reloc_root(trans, root);
1914
1915
1916
1917
1918
1919 list_add(&reloc_root->root_list, &reloc_roots);
1920 btrfs_put_root(root);
1921
1922 if (ret) {
1923 btrfs_abort_transaction(trans, ret);
1924 if (!err)
1925 err = ret;
1926 break;
1927 }
1928 }
1929
1930 list_splice(&reloc_roots, &rc->reloc_roots);
1931
1932 if (!err)
1933 err = btrfs_commit_transaction(trans);
1934 else
1935 btrfs_end_transaction(trans);
1936 return err;
1937 }
1938
1939 static noinline_for_stack
1940 void free_reloc_roots(struct list_head *list)
1941 {
1942 struct btrfs_root *reloc_root, *tmp;
1943
1944 list_for_each_entry_safe(reloc_root, tmp, list, root_list)
1945 __del_reloc_root(reloc_root);
1946 }
1947
1948 static noinline_for_stack
1949 void merge_reloc_roots(struct reloc_control *rc)
1950 {
1951 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
1952 struct btrfs_root *root;
1953 struct btrfs_root *reloc_root;
1954 LIST_HEAD(reloc_roots);
1955 int found = 0;
1956 int ret = 0;
1957 again:
1958 root = rc->extent_root;
1959
1960
1961
1962
1963
1964
1965
1966 mutex_lock(&fs_info->reloc_mutex);
1967 list_splice_init(&rc->reloc_roots, &reloc_roots);
1968 mutex_unlock(&fs_info->reloc_mutex);
1969
1970 while (!list_empty(&reloc_roots)) {
1971 found = 1;
1972 reloc_root = list_entry(reloc_roots.next,
1973 struct btrfs_root, root_list);
1974
1975 root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
1976 false);
1977 if (btrfs_root_refs(&reloc_root->root_item) > 0) {
1978 if (IS_ERR(root)) {
1979
1980
1981
1982
1983
1984
1985
1986
1987 ASSERT(0);
1988 ret = PTR_ERR(root);
1989 goto out;
1990 }
1991 if (root->reloc_root != reloc_root) {
1992
1993
1994
1995
1996
1997 ASSERT(0);
1998 ret = -EINVAL;
1999 goto out;
2000 }
2001 ret = merge_reloc_root(rc, root);
2002 btrfs_put_root(root);
2003 if (ret) {
2004 if (list_empty(&reloc_root->root_list))
2005 list_add_tail(&reloc_root->root_list,
2006 &reloc_roots);
2007 goto out;
2008 }
2009 } else {
2010 if (!IS_ERR(root)) {
2011 if (root->reloc_root == reloc_root) {
2012 root->reloc_root = NULL;
2013 btrfs_put_root(reloc_root);
2014 }
2015 clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE,
2016 &root->state);
2017 btrfs_put_root(root);
2018 }
2019
2020 list_del_init(&reloc_root->root_list);
2021
2022 list_add_tail(&reloc_root->reloc_dirty_list,
2023 &rc->dirty_subvol_roots);
2024 }
2025 }
2026
2027 if (found) {
2028 found = 0;
2029 goto again;
2030 }
2031 out:
2032 if (ret) {
2033 btrfs_handle_fs_error(fs_info, ret, NULL);
2034 free_reloc_roots(&reloc_roots);
2035
2036
2037 mutex_lock(&fs_info->reloc_mutex);
2038 list_splice_init(&rc->reloc_roots, &reloc_roots);
2039 mutex_unlock(&fs_info->reloc_mutex);
2040 free_reloc_roots(&reloc_roots);
2041 }
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058 }
2059
2060 static void free_block_list(struct rb_root *blocks)
2061 {
2062 struct tree_block *block;
2063 struct rb_node *rb_node;
2064 while ((rb_node = rb_first(blocks))) {
2065 block = rb_entry(rb_node, struct tree_block, rb_node);
2066 rb_erase(rb_node, blocks);
2067 kfree(block);
2068 }
2069 }
2070
2071 static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
2072 struct btrfs_root *reloc_root)
2073 {
2074 struct btrfs_fs_info *fs_info = reloc_root->fs_info;
2075 struct btrfs_root *root;
2076 int ret;
2077
2078 if (reloc_root->last_trans == trans->transid)
2079 return 0;
2080
2081 root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset, false);
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091 if (IS_ERR(root)) {
2092 ASSERT(0);
2093 return PTR_ERR(root);
2094 }
2095 if (root->reloc_root != reloc_root) {
2096 ASSERT(0);
2097 btrfs_err(fs_info,
2098 "root %llu has two reloc roots associated with it",
2099 reloc_root->root_key.offset);
2100 btrfs_put_root(root);
2101 return -EUCLEAN;
2102 }
2103 ret = btrfs_record_root_in_trans(trans, root);
2104 btrfs_put_root(root);
2105
2106 return ret;
2107 }
2108
2109 static noinline_for_stack
2110 struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
2111 struct reloc_control *rc,
2112 struct btrfs_backref_node *node,
2113 struct btrfs_backref_edge *edges[])
2114 {
2115 struct btrfs_backref_node *next;
2116 struct btrfs_root *root;
2117 int index = 0;
2118 int ret;
2119
2120 next = node;
2121 while (1) {
2122 cond_resched();
2123 next = walk_up_backref(next, edges, &index);
2124 root = next->root;
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138 if (!root) {
2139 ASSERT(0);
2140 btrfs_err(trans->fs_info,
2141 "bytenr %llu doesn't have a backref path ending in a root",
2142 node->bytenr);
2143 return ERR_PTR(-EUCLEAN);
2144 }
2145 if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
2146 ASSERT(0);
2147 btrfs_err(trans->fs_info,
2148 "bytenr %llu has multiple refs with one ending in a non-shareable root",
2149 node->bytenr);
2150 return ERR_PTR(-EUCLEAN);
2151 }
2152
2153 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2154 ret = record_reloc_root_in_trans(trans, root);
2155 if (ret)
2156 return ERR_PTR(ret);
2157 break;
2158 }
2159
2160 ret = btrfs_record_root_in_trans(trans, root);
2161 if (ret)
2162 return ERR_PTR(ret);
2163 root = root->reloc_root;
2164
2165
2166
2167
2168
2169 if (!root)
2170 return ERR_PTR(-ENOENT);
2171
2172 if (next->new_bytenr != root->node->start) {
2173
2174
2175
2176
2177
2178
2179
2180 ASSERT(next->new_bytenr == 0);
2181 ASSERT(list_empty(&next->list));
2182 if (next->new_bytenr || !list_empty(&next->list)) {
2183 btrfs_err(trans->fs_info,
2184 "bytenr %llu possibly has multiple roots pointing at the same bytenr %llu",
2185 node->bytenr, next->bytenr);
2186 return ERR_PTR(-EUCLEAN);
2187 }
2188
2189 next->new_bytenr = root->node->start;
2190 btrfs_put_root(next->root);
2191 next->root = btrfs_grab_root(root);
2192 ASSERT(next->root);
2193 list_add_tail(&next->list,
2194 &rc->backref_cache.changed);
2195 mark_block_processed(rc, next);
2196 break;
2197 }
2198
2199 WARN_ON(1);
2200 root = NULL;
2201 next = walk_down_backref(edges, &index);
2202 if (!next || next->level <= node->level)
2203 break;
2204 }
2205 if (!root) {
2206
2207
2208
2209
2210 ASSERT(0);
2211 return ERR_PTR(-ENOENT);
2212 }
2213
2214 next = node;
2215
2216 while (1) {
2217 rc->backref_cache.path[next->level] = next;
2218 if (--index < 0)
2219 break;
2220 next = edges[index]->node[UPPER];
2221 }
2222 return root;
2223 }
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234 static noinline_for_stack
2235 struct btrfs_root *select_one_root(struct btrfs_backref_node *node)
2236 {
2237 struct btrfs_backref_node *next;
2238 struct btrfs_root *root;
2239 struct btrfs_root *fs_root = NULL;
2240 struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2241 int index = 0;
2242
2243 next = node;
2244 while (1) {
2245 cond_resched();
2246 next = walk_up_backref(next, edges, &index);
2247 root = next->root;
2248
2249
2250
2251
2252
2253 if (!root)
2254 return ERR_PTR(-EUCLEAN);
2255
2256
2257 if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
2258 return root;
2259
2260 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)
2261 fs_root = root;
2262
2263 if (next != node)
2264 return NULL;
2265
2266 next = walk_down_backref(edges, &index);
2267 if (!next || next->level <= node->level)
2268 break;
2269 }
2270
2271 if (!fs_root)
2272 return ERR_PTR(-ENOENT);
2273 return fs_root;
2274 }
2275
2276 static noinline_for_stack
2277 u64 calcu_metadata_size(struct reloc_control *rc,
2278 struct btrfs_backref_node *node, int reserve)
2279 {
2280 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
2281 struct btrfs_backref_node *next = node;
2282 struct btrfs_backref_edge *edge;
2283 struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2284 u64 num_bytes = 0;
2285 int index = 0;
2286
2287 BUG_ON(reserve && node->processed);
2288
2289 while (next) {
2290 cond_resched();
2291 while (1) {
2292 if (next->processed && (reserve || next != node))
2293 break;
2294
2295 num_bytes += fs_info->nodesize;
2296
2297 if (list_empty(&next->upper))
2298 break;
2299
2300 edge = list_entry(next->upper.next,
2301 struct btrfs_backref_edge, list[LOWER]);
2302 edges[index++] = edge;
2303 next = edge->node[UPPER];
2304 }
2305 next = walk_down_backref(edges, &index);
2306 }
2307 return num_bytes;
2308 }
2309
2310 static int reserve_metadata_space(struct btrfs_trans_handle *trans,
2311 struct reloc_control *rc,
2312 struct btrfs_backref_node *node)
2313 {
2314 struct btrfs_root *root = rc->extent_root;
2315 struct btrfs_fs_info *fs_info = root->fs_info;
2316 u64 num_bytes;
2317 int ret;
2318 u64 tmp;
2319
2320 num_bytes = calcu_metadata_size(rc, node, 1) * 2;
2321
2322 trans->block_rsv = rc->block_rsv;
2323 rc->reserved_bytes += num_bytes;
2324
2325
2326
2327
2328
2329
2330 ret = btrfs_block_rsv_refill(fs_info, rc->block_rsv, num_bytes,
2331 BTRFS_RESERVE_FLUSH_LIMIT);
2332 if (ret) {
2333 tmp = fs_info->nodesize * RELOCATION_RESERVED_NODES;
2334 while (tmp <= rc->reserved_bytes)
2335 tmp <<= 1;
2336
2337
2338
2339
2340
2341
2342
2343 rc->block_rsv->size = tmp + fs_info->nodesize *
2344 RELOCATION_RESERVED_NODES;
2345 return -EAGAIN;
2346 }
2347
2348 return 0;
2349 }
2350
2351
2352
2353
2354
2355
2356
2357
2358 static int do_relocation(struct btrfs_trans_handle *trans,
2359 struct reloc_control *rc,
2360 struct btrfs_backref_node *node,
2361 struct btrfs_key *key,
2362 struct btrfs_path *path, int lowest)
2363 {
2364 struct btrfs_backref_node *upper;
2365 struct btrfs_backref_edge *edge;
2366 struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2367 struct btrfs_root *root;
2368 struct extent_buffer *eb;
2369 u32 blocksize;
2370 u64 bytenr;
2371 int slot;
2372 int ret = 0;
2373
2374
2375
2376
2377
2378 ASSERT(!lowest || !node->eb);
2379
2380 path->lowest_level = node->level + 1;
2381 rc->backref_cache.path[node->level] = node;
2382 list_for_each_entry(edge, &node->upper, list[LOWER]) {
2383 struct btrfs_ref ref = { 0 };
2384
2385 cond_resched();
2386
2387 upper = edge->node[UPPER];
2388 root = select_reloc_root(trans, rc, upper, edges);
2389 if (IS_ERR(root)) {
2390 ret = PTR_ERR(root);
2391 goto next;
2392 }
2393
2394 if (upper->eb && !upper->locked) {
2395 if (!lowest) {
2396 ret = btrfs_bin_search(upper->eb, key, &slot);
2397 if (ret < 0)
2398 goto next;
2399 BUG_ON(ret);
2400 bytenr = btrfs_node_blockptr(upper->eb, slot);
2401 if (node->eb->start == bytenr)
2402 goto next;
2403 }
2404 btrfs_backref_drop_node_buffer(upper);
2405 }
2406
2407 if (!upper->eb) {
2408 ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2409 if (ret) {
2410 if (ret > 0)
2411 ret = -ENOENT;
2412
2413 btrfs_release_path(path);
2414 break;
2415 }
2416
2417 if (!upper->eb) {
2418 upper->eb = path->nodes[upper->level];
2419 path->nodes[upper->level] = NULL;
2420 } else {
2421 BUG_ON(upper->eb != path->nodes[upper->level]);
2422 }
2423
2424 upper->locked = 1;
2425 path->locks[upper->level] = 0;
2426
2427 slot = path->slots[upper->level];
2428 btrfs_release_path(path);
2429 } else {
2430 ret = btrfs_bin_search(upper->eb, key, &slot);
2431 if (ret < 0)
2432 goto next;
2433 BUG_ON(ret);
2434 }
2435
2436 bytenr = btrfs_node_blockptr(upper->eb, slot);
2437 if (lowest) {
2438 if (bytenr != node->bytenr) {
2439 btrfs_err(root->fs_info,
2440 "lowest leaf/node mismatch: bytenr %llu node->bytenr %llu slot %d upper %llu",
2441 bytenr, node->bytenr, slot,
2442 upper->eb->start);
2443 ret = -EIO;
2444 goto next;
2445 }
2446 } else {
2447 if (node->eb->start == bytenr)
2448 goto next;
2449 }
2450
2451 blocksize = root->fs_info->nodesize;
2452 eb = btrfs_read_node_slot(upper->eb, slot);
2453 if (IS_ERR(eb)) {
2454 ret = PTR_ERR(eb);
2455 goto next;
2456 }
2457 btrfs_tree_lock(eb);
2458
2459 if (!node->eb) {
2460 ret = btrfs_cow_block(trans, root, eb, upper->eb,
2461 slot, &eb, BTRFS_NESTING_COW);
2462 btrfs_tree_unlock(eb);
2463 free_extent_buffer(eb);
2464 if (ret < 0)
2465 goto next;
2466
2467
2468
2469
2470 ASSERT(node->eb == eb);
2471 } else {
2472 btrfs_set_node_blockptr(upper->eb, slot,
2473 node->eb->start);
2474 btrfs_set_node_ptr_generation(upper->eb, slot,
2475 trans->transid);
2476 btrfs_mark_buffer_dirty(upper->eb);
2477
2478 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF,
2479 node->eb->start, blocksize,
2480 upper->eb->start);
2481 btrfs_init_tree_ref(&ref, node->level,
2482 btrfs_header_owner(upper->eb),
2483 root->root_key.objectid, false);
2484 ret = btrfs_inc_extent_ref(trans, &ref);
2485 if (!ret)
2486 ret = btrfs_drop_subtree(trans, root, eb,
2487 upper->eb);
2488 if (ret)
2489 btrfs_abort_transaction(trans, ret);
2490 }
2491 next:
2492 if (!upper->pending)
2493 btrfs_backref_drop_node_buffer(upper);
2494 else
2495 btrfs_backref_unlock_node_buffer(upper);
2496 if (ret)
2497 break;
2498 }
2499
2500 if (!ret && node->pending) {
2501 btrfs_backref_drop_node_buffer(node);
2502 list_move_tail(&node->list, &rc->backref_cache.changed);
2503 node->pending = 0;
2504 }
2505
2506 path->lowest_level = 0;
2507
2508
2509
2510
2511
2512 ASSERT(ret != -ENOSPC);
2513 return ret;
2514 }
2515
2516 static int link_to_upper(struct btrfs_trans_handle *trans,
2517 struct reloc_control *rc,
2518 struct btrfs_backref_node *node,
2519 struct btrfs_path *path)
2520 {
2521 struct btrfs_key key;
2522
2523 btrfs_node_key_to_cpu(node->eb, &key, 0);
2524 return do_relocation(trans, rc, node, &key, path, 0);
2525 }
2526
2527 static int finish_pending_nodes(struct btrfs_trans_handle *trans,
2528 struct reloc_control *rc,
2529 struct btrfs_path *path, int err)
2530 {
2531 LIST_HEAD(list);
2532 struct btrfs_backref_cache *cache = &rc->backref_cache;
2533 struct btrfs_backref_node *node;
2534 int level;
2535 int ret;
2536
2537 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
2538 while (!list_empty(&cache->pending[level])) {
2539 node = list_entry(cache->pending[level].next,
2540 struct btrfs_backref_node, list);
2541 list_move_tail(&node->list, &list);
2542 BUG_ON(!node->pending);
2543
2544 if (!err) {
2545 ret = link_to_upper(trans, rc, node, path);
2546 if (ret < 0)
2547 err = ret;
2548 }
2549 }
2550 list_splice_init(&list, &cache->pending[level]);
2551 }
2552 return err;
2553 }
2554
2555
2556
2557
2558
2559 static void update_processed_blocks(struct reloc_control *rc,
2560 struct btrfs_backref_node *node)
2561 {
2562 struct btrfs_backref_node *next = node;
2563 struct btrfs_backref_edge *edge;
2564 struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2565 int index = 0;
2566
2567 while (next) {
2568 cond_resched();
2569 while (1) {
2570 if (next->processed)
2571 break;
2572
2573 mark_block_processed(rc, next);
2574
2575 if (list_empty(&next->upper))
2576 break;
2577
2578 edge = list_entry(next->upper.next,
2579 struct btrfs_backref_edge, list[LOWER]);
2580 edges[index++] = edge;
2581 next = edge->node[UPPER];
2582 }
2583 next = walk_down_backref(edges, &index);
2584 }
2585 }
2586
2587 static int tree_block_processed(u64 bytenr, struct reloc_control *rc)
2588 {
2589 u32 blocksize = rc->extent_root->fs_info->nodesize;
2590
2591 if (test_range_bit(&rc->processed_blocks, bytenr,
2592 bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL))
2593 return 1;
2594 return 0;
2595 }
2596
2597 static int get_tree_block_key(struct btrfs_fs_info *fs_info,
2598 struct tree_block *block)
2599 {
2600 struct extent_buffer *eb;
2601
2602 eb = read_tree_block(fs_info, block->bytenr, block->owner,
2603 block->key.offset, block->level, NULL);
2604 if (IS_ERR(eb))
2605 return PTR_ERR(eb);
2606 if (!extent_buffer_uptodate(eb)) {
2607 free_extent_buffer(eb);
2608 return -EIO;
2609 }
2610 if (block->level == 0)
2611 btrfs_item_key_to_cpu(eb, &block->key, 0);
2612 else
2613 btrfs_node_key_to_cpu(eb, &block->key, 0);
2614 free_extent_buffer(eb);
2615 block->key_ready = 1;
2616 return 0;
2617 }
2618
2619
2620
2621
2622 static int relocate_tree_block(struct btrfs_trans_handle *trans,
2623 struct reloc_control *rc,
2624 struct btrfs_backref_node *node,
2625 struct btrfs_key *key,
2626 struct btrfs_path *path)
2627 {
2628 struct btrfs_root *root;
2629 int ret = 0;
2630
2631 if (!node)
2632 return 0;
2633
2634
2635
2636
2637
2638 ret = reserve_metadata_space(trans, rc, node);
2639 if (ret)
2640 goto out;
2641
2642 BUG_ON(node->processed);
2643 root = select_one_root(node);
2644 if (IS_ERR(root)) {
2645 ret = PTR_ERR(root);
2646
2647
2648 ASSERT(ret == -ENOENT);
2649 if (ret == -ENOENT) {
2650 ret = 0;
2651 update_processed_blocks(rc, node);
2652 }
2653 goto out;
2654 }
2655
2656 if (root) {
2657 if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671 ASSERT(node->new_bytenr == 0);
2672 ASSERT(list_empty(&node->list));
2673 if (node->new_bytenr || !list_empty(&node->list)) {
2674 btrfs_err(root->fs_info,
2675 "bytenr %llu has improper references to it",
2676 node->bytenr);
2677 ret = -EUCLEAN;
2678 goto out;
2679 }
2680 ret = btrfs_record_root_in_trans(trans, root);
2681 if (ret)
2682 goto out;
2683
2684
2685
2686
2687 if (!root->reloc_root) {
2688 ret = -ENOENT;
2689 goto out;
2690 }
2691 root = root->reloc_root;
2692 node->new_bytenr = root->node->start;
2693 btrfs_put_root(node->root);
2694 node->root = btrfs_grab_root(root);
2695 ASSERT(node->root);
2696 list_add_tail(&node->list, &rc->backref_cache.changed);
2697 } else {
2698 path->lowest_level = node->level;
2699 if (root == root->fs_info->chunk_root)
2700 btrfs_reserve_chunk_metadata(trans, false);
2701 ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2702 btrfs_release_path(path);
2703 if (root == root->fs_info->chunk_root)
2704 btrfs_trans_release_chunk_metadata(trans);
2705 if (ret > 0)
2706 ret = 0;
2707 }
2708 if (!ret)
2709 update_processed_blocks(rc, node);
2710 } else {
2711 ret = do_relocation(trans, rc, node, key, path, 1);
2712 }
2713 out:
2714 if (ret || node->level == 0 || node->cowonly)
2715 btrfs_backref_cleanup_node(&rc->backref_cache, node);
2716 return ret;
2717 }
2718
2719
2720
2721
2722 static noinline_for_stack
2723 int relocate_tree_blocks(struct btrfs_trans_handle *trans,
2724 struct reloc_control *rc, struct rb_root *blocks)
2725 {
2726 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
2727 struct btrfs_backref_node *node;
2728 struct btrfs_path *path;
2729 struct tree_block *block;
2730 struct tree_block *next;
2731 int ret;
2732 int err = 0;
2733
2734 path = btrfs_alloc_path();
2735 if (!path) {
2736 err = -ENOMEM;
2737 goto out_free_blocks;
2738 }
2739
2740
2741 rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
2742 if (!block->key_ready)
2743 btrfs_readahead_tree_block(fs_info, block->bytenr,
2744 block->owner, 0,
2745 block->level);
2746 }
2747
2748
2749 rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
2750 if (!block->key_ready) {
2751 err = get_tree_block_key(fs_info, block);
2752 if (err)
2753 goto out_free_path;
2754 }
2755 }
2756
2757
2758 rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
2759 node = build_backref_tree(rc, &block->key,
2760 block->level, block->bytenr);
2761 if (IS_ERR(node)) {
2762 err = PTR_ERR(node);
2763 goto out;
2764 }
2765
2766 ret = relocate_tree_block(trans, rc, node, &block->key,
2767 path);
2768 if (ret < 0) {
2769 err = ret;
2770 break;
2771 }
2772 }
2773 out:
2774 err = finish_pending_nodes(trans, rc, path, err);
2775
2776 out_free_path:
2777 btrfs_free_path(path);
2778 out_free_blocks:
2779 free_block_list(blocks);
2780 return err;
2781 }
2782
2783 static noinline_for_stack int prealloc_file_extent_cluster(
2784 struct btrfs_inode *inode,
2785 struct file_extent_cluster *cluster)
2786 {
2787 u64 alloc_hint = 0;
2788 u64 start;
2789 u64 end;
2790 u64 offset = inode->index_cnt;
2791 u64 num_bytes;
2792 int nr;
2793 int ret = 0;
2794 u64 i_size = i_size_read(&inode->vfs_inode);
2795 u64 prealloc_start = cluster->start - offset;
2796 u64 prealloc_end = cluster->end - offset;
2797 u64 cur_offset = prealloc_start;
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810 if (!IS_ALIGNED(i_size, PAGE_SIZE)) {
2811 struct address_space *mapping = inode->vfs_inode.i_mapping;
2812 struct btrfs_fs_info *fs_info = inode->root->fs_info;
2813 const u32 sectorsize = fs_info->sectorsize;
2814 struct page *page;
2815
2816 ASSERT(sectorsize < PAGE_SIZE);
2817 ASSERT(IS_ALIGNED(i_size, sectorsize));
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838 ret = filemap_write_and_wait(mapping);
2839 if (ret < 0)
2840 return ret;
2841
2842 clear_extent_bits(&inode->io_tree, i_size,
2843 round_up(i_size, PAGE_SIZE) - 1,
2844 EXTENT_UPTODATE);
2845 page = find_lock_page(mapping, i_size >> PAGE_SHIFT);
2846
2847
2848
2849
2850 if (page) {
2851 btrfs_subpage_clear_uptodate(fs_info, page, i_size,
2852 round_up(i_size, PAGE_SIZE) - i_size);
2853 unlock_page(page);
2854 put_page(page);
2855 }
2856 }
2857
2858 BUG_ON(cluster->start != cluster->boundary[0]);
2859 ret = btrfs_alloc_data_chunk_ondemand(inode,
2860 prealloc_end + 1 - prealloc_start);
2861 if (ret)
2862 return ret;
2863
2864 btrfs_inode_lock(&inode->vfs_inode, 0);
2865 for (nr = 0; nr < cluster->nr; nr++) {
2866 start = cluster->boundary[nr] - offset;
2867 if (nr + 1 < cluster->nr)
2868 end = cluster->boundary[nr + 1] - 1 - offset;
2869 else
2870 end = cluster->end - offset;
2871
2872 lock_extent(&inode->io_tree, start, end);
2873 num_bytes = end + 1 - start;
2874 ret = btrfs_prealloc_file_range(&inode->vfs_inode, 0, start,
2875 num_bytes, num_bytes,
2876 end + 1, &alloc_hint);
2877 cur_offset = end + 1;
2878 unlock_extent(&inode->io_tree, start, end);
2879 if (ret)
2880 break;
2881 }
2882 btrfs_inode_unlock(&inode->vfs_inode, 0);
2883
2884 if (cur_offset < prealloc_end)
2885 btrfs_free_reserved_data_space_noquota(inode->root->fs_info,
2886 prealloc_end + 1 - cur_offset);
2887 return ret;
2888 }
2889
2890 static noinline_for_stack int setup_relocation_extent_mapping(struct inode *inode,
2891 u64 start, u64 end, u64 block_start)
2892 {
2893 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2894 struct extent_map *em;
2895 int ret = 0;
2896
2897 em = alloc_extent_map();
2898 if (!em)
2899 return -ENOMEM;
2900
2901 em->start = start;
2902 em->len = end + 1 - start;
2903 em->block_len = em->len;
2904 em->block_start = block_start;
2905 set_bit(EXTENT_FLAG_PINNED, &em->flags);
2906
2907 lock_extent(&BTRFS_I(inode)->io_tree, start, end);
2908 while (1) {
2909 write_lock(&em_tree->lock);
2910 ret = add_extent_mapping(em_tree, em, 0);
2911 write_unlock(&em_tree->lock);
2912 if (ret != -EEXIST) {
2913 free_extent_map(em);
2914 break;
2915 }
2916 btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 0);
2917 }
2918 unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
2919 return ret;
2920 }
2921
2922
2923
2924
2925 noinline int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info)
2926 {
2927 return atomic_read(&fs_info->balance_cancel_req) ||
2928 atomic_read(&fs_info->reloc_cancel_req) ||
2929 fatal_signal_pending(current);
2930 }
2931 ALLOW_ERROR_INJECTION(btrfs_should_cancel_balance, TRUE);
2932
2933 static u64 get_cluster_boundary_end(struct file_extent_cluster *cluster,
2934 int cluster_nr)
2935 {
2936
2937 if (cluster_nr >= cluster->nr - 1)
2938 return cluster->end;
2939
2940
2941 return cluster->boundary[cluster_nr + 1] - 1;
2942 }
2943
2944 static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
2945 struct file_extent_cluster *cluster,
2946 int *cluster_nr, unsigned long page_index)
2947 {
2948 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2949 u64 offset = BTRFS_I(inode)->index_cnt;
2950 const unsigned long last_index = (cluster->end - offset) >> PAGE_SHIFT;
2951 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
2952 struct page *page;
2953 u64 page_start;
2954 u64 page_end;
2955 u64 cur;
2956 int ret;
2957
2958 ASSERT(page_index <= last_index);
2959 page = find_lock_page(inode->i_mapping, page_index);
2960 if (!page) {
2961 page_cache_sync_readahead(inode->i_mapping, ra, NULL,
2962 page_index, last_index + 1 - page_index);
2963 page = find_or_create_page(inode->i_mapping, page_index, mask);
2964 if (!page)
2965 return -ENOMEM;
2966 }
2967 ret = set_page_extent_mapped(page);
2968 if (ret < 0)
2969 goto release_page;
2970
2971 if (PageReadahead(page))
2972 page_cache_async_readahead(inode->i_mapping, ra, NULL,
2973 page_folio(page), page_index,
2974 last_index + 1 - page_index);
2975
2976 if (!PageUptodate(page)) {
2977 btrfs_read_folio(NULL, page_folio(page));
2978 lock_page(page);
2979 if (!PageUptodate(page)) {
2980 ret = -EIO;
2981 goto release_page;
2982 }
2983 }
2984
2985 page_start = page_offset(page);
2986 page_end = page_start + PAGE_SIZE - 1;
2987
2988
2989
2990
2991
2992 cur = max(page_start, cluster->boundary[*cluster_nr] - offset);
2993 while (cur <= page_end) {
2994 u64 extent_start = cluster->boundary[*cluster_nr] - offset;
2995 u64 extent_end = get_cluster_boundary_end(cluster,
2996 *cluster_nr) - offset;
2997 u64 clamped_start = max(page_start, extent_start);
2998 u64 clamped_end = min(page_end, extent_end);
2999 u32 clamped_len = clamped_end + 1 - clamped_start;
3000
3001
3002 ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
3003 clamped_len, clamped_len,
3004 false);
3005 if (ret)
3006 goto release_page;
3007
3008
3009 lock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end);
3010 ret = btrfs_set_extent_delalloc(BTRFS_I(inode), clamped_start,
3011 clamped_end, 0, NULL);
3012 if (ret) {
3013 clear_extent_bits(&BTRFS_I(inode)->io_tree,
3014 clamped_start, clamped_end,
3015 EXTENT_LOCKED | EXTENT_BOUNDARY);
3016 btrfs_delalloc_release_metadata(BTRFS_I(inode),
3017 clamped_len, true);
3018 btrfs_delalloc_release_extents(BTRFS_I(inode),
3019 clamped_len);
3020 goto release_page;
3021 }
3022 btrfs_page_set_dirty(fs_info, page, clamped_start, clamped_len);
3023
3024
3025
3026
3027
3028
3029
3030
3031 if (in_range(cluster->boundary[*cluster_nr] - offset,
3032 page_start, PAGE_SIZE)) {
3033 u64 boundary_start = cluster->boundary[*cluster_nr] -
3034 offset;
3035 u64 boundary_end = boundary_start +
3036 fs_info->sectorsize - 1;
3037
3038 set_extent_bits(&BTRFS_I(inode)->io_tree,
3039 boundary_start, boundary_end,
3040 EXTENT_BOUNDARY);
3041 }
3042 unlock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end);
3043 btrfs_delalloc_release_extents(BTRFS_I(inode), clamped_len);
3044 cur += clamped_len;
3045
3046
3047 if (cur >= extent_end) {
3048 (*cluster_nr)++;
3049
3050 if (*cluster_nr >= cluster->nr)
3051 break;
3052 }
3053 }
3054 unlock_page(page);
3055 put_page(page);
3056
3057 balance_dirty_pages_ratelimited(inode->i_mapping);
3058 btrfs_throttle(fs_info);
3059 if (btrfs_should_cancel_balance(fs_info))
3060 ret = -ECANCELED;
3061 return ret;
3062
3063 release_page:
3064 unlock_page(page);
3065 put_page(page);
3066 return ret;
3067 }
3068
3069 static int relocate_file_extent_cluster(struct inode *inode,
3070 struct file_extent_cluster *cluster)
3071 {
3072 u64 offset = BTRFS_I(inode)->index_cnt;
3073 unsigned long index;
3074 unsigned long last_index;
3075 struct file_ra_state *ra;
3076 int cluster_nr = 0;
3077 int ret = 0;
3078
3079 if (!cluster->nr)
3080 return 0;
3081
3082 ra = kzalloc(sizeof(*ra), GFP_NOFS);
3083 if (!ra)
3084 return -ENOMEM;
3085
3086 ret = prealloc_file_extent_cluster(BTRFS_I(inode), cluster);
3087 if (ret)
3088 goto out;
3089
3090 file_ra_state_init(ra, inode->i_mapping);
3091
3092 ret = setup_relocation_extent_mapping(inode, cluster->start - offset,
3093 cluster->end - offset, cluster->start);
3094 if (ret)
3095 goto out;
3096
3097 last_index = (cluster->end - offset) >> PAGE_SHIFT;
3098 for (index = (cluster->start - offset) >> PAGE_SHIFT;
3099 index <= last_index && !ret; index++)
3100 ret = relocate_one_page(inode, ra, cluster, &cluster_nr, index);
3101 if (ret == 0)
3102 WARN_ON(cluster_nr != cluster->nr);
3103 out:
3104 kfree(ra);
3105 return ret;
3106 }
3107
3108 static noinline_for_stack
3109 int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key,
3110 struct file_extent_cluster *cluster)
3111 {
3112 int ret;
3113
3114 if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) {
3115 ret = relocate_file_extent_cluster(inode, cluster);
3116 if (ret)
3117 return ret;
3118 cluster->nr = 0;
3119 }
3120
3121 if (!cluster->nr)
3122 cluster->start = extent_key->objectid;
3123 else
3124 BUG_ON(cluster->nr >= MAX_EXTENTS);
3125 cluster->end = extent_key->objectid + extent_key->offset - 1;
3126 cluster->boundary[cluster->nr] = extent_key->objectid;
3127 cluster->nr++;
3128
3129 if (cluster->nr >= MAX_EXTENTS) {
3130 ret = relocate_file_extent_cluster(inode, cluster);
3131 if (ret)
3132 return ret;
3133 cluster->nr = 0;
3134 }
3135 return 0;
3136 }
3137
3138
3139
3140
3141
3142 static int add_tree_block(struct reloc_control *rc,
3143 struct btrfs_key *extent_key,
3144 struct btrfs_path *path,
3145 struct rb_root *blocks)
3146 {
3147 struct extent_buffer *eb;
3148 struct btrfs_extent_item *ei;
3149 struct btrfs_tree_block_info *bi;
3150 struct tree_block *block;
3151 struct rb_node *rb_node;
3152 u32 item_size;
3153 int level = -1;
3154 u64 generation;
3155 u64 owner = 0;
3156
3157 eb = path->nodes[0];
3158 item_size = btrfs_item_size(eb, path->slots[0]);
3159
3160 if (extent_key->type == BTRFS_METADATA_ITEM_KEY ||
3161 item_size >= sizeof(*ei) + sizeof(*bi)) {
3162 unsigned long ptr = 0, end;
3163
3164 ei = btrfs_item_ptr(eb, path->slots[0],
3165 struct btrfs_extent_item);
3166 end = (unsigned long)ei + item_size;
3167 if (extent_key->type == BTRFS_EXTENT_ITEM_KEY) {
3168 bi = (struct btrfs_tree_block_info *)(ei + 1);
3169 level = btrfs_tree_block_level(eb, bi);
3170 ptr = (unsigned long)(bi + 1);
3171 } else {
3172 level = (int)extent_key->offset;
3173 ptr = (unsigned long)(ei + 1);
3174 }
3175 generation = btrfs_extent_generation(eb, ei);
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192 if (btrfs_extent_refs(eb, ei) == 1 &&
3193 !(btrfs_extent_flags(eb, ei) &
3194 BTRFS_BLOCK_FLAG_FULL_BACKREF) &&
3195 ptr < end) {
3196 struct btrfs_extent_inline_ref *iref;
3197 int type;
3198
3199 iref = (struct btrfs_extent_inline_ref *)ptr;
3200 type = btrfs_get_extent_inline_ref_type(eb, iref,
3201 BTRFS_REF_TYPE_BLOCK);
3202 if (type == BTRFS_REF_TYPE_INVALID)
3203 return -EINVAL;
3204 if (type == BTRFS_TREE_BLOCK_REF_KEY)
3205 owner = btrfs_extent_inline_ref_offset(eb, iref);
3206 }
3207 } else if (unlikely(item_size == sizeof(struct btrfs_extent_item_v0))) {
3208 btrfs_print_v0_err(eb->fs_info);
3209 btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
3210 return -EINVAL;
3211 } else {
3212 BUG();
3213 }
3214
3215 btrfs_release_path(path);
3216
3217 BUG_ON(level == -1);
3218
3219 block = kmalloc(sizeof(*block), GFP_NOFS);
3220 if (!block)
3221 return -ENOMEM;
3222
3223 block->bytenr = extent_key->objectid;
3224 block->key.objectid = rc->extent_root->fs_info->nodesize;
3225 block->key.offset = generation;
3226 block->level = level;
3227 block->key_ready = 0;
3228 block->owner = owner;
3229
3230 rb_node = rb_simple_insert(blocks, block->bytenr, &block->rb_node);
3231 if (rb_node)
3232 btrfs_backref_panic(rc->extent_root->fs_info, block->bytenr,
3233 -EEXIST);
3234
3235 return 0;
3236 }
3237
3238
3239
3240
3241 static int __add_tree_block(struct reloc_control *rc,
3242 u64 bytenr, u32 blocksize,
3243 struct rb_root *blocks)
3244 {
3245 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3246 struct btrfs_path *path;
3247 struct btrfs_key key;
3248 int ret;
3249 bool skinny = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
3250
3251 if (tree_block_processed(bytenr, rc))
3252 return 0;
3253
3254 if (rb_simple_search(blocks, bytenr))
3255 return 0;
3256
3257 path = btrfs_alloc_path();
3258 if (!path)
3259 return -ENOMEM;
3260 again:
3261 key.objectid = bytenr;
3262 if (skinny) {
3263 key.type = BTRFS_METADATA_ITEM_KEY;
3264 key.offset = (u64)-1;
3265 } else {
3266 key.type = BTRFS_EXTENT_ITEM_KEY;
3267 key.offset = blocksize;
3268 }
3269
3270 path->search_commit_root = 1;
3271 path->skip_locking = 1;
3272 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0);
3273 if (ret < 0)
3274 goto out;
3275
3276 if (ret > 0 && skinny) {
3277 if (path->slots[0]) {
3278 path->slots[0]--;
3279 btrfs_item_key_to_cpu(path->nodes[0], &key,
3280 path->slots[0]);
3281 if (key.objectid == bytenr &&
3282 (key.type == BTRFS_METADATA_ITEM_KEY ||
3283 (key.type == BTRFS_EXTENT_ITEM_KEY &&
3284 key.offset == blocksize)))
3285 ret = 0;
3286 }
3287
3288 if (ret) {
3289 skinny = false;
3290 btrfs_release_path(path);
3291 goto again;
3292 }
3293 }
3294 if (ret) {
3295 ASSERT(ret == 1);
3296 btrfs_print_leaf(path->nodes[0]);
3297 btrfs_err(fs_info,
3298 "tree block extent item (%llu) is not found in extent tree",
3299 bytenr);
3300 WARN_ON(1);
3301 ret = -EINVAL;
3302 goto out;
3303 }
3304
3305 ret = add_tree_block(rc, &key, path, blocks);
3306 out:
3307 btrfs_free_path(path);
3308 return ret;
3309 }
3310
3311 static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
3312 struct btrfs_block_group *block_group,
3313 struct inode *inode,
3314 u64 ino)
3315 {
3316 struct btrfs_root *root = fs_info->tree_root;
3317 struct btrfs_trans_handle *trans;
3318 int ret = 0;
3319
3320 if (inode)
3321 goto truncate;
3322
3323 inode = btrfs_iget(fs_info->sb, ino, root);
3324 if (IS_ERR(inode))
3325 return -ENOENT;
3326
3327 truncate:
3328 ret = btrfs_check_trunc_cache_free_space(fs_info,
3329 &fs_info->global_block_rsv);
3330 if (ret)
3331 goto out;
3332
3333 trans = btrfs_join_transaction(root);
3334 if (IS_ERR(trans)) {
3335 ret = PTR_ERR(trans);
3336 goto out;
3337 }
3338
3339 ret = btrfs_truncate_free_space_cache(trans, block_group, inode);
3340
3341 btrfs_end_transaction(trans);
3342 btrfs_btree_balance_dirty(fs_info);
3343 out:
3344 iput(inode);
3345 return ret;
3346 }
3347
3348
3349
3350
3351
3352 static int delete_v1_space_cache(struct extent_buffer *leaf,
3353 struct btrfs_block_group *block_group,
3354 u64 data_bytenr)
3355 {
3356 u64 space_cache_ino;
3357 struct btrfs_file_extent_item *ei;
3358 struct btrfs_key key;
3359 bool found = false;
3360 int i;
3361 int ret;
3362
3363 if (btrfs_header_owner(leaf) != BTRFS_ROOT_TREE_OBJECTID)
3364 return 0;
3365
3366 for (i = 0; i < btrfs_header_nritems(leaf); i++) {
3367 u8 type;
3368
3369 btrfs_item_key_to_cpu(leaf, &key, i);
3370 if (key.type != BTRFS_EXTENT_DATA_KEY)
3371 continue;
3372 ei = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
3373 type = btrfs_file_extent_type(leaf, ei);
3374
3375 if ((type == BTRFS_FILE_EXTENT_REG ||
3376 type == BTRFS_FILE_EXTENT_PREALLOC) &&
3377 btrfs_file_extent_disk_bytenr(leaf, ei) == data_bytenr) {
3378 found = true;
3379 space_cache_ino = key.objectid;
3380 break;
3381 }
3382 }
3383 if (!found)
3384 return -ENOENT;
3385 ret = delete_block_group_cache(leaf->fs_info, block_group, NULL,
3386 space_cache_ino);
3387 return ret;
3388 }
3389
3390
3391
3392
3393 static noinline_for_stack
3394 int add_data_references(struct reloc_control *rc,
3395 struct btrfs_key *extent_key,
3396 struct btrfs_path *path,
3397 struct rb_root *blocks)
3398 {
3399 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3400 struct ulist *leaves = NULL;
3401 struct ulist_iterator leaf_uiter;
3402 struct ulist_node *ref_node = NULL;
3403 const u32 blocksize = fs_info->nodesize;
3404 int ret = 0;
3405
3406 btrfs_release_path(path);
3407 ret = btrfs_find_all_leafs(NULL, fs_info, extent_key->objectid,
3408 0, &leaves, NULL, true);
3409 if (ret < 0)
3410 return ret;
3411
3412 ULIST_ITER_INIT(&leaf_uiter);
3413 while ((ref_node = ulist_next(leaves, &leaf_uiter))) {
3414 struct extent_buffer *eb;
3415
3416 eb = read_tree_block(fs_info, ref_node->val, 0, 0, 0, NULL);
3417 if (IS_ERR(eb)) {
3418 ret = PTR_ERR(eb);
3419 break;
3420 }
3421 ret = delete_v1_space_cache(eb, rc->block_group,
3422 extent_key->objectid);
3423 free_extent_buffer(eb);
3424 if (ret < 0)
3425 break;
3426 ret = __add_tree_block(rc, ref_node->val, blocksize, blocks);
3427 if (ret < 0)
3428 break;
3429 }
3430 if (ret < 0)
3431 free_block_list(blocks);
3432 ulist_free(leaves);
3433 return ret;
3434 }
3435
3436
3437
3438
3439 static noinline_for_stack
3440 int find_next_extent(struct reloc_control *rc, struct btrfs_path *path,
3441 struct btrfs_key *extent_key)
3442 {
3443 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3444 struct btrfs_key key;
3445 struct extent_buffer *leaf;
3446 u64 start, end, last;
3447 int ret;
3448
3449 last = rc->block_group->start + rc->block_group->length;
3450 while (1) {
3451 cond_resched();
3452 if (rc->search_start >= last) {
3453 ret = 1;
3454 break;
3455 }
3456
3457 key.objectid = rc->search_start;
3458 key.type = BTRFS_EXTENT_ITEM_KEY;
3459 key.offset = 0;
3460
3461 path->search_commit_root = 1;
3462 path->skip_locking = 1;
3463 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path,
3464 0, 0);
3465 if (ret < 0)
3466 break;
3467 next:
3468 leaf = path->nodes[0];
3469 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
3470 ret = btrfs_next_leaf(rc->extent_root, path);
3471 if (ret != 0)
3472 break;
3473 leaf = path->nodes[0];
3474 }
3475
3476 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3477 if (key.objectid >= last) {
3478 ret = 1;
3479 break;
3480 }
3481
3482 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3483 key.type != BTRFS_METADATA_ITEM_KEY) {
3484 path->slots[0]++;
3485 goto next;
3486 }
3487
3488 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
3489 key.objectid + key.offset <= rc->search_start) {
3490 path->slots[0]++;
3491 goto next;
3492 }
3493
3494 if (key.type == BTRFS_METADATA_ITEM_KEY &&
3495 key.objectid + fs_info->nodesize <=
3496 rc->search_start) {
3497 path->slots[0]++;
3498 goto next;
3499 }
3500
3501 ret = find_first_extent_bit(&rc->processed_blocks,
3502 key.objectid, &start, &end,
3503 EXTENT_DIRTY, NULL);
3504
3505 if (ret == 0 && start <= key.objectid) {
3506 btrfs_release_path(path);
3507 rc->search_start = end + 1;
3508 } else {
3509 if (key.type == BTRFS_EXTENT_ITEM_KEY)
3510 rc->search_start = key.objectid + key.offset;
3511 else
3512 rc->search_start = key.objectid +
3513 fs_info->nodesize;
3514 memcpy(extent_key, &key, sizeof(key));
3515 return 0;
3516 }
3517 }
3518 btrfs_release_path(path);
3519 return ret;
3520 }
3521
3522 static void set_reloc_control(struct reloc_control *rc)
3523 {
3524 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3525
3526 mutex_lock(&fs_info->reloc_mutex);
3527 fs_info->reloc_ctl = rc;
3528 mutex_unlock(&fs_info->reloc_mutex);
3529 }
3530
3531 static void unset_reloc_control(struct reloc_control *rc)
3532 {
3533 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3534
3535 mutex_lock(&fs_info->reloc_mutex);
3536 fs_info->reloc_ctl = NULL;
3537 mutex_unlock(&fs_info->reloc_mutex);
3538 }
3539
3540 static noinline_for_stack
3541 int prepare_to_relocate(struct reloc_control *rc)
3542 {
3543 struct btrfs_trans_handle *trans;
3544 int ret;
3545
3546 rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root->fs_info,
3547 BTRFS_BLOCK_RSV_TEMP);
3548 if (!rc->block_rsv)
3549 return -ENOMEM;
3550
3551 memset(&rc->cluster, 0, sizeof(rc->cluster));
3552 rc->search_start = rc->block_group->start;
3553 rc->extents_found = 0;
3554 rc->nodes_relocated = 0;
3555 rc->merging_rsv_size = 0;
3556 rc->reserved_bytes = 0;
3557 rc->block_rsv->size = rc->extent_root->fs_info->nodesize *
3558 RELOCATION_RESERVED_NODES;
3559 ret = btrfs_block_rsv_refill(rc->extent_root->fs_info,
3560 rc->block_rsv, rc->block_rsv->size,
3561 BTRFS_RESERVE_FLUSH_ALL);
3562 if (ret)
3563 return ret;
3564
3565 rc->create_reloc_tree = 1;
3566 set_reloc_control(rc);
3567
3568 trans = btrfs_join_transaction(rc->extent_root);
3569 if (IS_ERR(trans)) {
3570 unset_reloc_control(rc);
3571
3572
3573
3574
3575
3576 return PTR_ERR(trans);
3577 }
3578
3579 ret = btrfs_commit_transaction(trans);
3580 if (ret)
3581 unset_reloc_control(rc);
3582
3583 return ret;
3584 }
3585
3586 static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3587 {
3588 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3589 struct rb_root blocks = RB_ROOT;
3590 struct btrfs_key key;
3591 struct btrfs_trans_handle *trans = NULL;
3592 struct btrfs_path *path;
3593 struct btrfs_extent_item *ei;
3594 u64 flags;
3595 int ret;
3596 int err = 0;
3597 int progress = 0;
3598
3599 path = btrfs_alloc_path();
3600 if (!path)
3601 return -ENOMEM;
3602 path->reada = READA_FORWARD;
3603
3604 ret = prepare_to_relocate(rc);
3605 if (ret) {
3606 err = ret;
3607 goto out_free;
3608 }
3609
3610 while (1) {
3611 rc->reserved_bytes = 0;
3612 ret = btrfs_block_rsv_refill(fs_info, rc->block_rsv,
3613 rc->block_rsv->size,
3614 BTRFS_RESERVE_FLUSH_ALL);
3615 if (ret) {
3616 err = ret;
3617 break;
3618 }
3619 progress++;
3620 trans = btrfs_start_transaction(rc->extent_root, 0);
3621 if (IS_ERR(trans)) {
3622 err = PTR_ERR(trans);
3623 trans = NULL;
3624 break;
3625 }
3626 restart:
3627 if (update_backref_cache(trans, &rc->backref_cache)) {
3628 btrfs_end_transaction(trans);
3629 trans = NULL;
3630 continue;
3631 }
3632
3633 ret = find_next_extent(rc, path, &key);
3634 if (ret < 0)
3635 err = ret;
3636 if (ret != 0)
3637 break;
3638
3639 rc->extents_found++;
3640
3641 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
3642 struct btrfs_extent_item);
3643 flags = btrfs_extent_flags(path->nodes[0], ei);
3644
3645 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
3646 ret = add_tree_block(rc, &key, path, &blocks);
3647 } else if (rc->stage == UPDATE_DATA_PTRS &&
3648 (flags & BTRFS_EXTENT_FLAG_DATA)) {
3649 ret = add_data_references(rc, &key, path, &blocks);
3650 } else {
3651 btrfs_release_path(path);
3652 ret = 0;
3653 }
3654 if (ret < 0) {
3655 err = ret;
3656 break;
3657 }
3658
3659 if (!RB_EMPTY_ROOT(&blocks)) {
3660 ret = relocate_tree_blocks(trans, rc, &blocks);
3661 if (ret < 0) {
3662 if (ret != -EAGAIN) {
3663 err = ret;
3664 break;
3665 }
3666 rc->extents_found--;
3667 rc->search_start = key.objectid;
3668 }
3669 }
3670
3671 btrfs_end_transaction_throttle(trans);
3672 btrfs_btree_balance_dirty(fs_info);
3673 trans = NULL;
3674
3675 if (rc->stage == MOVE_DATA_EXTENTS &&
3676 (flags & BTRFS_EXTENT_FLAG_DATA)) {
3677 rc->found_file_extent = 1;
3678 ret = relocate_data_extent(rc->data_inode,
3679 &key, &rc->cluster);
3680 if (ret < 0) {
3681 err = ret;
3682 break;
3683 }
3684 }
3685 if (btrfs_should_cancel_balance(fs_info)) {
3686 err = -ECANCELED;
3687 break;
3688 }
3689 }
3690 if (trans && progress && err == -ENOSPC) {
3691 ret = btrfs_force_chunk_alloc(trans, rc->block_group->flags);
3692 if (ret == 1) {
3693 err = 0;
3694 progress = 0;
3695 goto restart;
3696 }
3697 }
3698
3699 btrfs_release_path(path);
3700 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY);
3701
3702 if (trans) {
3703 btrfs_end_transaction_throttle(trans);
3704 btrfs_btree_balance_dirty(fs_info);
3705 }
3706
3707 if (!err) {
3708 ret = relocate_file_extent_cluster(rc->data_inode,
3709 &rc->cluster);
3710 if (ret < 0)
3711 err = ret;
3712 }
3713
3714 rc->create_reloc_tree = 0;
3715 set_reloc_control(rc);
3716
3717 btrfs_backref_release_cache(&rc->backref_cache);
3718 btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1, NULL);
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728 err = prepare_to_merge(rc, err);
3729
3730 merge_reloc_roots(rc);
3731
3732 rc->merge_reloc_tree = 0;
3733 unset_reloc_control(rc);
3734 btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1, NULL);
3735
3736
3737 trans = btrfs_join_transaction(rc->extent_root);
3738 if (IS_ERR(trans)) {
3739 err = PTR_ERR(trans);
3740 goto out_free;
3741 }
3742 ret = btrfs_commit_transaction(trans);
3743 if (ret && !err)
3744 err = ret;
3745 out_free:
3746 ret = clean_dirty_subvols(rc);
3747 if (ret < 0 && !err)
3748 err = ret;
3749 btrfs_free_block_rsv(fs_info, rc->block_rsv);
3750 btrfs_free_path(path);
3751 return err;
3752 }
3753
3754 static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
3755 struct btrfs_root *root, u64 objectid)
3756 {
3757 struct btrfs_path *path;
3758 struct btrfs_inode_item *item;
3759 struct extent_buffer *leaf;
3760 int ret;
3761
3762 path = btrfs_alloc_path();
3763 if (!path)
3764 return -ENOMEM;
3765
3766 ret = btrfs_insert_empty_inode(trans, root, path, objectid);
3767 if (ret)
3768 goto out;
3769
3770 leaf = path->nodes[0];
3771 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
3772 memzero_extent_buffer(leaf, (unsigned long)item, sizeof(*item));
3773 btrfs_set_inode_generation(leaf, item, 1);
3774 btrfs_set_inode_size(leaf, item, 0);
3775 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
3776 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS |
3777 BTRFS_INODE_PREALLOC);
3778 btrfs_mark_buffer_dirty(leaf);
3779 out:
3780 btrfs_free_path(path);
3781 return ret;
3782 }
3783
3784 static void delete_orphan_inode(struct btrfs_trans_handle *trans,
3785 struct btrfs_root *root, u64 objectid)
3786 {
3787 struct btrfs_path *path;
3788 struct btrfs_key key;
3789 int ret = 0;
3790
3791 path = btrfs_alloc_path();
3792 if (!path) {
3793 ret = -ENOMEM;
3794 goto out;
3795 }
3796
3797 key.objectid = objectid;
3798 key.type = BTRFS_INODE_ITEM_KEY;
3799 key.offset = 0;
3800 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3801 if (ret) {
3802 if (ret > 0)
3803 ret = -ENOENT;
3804 goto out;
3805 }
3806 ret = btrfs_del_item(trans, root, path);
3807 out:
3808 if (ret)
3809 btrfs_abort_transaction(trans, ret);
3810 btrfs_free_path(path);
3811 }
3812
3813
3814
3815
3816
3817 static noinline_for_stack
3818 struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
3819 struct btrfs_block_group *group)
3820 {
3821 struct inode *inode = NULL;
3822 struct btrfs_trans_handle *trans;
3823 struct btrfs_root *root;
3824 u64 objectid;
3825 int err = 0;
3826
3827 root = btrfs_grab_root(fs_info->data_reloc_root);
3828 trans = btrfs_start_transaction(root, 6);
3829 if (IS_ERR(trans)) {
3830 btrfs_put_root(root);
3831 return ERR_CAST(trans);
3832 }
3833
3834 err = btrfs_get_free_objectid(root, &objectid);
3835 if (err)
3836 goto out;
3837
3838 err = __insert_orphan_inode(trans, root, objectid);
3839 if (err)
3840 goto out;
3841
3842 inode = btrfs_iget(fs_info->sb, objectid, root);
3843 if (IS_ERR(inode)) {
3844 delete_orphan_inode(trans, root, objectid);
3845 err = PTR_ERR(inode);
3846 inode = NULL;
3847 goto out;
3848 }
3849 BTRFS_I(inode)->index_cnt = group->start;
3850
3851 err = btrfs_orphan_add(trans, BTRFS_I(inode));
3852 out:
3853 btrfs_put_root(root);
3854 btrfs_end_transaction(trans);
3855 btrfs_btree_balance_dirty(fs_info);
3856 if (err) {
3857 iput(inode);
3858 inode = ERR_PTR(err);
3859 }
3860 return inode;
3861 }
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872 static int reloc_chunk_start(struct btrfs_fs_info *fs_info)
3873 {
3874 if (test_and_set_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)) {
3875
3876 btrfs_err(fs_info, "reloc already running, cannot start");
3877 return -EINPROGRESS;
3878 }
3879
3880 if (atomic_read(&fs_info->reloc_cancel_req) > 0) {
3881 btrfs_info(fs_info, "chunk relocation canceled on start");
3882
3883
3884
3885
3886 atomic_set(&fs_info->reloc_cancel_req, 0);
3887 return -ECANCELED;
3888 }
3889 return 0;
3890 }
3891
3892
3893
3894
3895 static void reloc_chunk_end(struct btrfs_fs_info *fs_info)
3896 {
3897
3898 if (atomic_read(&fs_info->reloc_cancel_req) > 0)
3899 btrfs_info(fs_info, "chunk relocation canceled during operation");
3900 clear_and_wake_up_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags);
3901 atomic_set(&fs_info->reloc_cancel_req, 0);
3902 }
3903
3904 static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
3905 {
3906 struct reloc_control *rc;
3907
3908 rc = kzalloc(sizeof(*rc), GFP_NOFS);
3909 if (!rc)
3910 return NULL;
3911
3912 INIT_LIST_HEAD(&rc->reloc_roots);
3913 INIT_LIST_HEAD(&rc->dirty_subvol_roots);
3914 btrfs_backref_init_cache(fs_info, &rc->backref_cache, 1);
3915 mapping_tree_init(&rc->reloc_root_tree);
3916 extent_io_tree_init(fs_info, &rc->processed_blocks,
3917 IO_TREE_RELOC_BLOCKS, NULL);
3918 return rc;
3919 }
3920
3921 static void free_reloc_control(struct reloc_control *rc)
3922 {
3923 struct mapping_node *node, *tmp;
3924
3925 free_reloc_roots(&rc->reloc_roots);
3926 rbtree_postorder_for_each_entry_safe(node, tmp,
3927 &rc->reloc_root_tree.rb_root, rb_node)
3928 kfree(node);
3929
3930 kfree(rc);
3931 }
3932
3933
3934
3935
3936 static void describe_relocation(struct btrfs_fs_info *fs_info,
3937 struct btrfs_block_group *block_group)
3938 {
3939 char buf[128] = {'\0'};
3940
3941 btrfs_describe_block_groups(block_group->flags, buf, sizeof(buf));
3942
3943 btrfs_info(fs_info,
3944 "relocating block group %llu flags %s",
3945 block_group->start, buf);
3946 }
3947
3948 static const char *stage_to_string(int stage)
3949 {
3950 if (stage == MOVE_DATA_EXTENTS)
3951 return "move data extents";
3952 if (stage == UPDATE_DATA_PTRS)
3953 return "update data pointers";
3954 return "unknown";
3955 }
3956
3957
3958
3959
3960 int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
3961 {
3962 struct btrfs_block_group *bg;
3963 struct btrfs_root *extent_root = btrfs_extent_root(fs_info, group_start);
3964 struct reloc_control *rc;
3965 struct inode *inode;
3966 struct btrfs_path *path;
3967 int ret;
3968 int rw = 0;
3969 int err = 0;
3970
3971
3972
3973
3974
3975
3976 ret = wait_on_bit(&fs_info->flags, BTRFS_FS_UNFINISHED_DROPS, TASK_INTERRUPTIBLE);
3977 if (ret)
3978 return ret;
3979
3980
3981 if (btrfs_fs_closing(fs_info))
3982 return -EINTR;
3983
3984 bg = btrfs_lookup_block_group(fs_info, group_start);
3985 if (!bg)
3986 return -ENOENT;
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996 if (bg->flags & BTRFS_BLOCK_GROUP_DATA)
3997 ASSERT(sb_write_started(fs_info->sb));
3998
3999 if (btrfs_pinned_by_swapfile(fs_info, bg)) {
4000 btrfs_put_block_group(bg);
4001 return -ETXTBSY;
4002 }
4003
4004 rc = alloc_reloc_control(fs_info);
4005 if (!rc) {
4006 btrfs_put_block_group(bg);
4007 return -ENOMEM;
4008 }
4009
4010 ret = reloc_chunk_start(fs_info);
4011 if (ret < 0) {
4012 err = ret;
4013 goto out_put_bg;
4014 }
4015
4016 rc->extent_root = extent_root;
4017 rc->block_group = bg;
4018
4019 ret = btrfs_inc_block_group_ro(rc->block_group, true);
4020 if (ret) {
4021 err = ret;
4022 goto out;
4023 }
4024 rw = 1;
4025
4026 path = btrfs_alloc_path();
4027 if (!path) {
4028 err = -ENOMEM;
4029 goto out;
4030 }
4031
4032 inode = lookup_free_space_inode(rc->block_group, path);
4033 btrfs_free_path(path);
4034
4035 if (!IS_ERR(inode))
4036 ret = delete_block_group_cache(fs_info, rc->block_group, inode, 0);
4037 else
4038 ret = PTR_ERR(inode);
4039
4040 if (ret && ret != -ENOENT) {
4041 err = ret;
4042 goto out;
4043 }
4044
4045 rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
4046 if (IS_ERR(rc->data_inode)) {
4047 err = PTR_ERR(rc->data_inode);
4048 rc->data_inode = NULL;
4049 goto out;
4050 }
4051
4052 describe_relocation(fs_info, rc->block_group);
4053
4054 btrfs_wait_block_group_reservations(rc->block_group);
4055 btrfs_wait_nocow_writers(rc->block_group);
4056 btrfs_wait_ordered_roots(fs_info, U64_MAX,
4057 rc->block_group->start,
4058 rc->block_group->length);
4059
4060 ret = btrfs_zone_finish(rc->block_group);
4061 WARN_ON(ret && ret != -EAGAIN);
4062
4063 while (1) {
4064 int finishes_stage;
4065
4066 mutex_lock(&fs_info->cleaner_mutex);
4067 ret = relocate_block_group(rc);
4068 mutex_unlock(&fs_info->cleaner_mutex);
4069 if (ret < 0)
4070 err = ret;
4071
4072 finishes_stage = rc->stage;
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
4083 ret = btrfs_wait_ordered_range(rc->data_inode, 0,
4084 (u64)-1);
4085 if (ret)
4086 err = ret;
4087 invalidate_mapping_pages(rc->data_inode->i_mapping,
4088 0, -1);
4089 rc->stage = UPDATE_DATA_PTRS;
4090 }
4091
4092 if (err < 0)
4093 goto out;
4094
4095 if (rc->extents_found == 0)
4096 break;
4097
4098 btrfs_info(fs_info, "found %llu extents, stage: %s",
4099 rc->extents_found, stage_to_string(finishes_stage));
4100 }
4101
4102 WARN_ON(rc->block_group->pinned > 0);
4103 WARN_ON(rc->block_group->reserved > 0);
4104 WARN_ON(rc->block_group->used > 0);
4105 out:
4106 if (err && rw)
4107 btrfs_dec_block_group_ro(rc->block_group);
4108 iput(rc->data_inode);
4109 out_put_bg:
4110 btrfs_put_block_group(bg);
4111 reloc_chunk_end(fs_info);
4112 free_reloc_control(rc);
4113 return err;
4114 }
4115
4116 static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
4117 {
4118 struct btrfs_fs_info *fs_info = root->fs_info;
4119 struct btrfs_trans_handle *trans;
4120 int ret, err;
4121
4122 trans = btrfs_start_transaction(fs_info->tree_root, 0);
4123 if (IS_ERR(trans))
4124 return PTR_ERR(trans);
4125
4126 memset(&root->root_item.drop_progress, 0,
4127 sizeof(root->root_item.drop_progress));
4128 btrfs_set_root_drop_level(&root->root_item, 0);
4129 btrfs_set_root_refs(&root->root_item, 0);
4130 ret = btrfs_update_root(trans, fs_info->tree_root,
4131 &root->root_key, &root->root_item);
4132
4133 err = btrfs_end_transaction(trans);
4134 if (err)
4135 return err;
4136 return ret;
4137 }
4138
4139
4140
4141
4142
4143
4144
4145 int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
4146 {
4147 LIST_HEAD(reloc_roots);
4148 struct btrfs_key key;
4149 struct btrfs_root *fs_root;
4150 struct btrfs_root *reloc_root;
4151 struct btrfs_path *path;
4152 struct extent_buffer *leaf;
4153 struct reloc_control *rc = NULL;
4154 struct btrfs_trans_handle *trans;
4155 int ret;
4156 int err = 0;
4157
4158 path = btrfs_alloc_path();
4159 if (!path)
4160 return -ENOMEM;
4161 path->reada = READA_BACK;
4162
4163 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
4164 key.type = BTRFS_ROOT_ITEM_KEY;
4165 key.offset = (u64)-1;
4166
4167 while (1) {
4168 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key,
4169 path, 0, 0);
4170 if (ret < 0) {
4171 err = ret;
4172 goto out;
4173 }
4174 if (ret > 0) {
4175 if (path->slots[0] == 0)
4176 break;
4177 path->slots[0]--;
4178 }
4179 leaf = path->nodes[0];
4180 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4181 btrfs_release_path(path);
4182
4183 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID ||
4184 key.type != BTRFS_ROOT_ITEM_KEY)
4185 break;
4186
4187 reloc_root = btrfs_read_tree_root(fs_info->tree_root, &key);
4188 if (IS_ERR(reloc_root)) {
4189 err = PTR_ERR(reloc_root);
4190 goto out;
4191 }
4192
4193 set_bit(BTRFS_ROOT_SHAREABLE, &reloc_root->state);
4194 list_add(&reloc_root->root_list, &reloc_roots);
4195
4196 if (btrfs_root_refs(&reloc_root->root_item) > 0) {
4197 fs_root = btrfs_get_fs_root(fs_info,
4198 reloc_root->root_key.offset, false);
4199 if (IS_ERR(fs_root)) {
4200 ret = PTR_ERR(fs_root);
4201 if (ret != -ENOENT) {
4202 err = ret;
4203 goto out;
4204 }
4205 ret = mark_garbage_root(reloc_root);
4206 if (ret < 0) {
4207 err = ret;
4208 goto out;
4209 }
4210 } else {
4211 btrfs_put_root(fs_root);
4212 }
4213 }
4214
4215 if (key.offset == 0)
4216 break;
4217
4218 key.offset--;
4219 }
4220 btrfs_release_path(path);
4221
4222 if (list_empty(&reloc_roots))
4223 goto out;
4224
4225 rc = alloc_reloc_control(fs_info);
4226 if (!rc) {
4227 err = -ENOMEM;
4228 goto out;
4229 }
4230
4231 ret = reloc_chunk_start(fs_info);
4232 if (ret < 0) {
4233 err = ret;
4234 goto out_end;
4235 }
4236
4237 rc->extent_root = btrfs_extent_root(fs_info, 0);
4238
4239 set_reloc_control(rc);
4240
4241 trans = btrfs_join_transaction(rc->extent_root);
4242 if (IS_ERR(trans)) {
4243 err = PTR_ERR(trans);
4244 goto out_unset;
4245 }
4246
4247 rc->merge_reloc_tree = 1;
4248
4249 while (!list_empty(&reloc_roots)) {
4250 reloc_root = list_entry(reloc_roots.next,
4251 struct btrfs_root, root_list);
4252 list_del(&reloc_root->root_list);
4253
4254 if (btrfs_root_refs(&reloc_root->root_item) == 0) {
4255 list_add_tail(&reloc_root->root_list,
4256 &rc->reloc_roots);
4257 continue;
4258 }
4259
4260 fs_root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
4261 false);
4262 if (IS_ERR(fs_root)) {
4263 err = PTR_ERR(fs_root);
4264 list_add_tail(&reloc_root->root_list, &reloc_roots);
4265 btrfs_end_transaction(trans);
4266 goto out_unset;
4267 }
4268
4269 err = __add_reloc_root(reloc_root);
4270 ASSERT(err != -EEXIST);
4271 if (err) {
4272 list_add_tail(&reloc_root->root_list, &reloc_roots);
4273 btrfs_put_root(fs_root);
4274 btrfs_end_transaction(trans);
4275 goto out_unset;
4276 }
4277 fs_root->reloc_root = btrfs_grab_root(reloc_root);
4278 btrfs_put_root(fs_root);
4279 }
4280
4281 err = btrfs_commit_transaction(trans);
4282 if (err)
4283 goto out_unset;
4284
4285 merge_reloc_roots(rc);
4286
4287 unset_reloc_control(rc);
4288
4289 trans = btrfs_join_transaction(rc->extent_root);
4290 if (IS_ERR(trans)) {
4291 err = PTR_ERR(trans);
4292 goto out_clean;
4293 }
4294 err = btrfs_commit_transaction(trans);
4295 out_clean:
4296 ret = clean_dirty_subvols(rc);
4297 if (ret < 0 && !err)
4298 err = ret;
4299 out_unset:
4300 unset_reloc_control(rc);
4301 out_end:
4302 reloc_chunk_end(fs_info);
4303 free_reloc_control(rc);
4304 out:
4305 free_reloc_roots(&reloc_roots);
4306
4307 btrfs_free_path(path);
4308
4309 if (err == 0) {
4310
4311 fs_root = btrfs_grab_root(fs_info->data_reloc_root);
4312 ASSERT(fs_root);
4313 err = btrfs_orphan_cleanup(fs_root);
4314 btrfs_put_root(fs_root);
4315 }
4316 return err;
4317 }
4318
4319
4320
4321
4322
4323
4324
4325 int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len)
4326 {
4327 struct btrfs_fs_info *fs_info = inode->root->fs_info;
4328 struct btrfs_root *csum_root;
4329 struct btrfs_ordered_sum *sums;
4330 struct btrfs_ordered_extent *ordered;
4331 int ret;
4332 u64 disk_bytenr;
4333 u64 new_bytenr;
4334 LIST_HEAD(list);
4335
4336 ordered = btrfs_lookup_ordered_extent(inode, file_pos);
4337 BUG_ON(ordered->file_offset != file_pos || ordered->num_bytes != len);
4338
4339 disk_bytenr = file_pos + inode->index_cnt;
4340 csum_root = btrfs_csum_root(fs_info, disk_bytenr);
4341 ret = btrfs_lookup_csums_range(csum_root, disk_bytenr,
4342 disk_bytenr + len - 1, &list, 0);
4343 if (ret)
4344 goto out;
4345
4346 while (!list_empty(&list)) {
4347 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
4348 list_del_init(&sums->list);
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362 new_bytenr = ordered->disk_bytenr + sums->bytenr - disk_bytenr;
4363 sums->bytenr = new_bytenr;
4364
4365 btrfs_add_ordered_sum(ordered, sums);
4366 }
4367 out:
4368 btrfs_put_ordered_extent(ordered);
4369 return ret;
4370 }
4371
4372 int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
4373 struct btrfs_root *root, struct extent_buffer *buf,
4374 struct extent_buffer *cow)
4375 {
4376 struct btrfs_fs_info *fs_info = root->fs_info;
4377 struct reloc_control *rc;
4378 struct btrfs_backref_node *node;
4379 int first_cow = 0;
4380 int level;
4381 int ret = 0;
4382
4383 rc = fs_info->reloc_ctl;
4384 if (!rc)
4385 return 0;
4386
4387 BUG_ON(rc->stage == UPDATE_DATA_PTRS && btrfs_is_data_reloc_root(root));
4388
4389 level = btrfs_header_level(buf);
4390 if (btrfs_header_generation(buf) <=
4391 btrfs_root_last_snapshot(&root->root_item))
4392 first_cow = 1;
4393
4394 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
4395 rc->create_reloc_tree) {
4396 WARN_ON(!first_cow && level == 0);
4397
4398 node = rc->backref_cache.path[level];
4399 BUG_ON(node->bytenr != buf->start &&
4400 node->new_bytenr != buf->start);
4401
4402 btrfs_backref_drop_node_buffer(node);
4403 atomic_inc(&cow->refs);
4404 node->eb = cow;
4405 node->new_bytenr = cow->start;
4406
4407 if (!node->pending) {
4408 list_move_tail(&node->list,
4409 &rc->backref_cache.pending[level]);
4410 node->pending = 1;
4411 }
4412
4413 if (first_cow)
4414 mark_block_processed(rc, node);
4415
4416 if (first_cow && level > 0)
4417 rc->nodes_relocated += buf->len;
4418 }
4419
4420 if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS)
4421 ret = replace_file_extents(trans, rc, root, cow);
4422 return ret;
4423 }
4424
4425
4426
4427
4428
4429 void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending,
4430 u64 *bytes_to_reserve)
4431 {
4432 struct btrfs_root *root = pending->root;
4433 struct reloc_control *rc = root->fs_info->reloc_ctl;
4434
4435 if (!rc || !have_reloc_root(root))
4436 return;
4437
4438 if (!rc->merge_reloc_tree)
4439 return;
4440
4441 root = root->reloc_root;
4442 BUG_ON(btrfs_root_refs(&root->root_item) == 0);
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453 *bytes_to_reserve += rc->nodes_relocated;
4454 }
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464 int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
4465 struct btrfs_pending_snapshot *pending)
4466 {
4467 struct btrfs_root *root = pending->root;
4468 struct btrfs_root *reloc_root;
4469 struct btrfs_root *new_root;
4470 struct reloc_control *rc = root->fs_info->reloc_ctl;
4471 int ret;
4472
4473 if (!rc || !have_reloc_root(root))
4474 return 0;
4475
4476 rc = root->fs_info->reloc_ctl;
4477 rc->merging_rsv_size += rc->nodes_relocated;
4478
4479 if (rc->merge_reloc_tree) {
4480 ret = btrfs_block_rsv_migrate(&pending->block_rsv,
4481 rc->block_rsv,
4482 rc->nodes_relocated, true);
4483 if (ret)
4484 return ret;
4485 }
4486
4487 new_root = pending->snap;
4488 reloc_root = create_reloc_root(trans, root->reloc_root,
4489 new_root->root_key.objectid);
4490 if (IS_ERR(reloc_root))
4491 return PTR_ERR(reloc_root);
4492
4493 ret = __add_reloc_root(reloc_root);
4494 ASSERT(ret != -EEXIST);
4495 if (ret) {
4496
4497 btrfs_put_root(reloc_root);
4498 return ret;
4499 }
4500 new_root->reloc_root = btrfs_grab_root(reloc_root);
4501
4502 if (rc->create_reloc_tree)
4503 ret = clone_backref_node(trans, rc, root, reloc_root);
4504 return ret;
4505 }