0001
0002
0003
0004
0005
0006 #include <linux/sched.h>
0007 #include <linux/sched/signal.h>
0008 #include <linux/pagemap.h>
0009 #include <linux/writeback.h>
0010 #include <linux/blkdev.h>
0011 #include <linux/sort.h>
0012 #include <linux/rcupdate.h>
0013 #include <linux/kthread.h>
0014 #include <linux/slab.h>
0015 #include <linux/ratelimit.h>
0016 #include <linux/percpu_counter.h>
0017 #include <linux/lockdep.h>
0018 #include <linux/crc32c.h>
0019 #include "misc.h"
0020 #include "tree-log.h"
0021 #include "disk-io.h"
0022 #include "print-tree.h"
0023 #include "volumes.h"
0024 #include "raid56.h"
0025 #include "locking.h"
0026 #include "free-space-cache.h"
0027 #include "free-space-tree.h"
0028 #include "sysfs.h"
0029 #include "qgroup.h"
0030 #include "ref-verify.h"
0031 #include "space-info.h"
0032 #include "block-rsv.h"
0033 #include "delalloc-space.h"
0034 #include "block-group.h"
0035 #include "discard.h"
0036 #include "rcu-string.h"
0037 #include "zoned.h"
0038 #include "dev-replace.h"
0039
0040 #undef SCRAMBLE_DELAYED_REFS
0041
0042
0043 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
0044 struct btrfs_delayed_ref_node *node, u64 parent,
0045 u64 root_objectid, u64 owner_objectid,
0046 u64 owner_offset, int refs_to_drop,
0047 struct btrfs_delayed_extent_op *extra_op);
0048 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
0049 struct extent_buffer *leaf,
0050 struct btrfs_extent_item *ei);
0051 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
0052 u64 parent, u64 root_objectid,
0053 u64 flags, u64 owner, u64 offset,
0054 struct btrfs_key *ins, int ref_mod);
0055 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
0056 struct btrfs_delayed_ref_node *node,
0057 struct btrfs_delayed_extent_op *extent_op);
0058 static int find_next_key(struct btrfs_path *path, int level,
0059 struct btrfs_key *key);
0060
0061 static int block_group_bits(struct btrfs_block_group *cache, u64 bits)
0062 {
0063 return (cache->flags & bits) == bits;
0064 }
0065
0066 int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
0067 u64 start, u64 num_bytes)
0068 {
0069 u64 end = start + num_bytes - 1;
0070 set_extent_bits(&fs_info->excluded_extents, start, end,
0071 EXTENT_UPTODATE);
0072 return 0;
0073 }
0074
0075 void btrfs_free_excluded_extents(struct btrfs_block_group *cache)
0076 {
0077 struct btrfs_fs_info *fs_info = cache->fs_info;
0078 u64 start, end;
0079
0080 start = cache->start;
0081 end = start + cache->length - 1;
0082
0083 clear_extent_bits(&fs_info->excluded_extents, start, end,
0084 EXTENT_UPTODATE);
0085 }
0086
0087
0088 int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
0089 {
0090 struct btrfs_root *root = btrfs_extent_root(fs_info, start);
0091 int ret;
0092 struct btrfs_key key;
0093 struct btrfs_path *path;
0094
0095 path = btrfs_alloc_path();
0096 if (!path)
0097 return -ENOMEM;
0098
0099 key.objectid = start;
0100 key.offset = len;
0101 key.type = BTRFS_EXTENT_ITEM_KEY;
0102 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
0103 btrfs_free_path(path);
0104 return ret;
0105 }
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
0117 struct btrfs_fs_info *fs_info, u64 bytenr,
0118 u64 offset, int metadata, u64 *refs, u64 *flags)
0119 {
0120 struct btrfs_root *extent_root;
0121 struct btrfs_delayed_ref_head *head;
0122 struct btrfs_delayed_ref_root *delayed_refs;
0123 struct btrfs_path *path;
0124 struct btrfs_extent_item *ei;
0125 struct extent_buffer *leaf;
0126 struct btrfs_key key;
0127 u32 item_size;
0128 u64 num_refs;
0129 u64 extent_flags;
0130 int ret;
0131
0132
0133
0134
0135
0136 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) {
0137 offset = fs_info->nodesize;
0138 metadata = 0;
0139 }
0140
0141 path = btrfs_alloc_path();
0142 if (!path)
0143 return -ENOMEM;
0144
0145 if (!trans) {
0146 path->skip_locking = 1;
0147 path->search_commit_root = 1;
0148 }
0149
0150 search_again:
0151 key.objectid = bytenr;
0152 key.offset = offset;
0153 if (metadata)
0154 key.type = BTRFS_METADATA_ITEM_KEY;
0155 else
0156 key.type = BTRFS_EXTENT_ITEM_KEY;
0157
0158 extent_root = btrfs_extent_root(fs_info, bytenr);
0159 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
0160 if (ret < 0)
0161 goto out_free;
0162
0163 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
0164 if (path->slots[0]) {
0165 path->slots[0]--;
0166 btrfs_item_key_to_cpu(path->nodes[0], &key,
0167 path->slots[0]);
0168 if (key.objectid == bytenr &&
0169 key.type == BTRFS_EXTENT_ITEM_KEY &&
0170 key.offset == fs_info->nodesize)
0171 ret = 0;
0172 }
0173 }
0174
0175 if (ret == 0) {
0176 leaf = path->nodes[0];
0177 item_size = btrfs_item_size(leaf, path->slots[0]);
0178 if (item_size >= sizeof(*ei)) {
0179 ei = btrfs_item_ptr(leaf, path->slots[0],
0180 struct btrfs_extent_item);
0181 num_refs = btrfs_extent_refs(leaf, ei);
0182 extent_flags = btrfs_extent_flags(leaf, ei);
0183 } else {
0184 ret = -EINVAL;
0185 btrfs_print_v0_err(fs_info);
0186 if (trans)
0187 btrfs_abort_transaction(trans, ret);
0188 else
0189 btrfs_handle_fs_error(fs_info, ret, NULL);
0190
0191 goto out_free;
0192 }
0193
0194 BUG_ON(num_refs == 0);
0195 } else {
0196 num_refs = 0;
0197 extent_flags = 0;
0198 ret = 0;
0199 }
0200
0201 if (!trans)
0202 goto out;
0203
0204 delayed_refs = &trans->transaction->delayed_refs;
0205 spin_lock(&delayed_refs->lock);
0206 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
0207 if (head) {
0208 if (!mutex_trylock(&head->mutex)) {
0209 refcount_inc(&head->refs);
0210 spin_unlock(&delayed_refs->lock);
0211
0212 btrfs_release_path(path);
0213
0214
0215
0216
0217
0218 mutex_lock(&head->mutex);
0219 mutex_unlock(&head->mutex);
0220 btrfs_put_delayed_ref_head(head);
0221 goto search_again;
0222 }
0223 spin_lock(&head->lock);
0224 if (head->extent_op && head->extent_op->update_flags)
0225 extent_flags |= head->extent_op->flags_to_set;
0226 else
0227 BUG_ON(num_refs == 0);
0228
0229 num_refs += head->ref_mod;
0230 spin_unlock(&head->lock);
0231 mutex_unlock(&head->mutex);
0232 }
0233 spin_unlock(&delayed_refs->lock);
0234 out:
0235 WARN_ON(num_refs == 0);
0236 if (refs)
0237 *refs = num_refs;
0238 if (flags)
0239 *flags = extent_flags;
0240 out_free:
0241 btrfs_free_path(path);
0242 return ret;
0243 }
0244
0245
0246
0247
0248
0249
0250
0251
0252
0253
0254
0255
0256
0257
0258
0259
0260
0261
0262
0263
0264
0265
0266
0267
0268
0269
0270
0271
0272
0273
0274
0275
0276
0277
0278
0279
0280
0281
0282
0283
0284
0285
0286
0287
0288
0289
0290
0291
0292
0293
0294
0295
0296
0297
0298
0299
0300
0301
0302
0303
0304
0305
0306
0307
0308
0309
0310
0311
0312
0313
0314
0315
0316
0317
0318
0319
0320
0321
0322
0323
0324
0325
0326
0327
0328
0329
0330
0331
0332
0333
0334
0335
0336
0337
0338
0339
0340
0341
0342
0343
0344
0345
0346
0347
0348
0349
0350
0351
0352
0353
0354
0355
0356 int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
0357 struct btrfs_extent_inline_ref *iref,
0358 enum btrfs_inline_ref_type is_data)
0359 {
0360 int type = btrfs_extent_inline_ref_type(eb, iref);
0361 u64 offset = btrfs_extent_inline_ref_offset(eb, iref);
0362
0363 if (type == BTRFS_TREE_BLOCK_REF_KEY ||
0364 type == BTRFS_SHARED_BLOCK_REF_KEY ||
0365 type == BTRFS_SHARED_DATA_REF_KEY ||
0366 type == BTRFS_EXTENT_DATA_REF_KEY) {
0367 if (is_data == BTRFS_REF_TYPE_BLOCK) {
0368 if (type == BTRFS_TREE_BLOCK_REF_KEY)
0369 return type;
0370 if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
0371 ASSERT(eb->fs_info);
0372
0373
0374
0375
0376 if (offset &&
0377 IS_ALIGNED(offset, eb->fs_info->sectorsize))
0378 return type;
0379 }
0380 } else if (is_data == BTRFS_REF_TYPE_DATA) {
0381 if (type == BTRFS_EXTENT_DATA_REF_KEY)
0382 return type;
0383 if (type == BTRFS_SHARED_DATA_REF_KEY) {
0384 ASSERT(eb->fs_info);
0385
0386
0387
0388
0389 if (offset &&
0390 IS_ALIGNED(offset, eb->fs_info->sectorsize))
0391 return type;
0392 }
0393 } else {
0394 ASSERT(is_data == BTRFS_REF_TYPE_ANY);
0395 return type;
0396 }
0397 }
0398
0399 btrfs_print_leaf((struct extent_buffer *)eb);
0400 btrfs_err(eb->fs_info,
0401 "eb %llu iref 0x%lx invalid extent inline ref type %d",
0402 eb->start, (unsigned long)iref, type);
0403 WARN_ON(1);
0404
0405 return BTRFS_REF_TYPE_INVALID;
0406 }
0407
0408 u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
0409 {
0410 u32 high_crc = ~(u32)0;
0411 u32 low_crc = ~(u32)0;
0412 __le64 lenum;
0413
0414 lenum = cpu_to_le64(root_objectid);
0415 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
0416 lenum = cpu_to_le64(owner);
0417 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
0418 lenum = cpu_to_le64(offset);
0419 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
0420
0421 return ((u64)high_crc << 31) ^ (u64)low_crc;
0422 }
0423
0424 static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
0425 struct btrfs_extent_data_ref *ref)
0426 {
0427 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
0428 btrfs_extent_data_ref_objectid(leaf, ref),
0429 btrfs_extent_data_ref_offset(leaf, ref));
0430 }
0431
0432 static int match_extent_data_ref(struct extent_buffer *leaf,
0433 struct btrfs_extent_data_ref *ref,
0434 u64 root_objectid, u64 owner, u64 offset)
0435 {
0436 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
0437 btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
0438 btrfs_extent_data_ref_offset(leaf, ref) != offset)
0439 return 0;
0440 return 1;
0441 }
0442
0443 static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
0444 struct btrfs_path *path,
0445 u64 bytenr, u64 parent,
0446 u64 root_objectid,
0447 u64 owner, u64 offset)
0448 {
0449 struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
0450 struct btrfs_key key;
0451 struct btrfs_extent_data_ref *ref;
0452 struct extent_buffer *leaf;
0453 u32 nritems;
0454 int ret;
0455 int recow;
0456 int err = -ENOENT;
0457
0458 key.objectid = bytenr;
0459 if (parent) {
0460 key.type = BTRFS_SHARED_DATA_REF_KEY;
0461 key.offset = parent;
0462 } else {
0463 key.type = BTRFS_EXTENT_DATA_REF_KEY;
0464 key.offset = hash_extent_data_ref(root_objectid,
0465 owner, offset);
0466 }
0467 again:
0468 recow = 0;
0469 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
0470 if (ret < 0) {
0471 err = ret;
0472 goto fail;
0473 }
0474
0475 if (parent) {
0476 if (!ret)
0477 return 0;
0478 goto fail;
0479 }
0480
0481 leaf = path->nodes[0];
0482 nritems = btrfs_header_nritems(leaf);
0483 while (1) {
0484 if (path->slots[0] >= nritems) {
0485 ret = btrfs_next_leaf(root, path);
0486 if (ret < 0)
0487 err = ret;
0488 if (ret)
0489 goto fail;
0490
0491 leaf = path->nodes[0];
0492 nritems = btrfs_header_nritems(leaf);
0493 recow = 1;
0494 }
0495
0496 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
0497 if (key.objectid != bytenr ||
0498 key.type != BTRFS_EXTENT_DATA_REF_KEY)
0499 goto fail;
0500
0501 ref = btrfs_item_ptr(leaf, path->slots[0],
0502 struct btrfs_extent_data_ref);
0503
0504 if (match_extent_data_ref(leaf, ref, root_objectid,
0505 owner, offset)) {
0506 if (recow) {
0507 btrfs_release_path(path);
0508 goto again;
0509 }
0510 err = 0;
0511 break;
0512 }
0513 path->slots[0]++;
0514 }
0515 fail:
0516 return err;
0517 }
0518
0519 static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
0520 struct btrfs_path *path,
0521 u64 bytenr, u64 parent,
0522 u64 root_objectid, u64 owner,
0523 u64 offset, int refs_to_add)
0524 {
0525 struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
0526 struct btrfs_key key;
0527 struct extent_buffer *leaf;
0528 u32 size;
0529 u32 num_refs;
0530 int ret;
0531
0532 key.objectid = bytenr;
0533 if (parent) {
0534 key.type = BTRFS_SHARED_DATA_REF_KEY;
0535 key.offset = parent;
0536 size = sizeof(struct btrfs_shared_data_ref);
0537 } else {
0538 key.type = BTRFS_EXTENT_DATA_REF_KEY;
0539 key.offset = hash_extent_data_ref(root_objectid,
0540 owner, offset);
0541 size = sizeof(struct btrfs_extent_data_ref);
0542 }
0543
0544 ret = btrfs_insert_empty_item(trans, root, path, &key, size);
0545 if (ret && ret != -EEXIST)
0546 goto fail;
0547
0548 leaf = path->nodes[0];
0549 if (parent) {
0550 struct btrfs_shared_data_ref *ref;
0551 ref = btrfs_item_ptr(leaf, path->slots[0],
0552 struct btrfs_shared_data_ref);
0553 if (ret == 0) {
0554 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
0555 } else {
0556 num_refs = btrfs_shared_data_ref_count(leaf, ref);
0557 num_refs += refs_to_add;
0558 btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
0559 }
0560 } else {
0561 struct btrfs_extent_data_ref *ref;
0562 while (ret == -EEXIST) {
0563 ref = btrfs_item_ptr(leaf, path->slots[0],
0564 struct btrfs_extent_data_ref);
0565 if (match_extent_data_ref(leaf, ref, root_objectid,
0566 owner, offset))
0567 break;
0568 btrfs_release_path(path);
0569 key.offset++;
0570 ret = btrfs_insert_empty_item(trans, root, path, &key,
0571 size);
0572 if (ret && ret != -EEXIST)
0573 goto fail;
0574
0575 leaf = path->nodes[0];
0576 }
0577 ref = btrfs_item_ptr(leaf, path->slots[0],
0578 struct btrfs_extent_data_ref);
0579 if (ret == 0) {
0580 btrfs_set_extent_data_ref_root(leaf, ref,
0581 root_objectid);
0582 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
0583 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
0584 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
0585 } else {
0586 num_refs = btrfs_extent_data_ref_count(leaf, ref);
0587 num_refs += refs_to_add;
0588 btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
0589 }
0590 }
0591 btrfs_mark_buffer_dirty(leaf);
0592 ret = 0;
0593 fail:
0594 btrfs_release_path(path);
0595 return ret;
0596 }
0597
0598 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
0599 struct btrfs_root *root,
0600 struct btrfs_path *path,
0601 int refs_to_drop)
0602 {
0603 struct btrfs_key key;
0604 struct btrfs_extent_data_ref *ref1 = NULL;
0605 struct btrfs_shared_data_ref *ref2 = NULL;
0606 struct extent_buffer *leaf;
0607 u32 num_refs = 0;
0608 int ret = 0;
0609
0610 leaf = path->nodes[0];
0611 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
0612
0613 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
0614 ref1 = btrfs_item_ptr(leaf, path->slots[0],
0615 struct btrfs_extent_data_ref);
0616 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
0617 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
0618 ref2 = btrfs_item_ptr(leaf, path->slots[0],
0619 struct btrfs_shared_data_ref);
0620 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
0621 } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
0622 btrfs_print_v0_err(trans->fs_info);
0623 btrfs_abort_transaction(trans, -EINVAL);
0624 return -EINVAL;
0625 } else {
0626 BUG();
0627 }
0628
0629 BUG_ON(num_refs < refs_to_drop);
0630 num_refs -= refs_to_drop;
0631
0632 if (num_refs == 0) {
0633 ret = btrfs_del_item(trans, root, path);
0634 } else {
0635 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
0636 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
0637 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
0638 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
0639 btrfs_mark_buffer_dirty(leaf);
0640 }
0641 return ret;
0642 }
0643
0644 static noinline u32 extent_data_ref_count(struct btrfs_path *path,
0645 struct btrfs_extent_inline_ref *iref)
0646 {
0647 struct btrfs_key key;
0648 struct extent_buffer *leaf;
0649 struct btrfs_extent_data_ref *ref1;
0650 struct btrfs_shared_data_ref *ref2;
0651 u32 num_refs = 0;
0652 int type;
0653
0654 leaf = path->nodes[0];
0655 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
0656
0657 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
0658 if (iref) {
0659
0660
0661
0662
0663 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
0664 ASSERT(type != BTRFS_REF_TYPE_INVALID);
0665 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
0666 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
0667 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
0668 } else {
0669 ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
0670 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
0671 }
0672 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
0673 ref1 = btrfs_item_ptr(leaf, path->slots[0],
0674 struct btrfs_extent_data_ref);
0675 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
0676 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
0677 ref2 = btrfs_item_ptr(leaf, path->slots[0],
0678 struct btrfs_shared_data_ref);
0679 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
0680 } else {
0681 WARN_ON(1);
0682 }
0683 return num_refs;
0684 }
0685
0686 static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
0687 struct btrfs_path *path,
0688 u64 bytenr, u64 parent,
0689 u64 root_objectid)
0690 {
0691 struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
0692 struct btrfs_key key;
0693 int ret;
0694
0695 key.objectid = bytenr;
0696 if (parent) {
0697 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
0698 key.offset = parent;
0699 } else {
0700 key.type = BTRFS_TREE_BLOCK_REF_KEY;
0701 key.offset = root_objectid;
0702 }
0703
0704 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
0705 if (ret > 0)
0706 ret = -ENOENT;
0707 return ret;
0708 }
0709
0710 static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
0711 struct btrfs_path *path,
0712 u64 bytenr, u64 parent,
0713 u64 root_objectid)
0714 {
0715 struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
0716 struct btrfs_key key;
0717 int ret;
0718
0719 key.objectid = bytenr;
0720 if (parent) {
0721 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
0722 key.offset = parent;
0723 } else {
0724 key.type = BTRFS_TREE_BLOCK_REF_KEY;
0725 key.offset = root_objectid;
0726 }
0727
0728 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
0729 btrfs_release_path(path);
0730 return ret;
0731 }
0732
0733 static inline int extent_ref_type(u64 parent, u64 owner)
0734 {
0735 int type;
0736 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
0737 if (parent > 0)
0738 type = BTRFS_SHARED_BLOCK_REF_KEY;
0739 else
0740 type = BTRFS_TREE_BLOCK_REF_KEY;
0741 } else {
0742 if (parent > 0)
0743 type = BTRFS_SHARED_DATA_REF_KEY;
0744 else
0745 type = BTRFS_EXTENT_DATA_REF_KEY;
0746 }
0747 return type;
0748 }
0749
0750 static int find_next_key(struct btrfs_path *path, int level,
0751 struct btrfs_key *key)
0752
0753 {
0754 for (; level < BTRFS_MAX_LEVEL; level++) {
0755 if (!path->nodes[level])
0756 break;
0757 if (path->slots[level] + 1 >=
0758 btrfs_header_nritems(path->nodes[level]))
0759 continue;
0760 if (level == 0)
0761 btrfs_item_key_to_cpu(path->nodes[level], key,
0762 path->slots[level] + 1);
0763 else
0764 btrfs_node_key_to_cpu(path->nodes[level], key,
0765 path->slots[level] + 1);
0766 return 0;
0767 }
0768 return 1;
0769 }
0770
0771
0772
0773
0774
0775
0776
0777
0778
0779
0780
0781
0782
0783
0784 static noinline_for_stack
0785 int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
0786 struct btrfs_path *path,
0787 struct btrfs_extent_inline_ref **ref_ret,
0788 u64 bytenr, u64 num_bytes,
0789 u64 parent, u64 root_objectid,
0790 u64 owner, u64 offset, int insert)
0791 {
0792 struct btrfs_fs_info *fs_info = trans->fs_info;
0793 struct btrfs_root *root = btrfs_extent_root(fs_info, bytenr);
0794 struct btrfs_key key;
0795 struct extent_buffer *leaf;
0796 struct btrfs_extent_item *ei;
0797 struct btrfs_extent_inline_ref *iref;
0798 u64 flags;
0799 u64 item_size;
0800 unsigned long ptr;
0801 unsigned long end;
0802 int extra_size;
0803 int type;
0804 int want;
0805 int ret;
0806 int err = 0;
0807 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
0808 int needed;
0809
0810 key.objectid = bytenr;
0811 key.type = BTRFS_EXTENT_ITEM_KEY;
0812 key.offset = num_bytes;
0813
0814 want = extent_ref_type(parent, owner);
0815 if (insert) {
0816 extra_size = btrfs_extent_inline_ref_size(want);
0817 path->search_for_extension = 1;
0818 path->keep_locks = 1;
0819 } else
0820 extra_size = -1;
0821
0822
0823
0824
0825
0826 if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
0827 key.type = BTRFS_METADATA_ITEM_KEY;
0828 key.offset = owner;
0829 }
0830
0831 again:
0832 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
0833 if (ret < 0) {
0834 err = ret;
0835 goto out;
0836 }
0837
0838
0839
0840
0841
0842 if (ret > 0 && skinny_metadata) {
0843 skinny_metadata = false;
0844 if (path->slots[0]) {
0845 path->slots[0]--;
0846 btrfs_item_key_to_cpu(path->nodes[0], &key,
0847 path->slots[0]);
0848 if (key.objectid == bytenr &&
0849 key.type == BTRFS_EXTENT_ITEM_KEY &&
0850 key.offset == num_bytes)
0851 ret = 0;
0852 }
0853 if (ret) {
0854 key.objectid = bytenr;
0855 key.type = BTRFS_EXTENT_ITEM_KEY;
0856 key.offset = num_bytes;
0857 btrfs_release_path(path);
0858 goto again;
0859 }
0860 }
0861
0862 if (ret && !insert) {
0863 err = -ENOENT;
0864 goto out;
0865 } else if (WARN_ON(ret)) {
0866 err = -EIO;
0867 goto out;
0868 }
0869
0870 leaf = path->nodes[0];
0871 item_size = btrfs_item_size(leaf, path->slots[0]);
0872 if (unlikely(item_size < sizeof(*ei))) {
0873 err = -EINVAL;
0874 btrfs_print_v0_err(fs_info);
0875 btrfs_abort_transaction(trans, err);
0876 goto out;
0877 }
0878
0879 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
0880 flags = btrfs_extent_flags(leaf, ei);
0881
0882 ptr = (unsigned long)(ei + 1);
0883 end = (unsigned long)ei + item_size;
0884
0885 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
0886 ptr += sizeof(struct btrfs_tree_block_info);
0887 BUG_ON(ptr > end);
0888 }
0889
0890 if (owner >= BTRFS_FIRST_FREE_OBJECTID)
0891 needed = BTRFS_REF_TYPE_DATA;
0892 else
0893 needed = BTRFS_REF_TYPE_BLOCK;
0894
0895 err = -ENOENT;
0896 while (1) {
0897 if (ptr >= end) {
0898 if (ptr > end) {
0899 err = -EUCLEAN;
0900 btrfs_print_leaf(path->nodes[0]);
0901 btrfs_crit(fs_info,
0902 "overrun extent record at slot %d while looking for inline extent for root %llu owner %llu offset %llu parent %llu",
0903 path->slots[0], root_objectid, owner, offset, parent);
0904 }
0905 break;
0906 }
0907 iref = (struct btrfs_extent_inline_ref *)ptr;
0908 type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
0909 if (type == BTRFS_REF_TYPE_INVALID) {
0910 err = -EUCLEAN;
0911 goto out;
0912 }
0913
0914 if (want < type)
0915 break;
0916 if (want > type) {
0917 ptr += btrfs_extent_inline_ref_size(type);
0918 continue;
0919 }
0920
0921 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
0922 struct btrfs_extent_data_ref *dref;
0923 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
0924 if (match_extent_data_ref(leaf, dref, root_objectid,
0925 owner, offset)) {
0926 err = 0;
0927 break;
0928 }
0929 if (hash_extent_data_ref_item(leaf, dref) <
0930 hash_extent_data_ref(root_objectid, owner, offset))
0931 break;
0932 } else {
0933 u64 ref_offset;
0934 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
0935 if (parent > 0) {
0936 if (parent == ref_offset) {
0937 err = 0;
0938 break;
0939 }
0940 if (ref_offset < parent)
0941 break;
0942 } else {
0943 if (root_objectid == ref_offset) {
0944 err = 0;
0945 break;
0946 }
0947 if (ref_offset < root_objectid)
0948 break;
0949 }
0950 }
0951 ptr += btrfs_extent_inline_ref_size(type);
0952 }
0953 if (err == -ENOENT && insert) {
0954 if (item_size + extra_size >=
0955 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
0956 err = -EAGAIN;
0957 goto out;
0958 }
0959
0960
0961
0962
0963
0964
0965 if (find_next_key(path, 0, &key) == 0 &&
0966 key.objectid == bytenr &&
0967 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
0968 err = -EAGAIN;
0969 goto out;
0970 }
0971 }
0972 *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
0973 out:
0974 if (insert) {
0975 path->keep_locks = 0;
0976 path->search_for_extension = 0;
0977 btrfs_unlock_up_safe(path, 1);
0978 }
0979 return err;
0980 }
0981
0982
0983
0984
0985 static noinline_for_stack
0986 void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
0987 struct btrfs_path *path,
0988 struct btrfs_extent_inline_ref *iref,
0989 u64 parent, u64 root_objectid,
0990 u64 owner, u64 offset, int refs_to_add,
0991 struct btrfs_delayed_extent_op *extent_op)
0992 {
0993 struct extent_buffer *leaf;
0994 struct btrfs_extent_item *ei;
0995 unsigned long ptr;
0996 unsigned long end;
0997 unsigned long item_offset;
0998 u64 refs;
0999 int size;
1000 int type;
1001
1002 leaf = path->nodes[0];
1003 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1004 item_offset = (unsigned long)iref - (unsigned long)ei;
1005
1006 type = extent_ref_type(parent, owner);
1007 size = btrfs_extent_inline_ref_size(type);
1008
1009 btrfs_extend_item(path, size);
1010
1011 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1012 refs = btrfs_extent_refs(leaf, ei);
1013 refs += refs_to_add;
1014 btrfs_set_extent_refs(leaf, ei, refs);
1015 if (extent_op)
1016 __run_delayed_extent_op(extent_op, leaf, ei);
1017
1018 ptr = (unsigned long)ei + item_offset;
1019 end = (unsigned long)ei + btrfs_item_size(leaf, path->slots[0]);
1020 if (ptr < end - size)
1021 memmove_extent_buffer(leaf, ptr + size, ptr,
1022 end - size - ptr);
1023
1024 iref = (struct btrfs_extent_inline_ref *)ptr;
1025 btrfs_set_extent_inline_ref_type(leaf, iref, type);
1026 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1027 struct btrfs_extent_data_ref *dref;
1028 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1029 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1030 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1031 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1032 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1033 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1034 struct btrfs_shared_data_ref *sref;
1035 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1036 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1037 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1038 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1039 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1040 } else {
1041 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1042 }
1043 btrfs_mark_buffer_dirty(leaf);
1044 }
1045
1046 static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1047 struct btrfs_path *path,
1048 struct btrfs_extent_inline_ref **ref_ret,
1049 u64 bytenr, u64 num_bytes, u64 parent,
1050 u64 root_objectid, u64 owner, u64 offset)
1051 {
1052 int ret;
1053
1054 ret = lookup_inline_extent_backref(trans, path, ref_ret, bytenr,
1055 num_bytes, parent, root_objectid,
1056 owner, offset, 0);
1057 if (ret != -ENOENT)
1058 return ret;
1059
1060 btrfs_release_path(path);
1061 *ref_ret = NULL;
1062
1063 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1064 ret = lookup_tree_block_ref(trans, path, bytenr, parent,
1065 root_objectid);
1066 } else {
1067 ret = lookup_extent_data_ref(trans, path, bytenr, parent,
1068 root_objectid, owner, offset);
1069 }
1070 return ret;
1071 }
1072
1073
1074
1075
1076 static noinline_for_stack
1077 void update_inline_extent_backref(struct btrfs_path *path,
1078 struct btrfs_extent_inline_ref *iref,
1079 int refs_to_mod,
1080 struct btrfs_delayed_extent_op *extent_op)
1081 {
1082 struct extent_buffer *leaf = path->nodes[0];
1083 struct btrfs_extent_item *ei;
1084 struct btrfs_extent_data_ref *dref = NULL;
1085 struct btrfs_shared_data_ref *sref = NULL;
1086 unsigned long ptr;
1087 unsigned long end;
1088 u32 item_size;
1089 int size;
1090 int type;
1091 u64 refs;
1092
1093 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1094 refs = btrfs_extent_refs(leaf, ei);
1095 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1096 refs += refs_to_mod;
1097 btrfs_set_extent_refs(leaf, ei, refs);
1098 if (extent_op)
1099 __run_delayed_extent_op(extent_op, leaf, ei);
1100
1101
1102
1103
1104
1105 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
1106 ASSERT(type != BTRFS_REF_TYPE_INVALID);
1107
1108 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1109 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1110 refs = btrfs_extent_data_ref_count(leaf, dref);
1111 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1112 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1113 refs = btrfs_shared_data_ref_count(leaf, sref);
1114 } else {
1115 refs = 1;
1116 BUG_ON(refs_to_mod != -1);
1117 }
1118
1119 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1120 refs += refs_to_mod;
1121
1122 if (refs > 0) {
1123 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1124 btrfs_set_extent_data_ref_count(leaf, dref, refs);
1125 else
1126 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1127 } else {
1128 size = btrfs_extent_inline_ref_size(type);
1129 item_size = btrfs_item_size(leaf, path->slots[0]);
1130 ptr = (unsigned long)iref;
1131 end = (unsigned long)ei + item_size;
1132 if (ptr + size < end)
1133 memmove_extent_buffer(leaf, ptr, ptr + size,
1134 end - ptr - size);
1135 item_size -= size;
1136 btrfs_truncate_item(path, item_size, 1);
1137 }
1138 btrfs_mark_buffer_dirty(leaf);
1139 }
1140
1141 static noinline_for_stack
1142 int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1143 struct btrfs_path *path,
1144 u64 bytenr, u64 num_bytes, u64 parent,
1145 u64 root_objectid, u64 owner,
1146 u64 offset, int refs_to_add,
1147 struct btrfs_delayed_extent_op *extent_op)
1148 {
1149 struct btrfs_extent_inline_ref *iref;
1150 int ret;
1151
1152 ret = lookup_inline_extent_backref(trans, path, &iref, bytenr,
1153 num_bytes, parent, root_objectid,
1154 owner, offset, 1);
1155 if (ret == 0) {
1156
1157
1158
1159
1160 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1161 btrfs_crit(trans->fs_info,
1162 "adding refs to an existing tree ref, bytenr %llu num_bytes %llu root_objectid %llu",
1163 bytenr, num_bytes, root_objectid);
1164 if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) {
1165 WARN_ON(1);
1166 btrfs_crit(trans->fs_info,
1167 "path->slots[0]=%d path->nodes[0]:", path->slots[0]);
1168 btrfs_print_leaf(path->nodes[0]);
1169 }
1170 return -EUCLEAN;
1171 }
1172 update_inline_extent_backref(path, iref, refs_to_add, extent_op);
1173 } else if (ret == -ENOENT) {
1174 setup_inline_extent_backref(trans->fs_info, path, iref, parent,
1175 root_objectid, owner, offset,
1176 refs_to_add, extent_op);
1177 ret = 0;
1178 }
1179 return ret;
1180 }
1181
1182 static int remove_extent_backref(struct btrfs_trans_handle *trans,
1183 struct btrfs_root *root,
1184 struct btrfs_path *path,
1185 struct btrfs_extent_inline_ref *iref,
1186 int refs_to_drop, int is_data)
1187 {
1188 int ret = 0;
1189
1190 BUG_ON(!is_data && refs_to_drop != 1);
1191 if (iref)
1192 update_inline_extent_backref(path, iref, -refs_to_drop, NULL);
1193 else if (is_data)
1194 ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
1195 else
1196 ret = btrfs_del_item(trans, root, path);
1197 return ret;
1198 }
1199
1200 static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
1201 u64 *discarded_bytes)
1202 {
1203 int j, ret = 0;
1204 u64 bytes_left, end;
1205 u64 aligned_start = ALIGN(start, 1 << 9);
1206
1207 if (WARN_ON(start != aligned_start)) {
1208 len -= aligned_start - start;
1209 len = round_down(len, 1 << 9);
1210 start = aligned_start;
1211 }
1212
1213 *discarded_bytes = 0;
1214
1215 if (!len)
1216 return 0;
1217
1218 end = start + len;
1219 bytes_left = len;
1220
1221
1222 for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
1223 u64 sb_start = btrfs_sb_offset(j);
1224 u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
1225 u64 size = sb_start - start;
1226
1227 if (!in_range(sb_start, start, bytes_left) &&
1228 !in_range(sb_end, start, bytes_left) &&
1229 !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
1230 continue;
1231
1232
1233
1234
1235
1236 if (sb_start <= start) {
1237 start += sb_end - start;
1238 if (start > end) {
1239 bytes_left = 0;
1240 break;
1241 }
1242 bytes_left = end - start;
1243 continue;
1244 }
1245
1246 if (size) {
1247 ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
1248 GFP_NOFS);
1249 if (!ret)
1250 *discarded_bytes += size;
1251 else if (ret != -EOPNOTSUPP)
1252 return ret;
1253 }
1254
1255 start = sb_end;
1256 if (start > end) {
1257 bytes_left = 0;
1258 break;
1259 }
1260 bytes_left = end - start;
1261 }
1262
1263 if (bytes_left) {
1264 ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
1265 GFP_NOFS);
1266 if (!ret)
1267 *discarded_bytes += bytes_left;
1268 }
1269 return ret;
1270 }
1271
1272 static int do_discard_extent(struct btrfs_discard_stripe *stripe, u64 *bytes)
1273 {
1274 struct btrfs_device *dev = stripe->dev;
1275 struct btrfs_fs_info *fs_info = dev->fs_info;
1276 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
1277 u64 phys = stripe->physical;
1278 u64 len = stripe->length;
1279 u64 discarded = 0;
1280 int ret = 0;
1281
1282
1283 if (btrfs_can_zone_reset(dev, phys, len)) {
1284 u64 src_disc;
1285
1286 ret = btrfs_reset_device_zone(dev, phys, len, &discarded);
1287 if (ret)
1288 goto out;
1289
1290 if (!btrfs_dev_replace_is_ongoing(dev_replace) ||
1291 dev != dev_replace->srcdev)
1292 goto out;
1293
1294 src_disc = discarded;
1295
1296
1297 ret = btrfs_reset_device_zone(dev_replace->tgtdev, phys, len,
1298 &discarded);
1299 discarded += src_disc;
1300 } else if (bdev_max_discard_sectors(stripe->dev->bdev)) {
1301 ret = btrfs_issue_discard(dev->bdev, phys, len, &discarded);
1302 } else {
1303 ret = 0;
1304 *bytes = 0;
1305 }
1306
1307 out:
1308 *bytes = discarded;
1309 return ret;
1310 }
1311
1312 int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
1313 u64 num_bytes, u64 *actual_bytes)
1314 {
1315 int ret = 0;
1316 u64 discarded_bytes = 0;
1317 u64 end = bytenr + num_bytes;
1318 u64 cur = bytenr;
1319
1320
1321
1322
1323
1324 btrfs_bio_counter_inc_blocked(fs_info);
1325 while (cur < end) {
1326 struct btrfs_discard_stripe *stripes;
1327 unsigned int num_stripes;
1328 int i;
1329
1330 num_bytes = end - cur;
1331 stripes = btrfs_map_discard(fs_info, cur, &num_bytes, &num_stripes);
1332 if (IS_ERR(stripes)) {
1333 ret = PTR_ERR(stripes);
1334 if (ret == -EOPNOTSUPP)
1335 ret = 0;
1336 break;
1337 }
1338
1339 for (i = 0; i < num_stripes; i++) {
1340 struct btrfs_discard_stripe *stripe = stripes + i;
1341 u64 bytes;
1342
1343 if (!stripe->dev->bdev) {
1344 ASSERT(btrfs_test_opt(fs_info, DEGRADED));
1345 continue;
1346 }
1347
1348 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE,
1349 &stripe->dev->dev_state))
1350 continue;
1351
1352 ret = do_discard_extent(stripe, &bytes);
1353 if (ret) {
1354
1355
1356
1357
1358 if (ret != -EOPNOTSUPP)
1359 break;
1360 ret = 0;
1361 } else {
1362 discarded_bytes += bytes;
1363 }
1364 }
1365 kfree(stripes);
1366 if (ret)
1367 break;
1368 cur += num_bytes;
1369 }
1370 btrfs_bio_counter_dec(fs_info);
1371 if (actual_bytes)
1372 *actual_bytes = discarded_bytes;
1373 return ret;
1374 }
1375
1376
1377 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1378 struct btrfs_ref *generic_ref)
1379 {
1380 struct btrfs_fs_info *fs_info = trans->fs_info;
1381 int ret;
1382
1383 ASSERT(generic_ref->type != BTRFS_REF_NOT_SET &&
1384 generic_ref->action);
1385 BUG_ON(generic_ref->type == BTRFS_REF_METADATA &&
1386 generic_ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID);
1387
1388 if (generic_ref->type == BTRFS_REF_METADATA)
1389 ret = btrfs_add_delayed_tree_ref(trans, generic_ref, NULL);
1390 else
1391 ret = btrfs_add_delayed_data_ref(trans, generic_ref, 0);
1392
1393 btrfs_ref_tree_mod(fs_info, generic_ref);
1394
1395 return ret;
1396 }
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1436 struct btrfs_delayed_ref_node *node,
1437 u64 parent, u64 root_objectid,
1438 u64 owner, u64 offset, int refs_to_add,
1439 struct btrfs_delayed_extent_op *extent_op)
1440 {
1441 struct btrfs_path *path;
1442 struct extent_buffer *leaf;
1443 struct btrfs_extent_item *item;
1444 struct btrfs_key key;
1445 u64 bytenr = node->bytenr;
1446 u64 num_bytes = node->num_bytes;
1447 u64 refs;
1448 int ret;
1449
1450 path = btrfs_alloc_path();
1451 if (!path)
1452 return -ENOMEM;
1453
1454
1455 ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes,
1456 parent, root_objectid, owner,
1457 offset, refs_to_add, extent_op);
1458 if ((ret < 0 && ret != -EAGAIN) || !ret)
1459 goto out;
1460
1461
1462
1463
1464
1465
1466 leaf = path->nodes[0];
1467 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1468 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1469 refs = btrfs_extent_refs(leaf, item);
1470 btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
1471 if (extent_op)
1472 __run_delayed_extent_op(extent_op, leaf, item);
1473
1474 btrfs_mark_buffer_dirty(leaf);
1475 btrfs_release_path(path);
1476
1477
1478 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1479 BUG_ON(refs_to_add != 1);
1480 ret = insert_tree_block_ref(trans, path, bytenr, parent,
1481 root_objectid);
1482 } else {
1483 ret = insert_extent_data_ref(trans, path, bytenr, parent,
1484 root_objectid, owner, offset,
1485 refs_to_add);
1486 }
1487 if (ret)
1488 btrfs_abort_transaction(trans, ret);
1489 out:
1490 btrfs_free_path(path);
1491 return ret;
1492 }
1493
1494 static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
1495 struct btrfs_delayed_ref_node *node,
1496 struct btrfs_delayed_extent_op *extent_op,
1497 int insert_reserved)
1498 {
1499 int ret = 0;
1500 struct btrfs_delayed_data_ref *ref;
1501 struct btrfs_key ins;
1502 u64 parent = 0;
1503 u64 ref_root = 0;
1504 u64 flags = 0;
1505
1506 ins.objectid = node->bytenr;
1507 ins.offset = node->num_bytes;
1508 ins.type = BTRFS_EXTENT_ITEM_KEY;
1509
1510 ref = btrfs_delayed_node_to_data_ref(node);
1511 trace_run_delayed_data_ref(trans->fs_info, node, ref, node->action);
1512
1513 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
1514 parent = ref->parent;
1515 ref_root = ref->root;
1516
1517 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1518 if (extent_op)
1519 flags |= extent_op->flags_to_set;
1520 ret = alloc_reserved_file_extent(trans, parent, ref_root,
1521 flags, ref->objectid,
1522 ref->offset, &ins,
1523 node->ref_mod);
1524 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1525 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
1526 ref->objectid, ref->offset,
1527 node->ref_mod, extent_op);
1528 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1529 ret = __btrfs_free_extent(trans, node, parent,
1530 ref_root, ref->objectid,
1531 ref->offset, node->ref_mod,
1532 extent_op);
1533 } else {
1534 BUG();
1535 }
1536 return ret;
1537 }
1538
1539 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
1540 struct extent_buffer *leaf,
1541 struct btrfs_extent_item *ei)
1542 {
1543 u64 flags = btrfs_extent_flags(leaf, ei);
1544 if (extent_op->update_flags) {
1545 flags |= extent_op->flags_to_set;
1546 btrfs_set_extent_flags(leaf, ei, flags);
1547 }
1548
1549 if (extent_op->update_key) {
1550 struct btrfs_tree_block_info *bi;
1551 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
1552 bi = (struct btrfs_tree_block_info *)(ei + 1);
1553 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
1554 }
1555 }
1556
1557 static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
1558 struct btrfs_delayed_ref_head *head,
1559 struct btrfs_delayed_extent_op *extent_op)
1560 {
1561 struct btrfs_fs_info *fs_info = trans->fs_info;
1562 struct btrfs_root *root;
1563 struct btrfs_key key;
1564 struct btrfs_path *path;
1565 struct btrfs_extent_item *ei;
1566 struct extent_buffer *leaf;
1567 u32 item_size;
1568 int ret;
1569 int err = 0;
1570 int metadata = 1;
1571
1572 if (TRANS_ABORTED(trans))
1573 return 0;
1574
1575 if (!btrfs_fs_incompat(fs_info, SKINNY_METADATA))
1576 metadata = 0;
1577
1578 path = btrfs_alloc_path();
1579 if (!path)
1580 return -ENOMEM;
1581
1582 key.objectid = head->bytenr;
1583
1584 if (metadata) {
1585 key.type = BTRFS_METADATA_ITEM_KEY;
1586 key.offset = extent_op->level;
1587 } else {
1588 key.type = BTRFS_EXTENT_ITEM_KEY;
1589 key.offset = head->num_bytes;
1590 }
1591
1592 root = btrfs_extent_root(fs_info, key.objectid);
1593 again:
1594 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1595 if (ret < 0) {
1596 err = ret;
1597 goto out;
1598 }
1599 if (ret > 0) {
1600 if (metadata) {
1601 if (path->slots[0] > 0) {
1602 path->slots[0]--;
1603 btrfs_item_key_to_cpu(path->nodes[0], &key,
1604 path->slots[0]);
1605 if (key.objectid == head->bytenr &&
1606 key.type == BTRFS_EXTENT_ITEM_KEY &&
1607 key.offset == head->num_bytes)
1608 ret = 0;
1609 }
1610 if (ret > 0) {
1611 btrfs_release_path(path);
1612 metadata = 0;
1613
1614 key.objectid = head->bytenr;
1615 key.offset = head->num_bytes;
1616 key.type = BTRFS_EXTENT_ITEM_KEY;
1617 goto again;
1618 }
1619 } else {
1620 err = -EIO;
1621 goto out;
1622 }
1623 }
1624
1625 leaf = path->nodes[0];
1626 item_size = btrfs_item_size(leaf, path->slots[0]);
1627
1628 if (unlikely(item_size < sizeof(*ei))) {
1629 err = -EINVAL;
1630 btrfs_print_v0_err(fs_info);
1631 btrfs_abort_transaction(trans, err);
1632 goto out;
1633 }
1634
1635 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1636 __run_delayed_extent_op(extent_op, leaf, ei);
1637
1638 btrfs_mark_buffer_dirty(leaf);
1639 out:
1640 btrfs_free_path(path);
1641 return err;
1642 }
1643
1644 static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
1645 struct btrfs_delayed_ref_node *node,
1646 struct btrfs_delayed_extent_op *extent_op,
1647 int insert_reserved)
1648 {
1649 int ret = 0;
1650 struct btrfs_delayed_tree_ref *ref;
1651 u64 parent = 0;
1652 u64 ref_root = 0;
1653
1654 ref = btrfs_delayed_node_to_tree_ref(node);
1655 trace_run_delayed_tree_ref(trans->fs_info, node, ref, node->action);
1656
1657 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1658 parent = ref->parent;
1659 ref_root = ref->root;
1660
1661 if (node->ref_mod != 1) {
1662 btrfs_err(trans->fs_info,
1663 "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
1664 node->bytenr, node->ref_mod, node->action, ref_root,
1665 parent);
1666 return -EIO;
1667 }
1668 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1669 BUG_ON(!extent_op || !extent_op->update_flags);
1670 ret = alloc_reserved_tree_block(trans, node, extent_op);
1671 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1672 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
1673 ref->level, 0, 1, extent_op);
1674 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1675 ret = __btrfs_free_extent(trans, node, parent, ref_root,
1676 ref->level, 0, 1, extent_op);
1677 } else {
1678 BUG();
1679 }
1680 return ret;
1681 }
1682
1683
1684 static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
1685 struct btrfs_delayed_ref_node *node,
1686 struct btrfs_delayed_extent_op *extent_op,
1687 int insert_reserved)
1688 {
1689 int ret = 0;
1690
1691 if (TRANS_ABORTED(trans)) {
1692 if (insert_reserved)
1693 btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
1694 return 0;
1695 }
1696
1697 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
1698 node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1699 ret = run_delayed_tree_ref(trans, node, extent_op,
1700 insert_reserved);
1701 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
1702 node->type == BTRFS_SHARED_DATA_REF_KEY)
1703 ret = run_delayed_data_ref(trans, node, extent_op,
1704 insert_reserved);
1705 else
1706 BUG();
1707 if (ret && insert_reserved)
1708 btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
1709 return ret;
1710 }
1711
1712 static inline struct btrfs_delayed_ref_node *
1713 select_delayed_ref(struct btrfs_delayed_ref_head *head)
1714 {
1715 struct btrfs_delayed_ref_node *ref;
1716
1717 if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
1718 return NULL;
1719
1720
1721
1722
1723
1724
1725
1726 if (!list_empty(&head->ref_add_list))
1727 return list_first_entry(&head->ref_add_list,
1728 struct btrfs_delayed_ref_node, add_list);
1729
1730 ref = rb_entry(rb_first_cached(&head->ref_tree),
1731 struct btrfs_delayed_ref_node, ref_node);
1732 ASSERT(list_empty(&ref->add_list));
1733 return ref;
1734 }
1735
1736 static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
1737 struct btrfs_delayed_ref_head *head)
1738 {
1739 spin_lock(&delayed_refs->lock);
1740 head->processing = 0;
1741 delayed_refs->num_heads_ready++;
1742 spin_unlock(&delayed_refs->lock);
1743 btrfs_delayed_ref_unlock(head);
1744 }
1745
1746 static struct btrfs_delayed_extent_op *cleanup_extent_op(
1747 struct btrfs_delayed_ref_head *head)
1748 {
1749 struct btrfs_delayed_extent_op *extent_op = head->extent_op;
1750
1751 if (!extent_op)
1752 return NULL;
1753
1754 if (head->must_insert_reserved) {
1755 head->extent_op = NULL;
1756 btrfs_free_delayed_extent_op(extent_op);
1757 return NULL;
1758 }
1759 return extent_op;
1760 }
1761
1762 static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans,
1763 struct btrfs_delayed_ref_head *head)
1764 {
1765 struct btrfs_delayed_extent_op *extent_op;
1766 int ret;
1767
1768 extent_op = cleanup_extent_op(head);
1769 if (!extent_op)
1770 return 0;
1771 head->extent_op = NULL;
1772 spin_unlock(&head->lock);
1773 ret = run_delayed_extent_op(trans, head, extent_op);
1774 btrfs_free_delayed_extent_op(extent_op);
1775 return ret ? ret : 1;
1776 }
1777
1778 void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
1779 struct btrfs_delayed_ref_root *delayed_refs,
1780 struct btrfs_delayed_ref_head *head)
1781 {
1782 int nr_items = 1;
1783
1784
1785
1786
1787
1788 if (head->total_ref_mod < 0 && head->is_data) {
1789 spin_lock(&delayed_refs->lock);
1790 delayed_refs->pending_csums -= head->num_bytes;
1791 spin_unlock(&delayed_refs->lock);
1792 nr_items += btrfs_csum_bytes_to_leaves(fs_info, head->num_bytes);
1793 }
1794
1795 btrfs_delayed_refs_rsv_release(fs_info, nr_items);
1796 }
1797
1798 static int cleanup_ref_head(struct btrfs_trans_handle *trans,
1799 struct btrfs_delayed_ref_head *head)
1800 {
1801
1802 struct btrfs_fs_info *fs_info = trans->fs_info;
1803 struct btrfs_delayed_ref_root *delayed_refs;
1804 int ret;
1805
1806 delayed_refs = &trans->transaction->delayed_refs;
1807
1808 ret = run_and_cleanup_extent_op(trans, head);
1809 if (ret < 0) {
1810 unselect_delayed_ref_head(delayed_refs, head);
1811 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
1812 return ret;
1813 } else if (ret) {
1814 return ret;
1815 }
1816
1817
1818
1819
1820
1821 spin_unlock(&head->lock);
1822 spin_lock(&delayed_refs->lock);
1823 spin_lock(&head->lock);
1824 if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root) || head->extent_op) {
1825 spin_unlock(&head->lock);
1826 spin_unlock(&delayed_refs->lock);
1827 return 1;
1828 }
1829 btrfs_delete_ref_head(delayed_refs, head);
1830 spin_unlock(&head->lock);
1831 spin_unlock(&delayed_refs->lock);
1832
1833 if (head->must_insert_reserved) {
1834 btrfs_pin_extent(trans, head->bytenr, head->num_bytes, 1);
1835 if (head->is_data) {
1836 struct btrfs_root *csum_root;
1837
1838 csum_root = btrfs_csum_root(fs_info, head->bytenr);
1839 ret = btrfs_del_csums(trans, csum_root, head->bytenr,
1840 head->num_bytes);
1841 }
1842 }
1843
1844 btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
1845
1846 trace_run_delayed_ref_head(fs_info, head, 0);
1847 btrfs_delayed_ref_unlock(head);
1848 btrfs_put_delayed_ref_head(head);
1849 return ret;
1850 }
1851
1852 static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head(
1853 struct btrfs_trans_handle *trans)
1854 {
1855 struct btrfs_delayed_ref_root *delayed_refs =
1856 &trans->transaction->delayed_refs;
1857 struct btrfs_delayed_ref_head *head = NULL;
1858 int ret;
1859
1860 spin_lock(&delayed_refs->lock);
1861 head = btrfs_select_ref_head(delayed_refs);
1862 if (!head) {
1863 spin_unlock(&delayed_refs->lock);
1864 return head;
1865 }
1866
1867
1868
1869
1870
1871 ret = btrfs_delayed_ref_lock(delayed_refs, head);
1872 spin_unlock(&delayed_refs->lock);
1873
1874
1875
1876
1877
1878
1879 if (ret == -EAGAIN)
1880 head = ERR_PTR(-EAGAIN);
1881
1882 return head;
1883 }
1884
1885 static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
1886 struct btrfs_delayed_ref_head *locked_ref,
1887 unsigned long *run_refs)
1888 {
1889 struct btrfs_fs_info *fs_info = trans->fs_info;
1890 struct btrfs_delayed_ref_root *delayed_refs;
1891 struct btrfs_delayed_extent_op *extent_op;
1892 struct btrfs_delayed_ref_node *ref;
1893 int must_insert_reserved = 0;
1894 int ret;
1895
1896 delayed_refs = &trans->transaction->delayed_refs;
1897
1898 lockdep_assert_held(&locked_ref->mutex);
1899 lockdep_assert_held(&locked_ref->lock);
1900
1901 while ((ref = select_delayed_ref(locked_ref))) {
1902 if (ref->seq &&
1903 btrfs_check_delayed_seq(fs_info, ref->seq)) {
1904 spin_unlock(&locked_ref->lock);
1905 unselect_delayed_ref_head(delayed_refs, locked_ref);
1906 return -EAGAIN;
1907 }
1908
1909 (*run_refs)++;
1910 ref->in_tree = 0;
1911 rb_erase_cached(&ref->ref_node, &locked_ref->ref_tree);
1912 RB_CLEAR_NODE(&ref->ref_node);
1913 if (!list_empty(&ref->add_list))
1914 list_del(&ref->add_list);
1915
1916
1917
1918
1919 switch (ref->action) {
1920 case BTRFS_ADD_DELAYED_REF:
1921 case BTRFS_ADD_DELAYED_EXTENT:
1922 locked_ref->ref_mod -= ref->ref_mod;
1923 break;
1924 case BTRFS_DROP_DELAYED_REF:
1925 locked_ref->ref_mod += ref->ref_mod;
1926 break;
1927 default:
1928 WARN_ON(1);
1929 }
1930 atomic_dec(&delayed_refs->num_entries);
1931
1932
1933
1934
1935
1936 must_insert_reserved = locked_ref->must_insert_reserved;
1937 locked_ref->must_insert_reserved = 0;
1938
1939 extent_op = locked_ref->extent_op;
1940 locked_ref->extent_op = NULL;
1941 spin_unlock(&locked_ref->lock);
1942
1943 ret = run_one_delayed_ref(trans, ref, extent_op,
1944 must_insert_reserved);
1945
1946 btrfs_free_delayed_extent_op(extent_op);
1947 if (ret) {
1948 unselect_delayed_ref_head(delayed_refs, locked_ref);
1949 btrfs_put_delayed_ref(ref);
1950 btrfs_debug(fs_info, "run_one_delayed_ref returned %d",
1951 ret);
1952 return ret;
1953 }
1954
1955 btrfs_put_delayed_ref(ref);
1956 cond_resched();
1957
1958 spin_lock(&locked_ref->lock);
1959 btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
1960 }
1961
1962 return 0;
1963 }
1964
1965
1966
1967
1968
1969 static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
1970 unsigned long nr)
1971 {
1972 struct btrfs_fs_info *fs_info = trans->fs_info;
1973 struct btrfs_delayed_ref_root *delayed_refs;
1974 struct btrfs_delayed_ref_head *locked_ref = NULL;
1975 ktime_t start = ktime_get();
1976 int ret;
1977 unsigned long count = 0;
1978 unsigned long actual_count = 0;
1979
1980 delayed_refs = &trans->transaction->delayed_refs;
1981 do {
1982 if (!locked_ref) {
1983 locked_ref = btrfs_obtain_ref_head(trans);
1984 if (IS_ERR_OR_NULL(locked_ref)) {
1985 if (PTR_ERR(locked_ref) == -EAGAIN) {
1986 continue;
1987 } else {
1988 break;
1989 }
1990 }
1991 count++;
1992 }
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005 spin_lock(&locked_ref->lock);
2006 btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
2007
2008 ret = btrfs_run_delayed_refs_for_head(trans, locked_ref,
2009 &actual_count);
2010 if (ret < 0 && ret != -EAGAIN) {
2011
2012
2013
2014
2015 return ret;
2016 } else if (!ret) {
2017
2018
2019
2020
2021 ret = cleanup_ref_head(trans, locked_ref);
2022 if (ret > 0 ) {
2023
2024 ret = 0;
2025 continue;
2026 } else if (ret) {
2027 return ret;
2028 }
2029 }
2030
2031
2032
2033
2034
2035
2036 locked_ref = NULL;
2037 cond_resched();
2038 } while ((nr != -1 && count < nr) || locked_ref);
2039
2040
2041
2042
2043
2044
2045 if (actual_count > 0) {
2046 u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
2047 u64 avg;
2048
2049
2050
2051
2052
2053 spin_lock(&delayed_refs->lock);
2054 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
2055 fs_info->avg_delayed_ref_runtime = avg >> 2;
2056 spin_unlock(&delayed_refs->lock);
2057 }
2058 return 0;
2059 }
2060
2061 #ifdef SCRAMBLE_DELAYED_REFS
2062
2063
2064
2065
2066
2067 static u64 find_middle(struct rb_root *root)
2068 {
2069 struct rb_node *n = root->rb_node;
2070 struct btrfs_delayed_ref_node *entry;
2071 int alt = 1;
2072 u64 middle;
2073 u64 first = 0, last = 0;
2074
2075 n = rb_first(root);
2076 if (n) {
2077 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2078 first = entry->bytenr;
2079 }
2080 n = rb_last(root);
2081 if (n) {
2082 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2083 last = entry->bytenr;
2084 }
2085 n = root->rb_node;
2086
2087 while (n) {
2088 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2089 WARN_ON(!entry->in_tree);
2090
2091 middle = entry->bytenr;
2092
2093 if (alt)
2094 n = n->rb_left;
2095 else
2096 n = n->rb_right;
2097
2098 alt = 1 - alt;
2099 }
2100 return middle;
2101 }
2102 #endif
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2115 unsigned long count)
2116 {
2117 struct btrfs_fs_info *fs_info = trans->fs_info;
2118 struct rb_node *node;
2119 struct btrfs_delayed_ref_root *delayed_refs;
2120 struct btrfs_delayed_ref_head *head;
2121 int ret;
2122 int run_all = count == (unsigned long)-1;
2123
2124
2125 if (TRANS_ABORTED(trans))
2126 return 0;
2127
2128 if (test_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags))
2129 return 0;
2130
2131 delayed_refs = &trans->transaction->delayed_refs;
2132 if (count == 0)
2133 count = delayed_refs->num_heads_ready;
2134
2135 again:
2136 #ifdef SCRAMBLE_DELAYED_REFS
2137 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2138 #endif
2139 ret = __btrfs_run_delayed_refs(trans, count);
2140 if (ret < 0) {
2141 btrfs_abort_transaction(trans, ret);
2142 return ret;
2143 }
2144
2145 if (run_all) {
2146 btrfs_create_pending_block_groups(trans);
2147
2148 spin_lock(&delayed_refs->lock);
2149 node = rb_first_cached(&delayed_refs->href_root);
2150 if (!node) {
2151 spin_unlock(&delayed_refs->lock);
2152 goto out;
2153 }
2154 head = rb_entry(node, struct btrfs_delayed_ref_head,
2155 href_node);
2156 refcount_inc(&head->refs);
2157 spin_unlock(&delayed_refs->lock);
2158
2159
2160 mutex_lock(&head->mutex);
2161 mutex_unlock(&head->mutex);
2162
2163 btrfs_put_delayed_ref_head(head);
2164 cond_resched();
2165 goto again;
2166 }
2167 out:
2168 return 0;
2169 }
2170
2171 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
2172 struct extent_buffer *eb, u64 flags,
2173 int level)
2174 {
2175 struct btrfs_delayed_extent_op *extent_op;
2176 int ret;
2177
2178 extent_op = btrfs_alloc_delayed_extent_op();
2179 if (!extent_op)
2180 return -ENOMEM;
2181
2182 extent_op->flags_to_set = flags;
2183 extent_op->update_flags = true;
2184 extent_op->update_key = false;
2185 extent_op->level = level;
2186
2187 ret = btrfs_add_delayed_extent_op(trans, eb->start, eb->len, extent_op);
2188 if (ret)
2189 btrfs_free_delayed_extent_op(extent_op);
2190 return ret;
2191 }
2192
2193 static noinline int check_delayed_ref(struct btrfs_root *root,
2194 struct btrfs_path *path,
2195 u64 objectid, u64 offset, u64 bytenr)
2196 {
2197 struct btrfs_delayed_ref_head *head;
2198 struct btrfs_delayed_ref_node *ref;
2199 struct btrfs_delayed_data_ref *data_ref;
2200 struct btrfs_delayed_ref_root *delayed_refs;
2201 struct btrfs_transaction *cur_trans;
2202 struct rb_node *node;
2203 int ret = 0;
2204
2205 spin_lock(&root->fs_info->trans_lock);
2206 cur_trans = root->fs_info->running_transaction;
2207 if (cur_trans)
2208 refcount_inc(&cur_trans->use_count);
2209 spin_unlock(&root->fs_info->trans_lock);
2210 if (!cur_trans)
2211 return 0;
2212
2213 delayed_refs = &cur_trans->delayed_refs;
2214 spin_lock(&delayed_refs->lock);
2215 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
2216 if (!head) {
2217 spin_unlock(&delayed_refs->lock);
2218 btrfs_put_transaction(cur_trans);
2219 return 0;
2220 }
2221
2222 if (!mutex_trylock(&head->mutex)) {
2223 refcount_inc(&head->refs);
2224 spin_unlock(&delayed_refs->lock);
2225
2226 btrfs_release_path(path);
2227
2228
2229
2230
2231
2232 mutex_lock(&head->mutex);
2233 mutex_unlock(&head->mutex);
2234 btrfs_put_delayed_ref_head(head);
2235 btrfs_put_transaction(cur_trans);
2236 return -EAGAIN;
2237 }
2238 spin_unlock(&delayed_refs->lock);
2239
2240 spin_lock(&head->lock);
2241
2242
2243
2244
2245 for (node = rb_first_cached(&head->ref_tree); node;
2246 node = rb_next(node)) {
2247 ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
2248
2249 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
2250 ret = 1;
2251 break;
2252 }
2253
2254 data_ref = btrfs_delayed_node_to_data_ref(ref);
2255
2256
2257
2258
2259
2260 if (data_ref->root != root->root_key.objectid ||
2261 data_ref->objectid != objectid ||
2262 data_ref->offset != offset) {
2263 ret = 1;
2264 break;
2265 }
2266 }
2267 spin_unlock(&head->lock);
2268 mutex_unlock(&head->mutex);
2269 btrfs_put_transaction(cur_trans);
2270 return ret;
2271 }
2272
2273 static noinline int check_committed_ref(struct btrfs_root *root,
2274 struct btrfs_path *path,
2275 u64 objectid, u64 offset, u64 bytenr,
2276 bool strict)
2277 {
2278 struct btrfs_fs_info *fs_info = root->fs_info;
2279 struct btrfs_root *extent_root = btrfs_extent_root(fs_info, bytenr);
2280 struct extent_buffer *leaf;
2281 struct btrfs_extent_data_ref *ref;
2282 struct btrfs_extent_inline_ref *iref;
2283 struct btrfs_extent_item *ei;
2284 struct btrfs_key key;
2285 u32 item_size;
2286 int type;
2287 int ret;
2288
2289 key.objectid = bytenr;
2290 key.offset = (u64)-1;
2291 key.type = BTRFS_EXTENT_ITEM_KEY;
2292
2293 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2294 if (ret < 0)
2295 goto out;
2296 BUG_ON(ret == 0);
2297
2298 ret = -ENOENT;
2299 if (path->slots[0] == 0)
2300 goto out;
2301
2302 path->slots[0]--;
2303 leaf = path->nodes[0];
2304 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2305
2306 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
2307 goto out;
2308
2309 ret = 1;
2310 item_size = btrfs_item_size(leaf, path->slots[0]);
2311 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2312
2313
2314 if (item_size != sizeof(*ei) +
2315 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
2316 goto out;
2317
2318
2319
2320
2321
2322 if (!strict &&
2323 (btrfs_extent_generation(leaf, ei) <=
2324 btrfs_root_last_snapshot(&root->root_item)))
2325 goto out;
2326
2327 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
2328
2329
2330 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
2331 if (type != BTRFS_EXTENT_DATA_REF_KEY)
2332 goto out;
2333
2334 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
2335 if (btrfs_extent_refs(leaf, ei) !=
2336 btrfs_extent_data_ref_count(leaf, ref) ||
2337 btrfs_extent_data_ref_root(leaf, ref) !=
2338 root->root_key.objectid ||
2339 btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
2340 btrfs_extent_data_ref_offset(leaf, ref) != offset)
2341 goto out;
2342
2343 ret = 0;
2344 out:
2345 return ret;
2346 }
2347
2348 int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
2349 u64 bytenr, bool strict, struct btrfs_path *path)
2350 {
2351 int ret;
2352
2353 do {
2354 ret = check_committed_ref(root, path, objectid,
2355 offset, bytenr, strict);
2356 if (ret && ret != -ENOENT)
2357 goto out;
2358
2359 ret = check_delayed_ref(root, path, objectid, offset, bytenr);
2360 } while (ret == -EAGAIN);
2361
2362 out:
2363 btrfs_release_path(path);
2364 if (btrfs_is_data_reloc_root(root))
2365 WARN_ON(ret > 0);
2366 return ret;
2367 }
2368
2369 static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
2370 struct btrfs_root *root,
2371 struct extent_buffer *buf,
2372 int full_backref, int inc)
2373 {
2374 struct btrfs_fs_info *fs_info = root->fs_info;
2375 u64 bytenr;
2376 u64 num_bytes;
2377 u64 parent;
2378 u64 ref_root;
2379 u32 nritems;
2380 struct btrfs_key key;
2381 struct btrfs_file_extent_item *fi;
2382 struct btrfs_ref generic_ref = { 0 };
2383 bool for_reloc = btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC);
2384 int i;
2385 int action;
2386 int level;
2387 int ret = 0;
2388
2389 if (btrfs_is_testing(fs_info))
2390 return 0;
2391
2392 ref_root = btrfs_header_owner(buf);
2393 nritems = btrfs_header_nritems(buf);
2394 level = btrfs_header_level(buf);
2395
2396 if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && level == 0)
2397 return 0;
2398
2399 if (full_backref)
2400 parent = buf->start;
2401 else
2402 parent = 0;
2403 if (inc)
2404 action = BTRFS_ADD_DELAYED_REF;
2405 else
2406 action = BTRFS_DROP_DELAYED_REF;
2407
2408 for (i = 0; i < nritems; i++) {
2409 if (level == 0) {
2410 btrfs_item_key_to_cpu(buf, &key, i);
2411 if (key.type != BTRFS_EXTENT_DATA_KEY)
2412 continue;
2413 fi = btrfs_item_ptr(buf, i,
2414 struct btrfs_file_extent_item);
2415 if (btrfs_file_extent_type(buf, fi) ==
2416 BTRFS_FILE_EXTENT_INLINE)
2417 continue;
2418 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
2419 if (bytenr == 0)
2420 continue;
2421
2422 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
2423 key.offset -= btrfs_file_extent_offset(buf, fi);
2424 btrfs_init_generic_ref(&generic_ref, action, bytenr,
2425 num_bytes, parent);
2426 btrfs_init_data_ref(&generic_ref, ref_root, key.objectid,
2427 key.offset, root->root_key.objectid,
2428 for_reloc);
2429 if (inc)
2430 ret = btrfs_inc_extent_ref(trans, &generic_ref);
2431 else
2432 ret = btrfs_free_extent(trans, &generic_ref);
2433 if (ret)
2434 goto fail;
2435 } else {
2436 bytenr = btrfs_node_blockptr(buf, i);
2437 num_bytes = fs_info->nodesize;
2438 btrfs_init_generic_ref(&generic_ref, action, bytenr,
2439 num_bytes, parent);
2440 btrfs_init_tree_ref(&generic_ref, level - 1, ref_root,
2441 root->root_key.objectid, for_reloc);
2442 if (inc)
2443 ret = btrfs_inc_extent_ref(trans, &generic_ref);
2444 else
2445 ret = btrfs_free_extent(trans, &generic_ref);
2446 if (ret)
2447 goto fail;
2448 }
2449 }
2450 return 0;
2451 fail:
2452 return ret;
2453 }
2454
2455 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2456 struct extent_buffer *buf, int full_backref)
2457 {
2458 return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
2459 }
2460
2461 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2462 struct extent_buffer *buf, int full_backref)
2463 {
2464 return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
2465 }
2466
2467 static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
2468 {
2469 struct btrfs_fs_info *fs_info = root->fs_info;
2470 u64 flags;
2471 u64 ret;
2472
2473 if (data)
2474 flags = BTRFS_BLOCK_GROUP_DATA;
2475 else if (root == fs_info->chunk_root)
2476 flags = BTRFS_BLOCK_GROUP_SYSTEM;
2477 else
2478 flags = BTRFS_BLOCK_GROUP_METADATA;
2479
2480 ret = btrfs_get_alloc_profile(fs_info, flags);
2481 return ret;
2482 }
2483
2484 static u64 first_logical_byte(struct btrfs_fs_info *fs_info)
2485 {
2486 struct rb_node *leftmost;
2487 u64 bytenr = 0;
2488
2489 read_lock(&fs_info->block_group_cache_lock);
2490
2491 leftmost = rb_first_cached(&fs_info->block_group_cache_tree);
2492 if (leftmost) {
2493 struct btrfs_block_group *bg;
2494
2495 bg = rb_entry(leftmost, struct btrfs_block_group, cache_node);
2496 bytenr = bg->start;
2497 }
2498 read_unlock(&fs_info->block_group_cache_lock);
2499
2500 return bytenr;
2501 }
2502
2503 static int pin_down_extent(struct btrfs_trans_handle *trans,
2504 struct btrfs_block_group *cache,
2505 u64 bytenr, u64 num_bytes, int reserved)
2506 {
2507 struct btrfs_fs_info *fs_info = cache->fs_info;
2508
2509 spin_lock(&cache->space_info->lock);
2510 spin_lock(&cache->lock);
2511 cache->pinned += num_bytes;
2512 btrfs_space_info_update_bytes_pinned(fs_info, cache->space_info,
2513 num_bytes);
2514 if (reserved) {
2515 cache->reserved -= num_bytes;
2516 cache->space_info->bytes_reserved -= num_bytes;
2517 }
2518 spin_unlock(&cache->lock);
2519 spin_unlock(&cache->space_info->lock);
2520
2521 set_extent_dirty(&trans->transaction->pinned_extents, bytenr,
2522 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
2523 return 0;
2524 }
2525
2526 int btrfs_pin_extent(struct btrfs_trans_handle *trans,
2527 u64 bytenr, u64 num_bytes, int reserved)
2528 {
2529 struct btrfs_block_group *cache;
2530
2531 cache = btrfs_lookup_block_group(trans->fs_info, bytenr);
2532 BUG_ON(!cache);
2533
2534 pin_down_extent(trans, cache, bytenr, num_bytes, reserved);
2535
2536 btrfs_put_block_group(cache);
2537 return 0;
2538 }
2539
2540
2541
2542
2543 int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
2544 u64 bytenr, u64 num_bytes)
2545 {
2546 struct btrfs_block_group *cache;
2547 int ret;
2548
2549 cache = btrfs_lookup_block_group(trans->fs_info, bytenr);
2550 if (!cache)
2551 return -EINVAL;
2552
2553
2554
2555
2556
2557 ret = btrfs_cache_block_group(cache, true);
2558 if (ret)
2559 goto out;
2560
2561 pin_down_extent(trans, cache, bytenr, num_bytes, 0);
2562
2563
2564 ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
2565 out:
2566 btrfs_put_block_group(cache);
2567 return ret;
2568 }
2569
2570 static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
2571 u64 start, u64 num_bytes)
2572 {
2573 int ret;
2574 struct btrfs_block_group *block_group;
2575
2576 block_group = btrfs_lookup_block_group(fs_info, start);
2577 if (!block_group)
2578 return -EINVAL;
2579
2580 ret = btrfs_cache_block_group(block_group, true);
2581 if (ret)
2582 goto out;
2583
2584 ret = btrfs_remove_free_space(block_group, start, num_bytes);
2585 out:
2586 btrfs_put_block_group(block_group);
2587 return ret;
2588 }
2589
2590 int btrfs_exclude_logged_extents(struct extent_buffer *eb)
2591 {
2592 struct btrfs_fs_info *fs_info = eb->fs_info;
2593 struct btrfs_file_extent_item *item;
2594 struct btrfs_key key;
2595 int found_type;
2596 int i;
2597 int ret = 0;
2598
2599 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS))
2600 return 0;
2601
2602 for (i = 0; i < btrfs_header_nritems(eb); i++) {
2603 btrfs_item_key_to_cpu(eb, &key, i);
2604 if (key.type != BTRFS_EXTENT_DATA_KEY)
2605 continue;
2606 item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
2607 found_type = btrfs_file_extent_type(eb, item);
2608 if (found_type == BTRFS_FILE_EXTENT_INLINE)
2609 continue;
2610 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
2611 continue;
2612 key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
2613 key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
2614 ret = __exclude_logged_extent(fs_info, key.objectid, key.offset);
2615 if (ret)
2616 break;
2617 }
2618
2619 return ret;
2620 }
2621
2622 static void
2623 btrfs_inc_block_group_reservations(struct btrfs_block_group *bg)
2624 {
2625 atomic_inc(&bg->reservations);
2626 }
2627
2628
2629
2630
2631
2632 static struct btrfs_free_cluster *
2633 fetch_cluster_info(struct btrfs_fs_info *fs_info,
2634 struct btrfs_space_info *space_info, u64 *empty_cluster)
2635 {
2636 struct btrfs_free_cluster *ret = NULL;
2637
2638 *empty_cluster = 0;
2639 if (btrfs_mixed_space_info(space_info))
2640 return ret;
2641
2642 if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
2643 ret = &fs_info->meta_alloc_cluster;
2644 if (btrfs_test_opt(fs_info, SSD))
2645 *empty_cluster = SZ_2M;
2646 else
2647 *empty_cluster = SZ_64K;
2648 } else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) &&
2649 btrfs_test_opt(fs_info, SSD_SPREAD)) {
2650 *empty_cluster = SZ_2M;
2651 ret = &fs_info->data_alloc_cluster;
2652 }
2653
2654 return ret;
2655 }
2656
2657 static int unpin_extent_range(struct btrfs_fs_info *fs_info,
2658 u64 start, u64 end,
2659 const bool return_free_space)
2660 {
2661 struct btrfs_block_group *cache = NULL;
2662 struct btrfs_space_info *space_info;
2663 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
2664 struct btrfs_free_cluster *cluster = NULL;
2665 u64 len;
2666 u64 total_unpinned = 0;
2667 u64 empty_cluster = 0;
2668 bool readonly;
2669
2670 while (start <= end) {
2671 readonly = false;
2672 if (!cache ||
2673 start >= cache->start + cache->length) {
2674 if (cache)
2675 btrfs_put_block_group(cache);
2676 total_unpinned = 0;
2677 cache = btrfs_lookup_block_group(fs_info, start);
2678 BUG_ON(!cache);
2679
2680 cluster = fetch_cluster_info(fs_info,
2681 cache->space_info,
2682 &empty_cluster);
2683 empty_cluster <<= 1;
2684 }
2685
2686 len = cache->start + cache->length - start;
2687 len = min(len, end + 1 - start);
2688
2689 down_read(&fs_info->commit_root_sem);
2690 if (start < cache->last_byte_to_unpin && return_free_space) {
2691 u64 add_len = min(len, cache->last_byte_to_unpin - start);
2692
2693 btrfs_add_free_space(cache, start, add_len);
2694 }
2695 up_read(&fs_info->commit_root_sem);
2696
2697 start += len;
2698 total_unpinned += len;
2699 space_info = cache->space_info;
2700
2701
2702
2703
2704
2705
2706
2707 if (cluster && cluster->fragmented &&
2708 total_unpinned > empty_cluster) {
2709 spin_lock(&cluster->lock);
2710 cluster->fragmented = 0;
2711 spin_unlock(&cluster->lock);
2712 }
2713
2714 spin_lock(&space_info->lock);
2715 spin_lock(&cache->lock);
2716 cache->pinned -= len;
2717 btrfs_space_info_update_bytes_pinned(fs_info, space_info, -len);
2718 space_info->max_extent_size = 0;
2719 if (cache->ro) {
2720 space_info->bytes_readonly += len;
2721 readonly = true;
2722 } else if (btrfs_is_zoned(fs_info)) {
2723
2724 space_info->bytes_zone_unusable += len;
2725 readonly = true;
2726 }
2727 spin_unlock(&cache->lock);
2728 if (!readonly && return_free_space &&
2729 global_rsv->space_info == space_info) {
2730 spin_lock(&global_rsv->lock);
2731 if (!global_rsv->full) {
2732 u64 to_add = min(len, global_rsv->size -
2733 global_rsv->reserved);
2734
2735 global_rsv->reserved += to_add;
2736 btrfs_space_info_update_bytes_may_use(fs_info,
2737 space_info, to_add);
2738 if (global_rsv->reserved >= global_rsv->size)
2739 global_rsv->full = 1;
2740 len -= to_add;
2741 }
2742 spin_unlock(&global_rsv->lock);
2743 }
2744
2745 if (!readonly && return_free_space && len)
2746 btrfs_try_granting_tickets(fs_info, space_info);
2747 spin_unlock(&space_info->lock);
2748 }
2749
2750 if (cache)
2751 btrfs_put_block_group(cache);
2752 return 0;
2753 }
2754
2755 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
2756 {
2757 struct btrfs_fs_info *fs_info = trans->fs_info;
2758 struct btrfs_block_group *block_group, *tmp;
2759 struct list_head *deleted_bgs;
2760 struct extent_io_tree *unpin;
2761 u64 start;
2762 u64 end;
2763 int ret;
2764
2765 unpin = &trans->transaction->pinned_extents;
2766
2767 while (!TRANS_ABORTED(trans)) {
2768 struct extent_state *cached_state = NULL;
2769
2770 mutex_lock(&fs_info->unused_bg_unpin_mutex);
2771 ret = find_first_extent_bit(unpin, 0, &start, &end,
2772 EXTENT_DIRTY, &cached_state);
2773 if (ret) {
2774 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
2775 break;
2776 }
2777
2778 if (btrfs_test_opt(fs_info, DISCARD_SYNC))
2779 ret = btrfs_discard_extent(fs_info, start,
2780 end + 1 - start, NULL);
2781
2782 clear_extent_dirty(unpin, start, end, &cached_state);
2783 unpin_extent_range(fs_info, start, end, true);
2784 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
2785 free_extent_state(cached_state);
2786 cond_resched();
2787 }
2788
2789 if (btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
2790 btrfs_discard_calc_delay(&fs_info->discard_ctl);
2791 btrfs_discard_schedule_work(&fs_info->discard_ctl, true);
2792 }
2793
2794
2795
2796
2797
2798
2799 deleted_bgs = &trans->transaction->deleted_bgs;
2800 list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
2801 u64 trimmed = 0;
2802
2803 ret = -EROFS;
2804 if (!TRANS_ABORTED(trans))
2805 ret = btrfs_discard_extent(fs_info,
2806 block_group->start,
2807 block_group->length,
2808 &trimmed);
2809
2810 list_del_init(&block_group->bg_list);
2811 btrfs_unfreeze_block_group(block_group);
2812 btrfs_put_block_group(block_group);
2813
2814 if (ret) {
2815 const char *errstr = btrfs_decode_error(ret);
2816 btrfs_warn(fs_info,
2817 "discard failed while removing blockgroup: errno=%d %s",
2818 ret, errstr);
2819 }
2820 }
2821
2822 return 0;
2823 }
2824
2825 static int do_free_extent_accounting(struct btrfs_trans_handle *trans,
2826 u64 bytenr, u64 num_bytes, bool is_data)
2827 {
2828 int ret;
2829
2830 if (is_data) {
2831 struct btrfs_root *csum_root;
2832
2833 csum_root = btrfs_csum_root(trans->fs_info, bytenr);
2834 ret = btrfs_del_csums(trans, csum_root, bytenr, num_bytes);
2835 if (ret) {
2836 btrfs_abort_transaction(trans, ret);
2837 return ret;
2838 }
2839 }
2840
2841 ret = add_to_free_space_tree(trans, bytenr, num_bytes);
2842 if (ret) {
2843 btrfs_abort_transaction(trans, ret);
2844 return ret;
2845 }
2846
2847 ret = btrfs_update_block_group(trans, bytenr, num_bytes, false);
2848 if (ret)
2849 btrfs_abort_transaction(trans, ret);
2850
2851 return ret;
2852 }
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2914 struct btrfs_delayed_ref_node *node, u64 parent,
2915 u64 root_objectid, u64 owner_objectid,
2916 u64 owner_offset, int refs_to_drop,
2917 struct btrfs_delayed_extent_op *extent_op)
2918 {
2919 struct btrfs_fs_info *info = trans->fs_info;
2920 struct btrfs_key key;
2921 struct btrfs_path *path;
2922 struct btrfs_root *extent_root;
2923 struct extent_buffer *leaf;
2924 struct btrfs_extent_item *ei;
2925 struct btrfs_extent_inline_ref *iref;
2926 int ret;
2927 int is_data;
2928 int extent_slot = 0;
2929 int found_extent = 0;
2930 int num_to_del = 1;
2931 u32 item_size;
2932 u64 refs;
2933 u64 bytenr = node->bytenr;
2934 u64 num_bytes = node->num_bytes;
2935 bool skinny_metadata = btrfs_fs_incompat(info, SKINNY_METADATA);
2936
2937 extent_root = btrfs_extent_root(info, bytenr);
2938 ASSERT(extent_root);
2939
2940 path = btrfs_alloc_path();
2941 if (!path)
2942 return -ENOMEM;
2943
2944 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
2945
2946 if (!is_data && refs_to_drop != 1) {
2947 btrfs_crit(info,
2948 "invalid refs_to_drop, dropping more than 1 refs for tree block %llu refs_to_drop %u",
2949 node->bytenr, refs_to_drop);
2950 ret = -EINVAL;
2951 btrfs_abort_transaction(trans, ret);
2952 goto out;
2953 }
2954
2955 if (is_data)
2956 skinny_metadata = false;
2957
2958 ret = lookup_extent_backref(trans, path, &iref, bytenr, num_bytes,
2959 parent, root_objectid, owner_objectid,
2960 owner_offset);
2961 if (ret == 0) {
2962
2963
2964
2965
2966
2967
2968
2969 extent_slot = path->slots[0];
2970 while (extent_slot >= 0) {
2971 btrfs_item_key_to_cpu(path->nodes[0], &key,
2972 extent_slot);
2973 if (key.objectid != bytenr)
2974 break;
2975 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
2976 key.offset == num_bytes) {
2977 found_extent = 1;
2978 break;
2979 }
2980 if (key.type == BTRFS_METADATA_ITEM_KEY &&
2981 key.offset == owner_objectid) {
2982 found_extent = 1;
2983 break;
2984 }
2985
2986
2987 if (path->slots[0] - extent_slot > 5)
2988 break;
2989 extent_slot--;
2990 }
2991
2992 if (!found_extent) {
2993 if (iref) {
2994 btrfs_crit(info,
2995 "invalid iref, no EXTENT/METADATA_ITEM found but has inline extent ref");
2996 btrfs_abort_transaction(trans, -EUCLEAN);
2997 goto err_dump;
2998 }
2999
3000 ret = remove_extent_backref(trans, extent_root, path,
3001 NULL, refs_to_drop, is_data);
3002 if (ret) {
3003 btrfs_abort_transaction(trans, ret);
3004 goto out;
3005 }
3006 btrfs_release_path(path);
3007
3008
3009 key.objectid = bytenr;
3010 key.type = BTRFS_EXTENT_ITEM_KEY;
3011 key.offset = num_bytes;
3012
3013 if (!is_data && skinny_metadata) {
3014 key.type = BTRFS_METADATA_ITEM_KEY;
3015 key.offset = owner_objectid;
3016 }
3017
3018 ret = btrfs_search_slot(trans, extent_root,
3019 &key, path, -1, 1);
3020 if (ret > 0 && skinny_metadata && path->slots[0]) {
3021
3022
3023
3024
3025 path->slots[0]--;
3026 btrfs_item_key_to_cpu(path->nodes[0], &key,
3027 path->slots[0]);
3028 if (key.objectid == bytenr &&
3029 key.type == BTRFS_EXTENT_ITEM_KEY &&
3030 key.offset == num_bytes)
3031 ret = 0;
3032 }
3033
3034 if (ret > 0 && skinny_metadata) {
3035 skinny_metadata = false;
3036 key.objectid = bytenr;
3037 key.type = BTRFS_EXTENT_ITEM_KEY;
3038 key.offset = num_bytes;
3039 btrfs_release_path(path);
3040 ret = btrfs_search_slot(trans, extent_root,
3041 &key, path, -1, 1);
3042 }
3043
3044 if (ret) {
3045 btrfs_err(info,
3046 "umm, got %d back from search, was looking for %llu",
3047 ret, bytenr);
3048 if (ret > 0)
3049 btrfs_print_leaf(path->nodes[0]);
3050 }
3051 if (ret < 0) {
3052 btrfs_abort_transaction(trans, ret);
3053 goto out;
3054 }
3055 extent_slot = path->slots[0];
3056 }
3057 } else if (WARN_ON(ret == -ENOENT)) {
3058 btrfs_print_leaf(path->nodes[0]);
3059 btrfs_err(info,
3060 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
3061 bytenr, parent, root_objectid, owner_objectid,
3062 owner_offset);
3063 btrfs_abort_transaction(trans, ret);
3064 goto out;
3065 } else {
3066 btrfs_abort_transaction(trans, ret);
3067 goto out;
3068 }
3069
3070 leaf = path->nodes[0];
3071 item_size = btrfs_item_size(leaf, extent_slot);
3072 if (unlikely(item_size < sizeof(*ei))) {
3073 ret = -EINVAL;
3074 btrfs_print_v0_err(info);
3075 btrfs_abort_transaction(trans, ret);
3076 goto out;
3077 }
3078 ei = btrfs_item_ptr(leaf, extent_slot,
3079 struct btrfs_extent_item);
3080 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
3081 key.type == BTRFS_EXTENT_ITEM_KEY) {
3082 struct btrfs_tree_block_info *bi;
3083 if (item_size < sizeof(*ei) + sizeof(*bi)) {
3084 btrfs_crit(info,
3085 "invalid extent item size for key (%llu, %u, %llu) owner %llu, has %u expect >= %zu",
3086 key.objectid, key.type, key.offset,
3087 owner_objectid, item_size,
3088 sizeof(*ei) + sizeof(*bi));
3089 btrfs_abort_transaction(trans, -EUCLEAN);
3090 goto err_dump;
3091 }
3092 bi = (struct btrfs_tree_block_info *)(ei + 1);
3093 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
3094 }
3095
3096 refs = btrfs_extent_refs(leaf, ei);
3097 if (refs < refs_to_drop) {
3098 btrfs_crit(info,
3099 "trying to drop %d refs but we only have %llu for bytenr %llu",
3100 refs_to_drop, refs, bytenr);
3101 btrfs_abort_transaction(trans, -EUCLEAN);
3102 goto err_dump;
3103 }
3104 refs -= refs_to_drop;
3105
3106 if (refs > 0) {
3107 if (extent_op)
3108 __run_delayed_extent_op(extent_op, leaf, ei);
3109
3110
3111
3112
3113 if (iref) {
3114 if (!found_extent) {
3115 btrfs_crit(info,
3116 "invalid iref, got inlined extent ref but no EXTENT/METADATA_ITEM found");
3117 btrfs_abort_transaction(trans, -EUCLEAN);
3118 goto err_dump;
3119 }
3120 } else {
3121 btrfs_set_extent_refs(leaf, ei, refs);
3122 btrfs_mark_buffer_dirty(leaf);
3123 }
3124 if (found_extent) {
3125 ret = remove_extent_backref(trans, extent_root, path,
3126 iref, refs_to_drop, is_data);
3127 if (ret) {
3128 btrfs_abort_transaction(trans, ret);
3129 goto out;
3130 }
3131 }
3132 } else {
3133
3134 if (found_extent) {
3135 if (is_data && refs_to_drop !=
3136 extent_data_ref_count(path, iref)) {
3137 btrfs_crit(info,
3138 "invalid refs_to_drop, current refs %u refs_to_drop %u",
3139 extent_data_ref_count(path, iref),
3140 refs_to_drop);
3141 btrfs_abort_transaction(trans, -EUCLEAN);
3142 goto err_dump;
3143 }
3144 if (iref) {
3145 if (path->slots[0] != extent_slot) {
3146 btrfs_crit(info,
3147 "invalid iref, extent item key (%llu %u %llu) doesn't have wanted iref",
3148 key.objectid, key.type,
3149 key.offset);
3150 btrfs_abort_transaction(trans, -EUCLEAN);
3151 goto err_dump;
3152 }
3153 } else {
3154
3155
3156
3157
3158
3159
3160 if (path->slots[0] != extent_slot + 1) {
3161 btrfs_crit(info,
3162 "invalid SHARED_* item, previous item is not EXTENT/METADATA_ITEM");
3163 btrfs_abort_transaction(trans, -EUCLEAN);
3164 goto err_dump;
3165 }
3166 path->slots[0] = extent_slot;
3167 num_to_del = 2;
3168 }
3169 }
3170
3171 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
3172 num_to_del);
3173 if (ret) {
3174 btrfs_abort_transaction(trans, ret);
3175 goto out;
3176 }
3177 btrfs_release_path(path);
3178
3179 ret = do_free_extent_accounting(trans, bytenr, num_bytes, is_data);
3180 }
3181 btrfs_release_path(path);
3182
3183 out:
3184 btrfs_free_path(path);
3185 return ret;
3186 err_dump:
3187
3188
3189
3190
3191 if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) {
3192 btrfs_crit(info, "path->slots[0]=%d extent_slot=%d",
3193 path->slots[0], extent_slot);
3194 btrfs_print_leaf(path->nodes[0]);
3195 }
3196
3197 btrfs_free_path(path);
3198 return -EUCLEAN;
3199 }
3200
3201
3202
3203
3204
3205
3206
3207 static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
3208 u64 bytenr)
3209 {
3210 struct btrfs_delayed_ref_head *head;
3211 struct btrfs_delayed_ref_root *delayed_refs;
3212 int ret = 0;
3213
3214 delayed_refs = &trans->transaction->delayed_refs;
3215 spin_lock(&delayed_refs->lock);
3216 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
3217 if (!head)
3218 goto out_delayed_unlock;
3219
3220 spin_lock(&head->lock);
3221 if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root))
3222 goto out;
3223
3224 if (cleanup_extent_op(head) != NULL)
3225 goto out;
3226
3227
3228
3229
3230
3231 if (!mutex_trylock(&head->mutex))
3232 goto out;
3233
3234 btrfs_delete_ref_head(delayed_refs, head);
3235 head->processing = 0;
3236
3237 spin_unlock(&head->lock);
3238 spin_unlock(&delayed_refs->lock);
3239
3240 BUG_ON(head->extent_op);
3241 if (head->must_insert_reserved)
3242 ret = 1;
3243
3244 btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head);
3245 mutex_unlock(&head->mutex);
3246 btrfs_put_delayed_ref_head(head);
3247 return ret;
3248 out:
3249 spin_unlock(&head->lock);
3250
3251 out_delayed_unlock:
3252 spin_unlock(&delayed_refs->lock);
3253 return 0;
3254 }
3255
3256 void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
3257 u64 root_id,
3258 struct extent_buffer *buf,
3259 u64 parent, int last_ref)
3260 {
3261 struct btrfs_fs_info *fs_info = trans->fs_info;
3262 struct btrfs_ref generic_ref = { 0 };
3263 int ret;
3264
3265 btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
3266 buf->start, buf->len, parent);
3267 btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
3268 root_id, 0, false);
3269
3270 if (root_id != BTRFS_TREE_LOG_OBJECTID) {
3271 btrfs_ref_tree_mod(fs_info, &generic_ref);
3272 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL);
3273 BUG_ON(ret);
3274 }
3275
3276 if (last_ref && btrfs_header_generation(buf) == trans->transid) {
3277 struct btrfs_block_group *cache;
3278 bool must_pin = false;
3279
3280 if (root_id != BTRFS_TREE_LOG_OBJECTID) {
3281 ret = check_ref_cleanup(trans, buf->start);
3282 if (!ret) {
3283 btrfs_redirty_list_add(trans->transaction, buf);
3284 goto out;
3285 }
3286 }
3287
3288 cache = btrfs_lookup_block_group(fs_info, buf->start);
3289
3290 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
3291 pin_down_extent(trans, cache, buf->start, buf->len, 1);
3292 btrfs_put_block_group(cache);
3293 goto out;
3294 }
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310 if (btrfs_header_level(buf) == 0 &&
3311 test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags))
3312 must_pin = true;
3313
3314 if (must_pin || btrfs_is_zoned(fs_info)) {
3315 btrfs_redirty_list_add(trans->transaction, buf);
3316 pin_down_extent(trans, cache, buf->start, buf->len, 1);
3317 btrfs_put_block_group(cache);
3318 goto out;
3319 }
3320
3321 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
3322
3323 btrfs_add_free_space(cache, buf->start, buf->len);
3324 btrfs_free_reserved_bytes(cache, buf->len, 0);
3325 btrfs_put_block_group(cache);
3326 trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len);
3327 }
3328 out:
3329 if (last_ref) {
3330
3331
3332
3333
3334 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
3335 }
3336 }
3337
3338
3339 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
3340 {
3341 struct btrfs_fs_info *fs_info = trans->fs_info;
3342 int ret;
3343
3344 if (btrfs_is_testing(fs_info))
3345 return 0;
3346
3347
3348
3349
3350
3351 if ((ref->type == BTRFS_REF_METADATA &&
3352 ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID) ||
3353 (ref->type == BTRFS_REF_DATA &&
3354 ref->data_ref.owning_root == BTRFS_TREE_LOG_OBJECTID)) {
3355
3356 btrfs_pin_extent(trans, ref->bytenr, ref->len, 1);
3357 ret = 0;
3358 } else if (ref->type == BTRFS_REF_METADATA) {
3359 ret = btrfs_add_delayed_tree_ref(trans, ref, NULL);
3360 } else {
3361 ret = btrfs_add_delayed_data_ref(trans, ref, 0);
3362 }
3363
3364 if (!((ref->type == BTRFS_REF_METADATA &&
3365 ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID) ||
3366 (ref->type == BTRFS_REF_DATA &&
3367 ref->data_ref.owning_root == BTRFS_TREE_LOG_OBJECTID)))
3368 btrfs_ref_tree_mod(fs_info, ref);
3369
3370 return ret;
3371 }
3372
3373 enum btrfs_loop_type {
3374 LOOP_CACHING_NOWAIT,
3375 LOOP_CACHING_WAIT,
3376 LOOP_ALLOC_CHUNK,
3377 LOOP_NO_EMPTY_SIZE,
3378 };
3379
3380 static inline void
3381 btrfs_lock_block_group(struct btrfs_block_group *cache,
3382 int delalloc)
3383 {
3384 if (delalloc)
3385 down_read(&cache->data_rwsem);
3386 }
3387
3388 static inline void btrfs_grab_block_group(struct btrfs_block_group *cache,
3389 int delalloc)
3390 {
3391 btrfs_get_block_group(cache);
3392 if (delalloc)
3393 down_read(&cache->data_rwsem);
3394 }
3395
3396 static struct btrfs_block_group *btrfs_lock_cluster(
3397 struct btrfs_block_group *block_group,
3398 struct btrfs_free_cluster *cluster,
3399 int delalloc)
3400 __acquires(&cluster->refill_lock)
3401 {
3402 struct btrfs_block_group *used_bg = NULL;
3403
3404 spin_lock(&cluster->refill_lock);
3405 while (1) {
3406 used_bg = cluster->block_group;
3407 if (!used_bg)
3408 return NULL;
3409
3410 if (used_bg == block_group)
3411 return used_bg;
3412
3413 btrfs_get_block_group(used_bg);
3414
3415 if (!delalloc)
3416 return used_bg;
3417
3418 if (down_read_trylock(&used_bg->data_rwsem))
3419 return used_bg;
3420
3421 spin_unlock(&cluster->refill_lock);
3422
3423
3424 down_read_nested(&used_bg->data_rwsem, SINGLE_DEPTH_NESTING);
3425
3426 spin_lock(&cluster->refill_lock);
3427 if (used_bg == cluster->block_group)
3428 return used_bg;
3429
3430 up_read(&used_bg->data_rwsem);
3431 btrfs_put_block_group(used_bg);
3432 }
3433 }
3434
3435 static inline void
3436 btrfs_release_block_group(struct btrfs_block_group *cache,
3437 int delalloc)
3438 {
3439 if (delalloc)
3440 up_read(&cache->data_rwsem);
3441 btrfs_put_block_group(cache);
3442 }
3443
3444 enum btrfs_extent_allocation_policy {
3445 BTRFS_EXTENT_ALLOC_CLUSTERED,
3446 BTRFS_EXTENT_ALLOC_ZONED,
3447 };
3448
3449
3450
3451
3452
3453 struct find_free_extent_ctl {
3454
3455 u64 ram_bytes;
3456 u64 num_bytes;
3457 u64 min_alloc_size;
3458 u64 empty_size;
3459 u64 flags;
3460 int delalloc;
3461
3462
3463 u64 search_start;
3464
3465
3466 u64 empty_cluster;
3467 struct btrfs_free_cluster *last_ptr;
3468 bool use_cluster;
3469
3470 bool have_caching_bg;
3471 bool orig_have_caching_bg;
3472
3473
3474 bool for_treelog;
3475
3476
3477 bool for_data_reloc;
3478
3479
3480 int index;
3481
3482
3483
3484
3485 int loop;
3486
3487
3488
3489
3490
3491 bool retry_clustered;
3492
3493
3494
3495
3496
3497 bool retry_unclustered;
3498
3499
3500 int cached;
3501
3502
3503 u64 max_extent_size;
3504
3505
3506 u64 total_free_space;
3507
3508
3509 u64 found_offset;
3510
3511
3512 u64 hint_byte;
3513
3514
3515 enum btrfs_extent_allocation_policy policy;
3516 };
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527 static int find_free_extent_clustered(struct btrfs_block_group *bg,
3528 struct find_free_extent_ctl *ffe_ctl,
3529 struct btrfs_block_group **cluster_bg_ret)
3530 {
3531 struct btrfs_block_group *cluster_bg;
3532 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
3533 u64 aligned_cluster;
3534 u64 offset;
3535 int ret;
3536
3537 cluster_bg = btrfs_lock_cluster(bg, last_ptr, ffe_ctl->delalloc);
3538 if (!cluster_bg)
3539 goto refill_cluster;
3540 if (cluster_bg != bg && (cluster_bg->ro ||
3541 !block_group_bits(cluster_bg, ffe_ctl->flags)))
3542 goto release_cluster;
3543
3544 offset = btrfs_alloc_from_cluster(cluster_bg, last_ptr,
3545 ffe_ctl->num_bytes, cluster_bg->start,
3546 &ffe_ctl->max_extent_size);
3547 if (offset) {
3548
3549 spin_unlock(&last_ptr->refill_lock);
3550 trace_btrfs_reserve_extent_cluster(cluster_bg,
3551 ffe_ctl->search_start, ffe_ctl->num_bytes);
3552 *cluster_bg_ret = cluster_bg;
3553 ffe_ctl->found_offset = offset;
3554 return 0;
3555 }
3556 WARN_ON(last_ptr->block_group != cluster_bg);
3557
3558 release_cluster:
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570 if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE && cluster_bg != bg) {
3571 spin_unlock(&last_ptr->refill_lock);
3572 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
3573 return -ENOENT;
3574 }
3575
3576
3577 btrfs_return_cluster_to_free_space(NULL, last_ptr);
3578
3579 if (cluster_bg != bg)
3580 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
3581
3582 refill_cluster:
3583 if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE) {
3584 spin_unlock(&last_ptr->refill_lock);
3585 return -ENOENT;
3586 }
3587
3588 aligned_cluster = max_t(u64,
3589 ffe_ctl->empty_cluster + ffe_ctl->empty_size,
3590 bg->full_stripe_len);
3591 ret = btrfs_find_space_cluster(bg, last_ptr, ffe_ctl->search_start,
3592 ffe_ctl->num_bytes, aligned_cluster);
3593 if (ret == 0) {
3594
3595 offset = btrfs_alloc_from_cluster(bg, last_ptr,
3596 ffe_ctl->num_bytes, ffe_ctl->search_start,
3597 &ffe_ctl->max_extent_size);
3598 if (offset) {
3599
3600 spin_unlock(&last_ptr->refill_lock);
3601 trace_btrfs_reserve_extent_cluster(bg,
3602 ffe_ctl->search_start,
3603 ffe_ctl->num_bytes);
3604 ffe_ctl->found_offset = offset;
3605 return 0;
3606 }
3607 } else if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
3608 !ffe_ctl->retry_clustered) {
3609 spin_unlock(&last_ptr->refill_lock);
3610
3611 ffe_ctl->retry_clustered = true;
3612 btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
3613 ffe_ctl->empty_cluster + ffe_ctl->empty_size);
3614 return -EAGAIN;
3615 }
3616
3617
3618
3619
3620
3621 btrfs_return_cluster_to_free_space(NULL, last_ptr);
3622 spin_unlock(&last_ptr->refill_lock);
3623 return 1;
3624 }
3625
3626
3627
3628
3629
3630
3631 static int find_free_extent_unclustered(struct btrfs_block_group *bg,
3632 struct find_free_extent_ctl *ffe_ctl)
3633 {
3634 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
3635 u64 offset;
3636
3637
3638
3639
3640
3641
3642 if (unlikely(last_ptr)) {
3643 spin_lock(&last_ptr->lock);
3644 last_ptr->fragmented = 1;
3645 spin_unlock(&last_ptr->lock);
3646 }
3647 if (ffe_ctl->cached) {
3648 struct btrfs_free_space_ctl *free_space_ctl;
3649
3650 free_space_ctl = bg->free_space_ctl;
3651 spin_lock(&free_space_ctl->tree_lock);
3652 if (free_space_ctl->free_space <
3653 ffe_ctl->num_bytes + ffe_ctl->empty_cluster +
3654 ffe_ctl->empty_size) {
3655 ffe_ctl->total_free_space = max_t(u64,
3656 ffe_ctl->total_free_space,
3657 free_space_ctl->free_space);
3658 spin_unlock(&free_space_ctl->tree_lock);
3659 return 1;
3660 }
3661 spin_unlock(&free_space_ctl->tree_lock);
3662 }
3663
3664 offset = btrfs_find_space_for_alloc(bg, ffe_ctl->search_start,
3665 ffe_ctl->num_bytes, ffe_ctl->empty_size,
3666 &ffe_ctl->max_extent_size);
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677 if (!offset && !ffe_ctl->retry_unclustered && !ffe_ctl->cached &&
3678 ffe_ctl->loop > LOOP_CACHING_NOWAIT) {
3679 btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
3680 ffe_ctl->empty_size);
3681 ffe_ctl->retry_unclustered = true;
3682 return -EAGAIN;
3683 } else if (!offset) {
3684 return 1;
3685 }
3686 ffe_ctl->found_offset = offset;
3687 return 0;
3688 }
3689
3690 static int do_allocation_clustered(struct btrfs_block_group *block_group,
3691 struct find_free_extent_ctl *ffe_ctl,
3692 struct btrfs_block_group **bg_ret)
3693 {
3694 int ret;
3695
3696
3697 if (ffe_ctl->last_ptr && ffe_ctl->use_cluster) {
3698 ret = find_free_extent_clustered(block_group, ffe_ctl, bg_ret);
3699 if (ret >= 0 || ret == -EAGAIN)
3700 return ret;
3701
3702 }
3703
3704 return find_free_extent_unclustered(block_group, ffe_ctl);
3705 }
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728 static int do_allocation_zoned(struct btrfs_block_group *block_group,
3729 struct find_free_extent_ctl *ffe_ctl,
3730 struct btrfs_block_group **bg_ret)
3731 {
3732 struct btrfs_fs_info *fs_info = block_group->fs_info;
3733 struct btrfs_space_info *space_info = block_group->space_info;
3734 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3735 u64 start = block_group->start;
3736 u64 num_bytes = ffe_ctl->num_bytes;
3737 u64 avail;
3738 u64 bytenr = block_group->start;
3739 u64 log_bytenr;
3740 u64 data_reloc_bytenr;
3741 int ret = 0;
3742 bool skip = false;
3743
3744 ASSERT(btrfs_is_zoned(block_group->fs_info));
3745
3746
3747
3748
3749
3750 spin_lock(&fs_info->treelog_bg_lock);
3751 log_bytenr = fs_info->treelog_bg;
3752 if (log_bytenr && ((ffe_ctl->for_treelog && bytenr != log_bytenr) ||
3753 (!ffe_ctl->for_treelog && bytenr == log_bytenr)))
3754 skip = true;
3755 spin_unlock(&fs_info->treelog_bg_lock);
3756 if (skip)
3757 return 1;
3758
3759
3760
3761
3762
3763 spin_lock(&fs_info->relocation_bg_lock);
3764 data_reloc_bytenr = fs_info->data_reloc_bg;
3765 if (data_reloc_bytenr &&
3766 ((ffe_ctl->for_data_reloc && bytenr != data_reloc_bytenr) ||
3767 (!ffe_ctl->for_data_reloc && bytenr == data_reloc_bytenr)))
3768 skip = true;
3769 spin_unlock(&fs_info->relocation_bg_lock);
3770 if (skip)
3771 return 1;
3772
3773
3774 spin_lock(&block_group->lock);
3775 if (block_group->ro || btrfs_zoned_bg_is_full(block_group)) {
3776 ret = 1;
3777
3778
3779
3780
3781 }
3782 spin_unlock(&block_group->lock);
3783
3784 if (!ret && !btrfs_zone_activate(block_group)) {
3785 ret = 1;
3786
3787
3788
3789
3790 }
3791
3792 spin_lock(&space_info->lock);
3793 spin_lock(&block_group->lock);
3794 spin_lock(&fs_info->treelog_bg_lock);
3795 spin_lock(&fs_info->relocation_bg_lock);
3796
3797 if (ret)
3798 goto out;
3799
3800 ASSERT(!ffe_ctl->for_treelog ||
3801 block_group->start == fs_info->treelog_bg ||
3802 fs_info->treelog_bg == 0);
3803 ASSERT(!ffe_ctl->for_data_reloc ||
3804 block_group->start == fs_info->data_reloc_bg ||
3805 fs_info->data_reloc_bg == 0);
3806
3807 if (block_group->ro || block_group->zoned_data_reloc_ongoing) {
3808 ret = 1;
3809 goto out;
3810 }
3811
3812
3813
3814
3815
3816 if (ffe_ctl->for_treelog && !fs_info->treelog_bg &&
3817 (block_group->used || block_group->reserved)) {
3818 ret = 1;
3819 goto out;
3820 }
3821
3822
3823
3824
3825
3826 if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg &&
3827 (block_group->used || block_group->reserved)) {
3828 ret = 1;
3829 goto out;
3830 }
3831
3832 WARN_ON_ONCE(block_group->alloc_offset > block_group->zone_capacity);
3833 avail = block_group->zone_capacity - block_group->alloc_offset;
3834 if (avail < num_bytes) {
3835 if (ffe_ctl->max_extent_size < avail) {
3836
3837
3838
3839
3840 ffe_ctl->max_extent_size = avail;
3841 ffe_ctl->total_free_space = avail;
3842 }
3843 ret = 1;
3844 goto out;
3845 }
3846
3847 if (ffe_ctl->for_treelog && !fs_info->treelog_bg)
3848 fs_info->treelog_bg = block_group->start;
3849
3850 if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg)
3851 fs_info->data_reloc_bg = block_group->start;
3852
3853 ffe_ctl->found_offset = start + block_group->alloc_offset;
3854 block_group->alloc_offset += num_bytes;
3855 spin_lock(&ctl->tree_lock);
3856 ctl->free_space -= num_bytes;
3857 spin_unlock(&ctl->tree_lock);
3858
3859
3860
3861
3862
3863
3864 ffe_ctl->search_start = ffe_ctl->found_offset;
3865
3866 out:
3867 if (ret && ffe_ctl->for_treelog)
3868 fs_info->treelog_bg = 0;
3869 if (ret && ffe_ctl->for_data_reloc &&
3870 fs_info->data_reloc_bg == block_group->start) {
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884 block_group->zoned_data_reloc_ongoing = 1;
3885 fs_info->data_reloc_bg = 0;
3886 }
3887 spin_unlock(&fs_info->relocation_bg_lock);
3888 spin_unlock(&fs_info->treelog_bg_lock);
3889 spin_unlock(&block_group->lock);
3890 spin_unlock(&space_info->lock);
3891 return ret;
3892 }
3893
3894 static int do_allocation(struct btrfs_block_group *block_group,
3895 struct find_free_extent_ctl *ffe_ctl,
3896 struct btrfs_block_group **bg_ret)
3897 {
3898 switch (ffe_ctl->policy) {
3899 case BTRFS_EXTENT_ALLOC_CLUSTERED:
3900 return do_allocation_clustered(block_group, ffe_ctl, bg_ret);
3901 case BTRFS_EXTENT_ALLOC_ZONED:
3902 return do_allocation_zoned(block_group, ffe_ctl, bg_ret);
3903 default:
3904 BUG();
3905 }
3906 }
3907
3908 static void release_block_group(struct btrfs_block_group *block_group,
3909 struct find_free_extent_ctl *ffe_ctl,
3910 int delalloc)
3911 {
3912 switch (ffe_ctl->policy) {
3913 case BTRFS_EXTENT_ALLOC_CLUSTERED:
3914 ffe_ctl->retry_clustered = false;
3915 ffe_ctl->retry_unclustered = false;
3916 break;
3917 case BTRFS_EXTENT_ALLOC_ZONED:
3918
3919 break;
3920 default:
3921 BUG();
3922 }
3923
3924 BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) !=
3925 ffe_ctl->index);
3926 btrfs_release_block_group(block_group, delalloc);
3927 }
3928
3929 static void found_extent_clustered(struct find_free_extent_ctl *ffe_ctl,
3930 struct btrfs_key *ins)
3931 {
3932 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
3933
3934 if (!ffe_ctl->use_cluster && last_ptr) {
3935 spin_lock(&last_ptr->lock);
3936 last_ptr->window_start = ins->objectid;
3937 spin_unlock(&last_ptr->lock);
3938 }
3939 }
3940
3941 static void found_extent(struct find_free_extent_ctl *ffe_ctl,
3942 struct btrfs_key *ins)
3943 {
3944 switch (ffe_ctl->policy) {
3945 case BTRFS_EXTENT_ALLOC_CLUSTERED:
3946 found_extent_clustered(ffe_ctl, ins);
3947 break;
3948 case BTRFS_EXTENT_ALLOC_ZONED:
3949
3950 break;
3951 default:
3952 BUG();
3953 }
3954 }
3955
3956 static int can_allocate_chunk_zoned(struct btrfs_fs_info *fs_info,
3957 struct find_free_extent_ctl *ffe_ctl)
3958 {
3959
3960 if (btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags))
3961 return 0;
3962
3963
3964
3965
3966
3967
3968
3969
3970 if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) {
3971 int ret = btrfs_zone_finish_one_bg(fs_info);
3972
3973 if (ret == 1)
3974 return 0;
3975 else if (ret < 0)
3976 return ret;
3977 }
3978
3979
3980
3981
3982
3983
3984 if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size)
3985 return -ENOSPC;
3986
3987
3988
3989
3990
3991
3992
3993
3994 if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA)
3995 return -EAGAIN;
3996
3997
3998
3999
4000
4001
4002 return 0;
4003 }
4004
4005 static int can_allocate_chunk(struct btrfs_fs_info *fs_info,
4006 struct find_free_extent_ctl *ffe_ctl)
4007 {
4008 switch (ffe_ctl->policy) {
4009 case BTRFS_EXTENT_ALLOC_CLUSTERED:
4010 return 0;
4011 case BTRFS_EXTENT_ALLOC_ZONED:
4012 return can_allocate_chunk_zoned(fs_info, ffe_ctl);
4013 default:
4014 BUG();
4015 }
4016 }
4017
4018 static int chunk_allocation_failed(struct find_free_extent_ctl *ffe_ctl)
4019 {
4020 switch (ffe_ctl->policy) {
4021 case BTRFS_EXTENT_ALLOC_CLUSTERED:
4022
4023
4024
4025
4026 ffe_ctl->loop = LOOP_NO_EMPTY_SIZE;
4027 return 0;
4028 case BTRFS_EXTENT_ALLOC_ZONED:
4029
4030 return -ENOSPC;
4031 default:
4032 BUG();
4033 }
4034 }
4035
4036
4037
4038
4039
4040
4041 static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
4042 struct btrfs_key *ins,
4043 struct find_free_extent_ctl *ffe_ctl,
4044 bool full_search)
4045 {
4046 struct btrfs_root *root = fs_info->chunk_root;
4047 int ret;
4048
4049 if ((ffe_ctl->loop == LOOP_CACHING_NOWAIT) &&
4050 ffe_ctl->have_caching_bg && !ffe_ctl->orig_have_caching_bg)
4051 ffe_ctl->orig_have_caching_bg = true;
4052
4053 if (ins->objectid) {
4054 found_extent(ffe_ctl, ins);
4055 return 0;
4056 }
4057
4058 if (ffe_ctl->loop >= LOOP_CACHING_WAIT && ffe_ctl->have_caching_bg)
4059 return 1;
4060
4061 ffe_ctl->index++;
4062 if (ffe_ctl->index < BTRFS_NR_RAID_TYPES)
4063 return 1;
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073 if (ffe_ctl->loop < LOOP_NO_EMPTY_SIZE) {
4074 ffe_ctl->index = 0;
4075 if (ffe_ctl->loop == LOOP_CACHING_NOWAIT) {
4076
4077
4078
4079
4080
4081 if (ffe_ctl->orig_have_caching_bg || !full_search)
4082 ffe_ctl->loop = LOOP_CACHING_WAIT;
4083 else
4084 ffe_ctl->loop = LOOP_ALLOC_CHUNK;
4085 } else {
4086 ffe_ctl->loop++;
4087 }
4088
4089 if (ffe_ctl->loop == LOOP_ALLOC_CHUNK) {
4090 struct btrfs_trans_handle *trans;
4091 int exist = 0;
4092
4093
4094 ret = can_allocate_chunk(fs_info, ffe_ctl);
4095 if (ret)
4096 return ret;
4097
4098 trans = current->journal_info;
4099 if (trans)
4100 exist = 1;
4101 else
4102 trans = btrfs_join_transaction(root);
4103
4104 if (IS_ERR(trans)) {
4105 ret = PTR_ERR(trans);
4106 return ret;
4107 }
4108
4109 ret = btrfs_chunk_alloc(trans, ffe_ctl->flags,
4110 CHUNK_ALLOC_FORCE_FOR_EXTENT);
4111
4112
4113 if (ret == -ENOSPC)
4114 ret = chunk_allocation_failed(ffe_ctl);
4115 else if (ret < 0)
4116 btrfs_abort_transaction(trans, ret);
4117 else
4118 ret = 0;
4119 if (!exist)
4120 btrfs_end_transaction(trans);
4121 if (ret)
4122 return ret;
4123 }
4124
4125 if (ffe_ctl->loop == LOOP_NO_EMPTY_SIZE) {
4126 if (ffe_ctl->policy != BTRFS_EXTENT_ALLOC_CLUSTERED)
4127 return -ENOSPC;
4128
4129
4130
4131
4132
4133 if (ffe_ctl->empty_size == 0 &&
4134 ffe_ctl->empty_cluster == 0)
4135 return -ENOSPC;
4136 ffe_ctl->empty_size = 0;
4137 ffe_ctl->empty_cluster = 0;
4138 }
4139 return 1;
4140 }
4141 return -ENOSPC;
4142 }
4143
4144 static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info,
4145 struct find_free_extent_ctl *ffe_ctl,
4146 struct btrfs_space_info *space_info,
4147 struct btrfs_key *ins)
4148 {
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159 if (space_info->max_extent_size) {
4160 spin_lock(&space_info->lock);
4161 if (space_info->max_extent_size &&
4162 ffe_ctl->num_bytes > space_info->max_extent_size) {
4163 ins->offset = space_info->max_extent_size;
4164 spin_unlock(&space_info->lock);
4165 return -ENOSPC;
4166 } else if (space_info->max_extent_size) {
4167 ffe_ctl->use_cluster = false;
4168 }
4169 spin_unlock(&space_info->lock);
4170 }
4171
4172 ffe_ctl->last_ptr = fetch_cluster_info(fs_info, space_info,
4173 &ffe_ctl->empty_cluster);
4174 if (ffe_ctl->last_ptr) {
4175 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
4176
4177 spin_lock(&last_ptr->lock);
4178 if (last_ptr->block_group)
4179 ffe_ctl->hint_byte = last_ptr->window_start;
4180 if (last_ptr->fragmented) {
4181
4182
4183
4184
4185
4186 ffe_ctl->hint_byte = last_ptr->window_start;
4187 ffe_ctl->use_cluster = false;
4188 }
4189 spin_unlock(&last_ptr->lock);
4190 }
4191
4192 return 0;
4193 }
4194
4195 static int prepare_allocation(struct btrfs_fs_info *fs_info,
4196 struct find_free_extent_ctl *ffe_ctl,
4197 struct btrfs_space_info *space_info,
4198 struct btrfs_key *ins)
4199 {
4200 switch (ffe_ctl->policy) {
4201 case BTRFS_EXTENT_ALLOC_CLUSTERED:
4202 return prepare_allocation_clustered(fs_info, ffe_ctl,
4203 space_info, ins);
4204 case BTRFS_EXTENT_ALLOC_ZONED:
4205 if (ffe_ctl->for_treelog) {
4206 spin_lock(&fs_info->treelog_bg_lock);
4207 if (fs_info->treelog_bg)
4208 ffe_ctl->hint_byte = fs_info->treelog_bg;
4209 spin_unlock(&fs_info->treelog_bg_lock);
4210 }
4211 if (ffe_ctl->for_data_reloc) {
4212 spin_lock(&fs_info->relocation_bg_lock);
4213 if (fs_info->data_reloc_bg)
4214 ffe_ctl->hint_byte = fs_info->data_reloc_bg;
4215 spin_unlock(&fs_info->relocation_bg_lock);
4216 }
4217 return 0;
4218 default:
4219 BUG();
4220 }
4221 }
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248 static noinline int find_free_extent(struct btrfs_root *root,
4249 struct btrfs_key *ins,
4250 struct find_free_extent_ctl *ffe_ctl)
4251 {
4252 struct btrfs_fs_info *fs_info = root->fs_info;
4253 int ret = 0;
4254 int cache_block_group_error = 0;
4255 struct btrfs_block_group *block_group = NULL;
4256 struct btrfs_space_info *space_info;
4257 bool full_search = false;
4258
4259 WARN_ON(ffe_ctl->num_bytes < fs_info->sectorsize);
4260
4261 ffe_ctl->search_start = 0;
4262
4263 ffe_ctl->empty_cluster = 0;
4264 ffe_ctl->last_ptr = NULL;
4265 ffe_ctl->use_cluster = true;
4266 ffe_ctl->have_caching_bg = false;
4267 ffe_ctl->orig_have_caching_bg = false;
4268 ffe_ctl->index = btrfs_bg_flags_to_raid_index(ffe_ctl->flags);
4269 ffe_ctl->loop = 0;
4270
4271 ffe_ctl->retry_clustered = false;
4272 ffe_ctl->retry_unclustered = false;
4273 ffe_ctl->cached = 0;
4274 ffe_ctl->max_extent_size = 0;
4275 ffe_ctl->total_free_space = 0;
4276 ffe_ctl->found_offset = 0;
4277 ffe_ctl->policy = BTRFS_EXTENT_ALLOC_CLUSTERED;
4278
4279 if (btrfs_is_zoned(fs_info))
4280 ffe_ctl->policy = BTRFS_EXTENT_ALLOC_ZONED;
4281
4282 ins->type = BTRFS_EXTENT_ITEM_KEY;
4283 ins->objectid = 0;
4284 ins->offset = 0;
4285
4286 trace_find_free_extent(root, ffe_ctl->num_bytes, ffe_ctl->empty_size,
4287 ffe_ctl->flags);
4288
4289 space_info = btrfs_find_space_info(fs_info, ffe_ctl->flags);
4290 if (!space_info) {
4291 btrfs_err(fs_info, "No space info for %llu", ffe_ctl->flags);
4292 return -ENOSPC;
4293 }
4294
4295 ret = prepare_allocation(fs_info, ffe_ctl, space_info, ins);
4296 if (ret < 0)
4297 return ret;
4298
4299 ffe_ctl->search_start = max(ffe_ctl->search_start,
4300 first_logical_byte(fs_info));
4301 ffe_ctl->search_start = max(ffe_ctl->search_start, ffe_ctl->hint_byte);
4302 if (ffe_ctl->search_start == ffe_ctl->hint_byte) {
4303 block_group = btrfs_lookup_block_group(fs_info,
4304 ffe_ctl->search_start);
4305
4306
4307
4308
4309
4310
4311
4312 if (block_group && block_group_bits(block_group, ffe_ctl->flags) &&
4313 block_group->cached != BTRFS_CACHE_NO) {
4314 down_read(&space_info->groups_sem);
4315 if (list_empty(&block_group->list) ||
4316 block_group->ro) {
4317
4318
4319
4320
4321
4322
4323 btrfs_put_block_group(block_group);
4324 up_read(&space_info->groups_sem);
4325 } else {
4326 ffe_ctl->index = btrfs_bg_flags_to_raid_index(
4327 block_group->flags);
4328 btrfs_lock_block_group(block_group,
4329 ffe_ctl->delalloc);
4330 goto have_block_group;
4331 }
4332 } else if (block_group) {
4333 btrfs_put_block_group(block_group);
4334 }
4335 }
4336 search:
4337 ffe_ctl->have_caching_bg = false;
4338 if (ffe_ctl->index == btrfs_bg_flags_to_raid_index(ffe_ctl->flags) ||
4339 ffe_ctl->index == 0)
4340 full_search = true;
4341 down_read(&space_info->groups_sem);
4342 list_for_each_entry(block_group,
4343 &space_info->block_groups[ffe_ctl->index], list) {
4344 struct btrfs_block_group *bg_ret;
4345
4346
4347 if (unlikely(block_group->ro)) {
4348 if (ffe_ctl->for_treelog)
4349 btrfs_clear_treelog_bg(block_group);
4350 if (ffe_ctl->for_data_reloc)
4351 btrfs_clear_data_reloc_bg(block_group);
4352 continue;
4353 }
4354
4355 btrfs_grab_block_group(block_group, ffe_ctl->delalloc);
4356 ffe_ctl->search_start = block_group->start;
4357
4358
4359
4360
4361
4362
4363 if (!block_group_bits(block_group, ffe_ctl->flags)) {
4364 u64 extra = BTRFS_BLOCK_GROUP_DUP |
4365 BTRFS_BLOCK_GROUP_RAID1_MASK |
4366 BTRFS_BLOCK_GROUP_RAID56_MASK |
4367 BTRFS_BLOCK_GROUP_RAID10;
4368
4369
4370
4371
4372
4373
4374 if ((ffe_ctl->flags & extra) && !(block_group->flags & extra))
4375 goto loop;
4376
4377
4378
4379
4380
4381
4382 btrfs_release_block_group(block_group, ffe_ctl->delalloc);
4383 continue;
4384 }
4385
4386 have_block_group:
4387 ffe_ctl->cached = btrfs_block_group_done(block_group);
4388 if (unlikely(!ffe_ctl->cached)) {
4389 ffe_ctl->have_caching_bg = true;
4390 ret = btrfs_cache_block_group(block_group, false);
4391
4392
4393
4394
4395
4396
4397
4398
4399 if (ret < 0) {
4400 if (!cache_block_group_error)
4401 cache_block_group_error = ret;
4402 ret = 0;
4403 goto loop;
4404 }
4405 ret = 0;
4406 }
4407
4408 if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
4409 goto loop;
4410
4411 bg_ret = NULL;
4412 ret = do_allocation(block_group, ffe_ctl, &bg_ret);
4413 if (ret == 0) {
4414 if (bg_ret && bg_ret != block_group) {
4415 btrfs_release_block_group(block_group,
4416 ffe_ctl->delalloc);
4417 block_group = bg_ret;
4418 }
4419 } else if (ret == -EAGAIN) {
4420 goto have_block_group;
4421 } else if (ret > 0) {
4422 goto loop;
4423 }
4424
4425
4426 ffe_ctl->search_start = round_up(ffe_ctl->found_offset,
4427 fs_info->stripesize);
4428
4429
4430 if (ffe_ctl->search_start + ffe_ctl->num_bytes >
4431 block_group->start + block_group->length) {
4432 btrfs_add_free_space_unused(block_group,
4433 ffe_ctl->found_offset,
4434 ffe_ctl->num_bytes);
4435 goto loop;
4436 }
4437
4438 if (ffe_ctl->found_offset < ffe_ctl->search_start)
4439 btrfs_add_free_space_unused(block_group,
4440 ffe_ctl->found_offset,
4441 ffe_ctl->search_start - ffe_ctl->found_offset);
4442
4443 ret = btrfs_add_reserved_bytes(block_group, ffe_ctl->ram_bytes,
4444 ffe_ctl->num_bytes,
4445 ffe_ctl->delalloc);
4446 if (ret == -EAGAIN) {
4447 btrfs_add_free_space_unused(block_group,
4448 ffe_ctl->found_offset,
4449 ffe_ctl->num_bytes);
4450 goto loop;
4451 }
4452 btrfs_inc_block_group_reservations(block_group);
4453
4454
4455 ins->objectid = ffe_ctl->search_start;
4456 ins->offset = ffe_ctl->num_bytes;
4457
4458 trace_btrfs_reserve_extent(block_group, ffe_ctl->search_start,
4459 ffe_ctl->num_bytes);
4460 btrfs_release_block_group(block_group, ffe_ctl->delalloc);
4461 break;
4462 loop:
4463 release_block_group(block_group, ffe_ctl, ffe_ctl->delalloc);
4464 cond_resched();
4465 }
4466 up_read(&space_info->groups_sem);
4467
4468 ret = find_free_extent_update_loop(fs_info, ins, ffe_ctl, full_search);
4469 if (ret > 0)
4470 goto search;
4471
4472 if (ret == -ENOSPC && !cache_block_group_error) {
4473
4474
4475
4476
4477 if (!ffe_ctl->max_extent_size)
4478 ffe_ctl->max_extent_size = ffe_ctl->total_free_space;
4479 spin_lock(&space_info->lock);
4480 space_info->max_extent_size = ffe_ctl->max_extent_size;
4481 spin_unlock(&space_info->lock);
4482 ins->offset = ffe_ctl->max_extent_size;
4483 } else if (ret == -ENOSPC) {
4484 ret = cache_block_group_error;
4485 }
4486 return ret;
4487 }
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534 int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
4535 u64 num_bytes, u64 min_alloc_size,
4536 u64 empty_size, u64 hint_byte,
4537 struct btrfs_key *ins, int is_data, int delalloc)
4538 {
4539 struct btrfs_fs_info *fs_info = root->fs_info;
4540 struct find_free_extent_ctl ffe_ctl = {};
4541 bool final_tried = num_bytes == min_alloc_size;
4542 u64 flags;
4543 int ret;
4544 bool for_treelog = (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
4545 bool for_data_reloc = (btrfs_is_data_reloc_root(root) && is_data);
4546
4547 flags = get_alloc_profile_by_root(root, is_data);
4548 again:
4549 WARN_ON(num_bytes < fs_info->sectorsize);
4550
4551 ffe_ctl.ram_bytes = ram_bytes;
4552 ffe_ctl.num_bytes = num_bytes;
4553 ffe_ctl.min_alloc_size = min_alloc_size;
4554 ffe_ctl.empty_size = empty_size;
4555 ffe_ctl.flags = flags;
4556 ffe_ctl.delalloc = delalloc;
4557 ffe_ctl.hint_byte = hint_byte;
4558 ffe_ctl.for_treelog = for_treelog;
4559 ffe_ctl.for_data_reloc = for_data_reloc;
4560
4561 ret = find_free_extent(root, ins, &ffe_ctl);
4562 if (!ret && !is_data) {
4563 btrfs_dec_block_group_reservations(fs_info, ins->objectid);
4564 } else if (ret == -ENOSPC) {
4565 if (!final_tried && ins->offset) {
4566 num_bytes = min(num_bytes >> 1, ins->offset);
4567 num_bytes = round_down(num_bytes,
4568 fs_info->sectorsize);
4569 num_bytes = max(num_bytes, min_alloc_size);
4570 ram_bytes = num_bytes;
4571 if (num_bytes == min_alloc_size)
4572 final_tried = true;
4573 goto again;
4574 } else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
4575 struct btrfs_space_info *sinfo;
4576
4577 sinfo = btrfs_find_space_info(fs_info, flags);
4578 btrfs_err(fs_info,
4579 "allocation failed flags %llu, wanted %llu tree-log %d, relocation: %d",
4580 flags, num_bytes, for_treelog, for_data_reloc);
4581 if (sinfo)
4582 btrfs_dump_space_info(fs_info, sinfo,
4583 num_bytes, 1);
4584 }
4585 }
4586
4587 return ret;
4588 }
4589
4590 int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
4591 u64 start, u64 len, int delalloc)
4592 {
4593 struct btrfs_block_group *cache;
4594
4595 cache = btrfs_lookup_block_group(fs_info, start);
4596 if (!cache) {
4597 btrfs_err(fs_info, "Unable to find block group for %llu",
4598 start);
4599 return -ENOSPC;
4600 }
4601
4602 btrfs_add_free_space(cache, start, len);
4603 btrfs_free_reserved_bytes(cache, len, delalloc);
4604 trace_btrfs_reserved_extent_free(fs_info, start, len);
4605
4606 btrfs_put_block_group(cache);
4607 return 0;
4608 }
4609
4610 int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start,
4611 u64 len)
4612 {
4613 struct btrfs_block_group *cache;
4614 int ret = 0;
4615
4616 cache = btrfs_lookup_block_group(trans->fs_info, start);
4617 if (!cache) {
4618 btrfs_err(trans->fs_info, "unable to find block group for %llu",
4619 start);
4620 return -ENOSPC;
4621 }
4622
4623 ret = pin_down_extent(trans, cache, start, len, 1);
4624 btrfs_put_block_group(cache);
4625 return ret;
4626 }
4627
4628 static int alloc_reserved_extent(struct btrfs_trans_handle *trans, u64 bytenr,
4629 u64 num_bytes)
4630 {
4631 struct btrfs_fs_info *fs_info = trans->fs_info;
4632 int ret;
4633
4634 ret = remove_from_free_space_tree(trans, bytenr, num_bytes);
4635 if (ret)
4636 return ret;
4637
4638 ret = btrfs_update_block_group(trans, bytenr, num_bytes, true);
4639 if (ret) {
4640 ASSERT(!ret);
4641 btrfs_err(fs_info, "update block group failed for %llu %llu",
4642 bytenr, num_bytes);
4643 return ret;
4644 }
4645
4646 trace_btrfs_reserved_extent_alloc(fs_info, bytenr, num_bytes);
4647 return 0;
4648 }
4649
4650 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
4651 u64 parent, u64 root_objectid,
4652 u64 flags, u64 owner, u64 offset,
4653 struct btrfs_key *ins, int ref_mod)
4654 {
4655 struct btrfs_fs_info *fs_info = trans->fs_info;
4656 struct btrfs_root *extent_root;
4657 int ret;
4658 struct btrfs_extent_item *extent_item;
4659 struct btrfs_extent_inline_ref *iref;
4660 struct btrfs_path *path;
4661 struct extent_buffer *leaf;
4662 int type;
4663 u32 size;
4664
4665 if (parent > 0)
4666 type = BTRFS_SHARED_DATA_REF_KEY;
4667 else
4668 type = BTRFS_EXTENT_DATA_REF_KEY;
4669
4670 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
4671
4672 path = btrfs_alloc_path();
4673 if (!path)
4674 return -ENOMEM;
4675
4676 extent_root = btrfs_extent_root(fs_info, ins->objectid);
4677 ret = btrfs_insert_empty_item(trans, extent_root, path, ins, size);
4678 if (ret) {
4679 btrfs_free_path(path);
4680 return ret;
4681 }
4682
4683 leaf = path->nodes[0];
4684 extent_item = btrfs_item_ptr(leaf, path->slots[0],
4685 struct btrfs_extent_item);
4686 btrfs_set_extent_refs(leaf, extent_item, ref_mod);
4687 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
4688 btrfs_set_extent_flags(leaf, extent_item,
4689 flags | BTRFS_EXTENT_FLAG_DATA);
4690
4691 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
4692 btrfs_set_extent_inline_ref_type(leaf, iref, type);
4693 if (parent > 0) {
4694 struct btrfs_shared_data_ref *ref;
4695 ref = (struct btrfs_shared_data_ref *)(iref + 1);
4696 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
4697 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
4698 } else {
4699 struct btrfs_extent_data_ref *ref;
4700 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
4701 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
4702 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
4703 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
4704 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
4705 }
4706
4707 btrfs_mark_buffer_dirty(path->nodes[0]);
4708 btrfs_free_path(path);
4709
4710 return alloc_reserved_extent(trans, ins->objectid, ins->offset);
4711 }
4712
4713 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
4714 struct btrfs_delayed_ref_node *node,
4715 struct btrfs_delayed_extent_op *extent_op)
4716 {
4717 struct btrfs_fs_info *fs_info = trans->fs_info;
4718 struct btrfs_root *extent_root;
4719 int ret;
4720 struct btrfs_extent_item *extent_item;
4721 struct btrfs_key extent_key;
4722 struct btrfs_tree_block_info *block_info;
4723 struct btrfs_extent_inline_ref *iref;
4724 struct btrfs_path *path;
4725 struct extent_buffer *leaf;
4726 struct btrfs_delayed_tree_ref *ref;
4727 u32 size = sizeof(*extent_item) + sizeof(*iref);
4728 u64 flags = extent_op->flags_to_set;
4729 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
4730
4731 ref = btrfs_delayed_node_to_tree_ref(node);
4732
4733 extent_key.objectid = node->bytenr;
4734 if (skinny_metadata) {
4735 extent_key.offset = ref->level;
4736 extent_key.type = BTRFS_METADATA_ITEM_KEY;
4737 } else {
4738 extent_key.offset = node->num_bytes;
4739 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
4740 size += sizeof(*block_info);
4741 }
4742
4743 path = btrfs_alloc_path();
4744 if (!path)
4745 return -ENOMEM;
4746
4747 extent_root = btrfs_extent_root(fs_info, extent_key.objectid);
4748 ret = btrfs_insert_empty_item(trans, extent_root, path, &extent_key,
4749 size);
4750 if (ret) {
4751 btrfs_free_path(path);
4752 return ret;
4753 }
4754
4755 leaf = path->nodes[0];
4756 extent_item = btrfs_item_ptr(leaf, path->slots[0],
4757 struct btrfs_extent_item);
4758 btrfs_set_extent_refs(leaf, extent_item, 1);
4759 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
4760 btrfs_set_extent_flags(leaf, extent_item,
4761 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
4762
4763 if (skinny_metadata) {
4764 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
4765 } else {
4766 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
4767 btrfs_set_tree_block_key(leaf, block_info, &extent_op->key);
4768 btrfs_set_tree_block_level(leaf, block_info, ref->level);
4769 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
4770 }
4771
4772 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
4773 btrfs_set_extent_inline_ref_type(leaf, iref,
4774 BTRFS_SHARED_BLOCK_REF_KEY);
4775 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->parent);
4776 } else {
4777 btrfs_set_extent_inline_ref_type(leaf, iref,
4778 BTRFS_TREE_BLOCK_REF_KEY);
4779 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->root);
4780 }
4781
4782 btrfs_mark_buffer_dirty(leaf);
4783 btrfs_free_path(path);
4784
4785 return alloc_reserved_extent(trans, node->bytenr, fs_info->nodesize);
4786 }
4787
4788 int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
4789 struct btrfs_root *root, u64 owner,
4790 u64 offset, u64 ram_bytes,
4791 struct btrfs_key *ins)
4792 {
4793 struct btrfs_ref generic_ref = { 0 };
4794
4795 BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
4796
4797 btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
4798 ins->objectid, ins->offset, 0);
4799 btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner,
4800 offset, 0, false);
4801 btrfs_ref_tree_mod(root->fs_info, &generic_ref);
4802
4803 return btrfs_add_delayed_data_ref(trans, &generic_ref, ram_bytes);
4804 }
4805
4806
4807
4808
4809
4810
4811 int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
4812 u64 root_objectid, u64 owner, u64 offset,
4813 struct btrfs_key *ins)
4814 {
4815 struct btrfs_fs_info *fs_info = trans->fs_info;
4816 int ret;
4817 struct btrfs_block_group *block_group;
4818 struct btrfs_space_info *space_info;
4819
4820
4821
4822
4823
4824 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
4825 ret = __exclude_logged_extent(fs_info, ins->objectid,
4826 ins->offset);
4827 if (ret)
4828 return ret;
4829 }
4830
4831 block_group = btrfs_lookup_block_group(fs_info, ins->objectid);
4832 if (!block_group)
4833 return -EINVAL;
4834
4835 space_info = block_group->space_info;
4836 spin_lock(&space_info->lock);
4837 spin_lock(&block_group->lock);
4838 space_info->bytes_reserved += ins->offset;
4839 block_group->reserved += ins->offset;
4840 spin_unlock(&block_group->lock);
4841 spin_unlock(&space_info->lock);
4842
4843 ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner,
4844 offset, ins, 1);
4845 if (ret)
4846 btrfs_pin_extent(trans, ins->objectid, ins->offset, 1);
4847 btrfs_put_block_group(block_group);
4848 return ret;
4849 }
4850
4851 static struct extent_buffer *
4852 btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4853 u64 bytenr, int level, u64 owner,
4854 enum btrfs_lock_nesting nest)
4855 {
4856 struct btrfs_fs_info *fs_info = root->fs_info;
4857 struct extent_buffer *buf;
4858 u64 lockdep_owner = owner;
4859
4860 buf = btrfs_find_create_tree_block(fs_info, bytenr, owner, level);
4861 if (IS_ERR(buf))
4862 return buf;
4863
4864
4865
4866
4867
4868
4869 if (buf->lock_owner == current->pid) {
4870 btrfs_err_rl(fs_info,
4871 "tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected",
4872 buf->start, btrfs_header_owner(buf), current->pid);
4873 free_extent_buffer(buf);
4874 return ERR_PTR(-EUCLEAN);
4875 }
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887 if (lockdep_owner == BTRFS_TREE_RELOC_OBJECTID &&
4888 !test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state))
4889 lockdep_owner = BTRFS_FS_TREE_OBJECTID;
4890
4891
4892
4893
4894
4895
4896 btrfs_set_buffer_lockdep_class(lockdep_owner, buf, level);
4897
4898 __btrfs_tree_lock(buf, nest);
4899 btrfs_clean_tree_block(buf);
4900 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
4901 clear_bit(EXTENT_BUFFER_NO_CHECK, &buf->bflags);
4902
4903 set_extent_buffer_uptodate(buf);
4904
4905 memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header));
4906 btrfs_set_header_level(buf, level);
4907 btrfs_set_header_bytenr(buf, buf->start);
4908 btrfs_set_header_generation(buf, trans->transid);
4909 btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV);
4910 btrfs_set_header_owner(buf, owner);
4911 write_extent_buffer_fsid(buf, fs_info->fs_devices->metadata_uuid);
4912 write_extent_buffer_chunk_tree_uuid(buf, fs_info->chunk_tree_uuid);
4913 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
4914 buf->log_index = root->log_transid % 2;
4915
4916
4917
4918
4919 if (buf->log_index == 0)
4920 set_extent_dirty(&root->dirty_log_pages, buf->start,
4921 buf->start + buf->len - 1, GFP_NOFS);
4922 else
4923 set_extent_new(&root->dirty_log_pages, buf->start,
4924 buf->start + buf->len - 1);
4925 } else {
4926 buf->log_index = -1;
4927 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
4928 buf->start + buf->len - 1, GFP_NOFS);
4929 }
4930
4931 return buf;
4932 }
4933
4934
4935
4936
4937
4938 struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
4939 struct btrfs_root *root,
4940 u64 parent, u64 root_objectid,
4941 const struct btrfs_disk_key *key,
4942 int level, u64 hint,
4943 u64 empty_size,
4944 enum btrfs_lock_nesting nest)
4945 {
4946 struct btrfs_fs_info *fs_info = root->fs_info;
4947 struct btrfs_key ins;
4948 struct btrfs_block_rsv *block_rsv;
4949 struct extent_buffer *buf;
4950 struct btrfs_delayed_extent_op *extent_op;
4951 struct btrfs_ref generic_ref = { 0 };
4952 u64 flags = 0;
4953 int ret;
4954 u32 blocksize = fs_info->nodesize;
4955 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
4956
4957 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4958 if (btrfs_is_testing(fs_info)) {
4959 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
4960 level, root_objectid, nest);
4961 if (!IS_ERR(buf))
4962 root->alloc_bytenr += blocksize;
4963 return buf;
4964 }
4965 #endif
4966
4967 block_rsv = btrfs_use_block_rsv(trans, root, blocksize);
4968 if (IS_ERR(block_rsv))
4969 return ERR_CAST(block_rsv);
4970
4971 ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
4972 empty_size, hint, &ins, 0, 0);
4973 if (ret)
4974 goto out_unuse;
4975
4976 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level,
4977 root_objectid, nest);
4978 if (IS_ERR(buf)) {
4979 ret = PTR_ERR(buf);
4980 goto out_free_reserved;
4981 }
4982
4983 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
4984 if (parent == 0)
4985 parent = ins.objectid;
4986 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
4987 } else
4988 BUG_ON(parent > 0);
4989
4990 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
4991 extent_op = btrfs_alloc_delayed_extent_op();
4992 if (!extent_op) {
4993 ret = -ENOMEM;
4994 goto out_free_buf;
4995 }
4996 if (key)
4997 memcpy(&extent_op->key, key, sizeof(extent_op->key));
4998 else
4999 memset(&extent_op->key, 0, sizeof(extent_op->key));
5000 extent_op->flags_to_set = flags;
5001 extent_op->update_key = skinny_metadata ? false : true;
5002 extent_op->update_flags = true;
5003 extent_op->level = level;
5004
5005 btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
5006 ins.objectid, ins.offset, parent);
5007 btrfs_init_tree_ref(&generic_ref, level, root_objectid,
5008 root->root_key.objectid, false);
5009 btrfs_ref_tree_mod(fs_info, &generic_ref);
5010 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, extent_op);
5011 if (ret)
5012 goto out_free_delayed;
5013 }
5014 return buf;
5015
5016 out_free_delayed:
5017 btrfs_free_delayed_extent_op(extent_op);
5018 out_free_buf:
5019 btrfs_tree_unlock(buf);
5020 free_extent_buffer(buf);
5021 out_free_reserved:
5022 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0);
5023 out_unuse:
5024 btrfs_unuse_block_rsv(fs_info, block_rsv, blocksize);
5025 return ERR_PTR(ret);
5026 }
5027
5028 struct walk_control {
5029 u64 refs[BTRFS_MAX_LEVEL];
5030 u64 flags[BTRFS_MAX_LEVEL];
5031 struct btrfs_key update_progress;
5032 struct btrfs_key drop_progress;
5033 int drop_level;
5034 int stage;
5035 int level;
5036 int shared_level;
5037 int update_ref;
5038 int keep_locks;
5039 int reada_slot;
5040 int reada_count;
5041 int restarted;
5042 };
5043
5044 #define DROP_REFERENCE 1
5045 #define UPDATE_BACKREF 2
5046
5047 static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
5048 struct btrfs_root *root,
5049 struct walk_control *wc,
5050 struct btrfs_path *path)
5051 {
5052 struct btrfs_fs_info *fs_info = root->fs_info;
5053 u64 bytenr;
5054 u64 generation;
5055 u64 refs;
5056 u64 flags;
5057 u32 nritems;
5058 struct btrfs_key key;
5059 struct extent_buffer *eb;
5060 int ret;
5061 int slot;
5062 int nread = 0;
5063
5064 if (path->slots[wc->level] < wc->reada_slot) {
5065 wc->reada_count = wc->reada_count * 2 / 3;
5066 wc->reada_count = max(wc->reada_count, 2);
5067 } else {
5068 wc->reada_count = wc->reada_count * 3 / 2;
5069 wc->reada_count = min_t(int, wc->reada_count,
5070 BTRFS_NODEPTRS_PER_BLOCK(fs_info));
5071 }
5072
5073 eb = path->nodes[wc->level];
5074 nritems = btrfs_header_nritems(eb);
5075
5076 for (slot = path->slots[wc->level]; slot < nritems; slot++) {
5077 if (nread >= wc->reada_count)
5078 break;
5079
5080 cond_resched();
5081 bytenr = btrfs_node_blockptr(eb, slot);
5082 generation = btrfs_node_ptr_generation(eb, slot);
5083
5084 if (slot == path->slots[wc->level])
5085 goto reada;
5086
5087 if (wc->stage == UPDATE_BACKREF &&
5088 generation <= root->root_key.offset)
5089 continue;
5090
5091
5092 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr,
5093 wc->level - 1, 1, &refs,
5094 &flags);
5095
5096 if (ret < 0)
5097 continue;
5098 BUG_ON(refs == 0);
5099
5100 if (wc->stage == DROP_REFERENCE) {
5101 if (refs == 1)
5102 goto reada;
5103
5104 if (wc->level == 1 &&
5105 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5106 continue;
5107 if (!wc->update_ref ||
5108 generation <= root->root_key.offset)
5109 continue;
5110 btrfs_node_key_to_cpu(eb, &key, slot);
5111 ret = btrfs_comp_cpu_keys(&key,
5112 &wc->update_progress);
5113 if (ret < 0)
5114 continue;
5115 } else {
5116 if (wc->level == 1 &&
5117 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5118 continue;
5119 }
5120 reada:
5121 btrfs_readahead_node_child(eb, slot);
5122 nread++;
5123 }
5124 wc->reada_slot = slot;
5125 }
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135 static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
5136 struct btrfs_root *root,
5137 struct btrfs_path *path,
5138 struct walk_control *wc, int lookup_info)
5139 {
5140 struct btrfs_fs_info *fs_info = root->fs_info;
5141 int level = wc->level;
5142 struct extent_buffer *eb = path->nodes[level];
5143 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
5144 int ret;
5145
5146 if (wc->stage == UPDATE_BACKREF &&
5147 btrfs_header_owner(eb) != root->root_key.objectid)
5148 return 1;
5149
5150
5151
5152
5153
5154 if (lookup_info &&
5155 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
5156 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
5157 BUG_ON(!path->locks[level]);
5158 ret = btrfs_lookup_extent_info(trans, fs_info,
5159 eb->start, level, 1,
5160 &wc->refs[level],
5161 &wc->flags[level]);
5162 BUG_ON(ret == -ENOMEM);
5163 if (ret)
5164 return ret;
5165 BUG_ON(wc->refs[level] == 0);
5166 }
5167
5168 if (wc->stage == DROP_REFERENCE) {
5169 if (wc->refs[level] > 1)
5170 return 1;
5171
5172 if (path->locks[level] && !wc->keep_locks) {
5173 btrfs_tree_unlock_rw(eb, path->locks[level]);
5174 path->locks[level] = 0;
5175 }
5176 return 0;
5177 }
5178
5179
5180 if (!(wc->flags[level] & flag)) {
5181 BUG_ON(!path->locks[level]);
5182 ret = btrfs_inc_ref(trans, root, eb, 1);
5183 BUG_ON(ret);
5184 ret = btrfs_dec_ref(trans, root, eb, 0);
5185 BUG_ON(ret);
5186 ret = btrfs_set_disk_extent_flags(trans, eb, flag,
5187 btrfs_header_level(eb));
5188 BUG_ON(ret);
5189 wc->flags[level] |= flag;
5190 }
5191
5192
5193
5194
5195
5196 if (path->locks[level] && level > 0) {
5197 btrfs_tree_unlock_rw(eb, path->locks[level]);
5198 path->locks[level] = 0;
5199 }
5200 return 0;
5201 }
5202
5203
5204
5205
5206
5207 static int check_ref_exists(struct btrfs_trans_handle *trans,
5208 struct btrfs_root *root, u64 bytenr, u64 parent,
5209 int level)
5210 {
5211 struct btrfs_path *path;
5212 struct btrfs_extent_inline_ref *iref;
5213 int ret;
5214
5215 path = btrfs_alloc_path();
5216 if (!path)
5217 return -ENOMEM;
5218
5219 ret = lookup_extent_backref(trans, path, &iref, bytenr,
5220 root->fs_info->nodesize, parent,
5221 root->root_key.objectid, level, 0);
5222 btrfs_free_path(path);
5223 if (ret == -ENOENT)
5224 return 0;
5225 if (ret < 0)
5226 return ret;
5227 return 1;
5228 }
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243 static noinline int do_walk_down(struct btrfs_trans_handle *trans,
5244 struct btrfs_root *root,
5245 struct btrfs_path *path,
5246 struct walk_control *wc, int *lookup_info)
5247 {
5248 struct btrfs_fs_info *fs_info = root->fs_info;
5249 u64 bytenr;
5250 u64 generation;
5251 u64 parent;
5252 struct btrfs_key key;
5253 struct btrfs_key first_key;
5254 struct btrfs_ref ref = { 0 };
5255 struct extent_buffer *next;
5256 int level = wc->level;
5257 int reada = 0;
5258 int ret = 0;
5259 bool need_account = false;
5260
5261 generation = btrfs_node_ptr_generation(path->nodes[level],
5262 path->slots[level]);
5263
5264
5265
5266
5267
5268 if (wc->stage == UPDATE_BACKREF &&
5269 generation <= root->root_key.offset) {
5270 *lookup_info = 1;
5271 return 1;
5272 }
5273
5274 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
5275 btrfs_node_key_to_cpu(path->nodes[level], &first_key,
5276 path->slots[level]);
5277
5278 next = find_extent_buffer(fs_info, bytenr);
5279 if (!next) {
5280 next = btrfs_find_create_tree_block(fs_info, bytenr,
5281 root->root_key.objectid, level - 1);
5282 if (IS_ERR(next))
5283 return PTR_ERR(next);
5284 reada = 1;
5285 }
5286 btrfs_tree_lock(next);
5287
5288 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
5289 &wc->refs[level - 1],
5290 &wc->flags[level - 1]);
5291 if (ret < 0)
5292 goto out_unlock;
5293
5294 if (unlikely(wc->refs[level - 1] == 0)) {
5295 btrfs_err(fs_info, "Missing references.");
5296 ret = -EIO;
5297 goto out_unlock;
5298 }
5299 *lookup_info = 0;
5300
5301 if (wc->stage == DROP_REFERENCE) {
5302 if (wc->refs[level - 1] > 1) {
5303 need_account = true;
5304 if (level == 1 &&
5305 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5306 goto skip;
5307
5308 if (!wc->update_ref ||
5309 generation <= root->root_key.offset)
5310 goto skip;
5311
5312 btrfs_node_key_to_cpu(path->nodes[level], &key,
5313 path->slots[level]);
5314 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
5315 if (ret < 0)
5316 goto skip;
5317
5318 wc->stage = UPDATE_BACKREF;
5319 wc->shared_level = level - 1;
5320 }
5321 } else {
5322 if (level == 1 &&
5323 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5324 goto skip;
5325 }
5326
5327 if (!btrfs_buffer_uptodate(next, generation, 0)) {
5328 btrfs_tree_unlock(next);
5329 free_extent_buffer(next);
5330 next = NULL;
5331 *lookup_info = 1;
5332 }
5333
5334 if (!next) {
5335 if (reada && level == 1)
5336 reada_walk_down(trans, root, wc, path);
5337 next = read_tree_block(fs_info, bytenr, root->root_key.objectid,
5338 generation, level - 1, &first_key);
5339 if (IS_ERR(next)) {
5340 return PTR_ERR(next);
5341 } else if (!extent_buffer_uptodate(next)) {
5342 free_extent_buffer(next);
5343 return -EIO;
5344 }
5345 btrfs_tree_lock(next);
5346 }
5347
5348 level--;
5349 ASSERT(level == btrfs_header_level(next));
5350 if (level != btrfs_header_level(next)) {
5351 btrfs_err(root->fs_info, "mismatched level");
5352 ret = -EIO;
5353 goto out_unlock;
5354 }
5355 path->nodes[level] = next;
5356 path->slots[level] = 0;
5357 path->locks[level] = BTRFS_WRITE_LOCK;
5358 wc->level = level;
5359 if (wc->level == 1)
5360 wc->reada_slot = 0;
5361 return 0;
5362 skip:
5363 wc->refs[level - 1] = 0;
5364 wc->flags[level - 1] = 0;
5365 if (wc->stage == DROP_REFERENCE) {
5366 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5367 parent = path->nodes[level]->start;
5368 } else {
5369 ASSERT(root->root_key.objectid ==
5370 btrfs_header_owner(path->nodes[level]));
5371 if (root->root_key.objectid !=
5372 btrfs_header_owner(path->nodes[level])) {
5373 btrfs_err(root->fs_info,
5374 "mismatched block owner");
5375 ret = -EIO;
5376 goto out_unlock;
5377 }
5378 parent = 0;
5379 }
5380
5381
5382
5383
5384
5385
5386
5387 if (wc->restarted) {
5388 ret = check_ref_exists(trans, root, bytenr, parent,
5389 level - 1);
5390 if (ret < 0)
5391 goto out_unlock;
5392 if (ret == 0)
5393 goto no_delete;
5394 ret = 0;
5395 wc->restarted = 0;
5396 }
5397
5398
5399
5400
5401
5402
5403 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
5404 need_account) {
5405 ret = btrfs_qgroup_trace_subtree(trans, next,
5406 generation, level - 1);
5407 if (ret) {
5408 btrfs_err_rl(fs_info,
5409 "Error %d accounting shared subtree. Quota is out of sync, rescan required.",
5410 ret);
5411 }
5412 }
5413
5414
5415
5416
5417
5418
5419
5420 wc->drop_level = level;
5421 find_next_key(path, level, &wc->drop_progress);
5422
5423 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
5424 fs_info->nodesize, parent);
5425 btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid,
5426 0, false);
5427 ret = btrfs_free_extent(trans, &ref);
5428 if (ret)
5429 goto out_unlock;
5430 }
5431 no_delete:
5432 *lookup_info = 1;
5433 ret = 1;
5434
5435 out_unlock:
5436 btrfs_tree_unlock(next);
5437 free_extent_buffer(next);
5438
5439 return ret;
5440 }
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454 static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
5455 struct btrfs_root *root,
5456 struct btrfs_path *path,
5457 struct walk_control *wc)
5458 {
5459 struct btrfs_fs_info *fs_info = root->fs_info;
5460 int ret;
5461 int level = wc->level;
5462 struct extent_buffer *eb = path->nodes[level];
5463 u64 parent = 0;
5464
5465 if (wc->stage == UPDATE_BACKREF) {
5466 BUG_ON(wc->shared_level < level);
5467 if (level < wc->shared_level)
5468 goto out;
5469
5470 ret = find_next_key(path, level + 1, &wc->update_progress);
5471 if (ret > 0)
5472 wc->update_ref = 0;
5473
5474 wc->stage = DROP_REFERENCE;
5475 wc->shared_level = -1;
5476 path->slots[level] = 0;
5477
5478
5479
5480
5481
5482
5483 if (!path->locks[level]) {
5484 BUG_ON(level == 0);
5485 btrfs_tree_lock(eb);
5486 path->locks[level] = BTRFS_WRITE_LOCK;
5487
5488 ret = btrfs_lookup_extent_info(trans, fs_info,
5489 eb->start, level, 1,
5490 &wc->refs[level],
5491 &wc->flags[level]);
5492 if (ret < 0) {
5493 btrfs_tree_unlock_rw(eb, path->locks[level]);
5494 path->locks[level] = 0;
5495 return ret;
5496 }
5497 BUG_ON(wc->refs[level] == 0);
5498 if (wc->refs[level] == 1) {
5499 btrfs_tree_unlock_rw(eb, path->locks[level]);
5500 path->locks[level] = 0;
5501 return 1;
5502 }
5503 }
5504 }
5505
5506
5507 BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
5508
5509 if (wc->refs[level] == 1) {
5510 if (level == 0) {
5511 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5512 ret = btrfs_dec_ref(trans, root, eb, 1);
5513 else
5514 ret = btrfs_dec_ref(trans, root, eb, 0);
5515 BUG_ON(ret);
5516 if (is_fstree(root->root_key.objectid)) {
5517 ret = btrfs_qgroup_trace_leaf_items(trans, eb);
5518 if (ret) {
5519 btrfs_err_rl(fs_info,
5520 "error %d accounting leaf items, quota is out of sync, rescan required",
5521 ret);
5522 }
5523 }
5524 }
5525
5526 if (!path->locks[level] &&
5527 btrfs_header_generation(eb) == trans->transid) {
5528 btrfs_tree_lock(eb);
5529 path->locks[level] = BTRFS_WRITE_LOCK;
5530 }
5531 btrfs_clean_tree_block(eb);
5532 }
5533
5534 if (eb == root->node) {
5535 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5536 parent = eb->start;
5537 else if (root->root_key.objectid != btrfs_header_owner(eb))
5538 goto owner_mismatch;
5539 } else {
5540 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5541 parent = path->nodes[level + 1]->start;
5542 else if (root->root_key.objectid !=
5543 btrfs_header_owner(path->nodes[level + 1]))
5544 goto owner_mismatch;
5545 }
5546
5547 btrfs_free_tree_block(trans, btrfs_root_id(root), eb, parent,
5548 wc->refs[level] == 1);
5549 out:
5550 wc->refs[level] = 0;
5551 wc->flags[level] = 0;
5552 return 0;
5553
5554 owner_mismatch:
5555 btrfs_err_rl(fs_info, "unexpected tree owner, have %llu expect %llu",
5556 btrfs_header_owner(eb), root->root_key.objectid);
5557 return -EUCLEAN;
5558 }
5559
5560 static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5561 struct btrfs_root *root,
5562 struct btrfs_path *path,
5563 struct walk_control *wc)
5564 {
5565 int level = wc->level;
5566 int lookup_info = 1;
5567 int ret;
5568
5569 while (level >= 0) {
5570 ret = walk_down_proc(trans, root, path, wc, lookup_info);
5571 if (ret > 0)
5572 break;
5573
5574 if (level == 0)
5575 break;
5576
5577 if (path->slots[level] >=
5578 btrfs_header_nritems(path->nodes[level]))
5579 break;
5580
5581 ret = do_walk_down(trans, root, path, wc, &lookup_info);
5582 if (ret > 0) {
5583 path->slots[level]++;
5584 continue;
5585 } else if (ret < 0)
5586 return ret;
5587 level = wc->level;
5588 }
5589 return 0;
5590 }
5591
5592 static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
5593 struct btrfs_root *root,
5594 struct btrfs_path *path,
5595 struct walk_control *wc, int max_level)
5596 {
5597 int level = wc->level;
5598 int ret;
5599
5600 path->slots[level] = btrfs_header_nritems(path->nodes[level]);
5601 while (level < max_level && path->nodes[level]) {
5602 wc->level = level;
5603 if (path->slots[level] + 1 <
5604 btrfs_header_nritems(path->nodes[level])) {
5605 path->slots[level]++;
5606 return 0;
5607 } else {
5608 ret = walk_up_proc(trans, root, path, wc);
5609 if (ret > 0)
5610 return 0;
5611 if (ret < 0)
5612 return ret;
5613
5614 if (path->locks[level]) {
5615 btrfs_tree_unlock_rw(path->nodes[level],
5616 path->locks[level]);
5617 path->locks[level] = 0;
5618 }
5619 free_extent_buffer(path->nodes[level]);
5620 path->nodes[level] = NULL;
5621 level++;
5622 }
5623 }
5624 return 1;
5625 }
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637
5638
5639
5640 int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
5641 {
5642 struct btrfs_fs_info *fs_info = root->fs_info;
5643 struct btrfs_path *path;
5644 struct btrfs_trans_handle *trans;
5645 struct btrfs_root *tree_root = fs_info->tree_root;
5646 struct btrfs_root_item *root_item = &root->root_item;
5647 struct walk_control *wc;
5648 struct btrfs_key key;
5649 int err = 0;
5650 int ret;
5651 int level;
5652 bool root_dropped = false;
5653 bool unfinished_drop = false;
5654
5655 btrfs_debug(fs_info, "Drop subvolume %llu", root->root_key.objectid);
5656
5657 path = btrfs_alloc_path();
5658 if (!path) {
5659 err = -ENOMEM;
5660 goto out;
5661 }
5662
5663 wc = kzalloc(sizeof(*wc), GFP_NOFS);
5664 if (!wc) {
5665 btrfs_free_path(path);
5666 err = -ENOMEM;
5667 goto out;
5668 }
5669
5670
5671
5672
5673
5674 if (for_reloc)
5675 trans = btrfs_join_transaction(tree_root);
5676 else
5677 trans = btrfs_start_transaction(tree_root, 0);
5678 if (IS_ERR(trans)) {
5679 err = PTR_ERR(trans);
5680 goto out_free;
5681 }
5682
5683 err = btrfs_run_delayed_items(trans);
5684 if (err)
5685 goto out_end_trans;
5686
5687
5688
5689
5690
5691
5692
5693
5694
5695 set_bit(BTRFS_ROOT_DELETING, &root->state);
5696 unfinished_drop = test_bit(BTRFS_ROOT_UNFINISHED_DROP, &root->state);
5697
5698 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5699 level = btrfs_header_level(root->node);
5700 path->nodes[level] = btrfs_lock_root_node(root);
5701 path->slots[level] = 0;
5702 path->locks[level] = BTRFS_WRITE_LOCK;
5703 memset(&wc->update_progress, 0,
5704 sizeof(wc->update_progress));
5705 } else {
5706 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5707 memcpy(&wc->update_progress, &key,
5708 sizeof(wc->update_progress));
5709
5710 level = btrfs_root_drop_level(root_item);
5711 BUG_ON(level == 0);
5712 path->lowest_level = level;
5713 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5714 path->lowest_level = 0;
5715 if (ret < 0) {
5716 err = ret;
5717 goto out_end_trans;
5718 }
5719 WARN_ON(ret > 0);
5720
5721
5722
5723
5724
5725 btrfs_unlock_up_safe(path, 0);
5726
5727 level = btrfs_header_level(root->node);
5728 while (1) {
5729 btrfs_tree_lock(path->nodes[level]);
5730 path->locks[level] = BTRFS_WRITE_LOCK;
5731
5732 ret = btrfs_lookup_extent_info(trans, fs_info,
5733 path->nodes[level]->start,
5734 level, 1, &wc->refs[level],
5735 &wc->flags[level]);
5736 if (ret < 0) {
5737 err = ret;
5738 goto out_end_trans;
5739 }
5740 BUG_ON(wc->refs[level] == 0);
5741
5742 if (level == btrfs_root_drop_level(root_item))
5743 break;
5744
5745 btrfs_tree_unlock(path->nodes[level]);
5746 path->locks[level] = 0;
5747 WARN_ON(wc->refs[level] != 1);
5748 level--;
5749 }
5750 }
5751
5752 wc->restarted = test_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
5753 wc->level = level;
5754 wc->shared_level = -1;
5755 wc->stage = DROP_REFERENCE;
5756 wc->update_ref = update_ref;
5757 wc->keep_locks = 0;
5758 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
5759
5760 while (1) {
5761
5762 ret = walk_down_tree(trans, root, path, wc);
5763 if (ret < 0) {
5764 err = ret;
5765 break;
5766 }
5767
5768 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
5769 if (ret < 0) {
5770 err = ret;
5771 break;
5772 }
5773
5774 if (ret > 0) {
5775 BUG_ON(wc->stage != DROP_REFERENCE);
5776 break;
5777 }
5778
5779 if (wc->stage == DROP_REFERENCE) {
5780 wc->drop_level = wc->level;
5781 btrfs_node_key_to_cpu(path->nodes[wc->drop_level],
5782 &wc->drop_progress,
5783 path->slots[wc->drop_level]);
5784 }
5785 btrfs_cpu_key_to_disk(&root_item->drop_progress,
5786 &wc->drop_progress);
5787 btrfs_set_root_drop_level(root_item, wc->drop_level);
5788
5789 BUG_ON(wc->level == 0);
5790 if (btrfs_should_end_transaction(trans) ||
5791 (!for_reloc && btrfs_need_cleaner_sleep(fs_info))) {
5792 ret = btrfs_update_root(trans, tree_root,
5793 &root->root_key,
5794 root_item);
5795 if (ret) {
5796 btrfs_abort_transaction(trans, ret);
5797 err = ret;
5798 goto out_end_trans;
5799 }
5800
5801 btrfs_end_transaction_throttle(trans);
5802 if (!for_reloc && btrfs_need_cleaner_sleep(fs_info)) {
5803 btrfs_debug(fs_info,
5804 "drop snapshot early exit");
5805 err = -EAGAIN;
5806 goto out_free;
5807 }
5808
5809
5810
5811
5812
5813
5814 if (for_reloc)
5815 trans = btrfs_join_transaction(tree_root);
5816 else
5817 trans = btrfs_start_transaction(tree_root, 0);
5818 if (IS_ERR(trans)) {
5819 err = PTR_ERR(trans);
5820 goto out_free;
5821 }
5822 }
5823 }
5824 btrfs_release_path(path);
5825 if (err)
5826 goto out_end_trans;
5827
5828 ret = btrfs_del_root(trans, &root->root_key);
5829 if (ret) {
5830 btrfs_abort_transaction(trans, ret);
5831 err = ret;
5832 goto out_end_trans;
5833 }
5834
5835 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
5836 ret = btrfs_find_root(tree_root, &root->root_key, path,
5837 NULL, NULL);
5838 if (ret < 0) {
5839 btrfs_abort_transaction(trans, ret);
5840 err = ret;
5841 goto out_end_trans;
5842 } else if (ret > 0) {
5843
5844
5845
5846
5847
5848 btrfs_del_orphan_item(trans, tree_root,
5849 root->root_key.objectid);
5850 }
5851 }
5852
5853
5854
5855
5856
5857
5858 btrfs_qgroup_convert_reserved_meta(root, INT_MAX);
5859 btrfs_qgroup_free_meta_all_pertrans(root);
5860
5861 if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state))
5862 btrfs_add_dropped_root(trans, root);
5863 else
5864 btrfs_put_root(root);
5865 root_dropped = true;
5866 out_end_trans:
5867 btrfs_end_transaction_throttle(trans);
5868 out_free:
5869 kfree(wc);
5870 btrfs_free_path(path);
5871 out:
5872
5873
5874
5875
5876 if (!err && unfinished_drop)
5877 btrfs_maybe_wake_unfinished_drop(fs_info);
5878
5879
5880
5881
5882
5883
5884
5885
5886 if (!for_reloc && !root_dropped)
5887 btrfs_add_dead_root(root);
5888 return err;
5889 }
5890
5891
5892
5893
5894
5895
5896
5897 int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
5898 struct btrfs_root *root,
5899 struct extent_buffer *node,
5900 struct extent_buffer *parent)
5901 {
5902 struct btrfs_fs_info *fs_info = root->fs_info;
5903 struct btrfs_path *path;
5904 struct walk_control *wc;
5905 int level;
5906 int parent_level;
5907 int ret = 0;
5908 int wret;
5909
5910 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
5911
5912 path = btrfs_alloc_path();
5913 if (!path)
5914 return -ENOMEM;
5915
5916 wc = kzalloc(sizeof(*wc), GFP_NOFS);
5917 if (!wc) {
5918 btrfs_free_path(path);
5919 return -ENOMEM;
5920 }
5921
5922 btrfs_assert_tree_write_locked(parent);
5923 parent_level = btrfs_header_level(parent);
5924 atomic_inc(&parent->refs);
5925 path->nodes[parent_level] = parent;
5926 path->slots[parent_level] = btrfs_header_nritems(parent);
5927
5928 btrfs_assert_tree_write_locked(node);
5929 level = btrfs_header_level(node);
5930 path->nodes[level] = node;
5931 path->slots[level] = 0;
5932 path->locks[level] = BTRFS_WRITE_LOCK;
5933
5934 wc->refs[parent_level] = 1;
5935 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
5936 wc->level = level;
5937 wc->shared_level = -1;
5938 wc->stage = DROP_REFERENCE;
5939 wc->update_ref = 0;
5940 wc->keep_locks = 1;
5941 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
5942
5943 while (1) {
5944 wret = walk_down_tree(trans, root, path, wc);
5945 if (wret < 0) {
5946 ret = wret;
5947 break;
5948 }
5949
5950 wret = walk_up_tree(trans, root, path, wc, parent_level);
5951 if (wret < 0)
5952 ret = wret;
5953 if (wret != 0)
5954 break;
5955 }
5956
5957 kfree(wc);
5958 btrfs_free_path(path);
5959 return ret;
5960 }
5961
5962
5963
5964
5965
5966 u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
5967 {
5968 struct btrfs_block_group *block_group;
5969 u64 free_bytes = 0;
5970 int factor;
5971
5972
5973 if (list_empty(&sinfo->ro_bgs))
5974 return 0;
5975
5976 spin_lock(&sinfo->lock);
5977 list_for_each_entry(block_group, &sinfo->ro_bgs, ro_list) {
5978 spin_lock(&block_group->lock);
5979
5980 if (!block_group->ro) {
5981 spin_unlock(&block_group->lock);
5982 continue;
5983 }
5984
5985 factor = btrfs_bg_type_to_factor(block_group->flags);
5986 free_bytes += (block_group->length -
5987 block_group->used) * factor;
5988
5989 spin_unlock(&block_group->lock);
5990 }
5991 spin_unlock(&sinfo->lock);
5992
5993 return free_bytes;
5994 }
5995
5996 int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
5997 u64 start, u64 end)
5998 {
5999 return unpin_extent_range(fs_info, start, end, false);
6000 }
6001
6002
6003
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022 static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
6023 {
6024 u64 start = BTRFS_DEVICE_RANGE_RESERVED, len = 0, end = 0;
6025 int ret;
6026
6027 *trimmed = 0;
6028
6029
6030 if (!bdev_max_discard_sectors(device->bdev))
6031 return 0;
6032
6033
6034 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
6035 return 0;
6036
6037
6038 if (device->total_bytes <= device->bytes_used)
6039 return 0;
6040
6041 ret = 0;
6042
6043 while (1) {
6044 struct btrfs_fs_info *fs_info = device->fs_info;
6045 u64 bytes;
6046
6047 ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
6048 if (ret)
6049 break;
6050
6051 find_first_clear_extent_bit(&device->alloc_state, start,
6052 &start, &end,
6053 CHUNK_TRIMMED | CHUNK_ALLOCATED);
6054
6055
6056 if (start > device->total_bytes) {
6057 WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
6058 btrfs_warn_in_rcu(fs_info,
6059 "ignoring attempt to trim beyond device size: offset %llu length %llu device %s device size %llu",
6060 start, end - start + 1,
6061 rcu_str_deref(device->name),
6062 device->total_bytes);
6063 mutex_unlock(&fs_info->chunk_mutex);
6064 ret = 0;
6065 break;
6066 }
6067
6068
6069 start = max_t(u64, start, BTRFS_DEVICE_RANGE_RESERVED);
6070
6071
6072
6073
6074
6075
6076 end = min(end, device->total_bytes - 1);
6077
6078 len = end - start + 1;
6079
6080
6081 if (!len) {
6082 mutex_unlock(&fs_info->chunk_mutex);
6083 ret = 0;
6084 break;
6085 }
6086
6087 ret = btrfs_issue_discard(device->bdev, start, len,
6088 &bytes);
6089 if (!ret)
6090 set_extent_bits(&device->alloc_state, start,
6091 start + bytes - 1,
6092 CHUNK_TRIMMED);
6093 mutex_unlock(&fs_info->chunk_mutex);
6094
6095 if (ret)
6096 break;
6097
6098 start += len;
6099 *trimmed += bytes;
6100
6101 if (fatal_signal_pending(current)) {
6102 ret = -ERESTARTSYS;
6103 break;
6104 }
6105
6106 cond_resched();
6107 }
6108
6109 return ret;
6110 }
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121 int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
6122 {
6123 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
6124 struct btrfs_block_group *cache = NULL;
6125 struct btrfs_device *device;
6126 u64 group_trimmed;
6127 u64 range_end = U64_MAX;
6128 u64 start;
6129 u64 end;
6130 u64 trimmed = 0;
6131 u64 bg_failed = 0;
6132 u64 dev_failed = 0;
6133 int bg_ret = 0;
6134 int dev_ret = 0;
6135 int ret = 0;
6136
6137 if (range->start == U64_MAX)
6138 return -EINVAL;
6139
6140
6141
6142
6143
6144 if (range->len != U64_MAX &&
6145 check_add_overflow(range->start, range->len, &range_end))
6146 return -EINVAL;
6147
6148 cache = btrfs_lookup_first_block_group(fs_info, range->start);
6149 for (; cache; cache = btrfs_next_block_group(cache)) {
6150 if (cache->start >= range_end) {
6151 btrfs_put_block_group(cache);
6152 break;
6153 }
6154
6155 start = max(range->start, cache->start);
6156 end = min(range_end, cache->start + cache->length);
6157
6158 if (end - start >= range->minlen) {
6159 if (!btrfs_block_group_done(cache)) {
6160 ret = btrfs_cache_block_group(cache, true);
6161 if (ret) {
6162 bg_failed++;
6163 bg_ret = ret;
6164 continue;
6165 }
6166 }
6167 ret = btrfs_trim_block_group(cache,
6168 &group_trimmed,
6169 start,
6170 end,
6171 range->minlen);
6172
6173 trimmed += group_trimmed;
6174 if (ret) {
6175 bg_failed++;
6176 bg_ret = ret;
6177 continue;
6178 }
6179 }
6180 }
6181
6182 if (bg_failed)
6183 btrfs_warn(fs_info,
6184 "failed to trim %llu block group(s), last error %d",
6185 bg_failed, bg_ret);
6186
6187 mutex_lock(&fs_devices->device_list_mutex);
6188 list_for_each_entry(device, &fs_devices->devices, dev_list) {
6189 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
6190 continue;
6191
6192 ret = btrfs_trim_free_extents(device, &group_trimmed);
6193 if (ret) {
6194 dev_failed++;
6195 dev_ret = ret;
6196 break;
6197 }
6198
6199 trimmed += group_trimmed;
6200 }
6201 mutex_unlock(&fs_devices->device_list_mutex);
6202
6203 if (dev_failed)
6204 btrfs_warn(fs_info,
6205 "failed to trim %llu device(s), last error %d",
6206 dev_failed, dev_ret);
6207 range->len = trimmed;
6208 if (bg_ret)
6209 return bg_ret;
6210 return dev_ret;
6211 }