0001
0002
0003
0004
0005
0006 #include <linux/sched.h>
0007 #include <linux/slab.h>
0008 #include <linux/rbtree.h>
0009 #include <linux/mm.h>
0010 #include <linux/error-injection.h>
0011 #include "ctree.h"
0012 #include "disk-io.h"
0013 #include "transaction.h"
0014 #include "print-tree.h"
0015 #include "locking.h"
0016 #include "volumes.h"
0017 #include "qgroup.h"
0018 #include "tree-mod-log.h"
0019 #include "tree-checker.h"
0020
0021 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
0022 *root, struct btrfs_path *path, int level);
0023 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root,
0024 const struct btrfs_key *ins_key, struct btrfs_path *path,
0025 int data_size, int extend);
0026 static int push_node_left(struct btrfs_trans_handle *trans,
0027 struct extent_buffer *dst,
0028 struct extent_buffer *src, int empty);
0029 static int balance_node_right(struct btrfs_trans_handle *trans,
0030 struct extent_buffer *dst_buf,
0031 struct extent_buffer *src_buf);
0032 static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
0033 int level, int slot);
0034
0035 static const struct btrfs_csums {
0036 u16 size;
0037 const char name[10];
0038 const char driver[12];
0039 } btrfs_csums[] = {
0040 [BTRFS_CSUM_TYPE_CRC32] = { .size = 4, .name = "crc32c" },
0041 [BTRFS_CSUM_TYPE_XXHASH] = { .size = 8, .name = "xxhash64" },
0042 [BTRFS_CSUM_TYPE_SHA256] = { .size = 32, .name = "sha256" },
0043 [BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b",
0044 .driver = "blake2b-256" },
0045 };
0046
0047 int btrfs_super_csum_size(const struct btrfs_super_block *s)
0048 {
0049 u16 t = btrfs_super_csum_type(s);
0050
0051
0052
0053 return btrfs_csums[t].size;
0054 }
0055
0056 const char *btrfs_super_csum_name(u16 csum_type)
0057 {
0058
0059 return btrfs_csums[csum_type].name;
0060 }
0061
0062
0063
0064
0065
0066 const char *btrfs_super_csum_driver(u16 csum_type)
0067 {
0068
0069 return btrfs_csums[csum_type].driver[0] ?
0070 btrfs_csums[csum_type].driver :
0071 btrfs_csums[csum_type].name;
0072 }
0073
0074 size_t __attribute_const__ btrfs_get_num_csums(void)
0075 {
0076 return ARRAY_SIZE(btrfs_csums);
0077 }
0078
0079 struct btrfs_path *btrfs_alloc_path(void)
0080 {
0081 return kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
0082 }
0083
0084
0085 void btrfs_free_path(struct btrfs_path *p)
0086 {
0087 if (!p)
0088 return;
0089 btrfs_release_path(p);
0090 kmem_cache_free(btrfs_path_cachep, p);
0091 }
0092
0093
0094
0095
0096
0097
0098
0099 noinline void btrfs_release_path(struct btrfs_path *p)
0100 {
0101 int i;
0102
0103 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
0104 p->slots[i] = 0;
0105 if (!p->nodes[i])
0106 continue;
0107 if (p->locks[i]) {
0108 btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]);
0109 p->locks[i] = 0;
0110 }
0111 free_extent_buffer(p->nodes[i]);
0112 p->nodes[i] = NULL;
0113 }
0114 }
0115
0116
0117
0118
0119
0120
0121
0122
0123
0124
0125
0126 struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
0127 {
0128 struct extent_buffer *eb;
0129
0130 while (1) {
0131 rcu_read_lock();
0132 eb = rcu_dereference(root->node);
0133
0134
0135
0136
0137
0138
0139
0140 if (atomic_inc_not_zero(&eb->refs)) {
0141 rcu_read_unlock();
0142 break;
0143 }
0144 rcu_read_unlock();
0145 synchronize_rcu();
0146 }
0147 return eb;
0148 }
0149
0150
0151
0152
0153
0154
0155 static void add_root_to_dirty_list(struct btrfs_root *root)
0156 {
0157 struct btrfs_fs_info *fs_info = root->fs_info;
0158
0159 if (test_bit(BTRFS_ROOT_DIRTY, &root->state) ||
0160 !test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state))
0161 return;
0162
0163 spin_lock(&fs_info->trans_lock);
0164 if (!test_and_set_bit(BTRFS_ROOT_DIRTY, &root->state)) {
0165
0166 if (root->root_key.objectid == BTRFS_EXTENT_TREE_OBJECTID)
0167 list_move_tail(&root->dirty_list,
0168 &fs_info->dirty_cowonly_roots);
0169 else
0170 list_move(&root->dirty_list,
0171 &fs_info->dirty_cowonly_roots);
0172 }
0173 spin_unlock(&fs_info->trans_lock);
0174 }
0175
0176
0177
0178
0179
0180
0181 int btrfs_copy_root(struct btrfs_trans_handle *trans,
0182 struct btrfs_root *root,
0183 struct extent_buffer *buf,
0184 struct extent_buffer **cow_ret, u64 new_root_objectid)
0185 {
0186 struct btrfs_fs_info *fs_info = root->fs_info;
0187 struct extent_buffer *cow;
0188 int ret = 0;
0189 int level;
0190 struct btrfs_disk_key disk_key;
0191
0192 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
0193 trans->transid != fs_info->running_transaction->transid);
0194 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
0195 trans->transid != root->last_trans);
0196
0197 level = btrfs_header_level(buf);
0198 if (level == 0)
0199 btrfs_item_key(buf, &disk_key, 0);
0200 else
0201 btrfs_node_key(buf, &disk_key, 0);
0202
0203 cow = btrfs_alloc_tree_block(trans, root, 0, new_root_objectid,
0204 &disk_key, level, buf->start, 0,
0205 BTRFS_NESTING_NEW_ROOT);
0206 if (IS_ERR(cow))
0207 return PTR_ERR(cow);
0208
0209 copy_extent_buffer_full(cow, buf);
0210 btrfs_set_header_bytenr(cow, cow->start);
0211 btrfs_set_header_generation(cow, trans->transid);
0212 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
0213 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
0214 BTRFS_HEADER_FLAG_RELOC);
0215 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
0216 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
0217 else
0218 btrfs_set_header_owner(cow, new_root_objectid);
0219
0220 write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
0221
0222 WARN_ON(btrfs_header_generation(buf) > trans->transid);
0223 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
0224 ret = btrfs_inc_ref(trans, root, cow, 1);
0225 else
0226 ret = btrfs_inc_ref(trans, root, cow, 0);
0227 if (ret) {
0228 btrfs_tree_unlock(cow);
0229 free_extent_buffer(cow);
0230 btrfs_abort_transaction(trans, ret);
0231 return ret;
0232 }
0233
0234 btrfs_mark_buffer_dirty(cow);
0235 *cow_ret = cow;
0236 return 0;
0237 }
0238
0239
0240
0241
0242 int btrfs_block_can_be_shared(struct btrfs_root *root,
0243 struct extent_buffer *buf)
0244 {
0245
0246
0247
0248
0249
0250 if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
0251 buf != root->node && buf != root->commit_root &&
0252 (btrfs_header_generation(buf) <=
0253 btrfs_root_last_snapshot(&root->root_item) ||
0254 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
0255 return 1;
0256
0257 return 0;
0258 }
0259
0260 static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
0261 struct btrfs_root *root,
0262 struct extent_buffer *buf,
0263 struct extent_buffer *cow,
0264 int *last_ref)
0265 {
0266 struct btrfs_fs_info *fs_info = root->fs_info;
0267 u64 refs;
0268 u64 owner;
0269 u64 flags;
0270 u64 new_flags = 0;
0271 int ret;
0272
0273
0274
0275
0276
0277
0278
0279
0280
0281
0282
0283
0284
0285
0286
0287
0288
0289
0290 if (btrfs_block_can_be_shared(root, buf)) {
0291 ret = btrfs_lookup_extent_info(trans, fs_info, buf->start,
0292 btrfs_header_level(buf), 1,
0293 &refs, &flags);
0294 if (ret)
0295 return ret;
0296 if (refs == 0) {
0297 ret = -EROFS;
0298 btrfs_handle_fs_error(fs_info, ret, NULL);
0299 return ret;
0300 }
0301 } else {
0302 refs = 1;
0303 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
0304 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
0305 flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
0306 else
0307 flags = 0;
0308 }
0309
0310 owner = btrfs_header_owner(buf);
0311 BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID &&
0312 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
0313
0314 if (refs > 1) {
0315 if ((owner == root->root_key.objectid ||
0316 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
0317 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
0318 ret = btrfs_inc_ref(trans, root, buf, 1);
0319 if (ret)
0320 return ret;
0321
0322 if (root->root_key.objectid ==
0323 BTRFS_TREE_RELOC_OBJECTID) {
0324 ret = btrfs_dec_ref(trans, root, buf, 0);
0325 if (ret)
0326 return ret;
0327 ret = btrfs_inc_ref(trans, root, cow, 1);
0328 if (ret)
0329 return ret;
0330 }
0331 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
0332 } else {
0333
0334 if (root->root_key.objectid ==
0335 BTRFS_TREE_RELOC_OBJECTID)
0336 ret = btrfs_inc_ref(trans, root, cow, 1);
0337 else
0338 ret = btrfs_inc_ref(trans, root, cow, 0);
0339 if (ret)
0340 return ret;
0341 }
0342 if (new_flags != 0) {
0343 int level = btrfs_header_level(buf);
0344
0345 ret = btrfs_set_disk_extent_flags(trans, buf,
0346 new_flags, level);
0347 if (ret)
0348 return ret;
0349 }
0350 } else {
0351 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
0352 if (root->root_key.objectid ==
0353 BTRFS_TREE_RELOC_OBJECTID)
0354 ret = btrfs_inc_ref(trans, root, cow, 1);
0355 else
0356 ret = btrfs_inc_ref(trans, root, cow, 0);
0357 if (ret)
0358 return ret;
0359 ret = btrfs_dec_ref(trans, root, buf, 1);
0360 if (ret)
0361 return ret;
0362 }
0363 btrfs_clean_tree_block(buf);
0364 *last_ref = 1;
0365 }
0366 return 0;
0367 }
0368
0369
0370
0371
0372
0373
0374
0375
0376
0377
0378
0379
0380
0381 static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
0382 struct btrfs_root *root,
0383 struct extent_buffer *buf,
0384 struct extent_buffer *parent, int parent_slot,
0385 struct extent_buffer **cow_ret,
0386 u64 search_start, u64 empty_size,
0387 enum btrfs_lock_nesting nest)
0388 {
0389 struct btrfs_fs_info *fs_info = root->fs_info;
0390 struct btrfs_disk_key disk_key;
0391 struct extent_buffer *cow;
0392 int level, ret;
0393 int last_ref = 0;
0394 int unlock_orig = 0;
0395 u64 parent_start = 0;
0396
0397 if (*cow_ret == buf)
0398 unlock_orig = 1;
0399
0400 btrfs_assert_tree_write_locked(buf);
0401
0402 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
0403 trans->transid != fs_info->running_transaction->transid);
0404 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
0405 trans->transid != root->last_trans);
0406
0407 level = btrfs_header_level(buf);
0408
0409 if (level == 0)
0410 btrfs_item_key(buf, &disk_key, 0);
0411 else
0412 btrfs_node_key(buf, &disk_key, 0);
0413
0414 if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent)
0415 parent_start = parent->start;
0416
0417 cow = btrfs_alloc_tree_block(trans, root, parent_start,
0418 root->root_key.objectid, &disk_key, level,
0419 search_start, empty_size, nest);
0420 if (IS_ERR(cow))
0421 return PTR_ERR(cow);
0422
0423
0424
0425 copy_extent_buffer_full(cow, buf);
0426 btrfs_set_header_bytenr(cow, cow->start);
0427 btrfs_set_header_generation(cow, trans->transid);
0428 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
0429 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
0430 BTRFS_HEADER_FLAG_RELOC);
0431 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
0432 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
0433 else
0434 btrfs_set_header_owner(cow, root->root_key.objectid);
0435
0436 write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
0437
0438 ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
0439 if (ret) {
0440 btrfs_tree_unlock(cow);
0441 free_extent_buffer(cow);
0442 btrfs_abort_transaction(trans, ret);
0443 return ret;
0444 }
0445
0446 if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
0447 ret = btrfs_reloc_cow_block(trans, root, buf, cow);
0448 if (ret) {
0449 btrfs_tree_unlock(cow);
0450 free_extent_buffer(cow);
0451 btrfs_abort_transaction(trans, ret);
0452 return ret;
0453 }
0454 }
0455
0456 if (buf == root->node) {
0457 WARN_ON(parent && parent != buf);
0458 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
0459 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
0460 parent_start = buf->start;
0461
0462 atomic_inc(&cow->refs);
0463 ret = btrfs_tree_mod_log_insert_root(root->node, cow, true);
0464 BUG_ON(ret < 0);
0465 rcu_assign_pointer(root->node, cow);
0466
0467 btrfs_free_tree_block(trans, btrfs_root_id(root), buf,
0468 parent_start, last_ref);
0469 free_extent_buffer(buf);
0470 add_root_to_dirty_list(root);
0471 } else {
0472 WARN_ON(trans->transid != btrfs_header_generation(parent));
0473 btrfs_tree_mod_log_insert_key(parent, parent_slot,
0474 BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
0475 btrfs_set_node_blockptr(parent, parent_slot,
0476 cow->start);
0477 btrfs_set_node_ptr_generation(parent, parent_slot,
0478 trans->transid);
0479 btrfs_mark_buffer_dirty(parent);
0480 if (last_ref) {
0481 ret = btrfs_tree_mod_log_free_eb(buf);
0482 if (ret) {
0483 btrfs_tree_unlock(cow);
0484 free_extent_buffer(cow);
0485 btrfs_abort_transaction(trans, ret);
0486 return ret;
0487 }
0488 }
0489 btrfs_free_tree_block(trans, btrfs_root_id(root), buf,
0490 parent_start, last_ref);
0491 }
0492 if (unlock_orig)
0493 btrfs_tree_unlock(buf);
0494 free_extent_buffer_stale(buf);
0495 btrfs_mark_buffer_dirty(cow);
0496 *cow_ret = cow;
0497 return 0;
0498 }
0499
0500 static inline int should_cow_block(struct btrfs_trans_handle *trans,
0501 struct btrfs_root *root,
0502 struct extent_buffer *buf)
0503 {
0504 if (btrfs_is_testing(root->fs_info))
0505 return 0;
0506
0507
0508 smp_mb__before_atomic();
0509
0510
0511
0512
0513
0514
0515
0516
0517
0518
0519
0520
0521 if (btrfs_header_generation(buf) == trans->transid &&
0522 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
0523 !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
0524 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
0525 !test_bit(BTRFS_ROOT_FORCE_COW, &root->state))
0526 return 0;
0527 return 1;
0528 }
0529
0530
0531
0532
0533
0534
0535 noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
0536 struct btrfs_root *root, struct extent_buffer *buf,
0537 struct extent_buffer *parent, int parent_slot,
0538 struct extent_buffer **cow_ret,
0539 enum btrfs_lock_nesting nest)
0540 {
0541 struct btrfs_fs_info *fs_info = root->fs_info;
0542 u64 search_start;
0543 int ret;
0544
0545 if (test_bit(BTRFS_ROOT_DELETING, &root->state))
0546 btrfs_err(fs_info,
0547 "COW'ing blocks on a fs root that's being dropped");
0548
0549 if (trans->transaction != fs_info->running_transaction)
0550 WARN(1, KERN_CRIT "trans %llu running %llu\n",
0551 trans->transid,
0552 fs_info->running_transaction->transid);
0553
0554 if (trans->transid != fs_info->generation)
0555 WARN(1, KERN_CRIT "trans %llu running %llu\n",
0556 trans->transid, fs_info->generation);
0557
0558 if (!should_cow_block(trans, root, buf)) {
0559 *cow_ret = buf;
0560 return 0;
0561 }
0562
0563 search_start = buf->start & ~((u64)SZ_1G - 1);
0564
0565
0566
0567
0568
0569
0570
0571 btrfs_qgroup_trace_subtree_after_cow(trans, root, buf);
0572 ret = __btrfs_cow_block(trans, root, buf, parent,
0573 parent_slot, cow_ret, search_start, 0, nest);
0574
0575 trace_btrfs_cow_block(root, buf, *cow_ret);
0576
0577 return ret;
0578 }
0579 ALLOW_ERROR_INJECTION(btrfs_cow_block, ERRNO);
0580
0581
0582
0583
0584
0585 static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
0586 {
0587 if (blocknr < other && other - (blocknr + blocksize) < 32768)
0588 return 1;
0589 if (blocknr > other && blocknr - (other + blocksize) < 32768)
0590 return 1;
0591 return 0;
0592 }
0593
0594 #ifdef __LITTLE_ENDIAN
0595
0596
0597
0598
0599
0600 static int comp_keys(const struct btrfs_disk_key *disk_key,
0601 const struct btrfs_key *k2)
0602 {
0603 const struct btrfs_key *k1 = (const struct btrfs_key *)disk_key;
0604
0605 return btrfs_comp_cpu_keys(k1, k2);
0606 }
0607
0608 #else
0609
0610
0611
0612
0613 static int comp_keys(const struct btrfs_disk_key *disk,
0614 const struct btrfs_key *k2)
0615 {
0616 struct btrfs_key k1;
0617
0618 btrfs_disk_key_to_cpu(&k1, disk);
0619
0620 return btrfs_comp_cpu_keys(&k1, k2);
0621 }
0622 #endif
0623
0624
0625
0626
0627 int __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2)
0628 {
0629 if (k1->objectid > k2->objectid)
0630 return 1;
0631 if (k1->objectid < k2->objectid)
0632 return -1;
0633 if (k1->type > k2->type)
0634 return 1;
0635 if (k1->type < k2->type)
0636 return -1;
0637 if (k1->offset > k2->offset)
0638 return 1;
0639 if (k1->offset < k2->offset)
0640 return -1;
0641 return 0;
0642 }
0643
0644
0645
0646
0647
0648
0649 int btrfs_realloc_node(struct btrfs_trans_handle *trans,
0650 struct btrfs_root *root, struct extent_buffer *parent,
0651 int start_slot, u64 *last_ret,
0652 struct btrfs_key *progress)
0653 {
0654 struct btrfs_fs_info *fs_info = root->fs_info;
0655 struct extent_buffer *cur;
0656 u64 blocknr;
0657 u64 search_start = *last_ret;
0658 u64 last_block = 0;
0659 u64 other;
0660 u32 parent_nritems;
0661 int end_slot;
0662 int i;
0663 int err = 0;
0664 u32 blocksize;
0665 int progress_passed = 0;
0666 struct btrfs_disk_key disk_key;
0667
0668 WARN_ON(trans->transaction != fs_info->running_transaction);
0669 WARN_ON(trans->transid != fs_info->generation);
0670
0671 parent_nritems = btrfs_header_nritems(parent);
0672 blocksize = fs_info->nodesize;
0673 end_slot = parent_nritems - 1;
0674
0675 if (parent_nritems <= 1)
0676 return 0;
0677
0678 for (i = start_slot; i <= end_slot; i++) {
0679 int close = 1;
0680
0681 btrfs_node_key(parent, &disk_key, i);
0682 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
0683 continue;
0684
0685 progress_passed = 1;
0686 blocknr = btrfs_node_blockptr(parent, i);
0687 if (last_block == 0)
0688 last_block = blocknr;
0689
0690 if (i > 0) {
0691 other = btrfs_node_blockptr(parent, i - 1);
0692 close = close_blocks(blocknr, other, blocksize);
0693 }
0694 if (!close && i < end_slot) {
0695 other = btrfs_node_blockptr(parent, i + 1);
0696 close = close_blocks(blocknr, other, blocksize);
0697 }
0698 if (close) {
0699 last_block = blocknr;
0700 continue;
0701 }
0702
0703 cur = btrfs_read_node_slot(parent, i);
0704 if (IS_ERR(cur))
0705 return PTR_ERR(cur);
0706 if (search_start == 0)
0707 search_start = last_block;
0708
0709 btrfs_tree_lock(cur);
0710 err = __btrfs_cow_block(trans, root, cur, parent, i,
0711 &cur, search_start,
0712 min(16 * blocksize,
0713 (end_slot - i) * blocksize),
0714 BTRFS_NESTING_COW);
0715 if (err) {
0716 btrfs_tree_unlock(cur);
0717 free_extent_buffer(cur);
0718 break;
0719 }
0720 search_start = cur->start;
0721 last_block = cur->start;
0722 *last_ret = search_start;
0723 btrfs_tree_unlock(cur);
0724 free_extent_buffer(cur);
0725 }
0726 return err;
0727 }
0728
0729
0730
0731
0732
0733
0734
0735
0736
0737
0738
0739
0740
0741
0742 static noinline int generic_bin_search(struct extent_buffer *eb, int low,
0743 const struct btrfs_key *key, int *slot)
0744 {
0745 unsigned long p;
0746 int item_size;
0747 int high = btrfs_header_nritems(eb);
0748 int ret;
0749 const int key_size = sizeof(struct btrfs_disk_key);
0750
0751 if (low > high) {
0752 btrfs_err(eb->fs_info,
0753 "%s: low (%d) > high (%d) eb %llu owner %llu level %d",
0754 __func__, low, high, eb->start,
0755 btrfs_header_owner(eb), btrfs_header_level(eb));
0756 return -EINVAL;
0757 }
0758
0759 if (btrfs_header_level(eb) == 0) {
0760 p = offsetof(struct btrfs_leaf, items);
0761 item_size = sizeof(struct btrfs_item);
0762 } else {
0763 p = offsetof(struct btrfs_node, ptrs);
0764 item_size = sizeof(struct btrfs_key_ptr);
0765 }
0766
0767 while (low < high) {
0768 unsigned long oip;
0769 unsigned long offset;
0770 struct btrfs_disk_key *tmp;
0771 struct btrfs_disk_key unaligned;
0772 int mid;
0773
0774 mid = (low + high) / 2;
0775 offset = p + mid * item_size;
0776 oip = offset_in_page(offset);
0777
0778 if (oip + key_size <= PAGE_SIZE) {
0779 const unsigned long idx = get_eb_page_index(offset);
0780 char *kaddr = page_address(eb->pages[idx]);
0781
0782 oip = get_eb_offset_in_page(eb, offset);
0783 tmp = (struct btrfs_disk_key *)(kaddr + oip);
0784 } else {
0785 read_extent_buffer(eb, &unaligned, offset, key_size);
0786 tmp = &unaligned;
0787 }
0788
0789 ret = comp_keys(tmp, key);
0790
0791 if (ret < 0)
0792 low = mid + 1;
0793 else if (ret > 0)
0794 high = mid;
0795 else {
0796 *slot = mid;
0797 return 0;
0798 }
0799 }
0800 *slot = low;
0801 return 1;
0802 }
0803
0804
0805
0806
0807
0808 int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
0809 int *slot)
0810 {
0811 return generic_bin_search(eb, 0, key, slot);
0812 }
0813
0814 static void root_add_used(struct btrfs_root *root, u32 size)
0815 {
0816 spin_lock(&root->accounting_lock);
0817 btrfs_set_root_used(&root->root_item,
0818 btrfs_root_used(&root->root_item) + size);
0819 spin_unlock(&root->accounting_lock);
0820 }
0821
0822 static void root_sub_used(struct btrfs_root *root, u32 size)
0823 {
0824 spin_lock(&root->accounting_lock);
0825 btrfs_set_root_used(&root->root_item,
0826 btrfs_root_used(&root->root_item) - size);
0827 spin_unlock(&root->accounting_lock);
0828 }
0829
0830
0831
0832
0833 struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
0834 int slot)
0835 {
0836 int level = btrfs_header_level(parent);
0837 struct extent_buffer *eb;
0838 struct btrfs_key first_key;
0839
0840 if (slot < 0 || slot >= btrfs_header_nritems(parent))
0841 return ERR_PTR(-ENOENT);
0842
0843 BUG_ON(level == 0);
0844
0845 btrfs_node_key_to_cpu(parent, &first_key, slot);
0846 eb = read_tree_block(parent->fs_info, btrfs_node_blockptr(parent, slot),
0847 btrfs_header_owner(parent),
0848 btrfs_node_ptr_generation(parent, slot),
0849 level - 1, &first_key);
0850 if (IS_ERR(eb))
0851 return eb;
0852 if (!extent_buffer_uptodate(eb)) {
0853 free_extent_buffer(eb);
0854 return ERR_PTR(-EIO);
0855 }
0856
0857 return eb;
0858 }
0859
0860
0861
0862
0863
0864
0865 static noinline int balance_level(struct btrfs_trans_handle *trans,
0866 struct btrfs_root *root,
0867 struct btrfs_path *path, int level)
0868 {
0869 struct btrfs_fs_info *fs_info = root->fs_info;
0870 struct extent_buffer *right = NULL;
0871 struct extent_buffer *mid;
0872 struct extent_buffer *left = NULL;
0873 struct extent_buffer *parent = NULL;
0874 int ret = 0;
0875 int wret;
0876 int pslot;
0877 int orig_slot = path->slots[level];
0878 u64 orig_ptr;
0879
0880 ASSERT(level > 0);
0881
0882 mid = path->nodes[level];
0883
0884 WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK);
0885 WARN_ON(btrfs_header_generation(mid) != trans->transid);
0886
0887 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
0888
0889 if (level < BTRFS_MAX_LEVEL - 1) {
0890 parent = path->nodes[level + 1];
0891 pslot = path->slots[level + 1];
0892 }
0893
0894
0895
0896
0897
0898 if (!parent) {
0899 struct extent_buffer *child;
0900
0901 if (btrfs_header_nritems(mid) != 1)
0902 return 0;
0903
0904
0905 child = btrfs_read_node_slot(mid, 0);
0906 if (IS_ERR(child)) {
0907 ret = PTR_ERR(child);
0908 btrfs_handle_fs_error(fs_info, ret, NULL);
0909 goto enospc;
0910 }
0911
0912 btrfs_tree_lock(child);
0913 ret = btrfs_cow_block(trans, root, child, mid, 0, &child,
0914 BTRFS_NESTING_COW);
0915 if (ret) {
0916 btrfs_tree_unlock(child);
0917 free_extent_buffer(child);
0918 goto enospc;
0919 }
0920
0921 ret = btrfs_tree_mod_log_insert_root(root->node, child, true);
0922 BUG_ON(ret < 0);
0923 rcu_assign_pointer(root->node, child);
0924
0925 add_root_to_dirty_list(root);
0926 btrfs_tree_unlock(child);
0927
0928 path->locks[level] = 0;
0929 path->nodes[level] = NULL;
0930 btrfs_clean_tree_block(mid);
0931 btrfs_tree_unlock(mid);
0932
0933 free_extent_buffer(mid);
0934
0935 root_sub_used(root, mid->len);
0936 btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1);
0937
0938 free_extent_buffer_stale(mid);
0939 return 0;
0940 }
0941 if (btrfs_header_nritems(mid) >
0942 BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 4)
0943 return 0;
0944
0945 left = btrfs_read_node_slot(parent, pslot - 1);
0946 if (IS_ERR(left))
0947 left = NULL;
0948
0949 if (left) {
0950 __btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
0951 wret = btrfs_cow_block(trans, root, left,
0952 parent, pslot - 1, &left,
0953 BTRFS_NESTING_LEFT_COW);
0954 if (wret) {
0955 ret = wret;
0956 goto enospc;
0957 }
0958 }
0959
0960 right = btrfs_read_node_slot(parent, pslot + 1);
0961 if (IS_ERR(right))
0962 right = NULL;
0963
0964 if (right) {
0965 __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
0966 wret = btrfs_cow_block(trans, root, right,
0967 parent, pslot + 1, &right,
0968 BTRFS_NESTING_RIGHT_COW);
0969 if (wret) {
0970 ret = wret;
0971 goto enospc;
0972 }
0973 }
0974
0975
0976 if (left) {
0977 orig_slot += btrfs_header_nritems(left);
0978 wret = push_node_left(trans, left, mid, 1);
0979 if (wret < 0)
0980 ret = wret;
0981 }
0982
0983
0984
0985
0986 if (right) {
0987 wret = push_node_left(trans, mid, right, 1);
0988 if (wret < 0 && wret != -ENOSPC)
0989 ret = wret;
0990 if (btrfs_header_nritems(right) == 0) {
0991 btrfs_clean_tree_block(right);
0992 btrfs_tree_unlock(right);
0993 del_ptr(root, path, level + 1, pslot + 1);
0994 root_sub_used(root, right->len);
0995 btrfs_free_tree_block(trans, btrfs_root_id(root), right,
0996 0, 1);
0997 free_extent_buffer_stale(right);
0998 right = NULL;
0999 } else {
1000 struct btrfs_disk_key right_key;
1001 btrfs_node_key(right, &right_key, 0);
1002 ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1,
1003 BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
1004 BUG_ON(ret < 0);
1005 btrfs_set_node_key(parent, &right_key, pslot + 1);
1006 btrfs_mark_buffer_dirty(parent);
1007 }
1008 }
1009 if (btrfs_header_nritems(mid) == 1) {
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019 if (!left) {
1020 ret = -EROFS;
1021 btrfs_handle_fs_error(fs_info, ret, NULL);
1022 goto enospc;
1023 }
1024 wret = balance_node_right(trans, mid, left);
1025 if (wret < 0) {
1026 ret = wret;
1027 goto enospc;
1028 }
1029 if (wret == 1) {
1030 wret = push_node_left(trans, left, mid, 1);
1031 if (wret < 0)
1032 ret = wret;
1033 }
1034 BUG_ON(wret == 1);
1035 }
1036 if (btrfs_header_nritems(mid) == 0) {
1037 btrfs_clean_tree_block(mid);
1038 btrfs_tree_unlock(mid);
1039 del_ptr(root, path, level + 1, pslot);
1040 root_sub_used(root, mid->len);
1041 btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1);
1042 free_extent_buffer_stale(mid);
1043 mid = NULL;
1044 } else {
1045
1046 struct btrfs_disk_key mid_key;
1047 btrfs_node_key(mid, &mid_key, 0);
1048 ret = btrfs_tree_mod_log_insert_key(parent, pslot,
1049 BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
1050 BUG_ON(ret < 0);
1051 btrfs_set_node_key(parent, &mid_key, pslot);
1052 btrfs_mark_buffer_dirty(parent);
1053 }
1054
1055
1056 if (left) {
1057 if (btrfs_header_nritems(left) > orig_slot) {
1058 atomic_inc(&left->refs);
1059
1060 path->nodes[level] = left;
1061 path->slots[level + 1] -= 1;
1062 path->slots[level] = orig_slot;
1063 if (mid) {
1064 btrfs_tree_unlock(mid);
1065 free_extent_buffer(mid);
1066 }
1067 } else {
1068 orig_slot -= btrfs_header_nritems(left);
1069 path->slots[level] = orig_slot;
1070 }
1071 }
1072
1073 if (orig_ptr !=
1074 btrfs_node_blockptr(path->nodes[level], path->slots[level]))
1075 BUG();
1076 enospc:
1077 if (right) {
1078 btrfs_tree_unlock(right);
1079 free_extent_buffer(right);
1080 }
1081 if (left) {
1082 if (path->nodes[level] != left)
1083 btrfs_tree_unlock(left);
1084 free_extent_buffer(left);
1085 }
1086 return ret;
1087 }
1088
1089
1090
1091
1092
1093 static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
1094 struct btrfs_root *root,
1095 struct btrfs_path *path, int level)
1096 {
1097 struct btrfs_fs_info *fs_info = root->fs_info;
1098 struct extent_buffer *right = NULL;
1099 struct extent_buffer *mid;
1100 struct extent_buffer *left = NULL;
1101 struct extent_buffer *parent = NULL;
1102 int ret = 0;
1103 int wret;
1104 int pslot;
1105 int orig_slot = path->slots[level];
1106
1107 if (level == 0)
1108 return 1;
1109
1110 mid = path->nodes[level];
1111 WARN_ON(btrfs_header_generation(mid) != trans->transid);
1112
1113 if (level < BTRFS_MAX_LEVEL - 1) {
1114 parent = path->nodes[level + 1];
1115 pslot = path->slots[level + 1];
1116 }
1117
1118 if (!parent)
1119 return 1;
1120
1121 left = btrfs_read_node_slot(parent, pslot - 1);
1122 if (IS_ERR(left))
1123 left = NULL;
1124
1125
1126 if (left) {
1127 u32 left_nr;
1128
1129 __btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
1130
1131 left_nr = btrfs_header_nritems(left);
1132 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
1133 wret = 1;
1134 } else {
1135 ret = btrfs_cow_block(trans, root, left, parent,
1136 pslot - 1, &left,
1137 BTRFS_NESTING_LEFT_COW);
1138 if (ret)
1139 wret = 1;
1140 else {
1141 wret = push_node_left(trans, left, mid, 0);
1142 }
1143 }
1144 if (wret < 0)
1145 ret = wret;
1146 if (wret == 0) {
1147 struct btrfs_disk_key disk_key;
1148 orig_slot += left_nr;
1149 btrfs_node_key(mid, &disk_key, 0);
1150 ret = btrfs_tree_mod_log_insert_key(parent, pslot,
1151 BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
1152 BUG_ON(ret < 0);
1153 btrfs_set_node_key(parent, &disk_key, pslot);
1154 btrfs_mark_buffer_dirty(parent);
1155 if (btrfs_header_nritems(left) > orig_slot) {
1156 path->nodes[level] = left;
1157 path->slots[level + 1] -= 1;
1158 path->slots[level] = orig_slot;
1159 btrfs_tree_unlock(mid);
1160 free_extent_buffer(mid);
1161 } else {
1162 orig_slot -=
1163 btrfs_header_nritems(left);
1164 path->slots[level] = orig_slot;
1165 btrfs_tree_unlock(left);
1166 free_extent_buffer(left);
1167 }
1168 return 0;
1169 }
1170 btrfs_tree_unlock(left);
1171 free_extent_buffer(left);
1172 }
1173 right = btrfs_read_node_slot(parent, pslot + 1);
1174 if (IS_ERR(right))
1175 right = NULL;
1176
1177
1178
1179
1180 if (right) {
1181 u32 right_nr;
1182
1183 __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
1184
1185 right_nr = btrfs_header_nritems(right);
1186 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
1187 wret = 1;
1188 } else {
1189 ret = btrfs_cow_block(trans, root, right,
1190 parent, pslot + 1,
1191 &right, BTRFS_NESTING_RIGHT_COW);
1192 if (ret)
1193 wret = 1;
1194 else {
1195 wret = balance_node_right(trans, right, mid);
1196 }
1197 }
1198 if (wret < 0)
1199 ret = wret;
1200 if (wret == 0) {
1201 struct btrfs_disk_key disk_key;
1202
1203 btrfs_node_key(right, &disk_key, 0);
1204 ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1,
1205 BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
1206 BUG_ON(ret < 0);
1207 btrfs_set_node_key(parent, &disk_key, pslot + 1);
1208 btrfs_mark_buffer_dirty(parent);
1209
1210 if (btrfs_header_nritems(mid) <= orig_slot) {
1211 path->nodes[level] = right;
1212 path->slots[level + 1] += 1;
1213 path->slots[level] = orig_slot -
1214 btrfs_header_nritems(mid);
1215 btrfs_tree_unlock(mid);
1216 free_extent_buffer(mid);
1217 } else {
1218 btrfs_tree_unlock(right);
1219 free_extent_buffer(right);
1220 }
1221 return 0;
1222 }
1223 btrfs_tree_unlock(right);
1224 free_extent_buffer(right);
1225 }
1226 return 1;
1227 }
1228
1229
1230
1231
1232
1233 static void reada_for_search(struct btrfs_fs_info *fs_info,
1234 struct btrfs_path *path,
1235 int level, int slot, u64 objectid)
1236 {
1237 struct extent_buffer *node;
1238 struct btrfs_disk_key disk_key;
1239 u32 nritems;
1240 u64 search;
1241 u64 target;
1242 u64 nread = 0;
1243 u64 nread_max;
1244 u32 nr;
1245 u32 blocksize;
1246 u32 nscan = 0;
1247
1248 if (level != 1 && path->reada != READA_FORWARD_ALWAYS)
1249 return;
1250
1251 if (!path->nodes[level])
1252 return;
1253
1254 node = path->nodes[level];
1255
1256
1257
1258
1259
1260
1261 if (path->reada == READA_FORWARD_ALWAYS) {
1262 if (level > 1)
1263 nread_max = node->fs_info->nodesize;
1264 else
1265 nread_max = SZ_128K;
1266 } else {
1267 nread_max = SZ_64K;
1268 }
1269
1270 search = btrfs_node_blockptr(node, slot);
1271 blocksize = fs_info->nodesize;
1272 if (path->reada != READA_FORWARD_ALWAYS) {
1273 struct extent_buffer *eb;
1274
1275 eb = find_extent_buffer(fs_info, search);
1276 if (eb) {
1277 free_extent_buffer(eb);
1278 return;
1279 }
1280 }
1281
1282 target = search;
1283
1284 nritems = btrfs_header_nritems(node);
1285 nr = slot;
1286
1287 while (1) {
1288 if (path->reada == READA_BACK) {
1289 if (nr == 0)
1290 break;
1291 nr--;
1292 } else if (path->reada == READA_FORWARD ||
1293 path->reada == READA_FORWARD_ALWAYS) {
1294 nr++;
1295 if (nr >= nritems)
1296 break;
1297 }
1298 if (path->reada == READA_BACK && objectid) {
1299 btrfs_node_key(node, &disk_key, nr);
1300 if (btrfs_disk_key_objectid(&disk_key) != objectid)
1301 break;
1302 }
1303 search = btrfs_node_blockptr(node, nr);
1304 if (path->reada == READA_FORWARD_ALWAYS ||
1305 (search <= target && target - search <= 65536) ||
1306 (search > target && search - target <= 65536)) {
1307 btrfs_readahead_node_child(node, nr);
1308 nread += blocksize;
1309 }
1310 nscan++;
1311 if (nread > nread_max || nscan > 32)
1312 break;
1313 }
1314 }
1315
1316 static noinline void reada_for_balance(struct btrfs_path *path, int level)
1317 {
1318 struct extent_buffer *parent;
1319 int slot;
1320 int nritems;
1321
1322 parent = path->nodes[level + 1];
1323 if (!parent)
1324 return;
1325
1326 nritems = btrfs_header_nritems(parent);
1327 slot = path->slots[level + 1];
1328
1329 if (slot > 0)
1330 btrfs_readahead_node_child(parent, slot - 1);
1331 if (slot + 1 < nritems)
1332 btrfs_readahead_node_child(parent, slot + 1);
1333 }
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349 static noinline void unlock_up(struct btrfs_path *path, int level,
1350 int lowest_unlock, int min_write_lock_level,
1351 int *write_lock_level)
1352 {
1353 int i;
1354 int skip_level = level;
1355 bool check_skip = true;
1356
1357 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1358 if (!path->nodes[i])
1359 break;
1360 if (!path->locks[i])
1361 break;
1362
1363 if (check_skip) {
1364 if (path->slots[i] == 0) {
1365 skip_level = i + 1;
1366 continue;
1367 }
1368
1369 if (path->keep_locks) {
1370 u32 nritems;
1371
1372 nritems = btrfs_header_nritems(path->nodes[i]);
1373 if (nritems < 1 || path->slots[i] >= nritems - 1) {
1374 skip_level = i + 1;
1375 continue;
1376 }
1377 }
1378 }
1379
1380 if (i >= lowest_unlock && i > skip_level) {
1381 check_skip = false;
1382 btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
1383 path->locks[i] = 0;
1384 if (write_lock_level &&
1385 i > min_write_lock_level &&
1386 i <= *write_lock_level) {
1387 *write_lock_level = i - 1;
1388 }
1389 }
1390 }
1391 }
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402 static int
1403 read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
1404 struct extent_buffer **eb_ret, int level, int slot,
1405 const struct btrfs_key *key)
1406 {
1407 struct btrfs_fs_info *fs_info = root->fs_info;
1408 u64 blocknr;
1409 u64 gen;
1410 struct extent_buffer *tmp;
1411 struct btrfs_key first_key;
1412 int ret;
1413 int parent_level;
1414 bool unlock_up;
1415
1416 unlock_up = ((level + 1 < BTRFS_MAX_LEVEL) && p->locks[level + 1]);
1417 blocknr = btrfs_node_blockptr(*eb_ret, slot);
1418 gen = btrfs_node_ptr_generation(*eb_ret, slot);
1419 parent_level = btrfs_header_level(*eb_ret);
1420 btrfs_node_key_to_cpu(*eb_ret, &first_key, slot);
1421
1422
1423
1424
1425
1426
1427
1428
1429 tmp = find_extent_buffer(fs_info, blocknr);
1430 if (tmp) {
1431 if (p->reada == READA_FORWARD_ALWAYS)
1432 reada_for_search(fs_info, p, level, slot, key->objectid);
1433
1434
1435 if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
1436
1437
1438
1439
1440
1441 if (btrfs_verify_level_key(tmp,
1442 parent_level - 1, &first_key, gen)) {
1443 free_extent_buffer(tmp);
1444 return -EUCLEAN;
1445 }
1446 *eb_ret = tmp;
1447 return 0;
1448 }
1449
1450 if (unlock_up)
1451 btrfs_unlock_up_safe(p, level + 1);
1452
1453
1454 ret = btrfs_read_extent_buffer(tmp, gen, parent_level - 1, &first_key);
1455 if (ret) {
1456 free_extent_buffer(tmp);
1457 btrfs_release_path(p);
1458 return -EIO;
1459 }
1460 if (btrfs_check_eb_owner(tmp, root->root_key.objectid)) {
1461 free_extent_buffer(tmp);
1462 btrfs_release_path(p);
1463 return -EUCLEAN;
1464 }
1465
1466 if (unlock_up)
1467 ret = -EAGAIN;
1468
1469 goto out;
1470 }
1471
1472 if (unlock_up) {
1473 btrfs_unlock_up_safe(p, level + 1);
1474 ret = -EAGAIN;
1475 } else {
1476 ret = 0;
1477 }
1478
1479 if (p->reada != READA_NONE)
1480 reada_for_search(fs_info, p, level, slot, key->objectid);
1481
1482 tmp = read_tree_block(fs_info, blocknr, root->root_key.objectid,
1483 gen, parent_level - 1, &first_key);
1484 if (IS_ERR(tmp)) {
1485 btrfs_release_path(p);
1486 return PTR_ERR(tmp);
1487 }
1488
1489
1490
1491
1492
1493
1494 if (!extent_buffer_uptodate(tmp))
1495 ret = -EIO;
1496
1497 out:
1498 if (ret == 0) {
1499 *eb_ret = tmp;
1500 } else {
1501 free_extent_buffer(tmp);
1502 btrfs_release_path(p);
1503 }
1504
1505 return ret;
1506 }
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517 static int
1518 setup_nodes_for_search(struct btrfs_trans_handle *trans,
1519 struct btrfs_root *root, struct btrfs_path *p,
1520 struct extent_buffer *b, int level, int ins_len,
1521 int *write_lock_level)
1522 {
1523 struct btrfs_fs_info *fs_info = root->fs_info;
1524 int ret = 0;
1525
1526 if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
1527 BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3) {
1528
1529 if (*write_lock_level < level + 1) {
1530 *write_lock_level = level + 1;
1531 btrfs_release_path(p);
1532 return -EAGAIN;
1533 }
1534
1535 reada_for_balance(p, level);
1536 ret = split_node(trans, root, p, level);
1537
1538 b = p->nodes[level];
1539 } else if (ins_len < 0 && btrfs_header_nritems(b) <
1540 BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 2) {
1541
1542 if (*write_lock_level < level + 1) {
1543 *write_lock_level = level + 1;
1544 btrfs_release_path(p);
1545 return -EAGAIN;
1546 }
1547
1548 reada_for_balance(p, level);
1549 ret = balance_level(trans, root, p, level);
1550 if (ret)
1551 return ret;
1552
1553 b = p->nodes[level];
1554 if (!b) {
1555 btrfs_release_path(p);
1556 return -EAGAIN;
1557 }
1558 BUG_ON(btrfs_header_nritems(b) == 1);
1559 }
1560 return ret;
1561 }
1562
1563 int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
1564 u64 iobjectid, u64 ioff, u8 key_type,
1565 struct btrfs_key *found_key)
1566 {
1567 int ret;
1568 struct btrfs_key key;
1569 struct extent_buffer *eb;
1570
1571 ASSERT(path);
1572 ASSERT(found_key);
1573
1574 key.type = key_type;
1575 key.objectid = iobjectid;
1576 key.offset = ioff;
1577
1578 ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
1579 if (ret < 0)
1580 return ret;
1581
1582 eb = path->nodes[0];
1583 if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
1584 ret = btrfs_next_leaf(fs_root, path);
1585 if (ret)
1586 return ret;
1587 eb = path->nodes[0];
1588 }
1589
1590 btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
1591 if (found_key->type != key.type ||
1592 found_key->objectid != key.objectid)
1593 return 1;
1594
1595 return 0;
1596 }
1597
1598 static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
1599 struct btrfs_path *p,
1600 int write_lock_level)
1601 {
1602 struct extent_buffer *b;
1603 int root_lock = 0;
1604 int level = 0;
1605
1606 if (p->search_commit_root) {
1607 b = root->commit_root;
1608 atomic_inc(&b->refs);
1609 level = btrfs_header_level(b);
1610
1611
1612
1613
1614 ASSERT(p->skip_locking == 1);
1615
1616 goto out;
1617 }
1618
1619 if (p->skip_locking) {
1620 b = btrfs_root_node(root);
1621 level = btrfs_header_level(b);
1622 goto out;
1623 }
1624
1625
1626 root_lock = BTRFS_READ_LOCK;
1627
1628
1629
1630
1631
1632 if (write_lock_level < BTRFS_MAX_LEVEL) {
1633
1634
1635
1636
1637 b = btrfs_read_lock_root_node(root);
1638 level = btrfs_header_level(b);
1639 if (level > write_lock_level)
1640 goto out;
1641
1642
1643 btrfs_tree_read_unlock(b);
1644 free_extent_buffer(b);
1645 }
1646
1647 b = btrfs_lock_root_node(root);
1648 root_lock = BTRFS_WRITE_LOCK;
1649
1650
1651 level = btrfs_header_level(b);
1652
1653 out:
1654
1655
1656
1657
1658 if (!extent_buffer_uptodate(b)) {
1659 if (root_lock)
1660 btrfs_tree_unlock_rw(b, root_lock);
1661 free_extent_buffer(b);
1662 return ERR_PTR(-EIO);
1663 }
1664
1665 p->nodes[level] = b;
1666 if (!p->skip_locking)
1667 p->locks[level] = root_lock;
1668
1669
1670
1671 return b;
1672 }
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686 static int finish_need_commit_sem_search(struct btrfs_path *path)
1687 {
1688 const int i = path->lowest_level;
1689 const int slot = path->slots[i];
1690 struct extent_buffer *lowest = path->nodes[i];
1691 struct extent_buffer *clone;
1692
1693 ASSERT(path->need_commit_sem);
1694
1695 if (!lowest)
1696 return 0;
1697
1698 lockdep_assert_held_read(&lowest->fs_info->commit_root_sem);
1699
1700 clone = btrfs_clone_extent_buffer(lowest);
1701 if (!clone)
1702 return -ENOMEM;
1703
1704 btrfs_release_path(path);
1705 path->nodes[i] = clone;
1706 path->slots[i] = slot;
1707
1708 return 0;
1709 }
1710
1711 static inline int search_for_key_slot(struct extent_buffer *eb,
1712 int search_low_slot,
1713 const struct btrfs_key *key,
1714 int prev_cmp,
1715 int *slot)
1716 {
1717
1718
1719
1720
1721
1722
1723
1724 if (prev_cmp == 0) {
1725 *slot = 0;
1726 return 0;
1727 }
1728
1729 return generic_bin_search(eb, search_low_slot, key, slot);
1730 }
1731
1732 static int search_leaf(struct btrfs_trans_handle *trans,
1733 struct btrfs_root *root,
1734 const struct btrfs_key *key,
1735 struct btrfs_path *path,
1736 int ins_len,
1737 int prev_cmp)
1738 {
1739 struct extent_buffer *leaf = path->nodes[0];
1740 int leaf_free_space = -1;
1741 int search_low_slot = 0;
1742 int ret;
1743 bool do_bin_search = true;
1744
1745
1746
1747
1748
1749
1750
1751
1752 if (ins_len > 0) {
1753
1754
1755
1756
1757 leaf_free_space = btrfs_leaf_free_space(leaf);
1758
1759
1760
1761
1762
1763 if (path->locks[1] && leaf_free_space >= ins_len) {
1764 struct btrfs_disk_key first_key;
1765
1766 ASSERT(btrfs_header_nritems(leaf) > 0);
1767 btrfs_item_key(leaf, &first_key, 0);
1768
1769
1770
1771
1772
1773
1774
1775
1776 ret = comp_keys(&first_key, key);
1777 if (ret < 0) {
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790 btrfs_unlock_up_safe(path, 1);
1791 search_low_slot = 1;
1792 } else {
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807 if (ret == 0)
1808 btrfs_unlock_up_safe(path, 1);
1809
1810
1811
1812
1813
1814 do_bin_search = false;
1815 path->slots[0] = 0;
1816 }
1817 }
1818 }
1819
1820 if (do_bin_search) {
1821 ret = search_for_key_slot(leaf, search_low_slot, key,
1822 prev_cmp, &path->slots[0]);
1823 if (ret < 0)
1824 return ret;
1825 }
1826
1827 if (ins_len > 0) {
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837 if (ret == 0 && !path->search_for_extension) {
1838 ASSERT(ins_len >= sizeof(struct btrfs_item));
1839 ins_len -= sizeof(struct btrfs_item);
1840 }
1841
1842 ASSERT(leaf_free_space >= 0);
1843
1844 if (leaf_free_space < ins_len) {
1845 int err;
1846
1847 err = split_leaf(trans, root, key, path, ins_len,
1848 (ret == 0));
1849 ASSERT(err <= 0);
1850 if (WARN_ON(err > 0))
1851 err = -EUCLEAN;
1852 if (err)
1853 ret = err;
1854 }
1855 }
1856
1857 return ret;
1858 }
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1892 const struct btrfs_key *key, struct btrfs_path *p,
1893 int ins_len, int cow)
1894 {
1895 struct btrfs_fs_info *fs_info = root->fs_info;
1896 struct extent_buffer *b;
1897 int slot;
1898 int ret;
1899 int err;
1900 int level;
1901 int lowest_unlock = 1;
1902
1903 int write_lock_level = 0;
1904 u8 lowest_level = 0;
1905 int min_write_lock_level;
1906 int prev_cmp;
1907
1908 lowest_level = p->lowest_level;
1909 WARN_ON(lowest_level && ins_len > 0);
1910 WARN_ON(p->nodes[0] != NULL);
1911 BUG_ON(!cow && ins_len);
1912
1913 if (ins_len < 0) {
1914 lowest_unlock = 2;
1915
1916
1917
1918
1919
1920 write_lock_level = 2;
1921 } else if (ins_len > 0) {
1922
1923
1924
1925
1926 write_lock_level = 1;
1927 }
1928
1929 if (!cow)
1930 write_lock_level = -1;
1931
1932 if (cow && (p->keep_locks || p->lowest_level))
1933 write_lock_level = BTRFS_MAX_LEVEL;
1934
1935 min_write_lock_level = write_lock_level;
1936
1937 if (p->need_commit_sem) {
1938 ASSERT(p->search_commit_root);
1939 down_read(&fs_info->commit_root_sem);
1940 }
1941
1942 again:
1943 prev_cmp = -1;
1944 b = btrfs_search_slot_get_root(root, p, write_lock_level);
1945 if (IS_ERR(b)) {
1946 ret = PTR_ERR(b);
1947 goto done;
1948 }
1949
1950 while (b) {
1951 int dec = 0;
1952
1953 level = btrfs_header_level(b);
1954
1955 if (cow) {
1956 bool last_level = (level == (BTRFS_MAX_LEVEL - 1));
1957
1958
1959
1960
1961
1962
1963 if (!should_cow_block(trans, root, b))
1964 goto cow_done;
1965
1966
1967
1968
1969
1970 if (level > write_lock_level ||
1971 (level + 1 > write_lock_level &&
1972 level + 1 < BTRFS_MAX_LEVEL &&
1973 p->nodes[level + 1])) {
1974 write_lock_level = level + 1;
1975 btrfs_release_path(p);
1976 goto again;
1977 }
1978
1979 if (last_level)
1980 err = btrfs_cow_block(trans, root, b, NULL, 0,
1981 &b,
1982 BTRFS_NESTING_COW);
1983 else
1984 err = btrfs_cow_block(trans, root, b,
1985 p->nodes[level + 1],
1986 p->slots[level + 1], &b,
1987 BTRFS_NESTING_COW);
1988 if (err) {
1989 ret = err;
1990 goto done;
1991 }
1992 }
1993 cow_done:
1994 p->nodes[level] = b;
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007 if (!ins_len && !p->keep_locks) {
2008 int u = level + 1;
2009
2010 if (u < BTRFS_MAX_LEVEL && p->locks[u]) {
2011 btrfs_tree_unlock_rw(p->nodes[u], p->locks[u]);
2012 p->locks[u] = 0;
2013 }
2014 }
2015
2016 if (level == 0) {
2017 if (ins_len > 0)
2018 ASSERT(write_lock_level >= 1);
2019
2020 ret = search_leaf(trans, root, key, p, ins_len, prev_cmp);
2021 if (!p->search_for_split)
2022 unlock_up(p, level, lowest_unlock,
2023 min_write_lock_level, NULL);
2024 goto done;
2025 }
2026
2027 ret = search_for_key_slot(b, 0, key, prev_cmp, &slot);
2028 if (ret < 0)
2029 goto done;
2030 prev_cmp = ret;
2031
2032 if (ret && slot > 0) {
2033 dec = 1;
2034 slot--;
2035 }
2036 p->slots[level] = slot;
2037 err = setup_nodes_for_search(trans, root, p, b, level, ins_len,
2038 &write_lock_level);
2039 if (err == -EAGAIN)
2040 goto again;
2041 if (err) {
2042 ret = err;
2043 goto done;
2044 }
2045 b = p->nodes[level];
2046 slot = p->slots[level];
2047
2048
2049
2050
2051
2052
2053 if (slot == 0 && ins_len && write_lock_level < level + 1) {
2054 write_lock_level = level + 1;
2055 btrfs_release_path(p);
2056 goto again;
2057 }
2058
2059 unlock_up(p, level, lowest_unlock, min_write_lock_level,
2060 &write_lock_level);
2061
2062 if (level == lowest_level) {
2063 if (dec)
2064 p->slots[level]++;
2065 goto done;
2066 }
2067
2068 err = read_block_for_search(root, p, &b, level, slot, key);
2069 if (err == -EAGAIN)
2070 goto again;
2071 if (err) {
2072 ret = err;
2073 goto done;
2074 }
2075
2076 if (!p->skip_locking) {
2077 level = btrfs_header_level(b);
2078
2079 btrfs_maybe_reset_lockdep_class(root, b);
2080
2081 if (level <= write_lock_level) {
2082 btrfs_tree_lock(b);
2083 p->locks[level] = BTRFS_WRITE_LOCK;
2084 } else {
2085 btrfs_tree_read_lock(b);
2086 p->locks[level] = BTRFS_READ_LOCK;
2087 }
2088 p->nodes[level] = b;
2089 }
2090 }
2091 ret = 1;
2092 done:
2093 if (ret < 0 && !p->skip_release_on_error)
2094 btrfs_release_path(p);
2095
2096 if (p->need_commit_sem) {
2097 int ret2;
2098
2099 ret2 = finish_need_commit_sem_search(p);
2100 up_read(&fs_info->commit_root_sem);
2101 if (ret2)
2102 ret = ret2;
2103 }
2104
2105 return ret;
2106 }
2107 ALLOW_ERROR_INJECTION(btrfs_search_slot, ERRNO);
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120 int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
2121 struct btrfs_path *p, u64 time_seq)
2122 {
2123 struct btrfs_fs_info *fs_info = root->fs_info;
2124 struct extent_buffer *b;
2125 int slot;
2126 int ret;
2127 int err;
2128 int level;
2129 int lowest_unlock = 1;
2130 u8 lowest_level = 0;
2131
2132 lowest_level = p->lowest_level;
2133 WARN_ON(p->nodes[0] != NULL);
2134
2135 if (p->search_commit_root) {
2136 BUG_ON(time_seq);
2137 return btrfs_search_slot(NULL, root, key, p, 0, 0);
2138 }
2139
2140 again:
2141 b = btrfs_get_old_root(root, time_seq);
2142 if (!b) {
2143 ret = -EIO;
2144 goto done;
2145 }
2146 level = btrfs_header_level(b);
2147 p->locks[level] = BTRFS_READ_LOCK;
2148
2149 while (b) {
2150 int dec = 0;
2151
2152 level = btrfs_header_level(b);
2153 p->nodes[level] = b;
2154
2155
2156
2157
2158
2159
2160
2161 btrfs_unlock_up_safe(p, level + 1);
2162
2163 ret = btrfs_bin_search(b, key, &slot);
2164 if (ret < 0)
2165 goto done;
2166
2167 if (level == 0) {
2168 p->slots[level] = slot;
2169 unlock_up(p, level, lowest_unlock, 0, NULL);
2170 goto done;
2171 }
2172
2173 if (ret && slot > 0) {
2174 dec = 1;
2175 slot--;
2176 }
2177 p->slots[level] = slot;
2178 unlock_up(p, level, lowest_unlock, 0, NULL);
2179
2180 if (level == lowest_level) {
2181 if (dec)
2182 p->slots[level]++;
2183 goto done;
2184 }
2185
2186 err = read_block_for_search(root, p, &b, level, slot, key);
2187 if (err == -EAGAIN)
2188 goto again;
2189 if (err) {
2190 ret = err;
2191 goto done;
2192 }
2193
2194 level = btrfs_header_level(b);
2195 btrfs_tree_read_lock(b);
2196 b = btrfs_tree_mod_log_rewind(fs_info, p, b, time_seq);
2197 if (!b) {
2198 ret = -ENOMEM;
2199 goto done;
2200 }
2201 p->locks[level] = BTRFS_READ_LOCK;
2202 p->nodes[level] = b;
2203 }
2204 ret = 1;
2205 done:
2206 if (ret < 0)
2207 btrfs_release_path(p);
2208
2209 return ret;
2210 }
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224 int btrfs_search_slot_for_read(struct btrfs_root *root,
2225 const struct btrfs_key *key,
2226 struct btrfs_path *p, int find_higher,
2227 int return_any)
2228 {
2229 int ret;
2230 struct extent_buffer *leaf;
2231
2232 again:
2233 ret = btrfs_search_slot(NULL, root, key, p, 0, 0);
2234 if (ret <= 0)
2235 return ret;
2236
2237
2238
2239
2240
2241
2242
2243 leaf = p->nodes[0];
2244
2245 if (find_higher) {
2246 if (p->slots[0] >= btrfs_header_nritems(leaf)) {
2247 ret = btrfs_next_leaf(root, p);
2248 if (ret <= 0)
2249 return ret;
2250 if (!return_any)
2251 return 1;
2252
2253
2254
2255
2256 return_any = 0;
2257 find_higher = 0;
2258 btrfs_release_path(p);
2259 goto again;
2260 }
2261 } else {
2262 if (p->slots[0] == 0) {
2263 ret = btrfs_prev_leaf(root, p);
2264 if (ret < 0)
2265 return ret;
2266 if (!ret) {
2267 leaf = p->nodes[0];
2268 if (p->slots[0] == btrfs_header_nritems(leaf))
2269 p->slots[0]--;
2270 return 0;
2271 }
2272 if (!return_any)
2273 return 1;
2274
2275
2276
2277
2278 return_any = 0;
2279 find_higher = 1;
2280 btrfs_release_path(p);
2281 goto again;
2282 } else {
2283 --p->slots[0];
2284 }
2285 }
2286 return 0;
2287 }
2288
2289
2290
2291
2292
2293
2294
2295 int btrfs_search_backwards(struct btrfs_root *root, struct btrfs_key *key,
2296 struct btrfs_path *path)
2297 {
2298 int ret;
2299
2300 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
2301 if (ret > 0)
2302 ret = btrfs_previous_item(root, path, key->objectid, key->type);
2303
2304 if (ret == 0)
2305 btrfs_item_key_to_cpu(path->nodes[0], key, path->slots[0]);
2306
2307 return ret;
2308 }
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321 int btrfs_get_next_valid_item(struct btrfs_root *root, struct btrfs_key *key,
2322 struct btrfs_path *path)
2323 {
2324 while (1) {
2325 int ret;
2326 const int slot = path->slots[0];
2327 const struct extent_buffer *leaf = path->nodes[0];
2328
2329
2330 if (slot >= btrfs_header_nritems(leaf)) {
2331
2332
2333
2334
2335 ret = btrfs_next_leaf(root, path);
2336 if (ret)
2337 return ret;
2338 continue;
2339 }
2340
2341 btrfs_item_key_to_cpu(leaf, key, slot);
2342 break;
2343 }
2344 return 0;
2345 }
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355 static void fixup_low_keys(struct btrfs_path *path,
2356 struct btrfs_disk_key *key, int level)
2357 {
2358 int i;
2359 struct extent_buffer *t;
2360 int ret;
2361
2362 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2363 int tslot = path->slots[i];
2364
2365 if (!path->nodes[i])
2366 break;
2367 t = path->nodes[i];
2368 ret = btrfs_tree_mod_log_insert_key(t, tslot,
2369 BTRFS_MOD_LOG_KEY_REPLACE, GFP_ATOMIC);
2370 BUG_ON(ret < 0);
2371 btrfs_set_node_key(t, key, tslot);
2372 btrfs_mark_buffer_dirty(path->nodes[i]);
2373 if (tslot != 0)
2374 break;
2375 }
2376 }
2377
2378
2379
2380
2381
2382
2383
2384 void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
2385 struct btrfs_path *path,
2386 const struct btrfs_key *new_key)
2387 {
2388 struct btrfs_disk_key disk_key;
2389 struct extent_buffer *eb;
2390 int slot;
2391
2392 eb = path->nodes[0];
2393 slot = path->slots[0];
2394 if (slot > 0) {
2395 btrfs_item_key(eb, &disk_key, slot - 1);
2396 if (unlikely(comp_keys(&disk_key, new_key) >= 0)) {
2397 btrfs_crit(fs_info,
2398 "slot %u key (%llu %u %llu) new key (%llu %u %llu)",
2399 slot, btrfs_disk_key_objectid(&disk_key),
2400 btrfs_disk_key_type(&disk_key),
2401 btrfs_disk_key_offset(&disk_key),
2402 new_key->objectid, new_key->type,
2403 new_key->offset);
2404 btrfs_print_leaf(eb);
2405 BUG();
2406 }
2407 }
2408 if (slot < btrfs_header_nritems(eb) - 1) {
2409 btrfs_item_key(eb, &disk_key, slot + 1);
2410 if (unlikely(comp_keys(&disk_key, new_key) <= 0)) {
2411 btrfs_crit(fs_info,
2412 "slot %u key (%llu %u %llu) new key (%llu %u %llu)",
2413 slot, btrfs_disk_key_objectid(&disk_key),
2414 btrfs_disk_key_type(&disk_key),
2415 btrfs_disk_key_offset(&disk_key),
2416 new_key->objectid, new_key->type,
2417 new_key->offset);
2418 btrfs_print_leaf(eb);
2419 BUG();
2420 }
2421 }
2422
2423 btrfs_cpu_key_to_disk(&disk_key, new_key);
2424 btrfs_set_item_key(eb, &disk_key, slot);
2425 btrfs_mark_buffer_dirty(eb);
2426 if (slot == 0)
2427 fixup_low_keys(path, &disk_key, 1);
2428 }
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450 static bool check_sibling_keys(struct extent_buffer *left,
2451 struct extent_buffer *right)
2452 {
2453 struct btrfs_key left_last;
2454 struct btrfs_key right_first;
2455 int level = btrfs_header_level(left);
2456 int nr_left = btrfs_header_nritems(left);
2457 int nr_right = btrfs_header_nritems(right);
2458
2459
2460 if (!nr_left || !nr_right)
2461 return false;
2462
2463 if (level) {
2464 btrfs_node_key_to_cpu(left, &left_last, nr_left - 1);
2465 btrfs_node_key_to_cpu(right, &right_first, 0);
2466 } else {
2467 btrfs_item_key_to_cpu(left, &left_last, nr_left - 1);
2468 btrfs_item_key_to_cpu(right, &right_first, 0);
2469 }
2470
2471 if (btrfs_comp_cpu_keys(&left_last, &right_first) >= 0) {
2472 btrfs_crit(left->fs_info,
2473 "bad key order, sibling blocks, left last (%llu %u %llu) right first (%llu %u %llu)",
2474 left_last.objectid, left_last.type,
2475 left_last.offset, right_first.objectid,
2476 right_first.type, right_first.offset);
2477 return true;
2478 }
2479 return false;
2480 }
2481
2482
2483
2484
2485
2486
2487
2488
2489 static int push_node_left(struct btrfs_trans_handle *trans,
2490 struct extent_buffer *dst,
2491 struct extent_buffer *src, int empty)
2492 {
2493 struct btrfs_fs_info *fs_info = trans->fs_info;
2494 int push_items = 0;
2495 int src_nritems;
2496 int dst_nritems;
2497 int ret = 0;
2498
2499 src_nritems = btrfs_header_nritems(src);
2500 dst_nritems = btrfs_header_nritems(dst);
2501 push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems;
2502 WARN_ON(btrfs_header_generation(src) != trans->transid);
2503 WARN_ON(btrfs_header_generation(dst) != trans->transid);
2504
2505 if (!empty && src_nritems <= 8)
2506 return 1;
2507
2508 if (push_items <= 0)
2509 return 1;
2510
2511 if (empty) {
2512 push_items = min(src_nritems, push_items);
2513 if (push_items < src_nritems) {
2514
2515
2516
2517 if (src_nritems - push_items < 8) {
2518 if (push_items <= 8)
2519 return 1;
2520 push_items -= 8;
2521 }
2522 }
2523 } else
2524 push_items = min(src_nritems - 8, push_items);
2525
2526
2527 if (check_sibling_keys(dst, src)) {
2528 ret = -EUCLEAN;
2529 btrfs_abort_transaction(trans, ret);
2530 return ret;
2531 }
2532 ret = btrfs_tree_mod_log_eb_copy(dst, src, dst_nritems, 0, push_items);
2533 if (ret) {
2534 btrfs_abort_transaction(trans, ret);
2535 return ret;
2536 }
2537 copy_extent_buffer(dst, src,
2538 btrfs_node_key_ptr_offset(dst_nritems),
2539 btrfs_node_key_ptr_offset(0),
2540 push_items * sizeof(struct btrfs_key_ptr));
2541
2542 if (push_items < src_nritems) {
2543
2544
2545
2546
2547 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
2548 btrfs_node_key_ptr_offset(push_items),
2549 (src_nritems - push_items) *
2550 sizeof(struct btrfs_key_ptr));
2551 }
2552 btrfs_set_header_nritems(src, src_nritems - push_items);
2553 btrfs_set_header_nritems(dst, dst_nritems + push_items);
2554 btrfs_mark_buffer_dirty(src);
2555 btrfs_mark_buffer_dirty(dst);
2556
2557 return ret;
2558 }
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569 static int balance_node_right(struct btrfs_trans_handle *trans,
2570 struct extent_buffer *dst,
2571 struct extent_buffer *src)
2572 {
2573 struct btrfs_fs_info *fs_info = trans->fs_info;
2574 int push_items = 0;
2575 int max_push;
2576 int src_nritems;
2577 int dst_nritems;
2578 int ret = 0;
2579
2580 WARN_ON(btrfs_header_generation(src) != trans->transid);
2581 WARN_ON(btrfs_header_generation(dst) != trans->transid);
2582
2583 src_nritems = btrfs_header_nritems(src);
2584 dst_nritems = btrfs_header_nritems(dst);
2585 push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems;
2586 if (push_items <= 0)
2587 return 1;
2588
2589 if (src_nritems < 4)
2590 return 1;
2591
2592 max_push = src_nritems / 2 + 1;
2593
2594 if (max_push >= src_nritems)
2595 return 1;
2596
2597 if (max_push < push_items)
2598 push_items = max_push;
2599
2600
2601 if (check_sibling_keys(src, dst)) {
2602 ret = -EUCLEAN;
2603 btrfs_abort_transaction(trans, ret);
2604 return ret;
2605 }
2606 ret = btrfs_tree_mod_log_insert_move(dst, push_items, 0, dst_nritems);
2607 BUG_ON(ret < 0);
2608 memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
2609 btrfs_node_key_ptr_offset(0),
2610 (dst_nritems) *
2611 sizeof(struct btrfs_key_ptr));
2612
2613 ret = btrfs_tree_mod_log_eb_copy(dst, src, 0, src_nritems - push_items,
2614 push_items);
2615 if (ret) {
2616 btrfs_abort_transaction(trans, ret);
2617 return ret;
2618 }
2619 copy_extent_buffer(dst, src,
2620 btrfs_node_key_ptr_offset(0),
2621 btrfs_node_key_ptr_offset(src_nritems - push_items),
2622 push_items * sizeof(struct btrfs_key_ptr));
2623
2624 btrfs_set_header_nritems(src, src_nritems - push_items);
2625 btrfs_set_header_nritems(dst, dst_nritems + push_items);
2626
2627 btrfs_mark_buffer_dirty(src);
2628 btrfs_mark_buffer_dirty(dst);
2629
2630 return ret;
2631 }
2632
2633
2634
2635
2636
2637
2638
2639
2640 static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2641 struct btrfs_root *root,
2642 struct btrfs_path *path, int level)
2643 {
2644 struct btrfs_fs_info *fs_info = root->fs_info;
2645 u64 lower_gen;
2646 struct extent_buffer *lower;
2647 struct extent_buffer *c;
2648 struct extent_buffer *old;
2649 struct btrfs_disk_key lower_key;
2650 int ret;
2651
2652 BUG_ON(path->nodes[level]);
2653 BUG_ON(path->nodes[level-1] != root->node);
2654
2655 lower = path->nodes[level-1];
2656 if (level == 1)
2657 btrfs_item_key(lower, &lower_key, 0);
2658 else
2659 btrfs_node_key(lower, &lower_key, 0);
2660
2661 c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
2662 &lower_key, level, root->node->start, 0,
2663 BTRFS_NESTING_NEW_ROOT);
2664 if (IS_ERR(c))
2665 return PTR_ERR(c);
2666
2667 root_add_used(root, fs_info->nodesize);
2668
2669 btrfs_set_header_nritems(c, 1);
2670 btrfs_set_node_key(c, &lower_key, 0);
2671 btrfs_set_node_blockptr(c, 0, lower->start);
2672 lower_gen = btrfs_header_generation(lower);
2673 WARN_ON(lower_gen != trans->transid);
2674
2675 btrfs_set_node_ptr_generation(c, 0, lower_gen);
2676
2677 btrfs_mark_buffer_dirty(c);
2678
2679 old = root->node;
2680 ret = btrfs_tree_mod_log_insert_root(root->node, c, false);
2681 BUG_ON(ret < 0);
2682 rcu_assign_pointer(root->node, c);
2683
2684
2685 free_extent_buffer(old);
2686
2687 add_root_to_dirty_list(root);
2688 atomic_inc(&c->refs);
2689 path->nodes[level] = c;
2690 path->locks[level] = BTRFS_WRITE_LOCK;
2691 path->slots[level] = 0;
2692 return 0;
2693 }
2694
2695
2696
2697
2698
2699
2700
2701
2702 static void insert_ptr(struct btrfs_trans_handle *trans,
2703 struct btrfs_path *path,
2704 struct btrfs_disk_key *key, u64 bytenr,
2705 int slot, int level)
2706 {
2707 struct extent_buffer *lower;
2708 int nritems;
2709 int ret;
2710
2711 BUG_ON(!path->nodes[level]);
2712 btrfs_assert_tree_write_locked(path->nodes[level]);
2713 lower = path->nodes[level];
2714 nritems = btrfs_header_nritems(lower);
2715 BUG_ON(slot > nritems);
2716 BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(trans->fs_info));
2717 if (slot != nritems) {
2718 if (level) {
2719 ret = btrfs_tree_mod_log_insert_move(lower, slot + 1,
2720 slot, nritems - slot);
2721 BUG_ON(ret < 0);
2722 }
2723 memmove_extent_buffer(lower,
2724 btrfs_node_key_ptr_offset(slot + 1),
2725 btrfs_node_key_ptr_offset(slot),
2726 (nritems - slot) * sizeof(struct btrfs_key_ptr));
2727 }
2728 if (level) {
2729 ret = btrfs_tree_mod_log_insert_key(lower, slot,
2730 BTRFS_MOD_LOG_KEY_ADD, GFP_NOFS);
2731 BUG_ON(ret < 0);
2732 }
2733 btrfs_set_node_key(lower, key, slot);
2734 btrfs_set_node_blockptr(lower, slot, bytenr);
2735 WARN_ON(trans->transid == 0);
2736 btrfs_set_node_ptr_generation(lower, slot, trans->transid);
2737 btrfs_set_header_nritems(lower, nritems + 1);
2738 btrfs_mark_buffer_dirty(lower);
2739 }
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750 static noinline int split_node(struct btrfs_trans_handle *trans,
2751 struct btrfs_root *root,
2752 struct btrfs_path *path, int level)
2753 {
2754 struct btrfs_fs_info *fs_info = root->fs_info;
2755 struct extent_buffer *c;
2756 struct extent_buffer *split;
2757 struct btrfs_disk_key disk_key;
2758 int mid;
2759 int ret;
2760 u32 c_nritems;
2761
2762 c = path->nodes[level];
2763 WARN_ON(btrfs_header_generation(c) != trans->transid);
2764 if (c == root->node) {
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775 ret = insert_new_root(trans, root, path, level + 1);
2776 if (ret)
2777 return ret;
2778 } else {
2779 ret = push_nodes_for_insert(trans, root, path, level);
2780 c = path->nodes[level];
2781 if (!ret && btrfs_header_nritems(c) <
2782 BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3)
2783 return 0;
2784 if (ret < 0)
2785 return ret;
2786 }
2787
2788 c_nritems = btrfs_header_nritems(c);
2789 mid = (c_nritems + 1) / 2;
2790 btrfs_node_key(c, &disk_key, mid);
2791
2792 split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
2793 &disk_key, level, c->start, 0,
2794 BTRFS_NESTING_SPLIT);
2795 if (IS_ERR(split))
2796 return PTR_ERR(split);
2797
2798 root_add_used(root, fs_info->nodesize);
2799 ASSERT(btrfs_header_level(c) == level);
2800
2801 ret = btrfs_tree_mod_log_eb_copy(split, c, 0, mid, c_nritems - mid);
2802 if (ret) {
2803 btrfs_abort_transaction(trans, ret);
2804 return ret;
2805 }
2806 copy_extent_buffer(split, c,
2807 btrfs_node_key_ptr_offset(0),
2808 btrfs_node_key_ptr_offset(mid),
2809 (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
2810 btrfs_set_header_nritems(split, c_nritems - mid);
2811 btrfs_set_header_nritems(c, mid);
2812
2813 btrfs_mark_buffer_dirty(c);
2814 btrfs_mark_buffer_dirty(split);
2815
2816 insert_ptr(trans, path, &disk_key, split->start,
2817 path->slots[level + 1] + 1, level + 1);
2818
2819 if (path->slots[level] >= mid) {
2820 path->slots[level] -= mid;
2821 btrfs_tree_unlock(c);
2822 free_extent_buffer(c);
2823 path->nodes[level] = split;
2824 path->slots[level + 1] += 1;
2825 } else {
2826 btrfs_tree_unlock(split);
2827 free_extent_buffer(split);
2828 }
2829 return 0;
2830 }
2831
2832
2833
2834
2835
2836
2837 static int leaf_space_used(struct extent_buffer *l, int start, int nr)
2838 {
2839 int data_len;
2840 int nritems = btrfs_header_nritems(l);
2841 int end = min(nritems, start + nr) - 1;
2842
2843 if (!nr)
2844 return 0;
2845 data_len = btrfs_item_offset(l, start) + btrfs_item_size(l, start);
2846 data_len = data_len - btrfs_item_offset(l, end);
2847 data_len += sizeof(struct btrfs_item) * nr;
2848 WARN_ON(data_len < 0);
2849 return data_len;
2850 }
2851
2852
2853
2854
2855
2856
2857 noinline int btrfs_leaf_free_space(struct extent_buffer *leaf)
2858 {
2859 struct btrfs_fs_info *fs_info = leaf->fs_info;
2860 int nritems = btrfs_header_nritems(leaf);
2861 int ret;
2862
2863 ret = BTRFS_LEAF_DATA_SIZE(fs_info) - leaf_space_used(leaf, 0, nritems);
2864 if (ret < 0) {
2865 btrfs_crit(fs_info,
2866 "leaf free space ret %d, leaf data size %lu, used %d nritems %d",
2867 ret,
2868 (unsigned long) BTRFS_LEAF_DATA_SIZE(fs_info),
2869 leaf_space_used(leaf, 0, nritems), nritems);
2870 }
2871 return ret;
2872 }
2873
2874
2875
2876
2877
2878 static noinline int __push_leaf_right(struct btrfs_path *path,
2879 int data_size, int empty,
2880 struct extent_buffer *right,
2881 int free_space, u32 left_nritems,
2882 u32 min_slot)
2883 {
2884 struct btrfs_fs_info *fs_info = right->fs_info;
2885 struct extent_buffer *left = path->nodes[0];
2886 struct extent_buffer *upper = path->nodes[1];
2887 struct btrfs_map_token token;
2888 struct btrfs_disk_key disk_key;
2889 int slot;
2890 u32 i;
2891 int push_space = 0;
2892 int push_items = 0;
2893 u32 nr;
2894 u32 right_nritems;
2895 u32 data_end;
2896 u32 this_item_size;
2897
2898 if (empty)
2899 nr = 0;
2900 else
2901 nr = max_t(u32, 1, min_slot);
2902
2903 if (path->slots[0] >= left_nritems)
2904 push_space += data_size;
2905
2906 slot = path->slots[1];
2907 i = left_nritems - 1;
2908 while (i >= nr) {
2909 if (!empty && push_items > 0) {
2910 if (path->slots[0] > i)
2911 break;
2912 if (path->slots[0] == i) {
2913 int space = btrfs_leaf_free_space(left);
2914
2915 if (space + push_space * 2 > free_space)
2916 break;
2917 }
2918 }
2919
2920 if (path->slots[0] == i)
2921 push_space += data_size;
2922
2923 this_item_size = btrfs_item_size(left, i);
2924 if (this_item_size + sizeof(struct btrfs_item) +
2925 push_space > free_space)
2926 break;
2927
2928 push_items++;
2929 push_space += this_item_size + sizeof(struct btrfs_item);
2930 if (i == 0)
2931 break;
2932 i--;
2933 }
2934
2935 if (push_items == 0)
2936 goto out_unlock;
2937
2938 WARN_ON(!empty && push_items == left_nritems);
2939
2940
2941 right_nritems = btrfs_header_nritems(right);
2942
2943 push_space = btrfs_item_data_end(left, left_nritems - push_items);
2944 push_space -= leaf_data_end(left);
2945
2946
2947 data_end = leaf_data_end(right);
2948 memmove_extent_buffer(right,
2949 BTRFS_LEAF_DATA_OFFSET + data_end - push_space,
2950 BTRFS_LEAF_DATA_OFFSET + data_end,
2951 BTRFS_LEAF_DATA_SIZE(fs_info) - data_end);
2952
2953
2954 copy_extent_buffer(right, left, BTRFS_LEAF_DATA_OFFSET +
2955 BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
2956 BTRFS_LEAF_DATA_OFFSET + leaf_data_end(left),
2957 push_space);
2958
2959 memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
2960 btrfs_item_nr_offset(0),
2961 right_nritems * sizeof(struct btrfs_item));
2962
2963
2964 copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
2965 btrfs_item_nr_offset(left_nritems - push_items),
2966 push_items * sizeof(struct btrfs_item));
2967
2968
2969 btrfs_init_map_token(&token, right);
2970 right_nritems += push_items;
2971 btrfs_set_header_nritems(right, right_nritems);
2972 push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
2973 for (i = 0; i < right_nritems; i++) {
2974 push_space -= btrfs_token_item_size(&token, i);
2975 btrfs_set_token_item_offset(&token, i, push_space);
2976 }
2977
2978 left_nritems -= push_items;
2979 btrfs_set_header_nritems(left, left_nritems);
2980
2981 if (left_nritems)
2982 btrfs_mark_buffer_dirty(left);
2983 else
2984 btrfs_clean_tree_block(left);
2985
2986 btrfs_mark_buffer_dirty(right);
2987
2988 btrfs_item_key(right, &disk_key, 0);
2989 btrfs_set_node_key(upper, &disk_key, slot + 1);
2990 btrfs_mark_buffer_dirty(upper);
2991
2992
2993 if (path->slots[0] >= left_nritems) {
2994 path->slots[0] -= left_nritems;
2995 if (btrfs_header_nritems(path->nodes[0]) == 0)
2996 btrfs_clean_tree_block(path->nodes[0]);
2997 btrfs_tree_unlock(path->nodes[0]);
2998 free_extent_buffer(path->nodes[0]);
2999 path->nodes[0] = right;
3000 path->slots[1] += 1;
3001 } else {
3002 btrfs_tree_unlock(right);
3003 free_extent_buffer(right);
3004 }
3005 return 0;
3006
3007 out_unlock:
3008 btrfs_tree_unlock(right);
3009 free_extent_buffer(right);
3010 return 1;
3011 }
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
3024 *root, struct btrfs_path *path,
3025 int min_data_size, int data_size,
3026 int empty, u32 min_slot)
3027 {
3028 struct extent_buffer *left = path->nodes[0];
3029 struct extent_buffer *right;
3030 struct extent_buffer *upper;
3031 int slot;
3032 int free_space;
3033 u32 left_nritems;
3034 int ret;
3035
3036 if (!path->nodes[1])
3037 return 1;
3038
3039 slot = path->slots[1];
3040 upper = path->nodes[1];
3041 if (slot >= btrfs_header_nritems(upper) - 1)
3042 return 1;
3043
3044 btrfs_assert_tree_write_locked(path->nodes[1]);
3045
3046 right = btrfs_read_node_slot(upper, slot + 1);
3047
3048
3049
3050
3051 if (IS_ERR(right))
3052 return 1;
3053
3054 __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
3055
3056 free_space = btrfs_leaf_free_space(right);
3057 if (free_space < data_size)
3058 goto out_unlock;
3059
3060 ret = btrfs_cow_block(trans, root, right, upper,
3061 slot + 1, &right, BTRFS_NESTING_RIGHT_COW);
3062 if (ret)
3063 goto out_unlock;
3064
3065 left_nritems = btrfs_header_nritems(left);
3066 if (left_nritems == 0)
3067 goto out_unlock;
3068
3069 if (check_sibling_keys(left, right)) {
3070 ret = -EUCLEAN;
3071 btrfs_tree_unlock(right);
3072 free_extent_buffer(right);
3073 return ret;
3074 }
3075 if (path->slots[0] == left_nritems && !empty) {
3076
3077
3078
3079
3080 btrfs_tree_unlock(left);
3081 free_extent_buffer(left);
3082 path->nodes[0] = right;
3083 path->slots[0] = 0;
3084 path->slots[1]++;
3085 return 0;
3086 }
3087
3088 return __push_leaf_right(path, min_data_size, empty,
3089 right, free_space, left_nritems, min_slot);
3090 out_unlock:
3091 btrfs_tree_unlock(right);
3092 free_extent_buffer(right);
3093 return 1;
3094 }
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104 static noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
3105 int empty, struct extent_buffer *left,
3106 int free_space, u32 right_nritems,
3107 u32 max_slot)
3108 {
3109 struct btrfs_fs_info *fs_info = left->fs_info;
3110 struct btrfs_disk_key disk_key;
3111 struct extent_buffer *right = path->nodes[0];
3112 int i;
3113 int push_space = 0;
3114 int push_items = 0;
3115 u32 old_left_nritems;
3116 u32 nr;
3117 int ret = 0;
3118 u32 this_item_size;
3119 u32 old_left_item_size;
3120 struct btrfs_map_token token;
3121
3122 if (empty)
3123 nr = min(right_nritems, max_slot);
3124 else
3125 nr = min(right_nritems - 1, max_slot);
3126
3127 for (i = 0; i < nr; i++) {
3128 if (!empty && push_items > 0) {
3129 if (path->slots[0] < i)
3130 break;
3131 if (path->slots[0] == i) {
3132 int space = btrfs_leaf_free_space(right);
3133
3134 if (space + push_space * 2 > free_space)
3135 break;
3136 }
3137 }
3138
3139 if (path->slots[0] == i)
3140 push_space += data_size;
3141
3142 this_item_size = btrfs_item_size(right, i);
3143 if (this_item_size + sizeof(struct btrfs_item) + push_space >
3144 free_space)
3145 break;
3146
3147 push_items++;
3148 push_space += this_item_size + sizeof(struct btrfs_item);
3149 }
3150
3151 if (push_items == 0) {
3152 ret = 1;
3153 goto out;
3154 }
3155 WARN_ON(!empty && push_items == btrfs_header_nritems(right));
3156
3157
3158 copy_extent_buffer(left, right,
3159 btrfs_item_nr_offset(btrfs_header_nritems(left)),
3160 btrfs_item_nr_offset(0),
3161 push_items * sizeof(struct btrfs_item));
3162
3163 push_space = BTRFS_LEAF_DATA_SIZE(fs_info) -
3164 btrfs_item_offset(right, push_items - 1);
3165
3166 copy_extent_buffer(left, right, BTRFS_LEAF_DATA_OFFSET +
3167 leaf_data_end(left) - push_space,
3168 BTRFS_LEAF_DATA_OFFSET +
3169 btrfs_item_offset(right, push_items - 1),
3170 push_space);
3171 old_left_nritems = btrfs_header_nritems(left);
3172 BUG_ON(old_left_nritems <= 0);
3173
3174 btrfs_init_map_token(&token, left);
3175 old_left_item_size = btrfs_item_offset(left, old_left_nritems - 1);
3176 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
3177 u32 ioff;
3178
3179 ioff = btrfs_token_item_offset(&token, i);
3180 btrfs_set_token_item_offset(&token, i,
3181 ioff - (BTRFS_LEAF_DATA_SIZE(fs_info) - old_left_item_size));
3182 }
3183 btrfs_set_header_nritems(left, old_left_nritems + push_items);
3184
3185
3186 if (push_items > right_nritems)
3187 WARN(1, KERN_CRIT "push items %d nr %u\n", push_items,
3188 right_nritems);
3189
3190 if (push_items < right_nritems) {
3191 push_space = btrfs_item_offset(right, push_items - 1) -
3192 leaf_data_end(right);
3193 memmove_extent_buffer(right, BTRFS_LEAF_DATA_OFFSET +
3194 BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
3195 BTRFS_LEAF_DATA_OFFSET +
3196 leaf_data_end(right), push_space);
3197
3198 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
3199 btrfs_item_nr_offset(push_items),
3200 (btrfs_header_nritems(right) - push_items) *
3201 sizeof(struct btrfs_item));
3202 }
3203
3204 btrfs_init_map_token(&token, right);
3205 right_nritems -= push_items;
3206 btrfs_set_header_nritems(right, right_nritems);
3207 push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
3208 for (i = 0; i < right_nritems; i++) {
3209 push_space = push_space - btrfs_token_item_size(&token, i);
3210 btrfs_set_token_item_offset(&token, i, push_space);
3211 }
3212
3213 btrfs_mark_buffer_dirty(left);
3214 if (right_nritems)
3215 btrfs_mark_buffer_dirty(right);
3216 else
3217 btrfs_clean_tree_block(right);
3218
3219 btrfs_item_key(right, &disk_key, 0);
3220 fixup_low_keys(path, &disk_key, 1);
3221
3222
3223 if (path->slots[0] < push_items) {
3224 path->slots[0] += old_left_nritems;
3225 btrfs_tree_unlock(path->nodes[0]);
3226 free_extent_buffer(path->nodes[0]);
3227 path->nodes[0] = left;
3228 path->slots[1] -= 1;
3229 } else {
3230 btrfs_tree_unlock(left);
3231 free_extent_buffer(left);
3232 path->slots[0] -= push_items;
3233 }
3234 BUG_ON(path->slots[0] < 0);
3235 return ret;
3236 out:
3237 btrfs_tree_unlock(left);
3238 free_extent_buffer(left);
3239 return ret;
3240 }
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
3251 *root, struct btrfs_path *path, int min_data_size,
3252 int data_size, int empty, u32 max_slot)
3253 {
3254 struct extent_buffer *right = path->nodes[0];
3255 struct extent_buffer *left;
3256 int slot;
3257 int free_space;
3258 u32 right_nritems;
3259 int ret = 0;
3260
3261 slot = path->slots[1];
3262 if (slot == 0)
3263 return 1;
3264 if (!path->nodes[1])
3265 return 1;
3266
3267 right_nritems = btrfs_header_nritems(right);
3268 if (right_nritems == 0)
3269 return 1;
3270
3271 btrfs_assert_tree_write_locked(path->nodes[1]);
3272
3273 left = btrfs_read_node_slot(path->nodes[1], slot - 1);
3274
3275
3276
3277
3278 if (IS_ERR(left))
3279 return 1;
3280
3281 __btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
3282
3283 free_space = btrfs_leaf_free_space(left);
3284 if (free_space < data_size) {
3285 ret = 1;
3286 goto out;
3287 }
3288
3289 ret = btrfs_cow_block(trans, root, left,
3290 path->nodes[1], slot - 1, &left,
3291 BTRFS_NESTING_LEFT_COW);
3292 if (ret) {
3293
3294 if (ret == -ENOSPC)
3295 ret = 1;
3296 goto out;
3297 }
3298
3299 if (check_sibling_keys(left, right)) {
3300 ret = -EUCLEAN;
3301 goto out;
3302 }
3303 return __push_leaf_left(path, min_data_size,
3304 empty, left, free_space, right_nritems,
3305 max_slot);
3306 out:
3307 btrfs_tree_unlock(left);
3308 free_extent_buffer(left);
3309 return ret;
3310 }
3311
3312
3313
3314
3315
3316 static noinline void copy_for_split(struct btrfs_trans_handle *trans,
3317 struct btrfs_path *path,
3318 struct extent_buffer *l,
3319 struct extent_buffer *right,
3320 int slot, int mid, int nritems)
3321 {
3322 struct btrfs_fs_info *fs_info = trans->fs_info;
3323 int data_copy_size;
3324 int rt_data_off;
3325 int i;
3326 struct btrfs_disk_key disk_key;
3327 struct btrfs_map_token token;
3328
3329 nritems = nritems - mid;
3330 btrfs_set_header_nritems(right, nritems);
3331 data_copy_size = btrfs_item_data_end(l, mid) - leaf_data_end(l);
3332
3333 copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
3334 btrfs_item_nr_offset(mid),
3335 nritems * sizeof(struct btrfs_item));
3336
3337 copy_extent_buffer(right, l,
3338 BTRFS_LEAF_DATA_OFFSET + BTRFS_LEAF_DATA_SIZE(fs_info) -
3339 data_copy_size, BTRFS_LEAF_DATA_OFFSET +
3340 leaf_data_end(l), data_copy_size);
3341
3342 rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_data_end(l, mid);
3343
3344 btrfs_init_map_token(&token, right);
3345 for (i = 0; i < nritems; i++) {
3346 u32 ioff;
3347
3348 ioff = btrfs_token_item_offset(&token, i);
3349 btrfs_set_token_item_offset(&token, i, ioff + rt_data_off);
3350 }
3351
3352 btrfs_set_header_nritems(l, mid);
3353 btrfs_item_key(right, &disk_key, 0);
3354 insert_ptr(trans, path, &disk_key, right->start, path->slots[1] + 1, 1);
3355
3356 btrfs_mark_buffer_dirty(right);
3357 btrfs_mark_buffer_dirty(l);
3358 BUG_ON(path->slots[0] != slot);
3359
3360 if (mid <= slot) {
3361 btrfs_tree_unlock(path->nodes[0]);
3362 free_extent_buffer(path->nodes[0]);
3363 path->nodes[0] = right;
3364 path->slots[0] -= mid;
3365 path->slots[1] += 1;
3366 } else {
3367 btrfs_tree_unlock(right);
3368 free_extent_buffer(right);
3369 }
3370
3371 BUG_ON(path->slots[0] < 0);
3372 }
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384 static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
3385 struct btrfs_root *root,
3386 struct btrfs_path *path,
3387 int data_size)
3388 {
3389 int ret;
3390 int progress = 0;
3391 int slot;
3392 u32 nritems;
3393 int space_needed = data_size;
3394
3395 slot = path->slots[0];
3396 if (slot < btrfs_header_nritems(path->nodes[0]))
3397 space_needed -= btrfs_leaf_free_space(path->nodes[0]);
3398
3399
3400
3401
3402
3403 ret = push_leaf_right(trans, root, path, 1, space_needed, 0, slot);
3404 if (ret < 0)
3405 return ret;
3406
3407 if (ret == 0)
3408 progress++;
3409
3410 nritems = btrfs_header_nritems(path->nodes[0]);
3411
3412
3413
3414
3415 if (path->slots[0] == 0 || path->slots[0] == nritems)
3416 return 0;
3417
3418 if (btrfs_leaf_free_space(path->nodes[0]) >= data_size)
3419 return 0;
3420
3421
3422 slot = path->slots[0];
3423 space_needed = data_size;
3424 if (slot > 0)
3425 space_needed -= btrfs_leaf_free_space(path->nodes[0]);
3426 ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot);
3427 if (ret < 0)
3428 return ret;
3429
3430 if (ret == 0)
3431 progress++;
3432
3433 if (progress)
3434 return 0;
3435 return 1;
3436 }
3437
3438
3439
3440
3441
3442
3443
3444 static noinline int split_leaf(struct btrfs_trans_handle *trans,
3445 struct btrfs_root *root,
3446 const struct btrfs_key *ins_key,
3447 struct btrfs_path *path, int data_size,
3448 int extend)
3449 {
3450 struct btrfs_disk_key disk_key;
3451 struct extent_buffer *l;
3452 u32 nritems;
3453 int mid;
3454 int slot;
3455 struct extent_buffer *right;
3456 struct btrfs_fs_info *fs_info = root->fs_info;
3457 int ret = 0;
3458 int wret;
3459 int split;
3460 int num_doubles = 0;
3461 int tried_avoid_double = 0;
3462
3463 l = path->nodes[0];
3464 slot = path->slots[0];
3465 if (extend && data_size + btrfs_item_size(l, slot) +
3466 sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(fs_info))
3467 return -EOVERFLOW;
3468
3469
3470 if (data_size && path->nodes[1]) {
3471 int space_needed = data_size;
3472
3473 if (slot < btrfs_header_nritems(l))
3474 space_needed -= btrfs_leaf_free_space(l);
3475
3476 wret = push_leaf_right(trans, root, path, space_needed,
3477 space_needed, 0, 0);
3478 if (wret < 0)
3479 return wret;
3480 if (wret) {
3481 space_needed = data_size;
3482 if (slot > 0)
3483 space_needed -= btrfs_leaf_free_space(l);
3484 wret = push_leaf_left(trans, root, path, space_needed,
3485 space_needed, 0, (u32)-1);
3486 if (wret < 0)
3487 return wret;
3488 }
3489 l = path->nodes[0];
3490
3491
3492 if (btrfs_leaf_free_space(l) >= data_size)
3493 return 0;
3494 }
3495
3496 if (!path->nodes[1]) {
3497 ret = insert_new_root(trans, root, path, 1);
3498 if (ret)
3499 return ret;
3500 }
3501 again:
3502 split = 1;
3503 l = path->nodes[0];
3504 slot = path->slots[0];
3505 nritems = btrfs_header_nritems(l);
3506 mid = (nritems + 1) / 2;
3507
3508 if (mid <= slot) {
3509 if (nritems == 1 ||
3510 leaf_space_used(l, mid, nritems - mid) + data_size >
3511 BTRFS_LEAF_DATA_SIZE(fs_info)) {
3512 if (slot >= nritems) {
3513 split = 0;
3514 } else {
3515 mid = slot;
3516 if (mid != nritems &&
3517 leaf_space_used(l, mid, nritems - mid) +
3518 data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) {
3519 if (data_size && !tried_avoid_double)
3520 goto push_for_double;
3521 split = 2;
3522 }
3523 }
3524 }
3525 } else {
3526 if (leaf_space_used(l, 0, mid) + data_size >
3527 BTRFS_LEAF_DATA_SIZE(fs_info)) {
3528 if (!extend && data_size && slot == 0) {
3529 split = 0;
3530 } else if ((extend || !data_size) && slot == 0) {
3531 mid = 1;
3532 } else {
3533 mid = slot;
3534 if (mid != nritems &&
3535 leaf_space_used(l, mid, nritems - mid) +
3536 data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) {
3537 if (data_size && !tried_avoid_double)
3538 goto push_for_double;
3539 split = 2;
3540 }
3541 }
3542 }
3543 }
3544
3545 if (split == 0)
3546 btrfs_cpu_key_to_disk(&disk_key, ins_key);
3547 else
3548 btrfs_item_key(l, &disk_key, mid);
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558 right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
3559 &disk_key, 0, l->start, 0,
3560 num_doubles ? BTRFS_NESTING_NEW_ROOT :
3561 BTRFS_NESTING_SPLIT);
3562 if (IS_ERR(right))
3563 return PTR_ERR(right);
3564
3565 root_add_used(root, fs_info->nodesize);
3566
3567 if (split == 0) {
3568 if (mid <= slot) {
3569 btrfs_set_header_nritems(right, 0);
3570 insert_ptr(trans, path, &disk_key,
3571 right->start, path->slots[1] + 1, 1);
3572 btrfs_tree_unlock(path->nodes[0]);
3573 free_extent_buffer(path->nodes[0]);
3574 path->nodes[0] = right;
3575 path->slots[0] = 0;
3576 path->slots[1] += 1;
3577 } else {
3578 btrfs_set_header_nritems(right, 0);
3579 insert_ptr(trans, path, &disk_key,
3580 right->start, path->slots[1], 1);
3581 btrfs_tree_unlock(path->nodes[0]);
3582 free_extent_buffer(path->nodes[0]);
3583 path->nodes[0] = right;
3584 path->slots[0] = 0;
3585 if (path->slots[1] == 0)
3586 fixup_low_keys(path, &disk_key, 1);
3587 }
3588
3589
3590
3591
3592
3593 return ret;
3594 }
3595
3596 copy_for_split(trans, path, l, right, slot, mid, nritems);
3597
3598 if (split == 2) {
3599 BUG_ON(num_doubles != 0);
3600 num_doubles++;
3601 goto again;
3602 }
3603
3604 return 0;
3605
3606 push_for_double:
3607 push_for_double_split(trans, root, path, data_size);
3608 tried_avoid_double = 1;
3609 if (btrfs_leaf_free_space(path->nodes[0]) >= data_size)
3610 return 0;
3611 goto again;
3612 }
3613
3614 static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
3615 struct btrfs_root *root,
3616 struct btrfs_path *path, int ins_len)
3617 {
3618 struct btrfs_key key;
3619 struct extent_buffer *leaf;
3620 struct btrfs_file_extent_item *fi;
3621 u64 extent_len = 0;
3622 u32 item_size;
3623 int ret;
3624
3625 leaf = path->nodes[0];
3626 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3627
3628 BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
3629 key.type != BTRFS_EXTENT_CSUM_KEY);
3630
3631 if (btrfs_leaf_free_space(leaf) >= ins_len)
3632 return 0;
3633
3634 item_size = btrfs_item_size(leaf, path->slots[0]);
3635 if (key.type == BTRFS_EXTENT_DATA_KEY) {
3636 fi = btrfs_item_ptr(leaf, path->slots[0],
3637 struct btrfs_file_extent_item);
3638 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
3639 }
3640 btrfs_release_path(path);
3641
3642 path->keep_locks = 1;
3643 path->search_for_split = 1;
3644 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3645 path->search_for_split = 0;
3646 if (ret > 0)
3647 ret = -EAGAIN;
3648 if (ret < 0)
3649 goto err;
3650
3651 ret = -EAGAIN;
3652 leaf = path->nodes[0];
3653
3654 if (item_size != btrfs_item_size(leaf, path->slots[0]))
3655 goto err;
3656
3657
3658 if (btrfs_leaf_free_space(path->nodes[0]) >= ins_len)
3659 goto err;
3660
3661 if (key.type == BTRFS_EXTENT_DATA_KEY) {
3662 fi = btrfs_item_ptr(leaf, path->slots[0],
3663 struct btrfs_file_extent_item);
3664 if (extent_len != btrfs_file_extent_num_bytes(leaf, fi))
3665 goto err;
3666 }
3667
3668 ret = split_leaf(trans, root, &key, path, ins_len, 1);
3669 if (ret)
3670 goto err;
3671
3672 path->keep_locks = 0;
3673 btrfs_unlock_up_safe(path, 1);
3674 return 0;
3675 err:
3676 path->keep_locks = 0;
3677 return ret;
3678 }
3679
3680 static noinline int split_item(struct btrfs_path *path,
3681 const struct btrfs_key *new_key,
3682 unsigned long split_offset)
3683 {
3684 struct extent_buffer *leaf;
3685 int orig_slot, slot;
3686 char *buf;
3687 u32 nritems;
3688 u32 item_size;
3689 u32 orig_offset;
3690 struct btrfs_disk_key disk_key;
3691
3692 leaf = path->nodes[0];
3693 BUG_ON(btrfs_leaf_free_space(leaf) < sizeof(struct btrfs_item));
3694
3695 orig_slot = path->slots[0];
3696 orig_offset = btrfs_item_offset(leaf, path->slots[0]);
3697 item_size = btrfs_item_size(leaf, path->slots[0]);
3698
3699 buf = kmalloc(item_size, GFP_NOFS);
3700 if (!buf)
3701 return -ENOMEM;
3702
3703 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
3704 path->slots[0]), item_size);
3705
3706 slot = path->slots[0] + 1;
3707 nritems = btrfs_header_nritems(leaf);
3708 if (slot != nritems) {
3709
3710 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
3711 btrfs_item_nr_offset(slot),
3712 (nritems - slot) * sizeof(struct btrfs_item));
3713 }
3714
3715 btrfs_cpu_key_to_disk(&disk_key, new_key);
3716 btrfs_set_item_key(leaf, &disk_key, slot);
3717
3718 btrfs_set_item_offset(leaf, slot, orig_offset);
3719 btrfs_set_item_size(leaf, slot, item_size - split_offset);
3720
3721 btrfs_set_item_offset(leaf, orig_slot,
3722 orig_offset + item_size - split_offset);
3723 btrfs_set_item_size(leaf, orig_slot, split_offset);
3724
3725 btrfs_set_header_nritems(leaf, nritems + 1);
3726
3727
3728 write_extent_buffer(leaf, buf,
3729 btrfs_item_ptr_offset(leaf, path->slots[0]),
3730 split_offset);
3731
3732
3733 write_extent_buffer(leaf, buf + split_offset,
3734 btrfs_item_ptr_offset(leaf, slot),
3735 item_size - split_offset);
3736 btrfs_mark_buffer_dirty(leaf);
3737
3738 BUG_ON(btrfs_leaf_free_space(leaf) < 0);
3739 kfree(buf);
3740 return 0;
3741 }
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758 int btrfs_split_item(struct btrfs_trans_handle *trans,
3759 struct btrfs_root *root,
3760 struct btrfs_path *path,
3761 const struct btrfs_key *new_key,
3762 unsigned long split_offset)
3763 {
3764 int ret;
3765 ret = setup_leaf_for_split(trans, root, path,
3766 sizeof(struct btrfs_item));
3767 if (ret)
3768 return ret;
3769
3770 ret = split_item(path, new_key, split_offset);
3771 return ret;
3772 }
3773
3774
3775
3776
3777
3778
3779
3780 void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end)
3781 {
3782 int slot;
3783 struct extent_buffer *leaf;
3784 u32 nritems;
3785 unsigned int data_end;
3786 unsigned int old_data_start;
3787 unsigned int old_size;
3788 unsigned int size_diff;
3789 int i;
3790 struct btrfs_map_token token;
3791
3792 leaf = path->nodes[0];
3793 slot = path->slots[0];
3794
3795 old_size = btrfs_item_size(leaf, slot);
3796 if (old_size == new_size)
3797 return;
3798
3799 nritems = btrfs_header_nritems(leaf);
3800 data_end = leaf_data_end(leaf);
3801
3802 old_data_start = btrfs_item_offset(leaf, slot);
3803
3804 size_diff = old_size - new_size;
3805
3806 BUG_ON(slot < 0);
3807 BUG_ON(slot >= nritems);
3808
3809
3810
3811
3812
3813 btrfs_init_map_token(&token, leaf);
3814 for (i = slot; i < nritems; i++) {
3815 u32 ioff;
3816
3817 ioff = btrfs_token_item_offset(&token, i);
3818 btrfs_set_token_item_offset(&token, i, ioff + size_diff);
3819 }
3820
3821
3822 if (from_end) {
3823 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
3824 data_end + size_diff, BTRFS_LEAF_DATA_OFFSET +
3825 data_end, old_data_start + new_size - data_end);
3826 } else {
3827 struct btrfs_disk_key disk_key;
3828 u64 offset;
3829
3830 btrfs_item_key(leaf, &disk_key, slot);
3831
3832 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
3833 unsigned long ptr;
3834 struct btrfs_file_extent_item *fi;
3835
3836 fi = btrfs_item_ptr(leaf, slot,
3837 struct btrfs_file_extent_item);
3838 fi = (struct btrfs_file_extent_item *)(
3839 (unsigned long)fi - size_diff);
3840
3841 if (btrfs_file_extent_type(leaf, fi) ==
3842 BTRFS_FILE_EXTENT_INLINE) {
3843 ptr = btrfs_item_ptr_offset(leaf, slot);
3844 memmove_extent_buffer(leaf, ptr,
3845 (unsigned long)fi,
3846 BTRFS_FILE_EXTENT_INLINE_DATA_START);
3847 }
3848 }
3849
3850 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
3851 data_end + size_diff, BTRFS_LEAF_DATA_OFFSET +
3852 data_end, old_data_start - data_end);
3853
3854 offset = btrfs_disk_key_offset(&disk_key);
3855 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
3856 btrfs_set_item_key(leaf, &disk_key, slot);
3857 if (slot == 0)
3858 fixup_low_keys(path, &disk_key, 1);
3859 }
3860
3861 btrfs_set_item_size(leaf, slot, new_size);
3862 btrfs_mark_buffer_dirty(leaf);
3863
3864 if (btrfs_leaf_free_space(leaf) < 0) {
3865 btrfs_print_leaf(leaf);
3866 BUG();
3867 }
3868 }
3869
3870
3871
3872
3873 void btrfs_extend_item(struct btrfs_path *path, u32 data_size)
3874 {
3875 int slot;
3876 struct extent_buffer *leaf;
3877 u32 nritems;
3878 unsigned int data_end;
3879 unsigned int old_data;
3880 unsigned int old_size;
3881 int i;
3882 struct btrfs_map_token token;
3883
3884 leaf = path->nodes[0];
3885
3886 nritems = btrfs_header_nritems(leaf);
3887 data_end = leaf_data_end(leaf);
3888
3889 if (btrfs_leaf_free_space(leaf) < data_size) {
3890 btrfs_print_leaf(leaf);
3891 BUG();
3892 }
3893 slot = path->slots[0];
3894 old_data = btrfs_item_data_end(leaf, slot);
3895
3896 BUG_ON(slot < 0);
3897 if (slot >= nritems) {
3898 btrfs_print_leaf(leaf);
3899 btrfs_crit(leaf->fs_info, "slot %d too large, nritems %d",
3900 slot, nritems);
3901 BUG();
3902 }
3903
3904
3905
3906
3907
3908 btrfs_init_map_token(&token, leaf);
3909 for (i = slot; i < nritems; i++) {
3910 u32 ioff;
3911
3912 ioff = btrfs_token_item_offset(&token, i);
3913 btrfs_set_token_item_offset(&token, i, ioff - data_size);
3914 }
3915
3916
3917 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
3918 data_end - data_size, BTRFS_LEAF_DATA_OFFSET +
3919 data_end, old_data - data_end);
3920
3921 data_end = old_data;
3922 old_size = btrfs_item_size(leaf, slot);
3923 btrfs_set_item_size(leaf, slot, old_size + data_size);
3924 btrfs_mark_buffer_dirty(leaf);
3925
3926 if (btrfs_leaf_free_space(leaf) < 0) {
3927 btrfs_print_leaf(leaf);
3928 BUG();
3929 }
3930 }
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941 static void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
3942 const struct btrfs_item_batch *batch)
3943 {
3944 struct btrfs_fs_info *fs_info = root->fs_info;
3945 int i;
3946 u32 nritems;
3947 unsigned int data_end;
3948 struct btrfs_disk_key disk_key;
3949 struct extent_buffer *leaf;
3950 int slot;
3951 struct btrfs_map_token token;
3952 u32 total_size;
3953
3954
3955
3956
3957
3958
3959 if (path->slots[0] == 0) {
3960 btrfs_cpu_key_to_disk(&disk_key, &batch->keys[0]);
3961 fixup_low_keys(path, &disk_key, 1);
3962 }
3963 btrfs_unlock_up_safe(path, 1);
3964
3965 leaf = path->nodes[0];
3966 slot = path->slots[0];
3967
3968 nritems = btrfs_header_nritems(leaf);
3969 data_end = leaf_data_end(leaf);
3970 total_size = batch->total_data_size + (batch->nr * sizeof(struct btrfs_item));
3971
3972 if (btrfs_leaf_free_space(leaf) < total_size) {
3973 btrfs_print_leaf(leaf);
3974 btrfs_crit(fs_info, "not enough freespace need %u have %d",
3975 total_size, btrfs_leaf_free_space(leaf));
3976 BUG();
3977 }
3978
3979 btrfs_init_map_token(&token, leaf);
3980 if (slot != nritems) {
3981 unsigned int old_data = btrfs_item_data_end(leaf, slot);
3982
3983 if (old_data < data_end) {
3984 btrfs_print_leaf(leaf);
3985 btrfs_crit(fs_info,
3986 "item at slot %d with data offset %u beyond data end of leaf %u",
3987 slot, old_data, data_end);
3988 BUG();
3989 }
3990
3991
3992
3993
3994 for (i = slot; i < nritems; i++) {
3995 u32 ioff;
3996
3997 ioff = btrfs_token_item_offset(&token, i);
3998 btrfs_set_token_item_offset(&token, i,
3999 ioff - batch->total_data_size);
4000 }
4001
4002 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + batch->nr),
4003 btrfs_item_nr_offset(slot),
4004 (nritems - slot) * sizeof(struct btrfs_item));
4005
4006
4007 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
4008 data_end - batch->total_data_size,
4009 BTRFS_LEAF_DATA_OFFSET + data_end,
4010 old_data - data_end);
4011 data_end = old_data;
4012 }
4013
4014
4015 for (i = 0; i < batch->nr; i++) {
4016 btrfs_cpu_key_to_disk(&disk_key, &batch->keys[i]);
4017 btrfs_set_item_key(leaf, &disk_key, slot + i);
4018 data_end -= batch->data_sizes[i];
4019 btrfs_set_token_item_offset(&token, slot + i, data_end);
4020 btrfs_set_token_item_size(&token, slot + i, batch->data_sizes[i]);
4021 }
4022
4023 btrfs_set_header_nritems(leaf, nritems + batch->nr);
4024 btrfs_mark_buffer_dirty(leaf);
4025
4026 if (btrfs_leaf_free_space(leaf) < 0) {
4027 btrfs_print_leaf(leaf);
4028 BUG();
4029 }
4030 }
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040 void btrfs_setup_item_for_insert(struct btrfs_root *root,
4041 struct btrfs_path *path,
4042 const struct btrfs_key *key,
4043 u32 data_size)
4044 {
4045 struct btrfs_item_batch batch;
4046
4047 batch.keys = key;
4048 batch.data_sizes = &data_size;
4049 batch.total_data_size = data_size;
4050 batch.nr = 1;
4051
4052 setup_items_for_insert(root, path, &batch);
4053 }
4054
4055
4056
4057
4058
4059 int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
4060 struct btrfs_root *root,
4061 struct btrfs_path *path,
4062 const struct btrfs_item_batch *batch)
4063 {
4064 int ret = 0;
4065 int slot;
4066 u32 total_size;
4067
4068 total_size = batch->total_data_size + (batch->nr * sizeof(struct btrfs_item));
4069 ret = btrfs_search_slot(trans, root, &batch->keys[0], path, total_size, 1);
4070 if (ret == 0)
4071 return -EEXIST;
4072 if (ret < 0)
4073 return ret;
4074
4075 slot = path->slots[0];
4076 BUG_ON(slot < 0);
4077
4078 setup_items_for_insert(root, path, batch);
4079 return 0;
4080 }
4081
4082
4083
4084
4085
4086 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4087 const struct btrfs_key *cpu_key, void *data,
4088 u32 data_size)
4089 {
4090 int ret = 0;
4091 struct btrfs_path *path;
4092 struct extent_buffer *leaf;
4093 unsigned long ptr;
4094
4095 path = btrfs_alloc_path();
4096 if (!path)
4097 return -ENOMEM;
4098 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
4099 if (!ret) {
4100 leaf = path->nodes[0];
4101 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
4102 write_extent_buffer(leaf, data, ptr, data_size);
4103 btrfs_mark_buffer_dirty(leaf);
4104 }
4105 btrfs_free_path(path);
4106 return ret;
4107 }
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117 int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
4118 struct btrfs_root *root,
4119 struct btrfs_path *path,
4120 const struct btrfs_key *new_key)
4121 {
4122 struct extent_buffer *leaf;
4123 int ret;
4124 u32 item_size;
4125
4126 leaf = path->nodes[0];
4127 item_size = btrfs_item_size(leaf, path->slots[0]);
4128 ret = setup_leaf_for_split(trans, root, path,
4129 item_size + sizeof(struct btrfs_item));
4130 if (ret)
4131 return ret;
4132
4133 path->slots[0]++;
4134 btrfs_setup_item_for_insert(root, path, new_key, item_size);
4135 leaf = path->nodes[0];
4136 memcpy_extent_buffer(leaf,
4137 btrfs_item_ptr_offset(leaf, path->slots[0]),
4138 btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
4139 item_size);
4140 return 0;
4141 }
4142
4143
4144
4145
4146
4147
4148
4149 static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
4150 int level, int slot)
4151 {
4152 struct extent_buffer *parent = path->nodes[level];
4153 u32 nritems;
4154 int ret;
4155
4156 nritems = btrfs_header_nritems(parent);
4157 if (slot != nritems - 1) {
4158 if (level) {
4159 ret = btrfs_tree_mod_log_insert_move(parent, slot,
4160 slot + 1, nritems - slot - 1);
4161 BUG_ON(ret < 0);
4162 }
4163 memmove_extent_buffer(parent,
4164 btrfs_node_key_ptr_offset(slot),
4165 btrfs_node_key_ptr_offset(slot + 1),
4166 sizeof(struct btrfs_key_ptr) *
4167 (nritems - slot - 1));
4168 } else if (level) {
4169 ret = btrfs_tree_mod_log_insert_key(parent, slot,
4170 BTRFS_MOD_LOG_KEY_REMOVE, GFP_NOFS);
4171 BUG_ON(ret < 0);
4172 }
4173
4174 nritems--;
4175 btrfs_set_header_nritems(parent, nritems);
4176 if (nritems == 0 && parent == root->node) {
4177 BUG_ON(btrfs_header_level(root->node) != 1);
4178
4179 btrfs_set_header_level(root->node, 0);
4180 } else if (slot == 0) {
4181 struct btrfs_disk_key disk_key;
4182
4183 btrfs_node_key(parent, &disk_key, 0);
4184 fixup_low_keys(path, &disk_key, level + 1);
4185 }
4186 btrfs_mark_buffer_dirty(parent);
4187 }
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199 static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
4200 struct btrfs_root *root,
4201 struct btrfs_path *path,
4202 struct extent_buffer *leaf)
4203 {
4204 WARN_ON(btrfs_header_generation(leaf) != trans->transid);
4205 del_ptr(root, path, 1, path->slots[1]);
4206
4207
4208
4209
4210
4211 btrfs_unlock_up_safe(path, 0);
4212
4213 root_sub_used(root, leaf->len);
4214
4215 atomic_inc(&leaf->refs);
4216 btrfs_free_tree_block(trans, btrfs_root_id(root), leaf, 0, 1);
4217 free_extent_buffer_stale(leaf);
4218 }
4219
4220
4221
4222
4223 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4224 struct btrfs_path *path, int slot, int nr)
4225 {
4226 struct btrfs_fs_info *fs_info = root->fs_info;
4227 struct extent_buffer *leaf;
4228 int ret = 0;
4229 int wret;
4230 u32 nritems;
4231
4232 leaf = path->nodes[0];
4233 nritems = btrfs_header_nritems(leaf);
4234
4235 if (slot + nr != nritems) {
4236 const u32 last_off = btrfs_item_offset(leaf, slot + nr - 1);
4237 const int data_end = leaf_data_end(leaf);
4238 struct btrfs_map_token token;
4239 u32 dsize = 0;
4240 int i;
4241
4242 for (i = 0; i < nr; i++)
4243 dsize += btrfs_item_size(leaf, slot + i);
4244
4245 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
4246 data_end + dsize,
4247 BTRFS_LEAF_DATA_OFFSET + data_end,
4248 last_off - data_end);
4249
4250 btrfs_init_map_token(&token, leaf);
4251 for (i = slot + nr; i < nritems; i++) {
4252 u32 ioff;
4253
4254 ioff = btrfs_token_item_offset(&token, i);
4255 btrfs_set_token_item_offset(&token, i, ioff + dsize);
4256 }
4257
4258 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
4259 btrfs_item_nr_offset(slot + nr),
4260 sizeof(struct btrfs_item) *
4261 (nritems - slot - nr));
4262 }
4263 btrfs_set_header_nritems(leaf, nritems - nr);
4264 nritems -= nr;
4265
4266
4267 if (nritems == 0) {
4268 if (leaf == root->node) {
4269 btrfs_set_header_level(leaf, 0);
4270 } else {
4271 btrfs_clean_tree_block(leaf);
4272 btrfs_del_leaf(trans, root, path, leaf);
4273 }
4274 } else {
4275 int used = leaf_space_used(leaf, 0, nritems);
4276 if (slot == 0) {
4277 struct btrfs_disk_key disk_key;
4278
4279 btrfs_item_key(leaf, &disk_key, 0);
4280 fixup_low_keys(path, &disk_key, 1);
4281 }
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291 if (used < BTRFS_LEAF_DATA_SIZE(fs_info) / 3) {
4292 u32 min_push_space;
4293
4294
4295
4296
4297
4298 slot = path->slots[1];
4299 atomic_inc(&leaf->refs);
4300
4301
4302
4303
4304 min_push_space = sizeof(struct btrfs_item) +
4305 btrfs_item_size(leaf, 0);
4306 wret = push_leaf_left(trans, root, path, 0,
4307 min_push_space, 1, (u32)-1);
4308 if (wret < 0 && wret != -ENOSPC)
4309 ret = wret;
4310
4311 if (path->nodes[0] == leaf &&
4312 btrfs_header_nritems(leaf)) {
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323 nritems = btrfs_header_nritems(leaf);
4324 min_push_space = leaf_space_used(leaf, 0, nritems);
4325 wret = push_leaf_right(trans, root, path, 0,
4326 min_push_space, 1, 0);
4327 if (wret < 0 && wret != -ENOSPC)
4328 ret = wret;
4329 }
4330
4331 if (btrfs_header_nritems(leaf) == 0) {
4332 path->slots[1] = slot;
4333 btrfs_del_leaf(trans, root, path, leaf);
4334 free_extent_buffer(leaf);
4335 ret = 0;
4336 } else {
4337
4338
4339
4340
4341
4342 if (path->nodes[0] == leaf)
4343 btrfs_mark_buffer_dirty(leaf);
4344 free_extent_buffer(leaf);
4345 }
4346 } else {
4347 btrfs_mark_buffer_dirty(leaf);
4348 }
4349 }
4350 return ret;
4351 }
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
4362 {
4363 struct btrfs_key key;
4364 struct btrfs_disk_key found_key;
4365 int ret;
4366
4367 btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
4368
4369 if (key.offset > 0) {
4370 key.offset--;
4371 } else if (key.type > 0) {
4372 key.type--;
4373 key.offset = (u64)-1;
4374 } else if (key.objectid > 0) {
4375 key.objectid--;
4376 key.type = (u8)-1;
4377 key.offset = (u64)-1;
4378 } else {
4379 return 1;
4380 }
4381
4382 btrfs_release_path(path);
4383 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4384 if (ret < 0)
4385 return ret;
4386 btrfs_item_key(path->nodes[0], &found_key, 0);
4387 ret = comp_keys(&found_key, &key);
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398 if (ret <= 0)
4399 return 0;
4400 return 1;
4401 }
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422 int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
4423 struct btrfs_path *path,
4424 u64 min_trans)
4425 {
4426 struct extent_buffer *cur;
4427 struct btrfs_key found_key;
4428 int slot;
4429 int sret;
4430 u32 nritems;
4431 int level;
4432 int ret = 1;
4433 int keep_locks = path->keep_locks;
4434
4435 path->keep_locks = 1;
4436 again:
4437 cur = btrfs_read_lock_root_node(root);
4438 level = btrfs_header_level(cur);
4439 WARN_ON(path->nodes[level]);
4440 path->nodes[level] = cur;
4441 path->locks[level] = BTRFS_READ_LOCK;
4442
4443 if (btrfs_header_generation(cur) < min_trans) {
4444 ret = 1;
4445 goto out;
4446 }
4447 while (1) {
4448 nritems = btrfs_header_nritems(cur);
4449 level = btrfs_header_level(cur);
4450 sret = btrfs_bin_search(cur, min_key, &slot);
4451 if (sret < 0) {
4452 ret = sret;
4453 goto out;
4454 }
4455
4456
4457 if (level == path->lowest_level) {
4458 if (slot >= nritems)
4459 goto find_next_key;
4460 ret = 0;
4461 path->slots[level] = slot;
4462 btrfs_item_key_to_cpu(cur, &found_key, slot);
4463 goto out;
4464 }
4465 if (sret && slot > 0)
4466 slot--;
4467
4468
4469
4470
4471 while (slot < nritems) {
4472 u64 gen;
4473
4474 gen = btrfs_node_ptr_generation(cur, slot);
4475 if (gen < min_trans) {
4476 slot++;
4477 continue;
4478 }
4479 break;
4480 }
4481 find_next_key:
4482
4483
4484
4485
4486 if (slot >= nritems) {
4487 path->slots[level] = slot;
4488 sret = btrfs_find_next_key(root, path, min_key, level,
4489 min_trans);
4490 if (sret == 0) {
4491 btrfs_release_path(path);
4492 goto again;
4493 } else {
4494 goto out;
4495 }
4496 }
4497
4498 btrfs_node_key_to_cpu(cur, &found_key, slot);
4499 path->slots[level] = slot;
4500 if (level == path->lowest_level) {
4501 ret = 0;
4502 goto out;
4503 }
4504 cur = btrfs_read_node_slot(cur, slot);
4505 if (IS_ERR(cur)) {
4506 ret = PTR_ERR(cur);
4507 goto out;
4508 }
4509
4510 btrfs_tree_read_lock(cur);
4511
4512 path->locks[level - 1] = BTRFS_READ_LOCK;
4513 path->nodes[level - 1] = cur;
4514 unlock_up(path, level, 1, 0, NULL);
4515 }
4516 out:
4517 path->keep_locks = keep_locks;
4518 if (ret == 0) {
4519 btrfs_unlock_up_safe(path, path->lowest_level + 1);
4520 memcpy(min_key, &found_key, sizeof(found_key));
4521 }
4522 return ret;
4523 }
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536 int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
4537 struct btrfs_key *key, int level, u64 min_trans)
4538 {
4539 int slot;
4540 struct extent_buffer *c;
4541
4542 WARN_ON(!path->keep_locks && !path->skip_locking);
4543 while (level < BTRFS_MAX_LEVEL) {
4544 if (!path->nodes[level])
4545 return 1;
4546
4547 slot = path->slots[level] + 1;
4548 c = path->nodes[level];
4549 next:
4550 if (slot >= btrfs_header_nritems(c)) {
4551 int ret;
4552 int orig_lowest;
4553 struct btrfs_key cur_key;
4554 if (level + 1 >= BTRFS_MAX_LEVEL ||
4555 !path->nodes[level + 1])
4556 return 1;
4557
4558 if (path->locks[level + 1] || path->skip_locking) {
4559 level++;
4560 continue;
4561 }
4562
4563 slot = btrfs_header_nritems(c) - 1;
4564 if (level == 0)
4565 btrfs_item_key_to_cpu(c, &cur_key, slot);
4566 else
4567 btrfs_node_key_to_cpu(c, &cur_key, slot);
4568
4569 orig_lowest = path->lowest_level;
4570 btrfs_release_path(path);
4571 path->lowest_level = level;
4572 ret = btrfs_search_slot(NULL, root, &cur_key, path,
4573 0, 0);
4574 path->lowest_level = orig_lowest;
4575 if (ret < 0)
4576 return ret;
4577
4578 c = path->nodes[level];
4579 slot = path->slots[level];
4580 if (ret == 0)
4581 slot++;
4582 goto next;
4583 }
4584
4585 if (level == 0)
4586 btrfs_item_key_to_cpu(c, key, slot);
4587 else {
4588 u64 gen = btrfs_node_ptr_generation(c, slot);
4589
4590 if (gen < min_trans) {
4591 slot++;
4592 goto next;
4593 }
4594 btrfs_node_key_to_cpu(c, key, slot);
4595 }
4596 return 0;
4597 }
4598 return 1;
4599 }
4600
4601 int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
4602 u64 time_seq)
4603 {
4604 int slot;
4605 int level;
4606 struct extent_buffer *c;
4607 struct extent_buffer *next;
4608 struct btrfs_fs_info *fs_info = root->fs_info;
4609 struct btrfs_key key;
4610 bool need_commit_sem = false;
4611 u32 nritems;
4612 int ret;
4613 int i;
4614
4615 nritems = btrfs_header_nritems(path->nodes[0]);
4616 if (nritems == 0)
4617 return 1;
4618
4619 btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
4620 again:
4621 level = 1;
4622 next = NULL;
4623 btrfs_release_path(path);
4624
4625 path->keep_locks = 1;
4626
4627 if (time_seq) {
4628 ret = btrfs_search_old_slot(root, &key, path, time_seq);
4629 } else {
4630 if (path->need_commit_sem) {
4631 path->need_commit_sem = 0;
4632 need_commit_sem = true;
4633 down_read(&fs_info->commit_root_sem);
4634 }
4635 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4636 }
4637 path->keep_locks = 0;
4638
4639 if (ret < 0)
4640 goto done;
4641
4642 nritems = btrfs_header_nritems(path->nodes[0]);
4643
4644
4645
4646
4647
4648
4649 if (nritems > 0 && path->slots[0] < nritems - 1) {
4650 if (ret == 0)
4651 path->slots[0]++;
4652 ret = 0;
4653 goto done;
4654 }
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669 if (nritems > 0 && ret > 0 && path->slots[0] == nritems - 1) {
4670 ret = 0;
4671 goto done;
4672 }
4673
4674 while (level < BTRFS_MAX_LEVEL) {
4675 if (!path->nodes[level]) {
4676 ret = 1;
4677 goto done;
4678 }
4679
4680 slot = path->slots[level] + 1;
4681 c = path->nodes[level];
4682 if (slot >= btrfs_header_nritems(c)) {
4683 level++;
4684 if (level == BTRFS_MAX_LEVEL) {
4685 ret = 1;
4686 goto done;
4687 }
4688 continue;
4689 }
4690
4691
4692
4693
4694
4695
4696
4697 for (i = 0; i < level; i++) {
4698 if (path->locks[level]) {
4699 btrfs_tree_read_unlock(path->nodes[i]);
4700 path->locks[i] = 0;
4701 }
4702 free_extent_buffer(path->nodes[i]);
4703 path->nodes[i] = NULL;
4704 }
4705
4706 next = c;
4707 ret = read_block_for_search(root, path, &next, level,
4708 slot, &key);
4709 if (ret == -EAGAIN)
4710 goto again;
4711
4712 if (ret < 0) {
4713 btrfs_release_path(path);
4714 goto done;
4715 }
4716
4717 if (!path->skip_locking) {
4718 ret = btrfs_try_tree_read_lock(next);
4719 if (!ret && time_seq) {
4720
4721
4722
4723
4724
4725
4726
4727 free_extent_buffer(next);
4728 btrfs_release_path(path);
4729 cond_resched();
4730 goto again;
4731 }
4732 if (!ret)
4733 btrfs_tree_read_lock(next);
4734 }
4735 break;
4736 }
4737 path->slots[level] = slot;
4738 while (1) {
4739 level--;
4740 path->nodes[level] = next;
4741 path->slots[level] = 0;
4742 if (!path->skip_locking)
4743 path->locks[level] = BTRFS_READ_LOCK;
4744 if (!level)
4745 break;
4746
4747 ret = read_block_for_search(root, path, &next, level,
4748 0, &key);
4749 if (ret == -EAGAIN)
4750 goto again;
4751
4752 if (ret < 0) {
4753 btrfs_release_path(path);
4754 goto done;
4755 }
4756
4757 if (!path->skip_locking)
4758 btrfs_tree_read_lock(next);
4759 }
4760 ret = 0;
4761 done:
4762 unlock_up(path, 0, 1, 0, NULL);
4763 if (need_commit_sem) {
4764 int ret2;
4765
4766 path->need_commit_sem = 1;
4767 ret2 = finish_need_commit_sem_search(path);
4768 up_read(&fs_info->commit_root_sem);
4769 if (ret2)
4770 ret = ret2;
4771 }
4772
4773 return ret;
4774 }
4775
4776
4777
4778
4779
4780
4781
4782 int btrfs_previous_item(struct btrfs_root *root,
4783 struct btrfs_path *path, u64 min_objectid,
4784 int type)
4785 {
4786 struct btrfs_key found_key;
4787 struct extent_buffer *leaf;
4788 u32 nritems;
4789 int ret;
4790
4791 while (1) {
4792 if (path->slots[0] == 0) {
4793 ret = btrfs_prev_leaf(root, path);
4794 if (ret != 0)
4795 return ret;
4796 } else {
4797 path->slots[0]--;
4798 }
4799 leaf = path->nodes[0];
4800 nritems = btrfs_header_nritems(leaf);
4801 if (nritems == 0)
4802 return 1;
4803 if (path->slots[0] == nritems)
4804 path->slots[0]--;
4805
4806 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4807 if (found_key.objectid < min_objectid)
4808 break;
4809 if (found_key.type == type)
4810 return 0;
4811 if (found_key.objectid == min_objectid &&
4812 found_key.type < type)
4813 break;
4814 }
4815 return 1;
4816 }
4817
4818
4819
4820
4821
4822
4823
4824 int btrfs_previous_extent_item(struct btrfs_root *root,
4825 struct btrfs_path *path, u64 min_objectid)
4826 {
4827 struct btrfs_key found_key;
4828 struct extent_buffer *leaf;
4829 u32 nritems;
4830 int ret;
4831
4832 while (1) {
4833 if (path->slots[0] == 0) {
4834 ret = btrfs_prev_leaf(root, path);
4835 if (ret != 0)
4836 return ret;
4837 } else {
4838 path->slots[0]--;
4839 }
4840 leaf = path->nodes[0];
4841 nritems = btrfs_header_nritems(leaf);
4842 if (nritems == 0)
4843 return 1;
4844 if (path->slots[0] == nritems)
4845 path->slots[0]--;
4846
4847 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4848 if (found_key.objectid < min_objectid)
4849 break;
4850 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
4851 found_key.type == BTRFS_METADATA_ITEM_KEY)
4852 return 0;
4853 if (found_key.objectid == min_objectid &&
4854 found_key.type < BTRFS_EXTENT_ITEM_KEY)
4855 break;
4856 }
4857 return 1;
4858 }