0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013 #include <linux/list_sort.h>
0014 #include <linux/proc_fs.h>
0015 #include <linux/seq_file.h>
0016 #include "ext4.h"
0017
0018 #include <trace/events/ext4.h>
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117
0118
0119
0120
0121
0122
0123
0124
0125
0126
0127
0128
0129
0130
0131
0132
0133
0134
0135
0136
0137
0138
0139
0140
0141
0142
0143
0144 static struct kmem_cache *ext4_es_cachep;
0145 static struct kmem_cache *ext4_pending_cachep;
0146
0147 static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
0148 static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
0149 ext4_lblk_t end, int *reserved);
0150 static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);
0151 static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
0152 struct ext4_inode_info *locked_ei);
0153 static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
0154 ext4_lblk_t len);
0155
0156 int __init ext4_init_es(void)
0157 {
0158 ext4_es_cachep = kmem_cache_create("ext4_extent_status",
0159 sizeof(struct extent_status),
0160 0, (SLAB_RECLAIM_ACCOUNT), NULL);
0161 if (ext4_es_cachep == NULL)
0162 return -ENOMEM;
0163 return 0;
0164 }
0165
0166 void ext4_exit_es(void)
0167 {
0168 kmem_cache_destroy(ext4_es_cachep);
0169 }
0170
0171 void ext4_es_init_tree(struct ext4_es_tree *tree)
0172 {
0173 tree->root = RB_ROOT;
0174 tree->cache_es = NULL;
0175 }
0176
0177 #ifdef ES_DEBUG__
0178 static void ext4_es_print_tree(struct inode *inode)
0179 {
0180 struct ext4_es_tree *tree;
0181 struct rb_node *node;
0182
0183 printk(KERN_DEBUG "status extents for inode %lu:", inode->i_ino);
0184 tree = &EXT4_I(inode)->i_es_tree;
0185 node = rb_first(&tree->root);
0186 while (node) {
0187 struct extent_status *es;
0188 es = rb_entry(node, struct extent_status, rb_node);
0189 printk(KERN_DEBUG " [%u/%u) %llu %x",
0190 es->es_lblk, es->es_len,
0191 ext4_es_pblock(es), ext4_es_status(es));
0192 node = rb_next(node);
0193 }
0194 printk(KERN_DEBUG "\n");
0195 }
0196 #else
0197 #define ext4_es_print_tree(inode)
0198 #endif
0199
0200 static inline ext4_lblk_t ext4_es_end(struct extent_status *es)
0201 {
0202 BUG_ON(es->es_lblk + es->es_len < es->es_lblk);
0203 return es->es_lblk + es->es_len - 1;
0204 }
0205
0206
0207
0208
0209
0210 static struct extent_status *__es_tree_search(struct rb_root *root,
0211 ext4_lblk_t lblk)
0212 {
0213 struct rb_node *node = root->rb_node;
0214 struct extent_status *es = NULL;
0215
0216 while (node) {
0217 es = rb_entry(node, struct extent_status, rb_node);
0218 if (lblk < es->es_lblk)
0219 node = node->rb_left;
0220 else if (lblk > ext4_es_end(es))
0221 node = node->rb_right;
0222 else
0223 return es;
0224 }
0225
0226 if (es && lblk < es->es_lblk)
0227 return es;
0228
0229 if (es && lblk > ext4_es_end(es)) {
0230 node = rb_next(&es->rb_node);
0231 return node ? rb_entry(node, struct extent_status, rb_node) :
0232 NULL;
0233 }
0234
0235 return NULL;
0236 }
0237
0238
0239
0240
0241
0242
0243
0244
0245
0246
0247
0248
0249
0250
0251
0252
0253
0254
0255
0256 static void __es_find_extent_range(struct inode *inode,
0257 int (*matching_fn)(struct extent_status *es),
0258 ext4_lblk_t lblk, ext4_lblk_t end,
0259 struct extent_status *es)
0260 {
0261 struct ext4_es_tree *tree = NULL;
0262 struct extent_status *es1 = NULL;
0263 struct rb_node *node;
0264
0265 WARN_ON(es == NULL);
0266 WARN_ON(end < lblk);
0267
0268 tree = &EXT4_I(inode)->i_es_tree;
0269
0270
0271 es->es_lblk = es->es_len = es->es_pblk = 0;
0272 if (tree->cache_es) {
0273 es1 = tree->cache_es;
0274 if (in_range(lblk, es1->es_lblk, es1->es_len)) {
0275 es_debug("%u cached by [%u/%u) %llu %x\n",
0276 lblk, es1->es_lblk, es1->es_len,
0277 ext4_es_pblock(es1), ext4_es_status(es1));
0278 goto out;
0279 }
0280 }
0281
0282 es1 = __es_tree_search(&tree->root, lblk);
0283
0284 out:
0285 if (es1 && !matching_fn(es1)) {
0286 while ((node = rb_next(&es1->rb_node)) != NULL) {
0287 es1 = rb_entry(node, struct extent_status, rb_node);
0288 if (es1->es_lblk > end) {
0289 es1 = NULL;
0290 break;
0291 }
0292 if (matching_fn(es1))
0293 break;
0294 }
0295 }
0296
0297 if (es1 && matching_fn(es1)) {
0298 tree->cache_es = es1;
0299 es->es_lblk = es1->es_lblk;
0300 es->es_len = es1->es_len;
0301 es->es_pblk = es1->es_pblk;
0302 }
0303
0304 }
0305
0306
0307
0308
0309 void ext4_es_find_extent_range(struct inode *inode,
0310 int (*matching_fn)(struct extent_status *es),
0311 ext4_lblk_t lblk, ext4_lblk_t end,
0312 struct extent_status *es)
0313 {
0314 if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
0315 return;
0316
0317 trace_ext4_es_find_extent_range_enter(inode, lblk);
0318
0319 read_lock(&EXT4_I(inode)->i_es_lock);
0320 __es_find_extent_range(inode, matching_fn, lblk, end, es);
0321 read_unlock(&EXT4_I(inode)->i_es_lock);
0322
0323 trace_ext4_es_find_extent_range_exit(inode, es);
0324 }
0325
0326
0327
0328
0329
0330
0331
0332
0333
0334
0335
0336
0337
0338
0339
0340
0341 static bool __es_scan_range(struct inode *inode,
0342 int (*matching_fn)(struct extent_status *es),
0343 ext4_lblk_t start, ext4_lblk_t end)
0344 {
0345 struct extent_status es;
0346
0347 __es_find_extent_range(inode, matching_fn, start, end, &es);
0348 if (es.es_len == 0)
0349 return false;
0350 else if (es.es_lblk <= start &&
0351 start < es.es_lblk + es.es_len)
0352 return true;
0353 else if (start <= es.es_lblk && es.es_lblk <= end)
0354 return true;
0355 else
0356 return false;
0357 }
0358
0359
0360
0361 bool ext4_es_scan_range(struct inode *inode,
0362 int (*matching_fn)(struct extent_status *es),
0363 ext4_lblk_t lblk, ext4_lblk_t end)
0364 {
0365 bool ret;
0366
0367 if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
0368 return false;
0369
0370 read_lock(&EXT4_I(inode)->i_es_lock);
0371 ret = __es_scan_range(inode, matching_fn, lblk, end);
0372 read_unlock(&EXT4_I(inode)->i_es_lock);
0373
0374 return ret;
0375 }
0376
0377
0378
0379
0380
0381
0382
0383
0384
0385
0386
0387
0388
0389
0390
0391 static bool __es_scan_clu(struct inode *inode,
0392 int (*matching_fn)(struct extent_status *es),
0393 ext4_lblk_t lblk)
0394 {
0395 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
0396 ext4_lblk_t lblk_start, lblk_end;
0397
0398 lblk_start = EXT4_LBLK_CMASK(sbi, lblk);
0399 lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
0400
0401 return __es_scan_range(inode, matching_fn, lblk_start, lblk_end);
0402 }
0403
0404
0405
0406
0407 bool ext4_es_scan_clu(struct inode *inode,
0408 int (*matching_fn)(struct extent_status *es),
0409 ext4_lblk_t lblk)
0410 {
0411 bool ret;
0412
0413 if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
0414 return false;
0415
0416 read_lock(&EXT4_I(inode)->i_es_lock);
0417 ret = __es_scan_clu(inode, matching_fn, lblk);
0418 read_unlock(&EXT4_I(inode)->i_es_lock);
0419
0420 return ret;
0421 }
0422
0423 static void ext4_es_list_add(struct inode *inode)
0424 {
0425 struct ext4_inode_info *ei = EXT4_I(inode);
0426 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
0427
0428 if (!list_empty(&ei->i_es_list))
0429 return;
0430
0431 spin_lock(&sbi->s_es_lock);
0432 if (list_empty(&ei->i_es_list)) {
0433 list_add_tail(&ei->i_es_list, &sbi->s_es_list);
0434 sbi->s_es_nr_inode++;
0435 }
0436 spin_unlock(&sbi->s_es_lock);
0437 }
0438
0439 static void ext4_es_list_del(struct inode *inode)
0440 {
0441 struct ext4_inode_info *ei = EXT4_I(inode);
0442 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
0443
0444 spin_lock(&sbi->s_es_lock);
0445 if (!list_empty(&ei->i_es_list)) {
0446 list_del_init(&ei->i_es_list);
0447 sbi->s_es_nr_inode--;
0448 WARN_ON_ONCE(sbi->s_es_nr_inode < 0);
0449 }
0450 spin_unlock(&sbi->s_es_lock);
0451 }
0452
0453 static struct extent_status *
0454 ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
0455 ext4_fsblk_t pblk)
0456 {
0457 struct extent_status *es;
0458 es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC);
0459 if (es == NULL)
0460 return NULL;
0461 es->es_lblk = lblk;
0462 es->es_len = len;
0463 es->es_pblk = pblk;
0464
0465
0466
0467
0468 if (!ext4_es_is_delayed(es)) {
0469 if (!EXT4_I(inode)->i_es_shk_nr++)
0470 ext4_es_list_add(inode);
0471 percpu_counter_inc(&EXT4_SB(inode->i_sb)->
0472 s_es_stats.es_stats_shk_cnt);
0473 }
0474
0475 EXT4_I(inode)->i_es_all_nr++;
0476 percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
0477
0478 return es;
0479 }
0480
0481 static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
0482 {
0483 EXT4_I(inode)->i_es_all_nr--;
0484 percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
0485
0486
0487 if (!ext4_es_is_delayed(es)) {
0488 BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0);
0489 if (!--EXT4_I(inode)->i_es_shk_nr)
0490 ext4_es_list_del(inode);
0491 percpu_counter_dec(&EXT4_SB(inode->i_sb)->
0492 s_es_stats.es_stats_shk_cnt);
0493 }
0494
0495 kmem_cache_free(ext4_es_cachep, es);
0496 }
0497
0498
0499
0500
0501
0502
0503
0504
0505 static int ext4_es_can_be_merged(struct extent_status *es1,
0506 struct extent_status *es2)
0507 {
0508 if (ext4_es_type(es1) != ext4_es_type(es2))
0509 return 0;
0510
0511 if (((__u64) es1->es_len) + es2->es_len > EXT_MAX_BLOCKS) {
0512 pr_warn("ES assertion failed when merging extents. "
0513 "The sum of lengths of es1 (%d) and es2 (%d) "
0514 "is bigger than allowed file size (%d)\n",
0515 es1->es_len, es2->es_len, EXT_MAX_BLOCKS);
0516 WARN_ON(1);
0517 return 0;
0518 }
0519
0520 if (((__u64) es1->es_lblk) + es1->es_len != es2->es_lblk)
0521 return 0;
0522
0523 if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) &&
0524 (ext4_es_pblock(es1) + es1->es_len == ext4_es_pblock(es2)))
0525 return 1;
0526
0527 if (ext4_es_is_hole(es1))
0528 return 1;
0529
0530
0531 if (ext4_es_is_delayed(es1) && !ext4_es_is_unwritten(es1))
0532 return 1;
0533
0534 return 0;
0535 }
0536
0537 static struct extent_status *
0538 ext4_es_try_to_merge_left(struct inode *inode, struct extent_status *es)
0539 {
0540 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
0541 struct extent_status *es1;
0542 struct rb_node *node;
0543
0544 node = rb_prev(&es->rb_node);
0545 if (!node)
0546 return es;
0547
0548 es1 = rb_entry(node, struct extent_status, rb_node);
0549 if (ext4_es_can_be_merged(es1, es)) {
0550 es1->es_len += es->es_len;
0551 if (ext4_es_is_referenced(es))
0552 ext4_es_set_referenced(es1);
0553 rb_erase(&es->rb_node, &tree->root);
0554 ext4_es_free_extent(inode, es);
0555 es = es1;
0556 }
0557
0558 return es;
0559 }
0560
0561 static struct extent_status *
0562 ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es)
0563 {
0564 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
0565 struct extent_status *es1;
0566 struct rb_node *node;
0567
0568 node = rb_next(&es->rb_node);
0569 if (!node)
0570 return es;
0571
0572 es1 = rb_entry(node, struct extent_status, rb_node);
0573 if (ext4_es_can_be_merged(es, es1)) {
0574 es->es_len += es1->es_len;
0575 if (ext4_es_is_referenced(es1))
0576 ext4_es_set_referenced(es);
0577 rb_erase(node, &tree->root);
0578 ext4_es_free_extent(inode, es1);
0579 }
0580
0581 return es;
0582 }
0583
0584 #ifdef ES_AGGRESSIVE_TEST
0585 #include "ext4_extents.h" /* Needed when ES_AGGRESSIVE_TEST is defined */
0586
0587 static void ext4_es_insert_extent_ext_check(struct inode *inode,
0588 struct extent_status *es)
0589 {
0590 struct ext4_ext_path *path = NULL;
0591 struct ext4_extent *ex;
0592 ext4_lblk_t ee_block;
0593 ext4_fsblk_t ee_start;
0594 unsigned short ee_len;
0595 int depth, ee_status, es_status;
0596
0597 path = ext4_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE);
0598 if (IS_ERR(path))
0599 return;
0600
0601 depth = ext_depth(inode);
0602 ex = path[depth].p_ext;
0603
0604 if (ex) {
0605
0606 ee_block = le32_to_cpu(ex->ee_block);
0607 ee_start = ext4_ext_pblock(ex);
0608 ee_len = ext4_ext_get_actual_len(ex);
0609
0610 ee_status = ext4_ext_is_unwritten(ex) ? 1 : 0;
0611 es_status = ext4_es_is_unwritten(es) ? 1 : 0;
0612
0613
0614
0615
0616
0617 if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) {
0618 if (in_range(es->es_lblk, ee_block, ee_len)) {
0619 pr_warn("ES insert assertion failed for "
0620 "inode: %lu we can find an extent "
0621 "at block [%d/%d/%llu/%c], but we "
0622 "want to add a delayed/hole extent "
0623 "[%d/%d/%llu/%x]\n",
0624 inode->i_ino, ee_block, ee_len,
0625 ee_start, ee_status ? 'u' : 'w',
0626 es->es_lblk, es->es_len,
0627 ext4_es_pblock(es), ext4_es_status(es));
0628 }
0629 goto out;
0630 }
0631
0632
0633
0634
0635
0636 if (es->es_lblk < ee_block ||
0637 ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) {
0638 pr_warn("ES insert assertion failed for inode: %lu "
0639 "ex_status [%d/%d/%llu/%c] != "
0640 "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
0641 ee_block, ee_len, ee_start,
0642 ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
0643 ext4_es_pblock(es), es_status ? 'u' : 'w');
0644 goto out;
0645 }
0646
0647 if (ee_status ^ es_status) {
0648 pr_warn("ES insert assertion failed for inode: %lu "
0649 "ex_status [%d/%d/%llu/%c] != "
0650 "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
0651 ee_block, ee_len, ee_start,
0652 ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
0653 ext4_es_pblock(es), es_status ? 'u' : 'w');
0654 }
0655 } else {
0656
0657
0658
0659
0660 if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) {
0661 pr_warn("ES insert assertion failed for inode: %lu "
0662 "can't find an extent at block %d but we want "
0663 "to add a written/unwritten extent "
0664 "[%d/%d/%llu/%x]\n", inode->i_ino,
0665 es->es_lblk, es->es_lblk, es->es_len,
0666 ext4_es_pblock(es), ext4_es_status(es));
0667 }
0668 }
0669 out:
0670 ext4_ext_drop_refs(path);
0671 kfree(path);
0672 }
0673
0674 static void ext4_es_insert_extent_ind_check(struct inode *inode,
0675 struct extent_status *es)
0676 {
0677 struct ext4_map_blocks map;
0678 int retval;
0679
0680
0681
0682
0683
0684
0685
0686
0687 map.m_lblk = es->es_lblk;
0688 map.m_len = es->es_len;
0689
0690 retval = ext4_ind_map_blocks(NULL, inode, &map, 0);
0691 if (retval > 0) {
0692 if (ext4_es_is_delayed(es) || ext4_es_is_hole(es)) {
0693
0694
0695
0696
0697 pr_warn("ES insert assertion failed for inode: %lu "
0698 "We can find blocks but we want to add a "
0699 "delayed/hole extent [%d/%d/%llu/%x]\n",
0700 inode->i_ino, es->es_lblk, es->es_len,
0701 ext4_es_pblock(es), ext4_es_status(es));
0702 return;
0703 } else if (ext4_es_is_written(es)) {
0704 if (retval != es->es_len) {
0705 pr_warn("ES insert assertion failed for "
0706 "inode: %lu retval %d != es_len %d\n",
0707 inode->i_ino, retval, es->es_len);
0708 return;
0709 }
0710 if (map.m_pblk != ext4_es_pblock(es)) {
0711 pr_warn("ES insert assertion failed for "
0712 "inode: %lu m_pblk %llu != "
0713 "es_pblk %llu\n",
0714 inode->i_ino, map.m_pblk,
0715 ext4_es_pblock(es));
0716 return;
0717 }
0718 } else {
0719
0720
0721
0722
0723 BUG();
0724 }
0725 } else if (retval == 0) {
0726 if (ext4_es_is_written(es)) {
0727 pr_warn("ES insert assertion failed for inode: %lu "
0728 "We can't find the block but we want to add "
0729 "a written extent [%d/%d/%llu/%x]\n",
0730 inode->i_ino, es->es_lblk, es->es_len,
0731 ext4_es_pblock(es), ext4_es_status(es));
0732 return;
0733 }
0734 }
0735 }
0736
0737 static inline void ext4_es_insert_extent_check(struct inode *inode,
0738 struct extent_status *es)
0739 {
0740
0741
0742
0743
0744 BUG_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
0745 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
0746 ext4_es_insert_extent_ext_check(inode, es);
0747 else
0748 ext4_es_insert_extent_ind_check(inode, es);
0749 }
0750 #else
0751 static inline void ext4_es_insert_extent_check(struct inode *inode,
0752 struct extent_status *es)
0753 {
0754 }
0755 #endif
0756
0757 static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
0758 {
0759 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
0760 struct rb_node **p = &tree->root.rb_node;
0761 struct rb_node *parent = NULL;
0762 struct extent_status *es;
0763
0764 while (*p) {
0765 parent = *p;
0766 es = rb_entry(parent, struct extent_status, rb_node);
0767
0768 if (newes->es_lblk < es->es_lblk) {
0769 if (ext4_es_can_be_merged(newes, es)) {
0770
0771
0772
0773
0774 es->es_lblk = newes->es_lblk;
0775 es->es_len += newes->es_len;
0776 if (ext4_es_is_written(es) ||
0777 ext4_es_is_unwritten(es))
0778 ext4_es_store_pblock(es,
0779 newes->es_pblk);
0780 es = ext4_es_try_to_merge_left(inode, es);
0781 goto out;
0782 }
0783 p = &(*p)->rb_left;
0784 } else if (newes->es_lblk > ext4_es_end(es)) {
0785 if (ext4_es_can_be_merged(es, newes)) {
0786 es->es_len += newes->es_len;
0787 es = ext4_es_try_to_merge_right(inode, es);
0788 goto out;
0789 }
0790 p = &(*p)->rb_right;
0791 } else {
0792 BUG();
0793 return -EINVAL;
0794 }
0795 }
0796
0797 es = ext4_es_alloc_extent(inode, newes->es_lblk, newes->es_len,
0798 newes->es_pblk);
0799 if (!es)
0800 return -ENOMEM;
0801 rb_link_node(&es->rb_node, parent, p);
0802 rb_insert_color(&es->rb_node, &tree->root);
0803
0804 out:
0805 tree->cache_es = es;
0806 return 0;
0807 }
0808
0809
0810
0811
0812
0813
0814
0815 int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
0816 ext4_lblk_t len, ext4_fsblk_t pblk,
0817 unsigned int status)
0818 {
0819 struct extent_status newes;
0820 ext4_lblk_t end = lblk + len - 1;
0821 int err = 0;
0822 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
0823
0824 if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
0825 return 0;
0826
0827 es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n",
0828 lblk, len, pblk, status, inode->i_ino);
0829
0830 if (!len)
0831 return 0;
0832
0833 BUG_ON(end < lblk);
0834
0835 if ((status & EXTENT_STATUS_DELAYED) &&
0836 (status & EXTENT_STATUS_WRITTEN)) {
0837 ext4_warning(inode->i_sb, "Inserting extent [%u/%u] as "
0838 " delayed and written which can potentially "
0839 " cause data loss.", lblk, len);
0840 WARN_ON(1);
0841 }
0842
0843 newes.es_lblk = lblk;
0844 newes.es_len = len;
0845 ext4_es_store_pblock_status(&newes, pblk, status);
0846 trace_ext4_es_insert_extent(inode, &newes);
0847
0848 ext4_es_insert_extent_check(inode, &newes);
0849
0850 write_lock(&EXT4_I(inode)->i_es_lock);
0851 err = __es_remove_extent(inode, lblk, end, NULL);
0852 if (err != 0)
0853 goto error;
0854 retry:
0855 err = __es_insert_extent(inode, &newes);
0856 if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb),
0857 128, EXT4_I(inode)))
0858 goto retry;
0859 if (err == -ENOMEM && !ext4_es_is_delayed(&newes))
0860 err = 0;
0861
0862 if (sbi->s_cluster_ratio > 1 && test_opt(inode->i_sb, DELALLOC) &&
0863 (status & EXTENT_STATUS_WRITTEN ||
0864 status & EXTENT_STATUS_UNWRITTEN))
0865 __revise_pending(inode, lblk, len);
0866
0867 error:
0868 write_unlock(&EXT4_I(inode)->i_es_lock);
0869
0870 ext4_es_print_tree(inode);
0871
0872 return err;
0873 }
0874
0875
0876
0877
0878
0879
0880 void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
0881 ext4_lblk_t len, ext4_fsblk_t pblk,
0882 unsigned int status)
0883 {
0884 struct extent_status *es;
0885 struct extent_status newes;
0886 ext4_lblk_t end = lblk + len - 1;
0887
0888 if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
0889 return;
0890
0891 newes.es_lblk = lblk;
0892 newes.es_len = len;
0893 ext4_es_store_pblock_status(&newes, pblk, status);
0894 trace_ext4_es_cache_extent(inode, &newes);
0895
0896 if (!len)
0897 return;
0898
0899 BUG_ON(end < lblk);
0900
0901 write_lock(&EXT4_I(inode)->i_es_lock);
0902
0903 es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk);
0904 if (!es || es->es_lblk > end)
0905 __es_insert_extent(inode, &newes);
0906 write_unlock(&EXT4_I(inode)->i_es_lock);
0907 }
0908
0909
0910
0911
0912
0913
0914
0915
0916 int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
0917 ext4_lblk_t *next_lblk,
0918 struct extent_status *es)
0919 {
0920 struct ext4_es_tree *tree;
0921 struct ext4_es_stats *stats;
0922 struct extent_status *es1 = NULL;
0923 struct rb_node *node;
0924 int found = 0;
0925
0926 if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
0927 return 0;
0928
0929 trace_ext4_es_lookup_extent_enter(inode, lblk);
0930 es_debug("lookup extent in block %u\n", lblk);
0931
0932 tree = &EXT4_I(inode)->i_es_tree;
0933 read_lock(&EXT4_I(inode)->i_es_lock);
0934
0935
0936 es->es_lblk = es->es_len = es->es_pblk = 0;
0937 if (tree->cache_es) {
0938 es1 = tree->cache_es;
0939 if (in_range(lblk, es1->es_lblk, es1->es_len)) {
0940 es_debug("%u cached by [%u/%u)\n",
0941 lblk, es1->es_lblk, es1->es_len);
0942 found = 1;
0943 goto out;
0944 }
0945 }
0946
0947 node = tree->root.rb_node;
0948 while (node) {
0949 es1 = rb_entry(node, struct extent_status, rb_node);
0950 if (lblk < es1->es_lblk)
0951 node = node->rb_left;
0952 else if (lblk > ext4_es_end(es1))
0953 node = node->rb_right;
0954 else {
0955 found = 1;
0956 break;
0957 }
0958 }
0959
0960 out:
0961 stats = &EXT4_SB(inode->i_sb)->s_es_stats;
0962 if (found) {
0963 BUG_ON(!es1);
0964 es->es_lblk = es1->es_lblk;
0965 es->es_len = es1->es_len;
0966 es->es_pblk = es1->es_pblk;
0967 if (!ext4_es_is_referenced(es1))
0968 ext4_es_set_referenced(es1);
0969 percpu_counter_inc(&stats->es_stats_cache_hits);
0970 if (next_lblk) {
0971 node = rb_next(&es1->rb_node);
0972 if (node) {
0973 es1 = rb_entry(node, struct extent_status,
0974 rb_node);
0975 *next_lblk = es1->es_lblk;
0976 } else
0977 *next_lblk = 0;
0978 }
0979 } else {
0980 percpu_counter_inc(&stats->es_stats_cache_misses);
0981 }
0982
0983 read_unlock(&EXT4_I(inode)->i_es_lock);
0984
0985 trace_ext4_es_lookup_extent_exit(inode, es, found);
0986 return found;
0987 }
0988
0989 struct rsvd_count {
0990 int ndelonly;
0991 bool first_do_lblk_found;
0992 ext4_lblk_t first_do_lblk;
0993 ext4_lblk_t last_do_lblk;
0994 struct extent_status *left_es;
0995 bool partial;
0996 ext4_lblk_t lclu;
0997 };
0998
0999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010 static void init_rsvd(struct inode *inode, ext4_lblk_t lblk,
1011 struct extent_status *es, struct rsvd_count *rc)
1012 {
1013 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1014 struct rb_node *node;
1015
1016 rc->ndelonly = 0;
1017
1018
1019
1020
1021
1022
1023
1024 if (sbi->s_cluster_ratio > 1) {
1025 rc->first_do_lblk_found = false;
1026 if (lblk > es->es_lblk) {
1027 rc->left_es = es;
1028 } else {
1029 node = rb_prev(&es->rb_node);
1030 rc->left_es = node ? rb_entry(node,
1031 struct extent_status,
1032 rb_node) : NULL;
1033 }
1034 rc->partial = false;
1035 }
1036 }
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052 static void count_rsvd(struct inode *inode, ext4_lblk_t lblk, long len,
1053 struct extent_status *es, struct rsvd_count *rc)
1054 {
1055 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1056 ext4_lblk_t i, end, nclu;
1057
1058 if (!ext4_es_is_delonly(es))
1059 return;
1060
1061 WARN_ON(len <= 0);
1062
1063 if (sbi->s_cluster_ratio == 1) {
1064 rc->ndelonly += (int) len;
1065 return;
1066 }
1067
1068
1069
1070 i = (lblk < es->es_lblk) ? es->es_lblk : lblk;
1071 end = lblk + (ext4_lblk_t) len - 1;
1072 end = (end > ext4_es_end(es)) ? ext4_es_end(es) : end;
1073
1074
1075 if (!rc->first_do_lblk_found) {
1076 rc->first_do_lblk = i;
1077 rc->first_do_lblk_found = true;
1078 }
1079
1080
1081 rc->last_do_lblk = end;
1082
1083
1084
1085
1086
1087 if (rc->partial && (rc->lclu != EXT4_B2C(sbi, i))) {
1088 rc->ndelonly++;
1089 rc->partial = false;
1090 }
1091
1092
1093
1094
1095
1096 if (EXT4_LBLK_COFF(sbi, i) != 0) {
1097 if (end >= EXT4_LBLK_CFILL(sbi, i)) {
1098 rc->ndelonly++;
1099 rc->partial = false;
1100 i = EXT4_LBLK_CFILL(sbi, i) + 1;
1101 }
1102 }
1103
1104
1105
1106
1107
1108 if ((i + sbi->s_cluster_ratio - 1) <= end) {
1109 nclu = (end - i + 1) >> sbi->s_cluster_bits;
1110 rc->ndelonly += nclu;
1111 i += nclu << sbi->s_cluster_bits;
1112 }
1113
1114
1115
1116
1117
1118 if (!rc->partial && i <= end) {
1119 rc->partial = true;
1120 rc->lclu = EXT4_B2C(sbi, i);
1121 }
1122 }
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134 static struct pending_reservation *__pr_tree_search(struct rb_root *root,
1135 ext4_lblk_t lclu)
1136 {
1137 struct rb_node *node = root->rb_node;
1138 struct pending_reservation *pr = NULL;
1139
1140 while (node) {
1141 pr = rb_entry(node, struct pending_reservation, rb_node);
1142 if (lclu < pr->lclu)
1143 node = node->rb_left;
1144 else if (lclu > pr->lclu)
1145 node = node->rb_right;
1146 else
1147 return pr;
1148 }
1149 if (pr && lclu < pr->lclu)
1150 return pr;
1151 if (pr && lclu > pr->lclu) {
1152 node = rb_next(&pr->rb_node);
1153 return node ? rb_entry(node, struct pending_reservation,
1154 rb_node) : NULL;
1155 }
1156 return NULL;
1157 }
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175 static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
1176 struct extent_status *right_es,
1177 struct rsvd_count *rc)
1178 {
1179 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1180 struct pending_reservation *pr;
1181 struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree;
1182 struct rb_node *node;
1183 ext4_lblk_t first_lclu, last_lclu;
1184 bool left_delonly, right_delonly, count_pending;
1185 struct extent_status *es;
1186
1187 if (sbi->s_cluster_ratio > 1) {
1188
1189 if (rc->partial)
1190 rc->ndelonly++;
1191
1192 if (rc->ndelonly == 0)
1193 return 0;
1194
1195 first_lclu = EXT4_B2C(sbi, rc->first_do_lblk);
1196 last_lclu = EXT4_B2C(sbi, rc->last_do_lblk);
1197
1198
1199
1200
1201
1202
1203 left_delonly = right_delonly = false;
1204
1205 es = rc->left_es;
1206 while (es && ext4_es_end(es) >=
1207 EXT4_LBLK_CMASK(sbi, rc->first_do_lblk)) {
1208 if (ext4_es_is_delonly(es)) {
1209 rc->ndelonly--;
1210 left_delonly = true;
1211 break;
1212 }
1213 node = rb_prev(&es->rb_node);
1214 if (!node)
1215 break;
1216 es = rb_entry(node, struct extent_status, rb_node);
1217 }
1218 if (right_es && (!left_delonly || first_lclu != last_lclu)) {
1219 if (end < ext4_es_end(right_es)) {
1220 es = right_es;
1221 } else {
1222 node = rb_next(&right_es->rb_node);
1223 es = node ? rb_entry(node, struct extent_status,
1224 rb_node) : NULL;
1225 }
1226 while (es && es->es_lblk <=
1227 EXT4_LBLK_CFILL(sbi, rc->last_do_lblk)) {
1228 if (ext4_es_is_delonly(es)) {
1229 rc->ndelonly--;
1230 right_delonly = true;
1231 break;
1232 }
1233 node = rb_next(&es->rb_node);
1234 if (!node)
1235 break;
1236 es = rb_entry(node, struct extent_status,
1237 rb_node);
1238 }
1239 }
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250 if (first_lclu == last_lclu) {
1251 if (left_delonly | right_delonly)
1252 count_pending = false;
1253 else
1254 count_pending = true;
1255 } else {
1256 if (left_delonly)
1257 first_lclu++;
1258 if (right_delonly)
1259 last_lclu--;
1260 if (first_lclu <= last_lclu)
1261 count_pending = true;
1262 else
1263 count_pending = false;
1264 }
1265
1266
1267
1268
1269
1270
1271
1272 if (count_pending) {
1273 pr = __pr_tree_search(&tree->root, first_lclu);
1274 while (pr && pr->lclu <= last_lclu) {
1275 rc->ndelonly--;
1276 node = rb_next(&pr->rb_node);
1277 rb_erase(&pr->rb_node, &tree->root);
1278 kmem_cache_free(ext4_pending_cachep, pr);
1279 if (!node)
1280 break;
1281 pr = rb_entry(node, struct pending_reservation,
1282 rb_node);
1283 }
1284 }
1285 }
1286 return rc->ndelonly;
1287 }
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303 static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
1304 ext4_lblk_t end, int *reserved)
1305 {
1306 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
1307 struct rb_node *node;
1308 struct extent_status *es;
1309 struct extent_status orig_es;
1310 ext4_lblk_t len1, len2;
1311 ext4_fsblk_t block;
1312 int err;
1313 bool count_reserved = true;
1314 struct rsvd_count rc;
1315
1316 if (reserved == NULL || !test_opt(inode->i_sb, DELALLOC))
1317 count_reserved = false;
1318 retry:
1319 err = 0;
1320
1321 es = __es_tree_search(&tree->root, lblk);
1322 if (!es)
1323 goto out;
1324 if (es->es_lblk > end)
1325 goto out;
1326
1327
1328 tree->cache_es = NULL;
1329 if (count_reserved)
1330 init_rsvd(inode, lblk, es, &rc);
1331
1332 orig_es.es_lblk = es->es_lblk;
1333 orig_es.es_len = es->es_len;
1334 orig_es.es_pblk = es->es_pblk;
1335
1336 len1 = lblk > es->es_lblk ? lblk - es->es_lblk : 0;
1337 len2 = ext4_es_end(es) > end ? ext4_es_end(es) - end : 0;
1338 if (len1 > 0)
1339 es->es_len = len1;
1340 if (len2 > 0) {
1341 if (len1 > 0) {
1342 struct extent_status newes;
1343
1344 newes.es_lblk = end + 1;
1345 newes.es_len = len2;
1346 block = 0x7FDEADBEEFULL;
1347 if (ext4_es_is_written(&orig_es) ||
1348 ext4_es_is_unwritten(&orig_es))
1349 block = ext4_es_pblock(&orig_es) +
1350 orig_es.es_len - len2;
1351 ext4_es_store_pblock_status(&newes, block,
1352 ext4_es_status(&orig_es));
1353 err = __es_insert_extent(inode, &newes);
1354 if (err) {
1355 es->es_lblk = orig_es.es_lblk;
1356 es->es_len = orig_es.es_len;
1357 if ((err == -ENOMEM) &&
1358 __es_shrink(EXT4_SB(inode->i_sb),
1359 128, EXT4_I(inode)))
1360 goto retry;
1361 goto out;
1362 }
1363 } else {
1364 es->es_lblk = end + 1;
1365 es->es_len = len2;
1366 if (ext4_es_is_written(es) ||
1367 ext4_es_is_unwritten(es)) {
1368 block = orig_es.es_pblk + orig_es.es_len - len2;
1369 ext4_es_store_pblock(es, block);
1370 }
1371 }
1372 if (count_reserved)
1373 count_rsvd(inode, lblk, orig_es.es_len - len1 - len2,
1374 &orig_es, &rc);
1375 goto out;
1376 }
1377
1378 if (len1 > 0) {
1379 if (count_reserved)
1380 count_rsvd(inode, lblk, orig_es.es_len - len1,
1381 &orig_es, &rc);
1382 node = rb_next(&es->rb_node);
1383 if (node)
1384 es = rb_entry(node, struct extent_status, rb_node);
1385 else
1386 es = NULL;
1387 }
1388
1389 while (es && ext4_es_end(es) <= end) {
1390 if (count_reserved)
1391 count_rsvd(inode, es->es_lblk, es->es_len, es, &rc);
1392 node = rb_next(&es->rb_node);
1393 rb_erase(&es->rb_node, &tree->root);
1394 ext4_es_free_extent(inode, es);
1395 if (!node) {
1396 es = NULL;
1397 break;
1398 }
1399 es = rb_entry(node, struct extent_status, rb_node);
1400 }
1401
1402 if (es && es->es_lblk < end + 1) {
1403 ext4_lblk_t orig_len = es->es_len;
1404
1405 len1 = ext4_es_end(es) - end;
1406 if (count_reserved)
1407 count_rsvd(inode, es->es_lblk, orig_len - len1,
1408 es, &rc);
1409 es->es_lblk = end + 1;
1410 es->es_len = len1;
1411 if (ext4_es_is_written(es) || ext4_es_is_unwritten(es)) {
1412 block = es->es_pblk + orig_len - len1;
1413 ext4_es_store_pblock(es, block);
1414 }
1415 }
1416
1417 if (count_reserved)
1418 *reserved = get_rsvd(inode, end, es, &rc);
1419 out:
1420 return err;
1421 }
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433 int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
1434 ext4_lblk_t len)
1435 {
1436 ext4_lblk_t end;
1437 int err = 0;
1438 int reserved = 0;
1439
1440 if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
1441 return 0;
1442
1443 trace_ext4_es_remove_extent(inode, lblk, len);
1444 es_debug("remove [%u/%u) from extent status tree of inode %lu\n",
1445 lblk, len, inode->i_ino);
1446
1447 if (!len)
1448 return err;
1449
1450 end = lblk + len - 1;
1451 BUG_ON(end < lblk);
1452
1453
1454
1455
1456
1457
1458 write_lock(&EXT4_I(inode)->i_es_lock);
1459 err = __es_remove_extent(inode, lblk, end, &reserved);
1460 write_unlock(&EXT4_I(inode)->i_es_lock);
1461 ext4_es_print_tree(inode);
1462 ext4_da_release_space(inode, reserved);
1463 return err;
1464 }
1465
1466 static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
1467 struct ext4_inode_info *locked_ei)
1468 {
1469 struct ext4_inode_info *ei;
1470 struct ext4_es_stats *es_stats;
1471 ktime_t start_time;
1472 u64 scan_time;
1473 int nr_to_walk;
1474 int nr_shrunk = 0;
1475 int retried = 0, nr_skipped = 0;
1476
1477 es_stats = &sbi->s_es_stats;
1478 start_time = ktime_get();
1479
1480 retry:
1481 spin_lock(&sbi->s_es_lock);
1482 nr_to_walk = sbi->s_es_nr_inode;
1483 while (nr_to_walk-- > 0) {
1484 if (list_empty(&sbi->s_es_list)) {
1485 spin_unlock(&sbi->s_es_lock);
1486 goto out;
1487 }
1488 ei = list_first_entry(&sbi->s_es_list, struct ext4_inode_info,
1489 i_es_list);
1490
1491 list_move_tail(&ei->i_es_list, &sbi->s_es_list);
1492
1493
1494
1495
1496
1497 if (!retried && ext4_test_inode_state(&ei->vfs_inode,
1498 EXT4_STATE_EXT_PRECACHED)) {
1499 nr_skipped++;
1500 continue;
1501 }
1502
1503 if (ei == locked_ei || !write_trylock(&ei->i_es_lock)) {
1504 nr_skipped++;
1505 continue;
1506 }
1507
1508
1509
1510
1511 spin_unlock(&sbi->s_es_lock);
1512
1513 nr_shrunk += es_reclaim_extents(ei, &nr_to_scan);
1514 write_unlock(&ei->i_es_lock);
1515
1516 if (nr_to_scan <= 0)
1517 goto out;
1518 spin_lock(&sbi->s_es_lock);
1519 }
1520 spin_unlock(&sbi->s_es_lock);
1521
1522
1523
1524
1525
1526 if ((nr_shrunk == 0) && nr_skipped && !retried) {
1527 retried++;
1528 goto retry;
1529 }
1530
1531 if (locked_ei && nr_shrunk == 0)
1532 nr_shrunk = es_reclaim_extents(locked_ei, &nr_to_scan);
1533
1534 out:
1535 scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
1536 if (likely(es_stats->es_stats_scan_time))
1537 es_stats->es_stats_scan_time = (scan_time +
1538 es_stats->es_stats_scan_time*3) / 4;
1539 else
1540 es_stats->es_stats_scan_time = scan_time;
1541 if (scan_time > es_stats->es_stats_max_scan_time)
1542 es_stats->es_stats_max_scan_time = scan_time;
1543 if (likely(es_stats->es_stats_shrunk))
1544 es_stats->es_stats_shrunk = (nr_shrunk +
1545 es_stats->es_stats_shrunk*3) / 4;
1546 else
1547 es_stats->es_stats_shrunk = nr_shrunk;
1548
1549 trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time,
1550 nr_skipped, retried);
1551 return nr_shrunk;
1552 }
1553
1554 static unsigned long ext4_es_count(struct shrinker *shrink,
1555 struct shrink_control *sc)
1556 {
1557 unsigned long nr;
1558 struct ext4_sb_info *sbi;
1559
1560 sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker);
1561 nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
1562 trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr);
1563 return nr;
1564 }
1565
1566 static unsigned long ext4_es_scan(struct shrinker *shrink,
1567 struct shrink_control *sc)
1568 {
1569 struct ext4_sb_info *sbi = container_of(shrink,
1570 struct ext4_sb_info, s_es_shrinker);
1571 int nr_to_scan = sc->nr_to_scan;
1572 int ret, nr_shrunk;
1573
1574 ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
1575 trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret);
1576
1577 nr_shrunk = __es_shrink(sbi, nr_to_scan, NULL);
1578
1579 ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
1580 trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret);
1581 return nr_shrunk;
1582 }
1583
1584 int ext4_seq_es_shrinker_info_show(struct seq_file *seq, void *v)
1585 {
1586 struct ext4_sb_info *sbi = EXT4_SB((struct super_block *) seq->private);
1587 struct ext4_es_stats *es_stats = &sbi->s_es_stats;
1588 struct ext4_inode_info *ei, *max = NULL;
1589 unsigned int inode_cnt = 0;
1590
1591 if (v != SEQ_START_TOKEN)
1592 return 0;
1593
1594
1595 spin_lock(&sbi->s_es_lock);
1596 list_for_each_entry(ei, &sbi->s_es_list, i_es_list) {
1597 inode_cnt++;
1598 if (max && max->i_es_all_nr < ei->i_es_all_nr)
1599 max = ei;
1600 else if (!max)
1601 max = ei;
1602 }
1603 spin_unlock(&sbi->s_es_lock);
1604
1605 seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n",
1606 percpu_counter_sum_positive(&es_stats->es_stats_all_cnt),
1607 percpu_counter_sum_positive(&es_stats->es_stats_shk_cnt));
1608 seq_printf(seq, " %lld/%lld cache hits/misses\n",
1609 percpu_counter_sum_positive(&es_stats->es_stats_cache_hits),
1610 percpu_counter_sum_positive(&es_stats->es_stats_cache_misses));
1611 if (inode_cnt)
1612 seq_printf(seq, " %d inodes on list\n", inode_cnt);
1613
1614 seq_printf(seq, "average:\n %llu us scan time\n",
1615 div_u64(es_stats->es_stats_scan_time, 1000));
1616 seq_printf(seq, " %lu shrunk objects\n", es_stats->es_stats_shrunk);
1617 if (inode_cnt)
1618 seq_printf(seq,
1619 "maximum:\n %lu inode (%u objects, %u reclaimable)\n"
1620 " %llu us max scan time\n",
1621 max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_shk_nr,
1622 div_u64(es_stats->es_stats_max_scan_time, 1000));
1623
1624 return 0;
1625 }
1626
1627 int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
1628 {
1629 int err;
1630
1631
1632 BUILD_BUG_ON(ES_SHIFT < 48);
1633 INIT_LIST_HEAD(&sbi->s_es_list);
1634 sbi->s_es_nr_inode = 0;
1635 spin_lock_init(&sbi->s_es_lock);
1636 sbi->s_es_stats.es_stats_shrunk = 0;
1637 err = percpu_counter_init(&sbi->s_es_stats.es_stats_cache_hits, 0,
1638 GFP_KERNEL);
1639 if (err)
1640 return err;
1641 err = percpu_counter_init(&sbi->s_es_stats.es_stats_cache_misses, 0,
1642 GFP_KERNEL);
1643 if (err)
1644 goto err1;
1645 sbi->s_es_stats.es_stats_scan_time = 0;
1646 sbi->s_es_stats.es_stats_max_scan_time = 0;
1647 err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL);
1648 if (err)
1649 goto err2;
1650 err = percpu_counter_init(&sbi->s_es_stats.es_stats_shk_cnt, 0, GFP_KERNEL);
1651 if (err)
1652 goto err3;
1653
1654 sbi->s_es_shrinker.scan_objects = ext4_es_scan;
1655 sbi->s_es_shrinker.count_objects = ext4_es_count;
1656 sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
1657 err = register_shrinker(&sbi->s_es_shrinker, "ext4-es:%s",
1658 sbi->s_sb->s_id);
1659 if (err)
1660 goto err4;
1661
1662 return 0;
1663 err4:
1664 percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
1665 err3:
1666 percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
1667 err2:
1668 percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_misses);
1669 err1:
1670 percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_hits);
1671 return err;
1672 }
1673
1674 void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
1675 {
1676 percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_hits);
1677 percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_misses);
1678 percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
1679 percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
1680 unregister_shrinker(&sbi->s_es_shrinker);
1681 }
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691 static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end,
1692 int *nr_to_scan, int *nr_shrunk)
1693 {
1694 struct inode *inode = &ei->vfs_inode;
1695 struct ext4_es_tree *tree = &ei->i_es_tree;
1696 struct extent_status *es;
1697 struct rb_node *node;
1698
1699 es = __es_tree_search(&tree->root, ei->i_es_shrink_lblk);
1700 if (!es)
1701 goto out_wrap;
1702
1703 while (*nr_to_scan > 0) {
1704 if (es->es_lblk > end) {
1705 ei->i_es_shrink_lblk = end + 1;
1706 return 0;
1707 }
1708
1709 (*nr_to_scan)--;
1710 node = rb_next(&es->rb_node);
1711
1712
1713
1714
1715 if (ext4_es_is_delayed(es))
1716 goto next;
1717 if (ext4_es_is_referenced(es)) {
1718 ext4_es_clear_referenced(es);
1719 goto next;
1720 }
1721
1722 rb_erase(&es->rb_node, &tree->root);
1723 ext4_es_free_extent(inode, es);
1724 (*nr_shrunk)++;
1725 next:
1726 if (!node)
1727 goto out_wrap;
1728 es = rb_entry(node, struct extent_status, rb_node);
1729 }
1730 ei->i_es_shrink_lblk = es->es_lblk;
1731 return 1;
1732 out_wrap:
1733 ei->i_es_shrink_lblk = 0;
1734 return 0;
1735 }
1736
1737 static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan)
1738 {
1739 struct inode *inode = &ei->vfs_inode;
1740 int nr_shrunk = 0;
1741 ext4_lblk_t start = ei->i_es_shrink_lblk;
1742 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
1743 DEFAULT_RATELIMIT_BURST);
1744
1745 if (ei->i_es_shk_nr == 0)
1746 return 0;
1747
1748 if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) &&
1749 __ratelimit(&_rs))
1750 ext4_warning(inode->i_sb, "forced shrink of precached extents");
1751
1752 if (!es_do_reclaim_extents(ei, EXT_MAX_BLOCKS, nr_to_scan, &nr_shrunk) &&
1753 start != 0)
1754 es_do_reclaim_extents(ei, start - 1, nr_to_scan, &nr_shrunk);
1755
1756 ei->i_es_tree.cache_es = NULL;
1757 return nr_shrunk;
1758 }
1759
1760
1761
1762
1763
1764
1765 void ext4_clear_inode_es(struct inode *inode)
1766 {
1767 struct ext4_inode_info *ei = EXT4_I(inode);
1768 struct extent_status *es;
1769 struct ext4_es_tree *tree;
1770 struct rb_node *node;
1771
1772 write_lock(&ei->i_es_lock);
1773 tree = &EXT4_I(inode)->i_es_tree;
1774 tree->cache_es = NULL;
1775 node = rb_first(&tree->root);
1776 while (node) {
1777 es = rb_entry(node, struct extent_status, rb_node);
1778 node = rb_next(node);
1779 if (!ext4_es_is_delayed(es)) {
1780 rb_erase(&es->rb_node, &tree->root);
1781 ext4_es_free_extent(inode, es);
1782 }
1783 }
1784 ext4_clear_inode_state(inode, EXT4_STATE_EXT_PRECACHED);
1785 write_unlock(&ei->i_es_lock);
1786 }
1787
1788 #ifdef ES_DEBUG__
1789 static void ext4_print_pending_tree(struct inode *inode)
1790 {
1791 struct ext4_pending_tree *tree;
1792 struct rb_node *node;
1793 struct pending_reservation *pr;
1794
1795 printk(KERN_DEBUG "pending reservations for inode %lu:", inode->i_ino);
1796 tree = &EXT4_I(inode)->i_pending_tree;
1797 node = rb_first(&tree->root);
1798 while (node) {
1799 pr = rb_entry(node, struct pending_reservation, rb_node);
1800 printk(KERN_DEBUG " %u", pr->lclu);
1801 node = rb_next(node);
1802 }
1803 printk(KERN_DEBUG "\n");
1804 }
1805 #else
1806 #define ext4_print_pending_tree(inode)
1807 #endif
1808
1809 int __init ext4_init_pending(void)
1810 {
1811 ext4_pending_cachep = kmem_cache_create("ext4_pending_reservation",
1812 sizeof(struct pending_reservation),
1813 0, (SLAB_RECLAIM_ACCOUNT), NULL);
1814 if (ext4_pending_cachep == NULL)
1815 return -ENOMEM;
1816 return 0;
1817 }
1818
1819 void ext4_exit_pending(void)
1820 {
1821 kmem_cache_destroy(ext4_pending_cachep);
1822 }
1823
1824 void ext4_init_pending_tree(struct ext4_pending_tree *tree)
1825 {
1826 tree->root = RB_ROOT;
1827 }
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838 static struct pending_reservation *__get_pending(struct inode *inode,
1839 ext4_lblk_t lclu)
1840 {
1841 struct ext4_pending_tree *tree;
1842 struct rb_node *node;
1843 struct pending_reservation *pr = NULL;
1844
1845 tree = &EXT4_I(inode)->i_pending_tree;
1846 node = (&tree->root)->rb_node;
1847
1848 while (node) {
1849 pr = rb_entry(node, struct pending_reservation, rb_node);
1850 if (lclu < pr->lclu)
1851 node = node->rb_left;
1852 else if (lclu > pr->lclu)
1853 node = node->rb_right;
1854 else if (lclu == pr->lclu)
1855 return pr;
1856 }
1857 return NULL;
1858 }
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870 static int __insert_pending(struct inode *inode, ext4_lblk_t lblk)
1871 {
1872 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1873 struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree;
1874 struct rb_node **p = &tree->root.rb_node;
1875 struct rb_node *parent = NULL;
1876 struct pending_reservation *pr;
1877 ext4_lblk_t lclu;
1878 int ret = 0;
1879
1880 lclu = EXT4_B2C(sbi, lblk);
1881
1882 while (*p) {
1883 parent = *p;
1884 pr = rb_entry(parent, struct pending_reservation, rb_node);
1885
1886 if (lclu < pr->lclu) {
1887 p = &(*p)->rb_left;
1888 } else if (lclu > pr->lclu) {
1889 p = &(*p)->rb_right;
1890 } else {
1891
1892 goto out;
1893 }
1894 }
1895
1896 pr = kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC);
1897 if (pr == NULL) {
1898 ret = -ENOMEM;
1899 goto out;
1900 }
1901 pr->lclu = lclu;
1902
1903 rb_link_node(&pr->rb_node, parent, p);
1904 rb_insert_color(&pr->rb_node, &tree->root);
1905
1906 out:
1907 return ret;
1908 }
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919 static void __remove_pending(struct inode *inode, ext4_lblk_t lblk)
1920 {
1921 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1922 struct pending_reservation *pr;
1923 struct ext4_pending_tree *tree;
1924
1925 pr = __get_pending(inode, EXT4_B2C(sbi, lblk));
1926 if (pr != NULL) {
1927 tree = &EXT4_I(inode)->i_pending_tree;
1928 rb_erase(&pr->rb_node, &tree->root);
1929 kmem_cache_free(ext4_pending_cachep, pr);
1930 }
1931 }
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942 void ext4_remove_pending(struct inode *inode, ext4_lblk_t lblk)
1943 {
1944 struct ext4_inode_info *ei = EXT4_I(inode);
1945
1946 write_lock(&ei->i_es_lock);
1947 __remove_pending(inode, lblk);
1948 write_unlock(&ei->i_es_lock);
1949 }
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961 bool ext4_is_pending(struct inode *inode, ext4_lblk_t lblk)
1962 {
1963 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1964 struct ext4_inode_info *ei = EXT4_I(inode);
1965 bool ret;
1966
1967 read_lock(&ei->i_es_lock);
1968 ret = (bool)(__get_pending(inode, EXT4_B2C(sbi, lblk)) != NULL);
1969 read_unlock(&ei->i_es_lock);
1970
1971 return ret;
1972 }
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986 int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
1987 bool allocated)
1988 {
1989 struct extent_status newes;
1990 int err = 0;
1991
1992 if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
1993 return 0;
1994
1995 es_debug("add [%u/1) delayed to extent status tree of inode %lu\n",
1996 lblk, inode->i_ino);
1997
1998 newes.es_lblk = lblk;
1999 newes.es_len = 1;
2000 ext4_es_store_pblock_status(&newes, ~0, EXTENT_STATUS_DELAYED);
2001 trace_ext4_es_insert_delayed_block(inode, &newes, allocated);
2002
2003 ext4_es_insert_extent_check(inode, &newes);
2004
2005 write_lock(&EXT4_I(inode)->i_es_lock);
2006
2007 err = __es_remove_extent(inode, lblk, lblk, NULL);
2008 if (err != 0)
2009 goto error;
2010 retry:
2011 err = __es_insert_extent(inode, &newes);
2012 if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb),
2013 128, EXT4_I(inode)))
2014 goto retry;
2015 if (err != 0)
2016 goto error;
2017
2018 if (allocated)
2019 __insert_pending(inode, lblk);
2020
2021 error:
2022 write_unlock(&EXT4_I(inode)->i_es_lock);
2023
2024 ext4_es_print_tree(inode);
2025 ext4_print_pending_tree(inode);
2026
2027 return err;
2028 }
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043 static unsigned int __es_delayed_clu(struct inode *inode, ext4_lblk_t start,
2044 ext4_lblk_t end)
2045 {
2046 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
2047 struct extent_status *es;
2048 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2049 struct rb_node *node;
2050 ext4_lblk_t first_lclu, last_lclu;
2051 unsigned long long last_counted_lclu;
2052 unsigned int n = 0;
2053
2054
2055 last_counted_lclu = ~0ULL;
2056
2057 es = __es_tree_search(&tree->root, start);
2058
2059 while (es && (es->es_lblk <= end)) {
2060 if (ext4_es_is_delonly(es)) {
2061 if (es->es_lblk <= start)
2062 first_lclu = EXT4_B2C(sbi, start);
2063 else
2064 first_lclu = EXT4_B2C(sbi, es->es_lblk);
2065
2066 if (ext4_es_end(es) >= end)
2067 last_lclu = EXT4_B2C(sbi, end);
2068 else
2069 last_lclu = EXT4_B2C(sbi, ext4_es_end(es));
2070
2071 if (first_lclu == last_counted_lclu)
2072 n += last_lclu - first_lclu;
2073 else
2074 n += last_lclu - first_lclu + 1;
2075 last_counted_lclu = last_lclu;
2076 }
2077 node = rb_next(&es->rb_node);
2078 if (!node)
2079 break;
2080 es = rb_entry(node, struct extent_status, rb_node);
2081 }
2082
2083 return n;
2084 }
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096 unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
2097 ext4_lblk_t len)
2098 {
2099 struct ext4_inode_info *ei = EXT4_I(inode);
2100 ext4_lblk_t end;
2101 unsigned int n;
2102
2103 if (len == 0)
2104 return 0;
2105
2106 end = lblk + len - 1;
2107 WARN_ON(end < lblk);
2108
2109 read_lock(&ei->i_es_lock);
2110
2111 n = __es_delayed_clu(inode, lblk, end);
2112
2113 read_unlock(&ei->i_es_lock);
2114
2115 return n;
2116 }
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133 static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
2134 ext4_lblk_t len)
2135 {
2136 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2137 ext4_lblk_t end = lblk + len - 1;
2138 ext4_lblk_t first, last;
2139 bool f_del = false, l_del = false;
2140
2141 if (len == 0)
2142 return;
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157 if (EXT4_B2C(sbi, lblk) == EXT4_B2C(sbi, end)) {
2158 first = EXT4_LBLK_CMASK(sbi, lblk);
2159 if (first != lblk)
2160 f_del = __es_scan_range(inode, &ext4_es_is_delonly,
2161 first, lblk - 1);
2162 if (f_del) {
2163 __insert_pending(inode, first);
2164 } else {
2165 last = EXT4_LBLK_CMASK(sbi, end) +
2166 sbi->s_cluster_ratio - 1;
2167 if (last != end)
2168 l_del = __es_scan_range(inode,
2169 &ext4_es_is_delonly,
2170 end + 1, last);
2171 if (l_del)
2172 __insert_pending(inode, last);
2173 else
2174 __remove_pending(inode, last);
2175 }
2176 } else {
2177 first = EXT4_LBLK_CMASK(sbi, lblk);
2178 if (first != lblk)
2179 f_del = __es_scan_range(inode, &ext4_es_is_delonly,
2180 first, lblk - 1);
2181 if (f_del)
2182 __insert_pending(inode, first);
2183 else
2184 __remove_pending(inode, first);
2185
2186 last = EXT4_LBLK_CMASK(sbi, end) + sbi->s_cluster_ratio - 1;
2187 if (last != end)
2188 l_del = __es_scan_range(inode, &ext4_es_is_delonly,
2189 end + 1, last);
2190 if (l_del)
2191 __insert_pending(inode, last);
2192 else
2193 __remove_pending(inode, last);
2194 }
2195 }