0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012 #include "ext4_jbd2.h"
0013 #include "mballoc.h"
0014 #include <linux/log2.h>
0015 #include <linux/module.h>
0016 #include <linux/slab.h>
0017 #include <linux/nospec.h>
0018 #include <linux/backing-dev.h>
0019 #include <trace/events/ext4.h>
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117
0118
0119
0120
0121
0122
0123
0124
0125
0126
0127
0128
0129
0130
0131
0132
0133
0134
0135
0136
0137
0138
0139
0140
0141
0142
0143
0144
0145
0146
0147
0148
0149
0150
0151
0152
0153
0154
0155
0156
0157
0158
0159
0160
0161
0162
0163
0164
0165
0166
0167
0168
0169
0170
0171
0172
0173
0174
0175
0176
0177
0178
0179
0180
0181
0182
0183
0184
0185
0186
0187
0188
0189
0190
0191
0192
0193
0194
0195
0196
0197
0198
0199
0200
0201
0202
0203
0204
0205
0206
0207
0208
0209
0210
0211
0212
0213
0214
0215
0216
0217
0218
0219
0220
0221
0222
0223
0224
0225
0226
0227
0228
0229
0230
0231
0232
0233
0234
0235
0236
0237
0238
0239
0240
0241
0242
0243
0244
0245
0246
0247
0248
0249
0250
0251
0252
0253
0254
0255
0256
0257
0258
0259
0260
0261
0262
0263
0264
0265
0266
0267
0268
0269
0270
0271
0272
0273
0274
0275
0276
0277
0278
0279
0280
0281
0282
0283
0284
0285
0286
0287
0288
0289
0290
0291
0292
0293
0294
0295
0296
0297
0298
0299
0300
0301
0302
0303
0304
0305
0306
0307
0308
0309
0310
0311
0312
0313
0314
0315
0316
0317
0318
0319
0320
0321
0322
0323
0324
0325
0326
0327
0328
0329
0330
0331
0332
0333
0334
0335
0336
0337
0338
0339
0340
0341
0342
0343
0344
0345
0346
0347
0348
0349
0350
0351
0352
0353
0354
0355
0356
0357
0358
0359
0360
0361
0362
0363
0364
0365
0366
0367
0368
0369
0370
0371
0372
0373
0374
0375
0376
0377
0378
0379
0380
0381
0382
0383
0384
0385
0386
0387
0388
0389 static struct kmem_cache *ext4_pspace_cachep;
0390 static struct kmem_cache *ext4_ac_cachep;
0391 static struct kmem_cache *ext4_free_data_cachep;
0392
0393
0394
0395
0396 #define NR_GRPINFO_CACHES 8
0397 static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
0398
0399 static const char * const ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
0400 "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
0401 "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
0402 "ext4_groupinfo_64k", "ext4_groupinfo_128k"
0403 };
0404
0405 static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
0406 ext4_group_t group);
0407 static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
0408 ext4_group_t group);
0409 static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac);
0410
0411 static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
0412 ext4_group_t group, int cr);
0413
0414 static int ext4_try_to_trim_range(struct super_block *sb,
0415 struct ext4_buddy *e4b, ext4_grpblk_t start,
0416 ext4_grpblk_t max, ext4_grpblk_t minblocks);
0417
0418
0419
0420
0421
0422
0423
0424
0425
0426
0427
0428
0429
0430
0431
0432
0433
0434
0435
0436 static DEFINE_PER_CPU(u64, discard_pa_seq);
0437 static inline u64 ext4_get_discard_pa_seq_sum(void)
0438 {
0439 int __cpu;
0440 u64 __seq = 0;
0441
0442 for_each_possible_cpu(__cpu)
0443 __seq += per_cpu(discard_pa_seq, __cpu);
0444 return __seq;
0445 }
0446
0447 static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
0448 {
0449 #if BITS_PER_LONG == 64
0450 *bit += ((unsigned long) addr & 7UL) << 3;
0451 addr = (void *) ((unsigned long) addr & ~7UL);
0452 #elif BITS_PER_LONG == 32
0453 *bit += ((unsigned long) addr & 3UL) << 3;
0454 addr = (void *) ((unsigned long) addr & ~3UL);
0455 #else
0456 #error "how many bits you are?!"
0457 #endif
0458 return addr;
0459 }
0460
0461 static inline int mb_test_bit(int bit, void *addr)
0462 {
0463
0464
0465
0466
0467 addr = mb_correct_addr_and_bit(&bit, addr);
0468 return ext4_test_bit(bit, addr);
0469 }
0470
0471 static inline void mb_set_bit(int bit, void *addr)
0472 {
0473 addr = mb_correct_addr_and_bit(&bit, addr);
0474 ext4_set_bit(bit, addr);
0475 }
0476
0477 static inline void mb_clear_bit(int bit, void *addr)
0478 {
0479 addr = mb_correct_addr_and_bit(&bit, addr);
0480 ext4_clear_bit(bit, addr);
0481 }
0482
0483 static inline int mb_test_and_clear_bit(int bit, void *addr)
0484 {
0485 addr = mb_correct_addr_and_bit(&bit, addr);
0486 return ext4_test_and_clear_bit(bit, addr);
0487 }
0488
0489 static inline int mb_find_next_zero_bit(void *addr, int max, int start)
0490 {
0491 int fix = 0, ret, tmpmax;
0492 addr = mb_correct_addr_and_bit(&fix, addr);
0493 tmpmax = max + fix;
0494 start += fix;
0495
0496 ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
0497 if (ret > max)
0498 return max;
0499 return ret;
0500 }
0501
0502 static inline int mb_find_next_bit(void *addr, int max, int start)
0503 {
0504 int fix = 0, ret, tmpmax;
0505 addr = mb_correct_addr_and_bit(&fix, addr);
0506 tmpmax = max + fix;
0507 start += fix;
0508
0509 ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
0510 if (ret > max)
0511 return max;
0512 return ret;
0513 }
0514
0515 static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
0516 {
0517 char *bb;
0518
0519 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
0520 BUG_ON(max == NULL);
0521
0522 if (order > e4b->bd_blkbits + 1) {
0523 *max = 0;
0524 return NULL;
0525 }
0526
0527
0528 if (order == 0) {
0529 *max = 1 << (e4b->bd_blkbits + 3);
0530 return e4b->bd_bitmap;
0531 }
0532
0533 bb = e4b->bd_buddy + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
0534 *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
0535
0536 return bb;
0537 }
0538
0539 #ifdef DOUBLE_CHECK
0540 static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
0541 int first, int count)
0542 {
0543 int i;
0544 struct super_block *sb = e4b->bd_sb;
0545
0546 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
0547 return;
0548 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
0549 for (i = 0; i < count; i++) {
0550 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
0551 ext4_fsblk_t blocknr;
0552
0553 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
0554 blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
0555 ext4_grp_locked_error(sb, e4b->bd_group,
0556 inode ? inode->i_ino : 0,
0557 blocknr,
0558 "freeing block already freed "
0559 "(bit %u)",
0560 first + i);
0561 ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
0562 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
0563 }
0564 mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
0565 }
0566 }
0567
0568 static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
0569 {
0570 int i;
0571
0572 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
0573 return;
0574 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
0575 for (i = 0; i < count; i++) {
0576 BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
0577 mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
0578 }
0579 }
0580
0581 static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
0582 {
0583 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
0584 return;
0585 if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) {
0586 unsigned char *b1, *b2;
0587 int i;
0588 b1 = (unsigned char *) e4b->bd_info->bb_bitmap;
0589 b2 = (unsigned char *) bitmap;
0590 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
0591 if (b1[i] != b2[i]) {
0592 ext4_msg(e4b->bd_sb, KERN_ERR,
0593 "corruption in group %u "
0594 "at byte %u(%u): %x in copy != %x "
0595 "on disk/prealloc",
0596 e4b->bd_group, i, i * 8, b1[i], b2[i]);
0597 BUG();
0598 }
0599 }
0600 }
0601 }
0602
0603 static void mb_group_bb_bitmap_alloc(struct super_block *sb,
0604 struct ext4_group_info *grp, ext4_group_t group)
0605 {
0606 struct buffer_head *bh;
0607
0608 grp->bb_bitmap = kmalloc(sb->s_blocksize, GFP_NOFS);
0609 if (!grp->bb_bitmap)
0610 return;
0611
0612 bh = ext4_read_block_bitmap(sb, group);
0613 if (IS_ERR_OR_NULL(bh)) {
0614 kfree(grp->bb_bitmap);
0615 grp->bb_bitmap = NULL;
0616 return;
0617 }
0618
0619 memcpy(grp->bb_bitmap, bh->b_data, sb->s_blocksize);
0620 put_bh(bh);
0621 }
0622
0623 static void mb_group_bb_bitmap_free(struct ext4_group_info *grp)
0624 {
0625 kfree(grp->bb_bitmap);
0626 }
0627
0628 #else
0629 static inline void mb_free_blocks_double(struct inode *inode,
0630 struct ext4_buddy *e4b, int first, int count)
0631 {
0632 return;
0633 }
0634 static inline void mb_mark_used_double(struct ext4_buddy *e4b,
0635 int first, int count)
0636 {
0637 return;
0638 }
0639 static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
0640 {
0641 return;
0642 }
0643
0644 static inline void mb_group_bb_bitmap_alloc(struct super_block *sb,
0645 struct ext4_group_info *grp, ext4_group_t group)
0646 {
0647 return;
0648 }
0649
0650 static inline void mb_group_bb_bitmap_free(struct ext4_group_info *grp)
0651 {
0652 return;
0653 }
0654 #endif
0655
0656 #ifdef AGGRESSIVE_CHECK
0657
0658 #define MB_CHECK_ASSERT(assert) \
0659 do { \
0660 if (!(assert)) { \
0661 printk(KERN_EMERG \
0662 "Assertion failure in %s() at %s:%d: \"%s\"\n", \
0663 function, file, line, # assert); \
0664 BUG(); \
0665 } \
0666 } while (0)
0667
0668 static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
0669 const char *function, int line)
0670 {
0671 struct super_block *sb = e4b->bd_sb;
0672 int order = e4b->bd_blkbits + 1;
0673 int max;
0674 int max2;
0675 int i;
0676 int j;
0677 int k;
0678 int count;
0679 struct ext4_group_info *grp;
0680 int fragments = 0;
0681 int fstart;
0682 struct list_head *cur;
0683 void *buddy;
0684 void *buddy2;
0685
0686 if (e4b->bd_info->bb_check_counter++ % 10)
0687 return 0;
0688
0689 while (order > 1) {
0690 buddy = mb_find_buddy(e4b, order, &max);
0691 MB_CHECK_ASSERT(buddy);
0692 buddy2 = mb_find_buddy(e4b, order - 1, &max2);
0693 MB_CHECK_ASSERT(buddy2);
0694 MB_CHECK_ASSERT(buddy != buddy2);
0695 MB_CHECK_ASSERT(max * 2 == max2);
0696
0697 count = 0;
0698 for (i = 0; i < max; i++) {
0699
0700 if (mb_test_bit(i, buddy)) {
0701
0702 if (!mb_test_bit(i << 1, buddy2)) {
0703 MB_CHECK_ASSERT(
0704 mb_test_bit((i<<1)+1, buddy2));
0705 }
0706 continue;
0707 }
0708
0709
0710 MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
0711 MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
0712
0713 for (j = 0; j < (1 << order); j++) {
0714 k = (i * (1 << order)) + j;
0715 MB_CHECK_ASSERT(
0716 !mb_test_bit(k, e4b->bd_bitmap));
0717 }
0718 count++;
0719 }
0720 MB_CHECK_ASSERT(e4b->bd_info->bb_counters[order] == count);
0721 order--;
0722 }
0723
0724 fstart = -1;
0725 buddy = mb_find_buddy(e4b, 0, &max);
0726 for (i = 0; i < max; i++) {
0727 if (!mb_test_bit(i, buddy)) {
0728 MB_CHECK_ASSERT(i >= e4b->bd_info->bb_first_free);
0729 if (fstart == -1) {
0730 fragments++;
0731 fstart = i;
0732 }
0733 continue;
0734 }
0735 fstart = -1;
0736
0737 for (j = 0; j < e4b->bd_blkbits + 1; j++) {
0738 buddy2 = mb_find_buddy(e4b, j, &max2);
0739 k = i >> j;
0740 MB_CHECK_ASSERT(k < max2);
0741 MB_CHECK_ASSERT(mb_test_bit(k, buddy2));
0742 }
0743 }
0744 MB_CHECK_ASSERT(!EXT4_MB_GRP_NEED_INIT(e4b->bd_info));
0745 MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
0746
0747 grp = ext4_get_group_info(sb, e4b->bd_group);
0748 list_for_each(cur, &grp->bb_prealloc_list) {
0749 ext4_group_t groupnr;
0750 struct ext4_prealloc_space *pa;
0751 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
0752 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
0753 MB_CHECK_ASSERT(groupnr == e4b->bd_group);
0754 for (i = 0; i < pa->pa_len; i++)
0755 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
0756 }
0757 return 0;
0758 }
0759 #undef MB_CHECK_ASSERT
0760 #define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
0761 __FILE__, __func__, __LINE__)
0762 #else
0763 #define mb_check_buddy(e4b)
0764 #endif
0765
0766
0767
0768
0769
0770
0771
0772 static void ext4_mb_mark_free_simple(struct super_block *sb,
0773 void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
0774 struct ext4_group_info *grp)
0775 {
0776 struct ext4_sb_info *sbi = EXT4_SB(sb);
0777 ext4_grpblk_t min;
0778 ext4_grpblk_t max;
0779 ext4_grpblk_t chunk;
0780 unsigned int border;
0781
0782 BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
0783
0784 border = 2 << sb->s_blocksize_bits;
0785
0786 while (len > 0) {
0787
0788 max = ffs(first | border) - 1;
0789
0790
0791 min = fls(len) - 1;
0792
0793 if (max < min)
0794 min = max;
0795 chunk = 1 << min;
0796
0797
0798 grp->bb_counters[min]++;
0799 if (min > 0)
0800 mb_clear_bit(first >> min,
0801 buddy + sbi->s_mb_offsets[min]);
0802
0803 len -= chunk;
0804 first += chunk;
0805 }
0806 }
0807
0808 static int mb_avg_fragment_size_order(struct super_block *sb, ext4_grpblk_t len)
0809 {
0810 int order;
0811
0812
0813
0814
0815
0816 order = fls(len) - 2;
0817 if (order < 0)
0818 return 0;
0819 if (order == MB_NUM_ORDERS(sb))
0820 order--;
0821 return order;
0822 }
0823
0824
0825 static void
0826 mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp)
0827 {
0828 struct ext4_sb_info *sbi = EXT4_SB(sb);
0829 int new_order;
0830
0831 if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_free == 0)
0832 return;
0833
0834 new_order = mb_avg_fragment_size_order(sb,
0835 grp->bb_free / grp->bb_fragments);
0836 if (new_order == grp->bb_avg_fragment_size_order)
0837 return;
0838
0839 if (grp->bb_avg_fragment_size_order != -1) {
0840 write_lock(&sbi->s_mb_avg_fragment_size_locks[
0841 grp->bb_avg_fragment_size_order]);
0842 list_del(&grp->bb_avg_fragment_size_node);
0843 write_unlock(&sbi->s_mb_avg_fragment_size_locks[
0844 grp->bb_avg_fragment_size_order]);
0845 }
0846 grp->bb_avg_fragment_size_order = new_order;
0847 write_lock(&sbi->s_mb_avg_fragment_size_locks[
0848 grp->bb_avg_fragment_size_order]);
0849 list_add_tail(&grp->bb_avg_fragment_size_node,
0850 &sbi->s_mb_avg_fragment_size[grp->bb_avg_fragment_size_order]);
0851 write_unlock(&sbi->s_mb_avg_fragment_size_locks[
0852 grp->bb_avg_fragment_size_order]);
0853 }
0854
0855
0856
0857
0858
0859 static void ext4_mb_choose_next_group_cr0(struct ext4_allocation_context *ac,
0860 int *new_cr, ext4_group_t *group, ext4_group_t ngroups)
0861 {
0862 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
0863 struct ext4_group_info *iter, *grp;
0864 int i;
0865
0866 if (ac->ac_status == AC_STATUS_FOUND)
0867 return;
0868
0869 if (unlikely(sbi->s_mb_stats && ac->ac_flags & EXT4_MB_CR0_OPTIMIZED))
0870 atomic_inc(&sbi->s_bal_cr0_bad_suggestions);
0871
0872 grp = NULL;
0873 for (i = ac->ac_2order; i < MB_NUM_ORDERS(ac->ac_sb); i++) {
0874 if (list_empty(&sbi->s_mb_largest_free_orders[i]))
0875 continue;
0876 read_lock(&sbi->s_mb_largest_free_orders_locks[i]);
0877 if (list_empty(&sbi->s_mb_largest_free_orders[i])) {
0878 read_unlock(&sbi->s_mb_largest_free_orders_locks[i]);
0879 continue;
0880 }
0881 grp = NULL;
0882 list_for_each_entry(iter, &sbi->s_mb_largest_free_orders[i],
0883 bb_largest_free_order_node) {
0884 if (sbi->s_mb_stats)
0885 atomic64_inc(&sbi->s_bal_cX_groups_considered[0]);
0886 if (likely(ext4_mb_good_group(ac, iter->bb_group, 0))) {
0887 grp = iter;
0888 break;
0889 }
0890 }
0891 read_unlock(&sbi->s_mb_largest_free_orders_locks[i]);
0892 if (grp)
0893 break;
0894 }
0895
0896 if (!grp) {
0897
0898 *new_cr = 1;
0899 } else {
0900 *group = grp->bb_group;
0901 ac->ac_flags |= EXT4_MB_CR0_OPTIMIZED;
0902 }
0903 }
0904
0905
0906
0907
0908
0909 static void ext4_mb_choose_next_group_cr1(struct ext4_allocation_context *ac,
0910 int *new_cr, ext4_group_t *group, ext4_group_t ngroups)
0911 {
0912 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
0913 struct ext4_group_info *grp = NULL, *iter;
0914 int i;
0915
0916 if (unlikely(ac->ac_flags & EXT4_MB_CR1_OPTIMIZED)) {
0917 if (sbi->s_mb_stats)
0918 atomic_inc(&sbi->s_bal_cr1_bad_suggestions);
0919 }
0920
0921 for (i = mb_avg_fragment_size_order(ac->ac_sb, ac->ac_g_ex.fe_len);
0922 i < MB_NUM_ORDERS(ac->ac_sb); i++) {
0923 if (list_empty(&sbi->s_mb_avg_fragment_size[i]))
0924 continue;
0925 read_lock(&sbi->s_mb_avg_fragment_size_locks[i]);
0926 if (list_empty(&sbi->s_mb_avg_fragment_size[i])) {
0927 read_unlock(&sbi->s_mb_avg_fragment_size_locks[i]);
0928 continue;
0929 }
0930 list_for_each_entry(iter, &sbi->s_mb_avg_fragment_size[i],
0931 bb_avg_fragment_size_node) {
0932 if (sbi->s_mb_stats)
0933 atomic64_inc(&sbi->s_bal_cX_groups_considered[1]);
0934 if (likely(ext4_mb_good_group(ac, iter->bb_group, 1))) {
0935 grp = iter;
0936 break;
0937 }
0938 }
0939 read_unlock(&sbi->s_mb_avg_fragment_size_locks[i]);
0940 if (grp)
0941 break;
0942 }
0943
0944 if (grp) {
0945 *group = grp->bb_group;
0946 ac->ac_flags |= EXT4_MB_CR1_OPTIMIZED;
0947 } else {
0948 *new_cr = 2;
0949 }
0950 }
0951
0952 static inline int should_optimize_scan(struct ext4_allocation_context *ac)
0953 {
0954 if (unlikely(!test_opt2(ac->ac_sb, MB_OPTIMIZE_SCAN)))
0955 return 0;
0956 if (ac->ac_criteria >= 2)
0957 return 0;
0958 if (!ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS))
0959 return 0;
0960 return 1;
0961 }
0962
0963
0964
0965
0966
0967 static int
0968 next_linear_group(struct ext4_allocation_context *ac, int group, int ngroups)
0969 {
0970 if (!should_optimize_scan(ac))
0971 goto inc_and_return;
0972
0973 if (ac->ac_groups_linear_remaining) {
0974 ac->ac_groups_linear_remaining--;
0975 goto inc_and_return;
0976 }
0977
0978 return group;
0979 inc_and_return:
0980
0981
0982
0983
0984 return group + 1 >= ngroups ? 0 : group + 1;
0985 }
0986
0987
0988
0989
0990
0991
0992
0993
0994
0995
0996
0997
0998
0999
1000 static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac,
1001 int *new_cr, ext4_group_t *group, ext4_group_t ngroups)
1002 {
1003 *new_cr = ac->ac_criteria;
1004
1005 if (!should_optimize_scan(ac) || ac->ac_groups_linear_remaining) {
1006 *group = next_linear_group(ac, *group, ngroups);
1007 return;
1008 }
1009
1010 if (*new_cr == 0) {
1011 ext4_mb_choose_next_group_cr0(ac, new_cr, group, ngroups);
1012 } else if (*new_cr == 1) {
1013 ext4_mb_choose_next_group_cr1(ac, new_cr, group, ngroups);
1014 } else {
1015
1016
1017
1018
1019 WARN_ON(1);
1020 }
1021 }
1022
1023
1024
1025
1026
1027 static void
1028 mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
1029 {
1030 struct ext4_sb_info *sbi = EXT4_SB(sb);
1031 int i;
1032
1033 for (i = MB_NUM_ORDERS(sb) - 1; i >= 0; i--)
1034 if (grp->bb_counters[i] > 0)
1035 break;
1036
1037 if (!test_opt2(sb, MB_OPTIMIZE_SCAN) ||
1038 i == grp->bb_largest_free_order) {
1039 grp->bb_largest_free_order = i;
1040 return;
1041 }
1042
1043 if (grp->bb_largest_free_order >= 0) {
1044 write_lock(&sbi->s_mb_largest_free_orders_locks[
1045 grp->bb_largest_free_order]);
1046 list_del_init(&grp->bb_largest_free_order_node);
1047 write_unlock(&sbi->s_mb_largest_free_orders_locks[
1048 grp->bb_largest_free_order]);
1049 }
1050 grp->bb_largest_free_order = i;
1051 if (grp->bb_largest_free_order >= 0 && grp->bb_free) {
1052 write_lock(&sbi->s_mb_largest_free_orders_locks[
1053 grp->bb_largest_free_order]);
1054 list_add_tail(&grp->bb_largest_free_order_node,
1055 &sbi->s_mb_largest_free_orders[grp->bb_largest_free_order]);
1056 write_unlock(&sbi->s_mb_largest_free_orders_locks[
1057 grp->bb_largest_free_order]);
1058 }
1059 }
1060
1061 static noinline_for_stack
1062 void ext4_mb_generate_buddy(struct super_block *sb,
1063 void *buddy, void *bitmap, ext4_group_t group)
1064 {
1065 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
1066 struct ext4_sb_info *sbi = EXT4_SB(sb);
1067 ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
1068 ext4_grpblk_t i = 0;
1069 ext4_grpblk_t first;
1070 ext4_grpblk_t len;
1071 unsigned free = 0;
1072 unsigned fragments = 0;
1073 unsigned long long period = get_cycles();
1074
1075
1076
1077 i = mb_find_next_zero_bit(bitmap, max, 0);
1078 grp->bb_first_free = i;
1079 while (i < max) {
1080 fragments++;
1081 first = i;
1082 i = mb_find_next_bit(bitmap, max, i);
1083 len = i - first;
1084 free += len;
1085 if (len > 1)
1086 ext4_mb_mark_free_simple(sb, buddy, first, len, grp);
1087 else
1088 grp->bb_counters[0]++;
1089 if (i < max)
1090 i = mb_find_next_zero_bit(bitmap, max, i);
1091 }
1092 grp->bb_fragments = fragments;
1093
1094 if (free != grp->bb_free) {
1095 ext4_grp_locked_error(sb, group, 0, 0,
1096 "block bitmap and bg descriptor "
1097 "inconsistent: %u vs %u free clusters",
1098 free, grp->bb_free);
1099
1100
1101
1102
1103 grp->bb_free = free;
1104 ext4_mark_group_bitmap_corrupted(sb, group,
1105 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
1106 }
1107 mb_set_largest_free_order(sb, grp);
1108 mb_update_avg_fragment_size(sb, grp);
1109
1110 clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
1111
1112 period = get_cycles() - period;
1113 atomic_inc(&sbi->s_mb_buddies_generated);
1114 atomic64_add(period, &sbi->s_mb_generation_time);
1115 }
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137 static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
1138 {
1139 ext4_group_t ngroups;
1140 int blocksize;
1141 int blocks_per_page;
1142 int groups_per_page;
1143 int err = 0;
1144 int i;
1145 ext4_group_t first_group, group;
1146 int first_block;
1147 struct super_block *sb;
1148 struct buffer_head *bhs;
1149 struct buffer_head **bh = NULL;
1150 struct inode *inode;
1151 char *data;
1152 char *bitmap;
1153 struct ext4_group_info *grinfo;
1154
1155 inode = page->mapping->host;
1156 sb = inode->i_sb;
1157 ngroups = ext4_get_groups_count(sb);
1158 blocksize = i_blocksize(inode);
1159 blocks_per_page = PAGE_SIZE / blocksize;
1160
1161 mb_debug(sb, "init page %lu\n", page->index);
1162
1163 groups_per_page = blocks_per_page >> 1;
1164 if (groups_per_page == 0)
1165 groups_per_page = 1;
1166
1167
1168 if (groups_per_page > 1) {
1169 i = sizeof(struct buffer_head *) * groups_per_page;
1170 bh = kzalloc(i, gfp);
1171 if (bh == NULL) {
1172 err = -ENOMEM;
1173 goto out;
1174 }
1175 } else
1176 bh = &bhs;
1177
1178 first_group = page->index * blocks_per_page / 2;
1179
1180
1181 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
1182 if (group >= ngroups)
1183 break;
1184
1185 grinfo = ext4_get_group_info(sb, group);
1186
1187
1188
1189
1190
1191
1192 if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) {
1193 bh[i] = NULL;
1194 continue;
1195 }
1196 bh[i] = ext4_read_block_bitmap_nowait(sb, group, false);
1197 if (IS_ERR(bh[i])) {
1198 err = PTR_ERR(bh[i]);
1199 bh[i] = NULL;
1200 goto out;
1201 }
1202 mb_debug(sb, "read bitmap for group %u\n", group);
1203 }
1204
1205
1206 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
1207 int err2;
1208
1209 if (!bh[i])
1210 continue;
1211 err2 = ext4_wait_block_bitmap(sb, group, bh[i]);
1212 if (!err)
1213 err = err2;
1214 }
1215
1216 first_block = page->index * blocks_per_page;
1217 for (i = 0; i < blocks_per_page; i++) {
1218 group = (first_block + i) >> 1;
1219 if (group >= ngroups)
1220 break;
1221
1222 if (!bh[group - first_group])
1223
1224 continue;
1225
1226 if (!buffer_verified(bh[group - first_group]))
1227
1228 continue;
1229 err = 0;
1230
1231
1232
1233
1234
1235
1236
1237 data = page_address(page) + (i * blocksize);
1238 bitmap = bh[group - first_group]->b_data;
1239
1240
1241
1242
1243
1244 if ((first_block + i) & 1) {
1245
1246 BUG_ON(incore == NULL);
1247 mb_debug(sb, "put buddy for group %u in page %lu/%x\n",
1248 group, page->index, i * blocksize);
1249 trace_ext4_mb_buddy_bitmap_load(sb, group);
1250 grinfo = ext4_get_group_info(sb, group);
1251 grinfo->bb_fragments = 0;
1252 memset(grinfo->bb_counters, 0,
1253 sizeof(*grinfo->bb_counters) *
1254 (MB_NUM_ORDERS(sb)));
1255
1256
1257
1258 ext4_lock_group(sb, group);
1259
1260 memset(data, 0xff, blocksize);
1261 ext4_mb_generate_buddy(sb, data, incore, group);
1262 ext4_unlock_group(sb, group);
1263 incore = NULL;
1264 } else {
1265
1266 BUG_ON(incore != NULL);
1267 mb_debug(sb, "put bitmap for group %u in page %lu/%x\n",
1268 group, page->index, i * blocksize);
1269 trace_ext4_mb_bitmap_load(sb, group);
1270
1271
1272 ext4_lock_group(sb, group);
1273 memcpy(data, bitmap, blocksize);
1274
1275
1276 ext4_mb_generate_from_pa(sb, data, group);
1277 ext4_mb_generate_from_freelist(sb, data, group);
1278 ext4_unlock_group(sb, group);
1279
1280
1281
1282
1283 incore = data;
1284 }
1285 }
1286 SetPageUptodate(page);
1287
1288 out:
1289 if (bh) {
1290 for (i = 0; i < groups_per_page; i++)
1291 brelse(bh[i]);
1292 if (bh != &bhs)
1293 kfree(bh);
1294 }
1295 return err;
1296 }
1297
1298
1299
1300
1301
1302
1303
1304 static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
1305 ext4_group_t group, struct ext4_buddy *e4b, gfp_t gfp)
1306 {
1307 struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
1308 int block, pnum, poff;
1309 int blocks_per_page;
1310 struct page *page;
1311
1312 e4b->bd_buddy_page = NULL;
1313 e4b->bd_bitmap_page = NULL;
1314
1315 blocks_per_page = PAGE_SIZE / sb->s_blocksize;
1316
1317
1318
1319
1320
1321 block = group * 2;
1322 pnum = block / blocks_per_page;
1323 poff = block % blocks_per_page;
1324 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1325 if (!page)
1326 return -ENOMEM;
1327 BUG_ON(page->mapping != inode->i_mapping);
1328 e4b->bd_bitmap_page = page;
1329 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
1330
1331 if (blocks_per_page >= 2) {
1332
1333 return 0;
1334 }
1335
1336 block++;
1337 pnum = block / blocks_per_page;
1338 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1339 if (!page)
1340 return -ENOMEM;
1341 BUG_ON(page->mapping != inode->i_mapping);
1342 e4b->bd_buddy_page = page;
1343 return 0;
1344 }
1345
1346 static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
1347 {
1348 if (e4b->bd_bitmap_page) {
1349 unlock_page(e4b->bd_bitmap_page);
1350 put_page(e4b->bd_bitmap_page);
1351 }
1352 if (e4b->bd_buddy_page) {
1353 unlock_page(e4b->bd_buddy_page);
1354 put_page(e4b->bd_buddy_page);
1355 }
1356 }
1357
1358
1359
1360
1361
1362
1363 static noinline_for_stack
1364 int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
1365 {
1366
1367 struct ext4_group_info *this_grp;
1368 struct ext4_buddy e4b;
1369 struct page *page;
1370 int ret = 0;
1371
1372 might_sleep();
1373 mb_debug(sb, "init group %u\n", group);
1374 this_grp = ext4_get_group_info(sb, group);
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384 ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b, gfp);
1385 if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
1386
1387
1388
1389
1390 goto err;
1391 }
1392
1393 page = e4b.bd_bitmap_page;
1394 ret = ext4_mb_init_cache(page, NULL, gfp);
1395 if (ret)
1396 goto err;
1397 if (!PageUptodate(page)) {
1398 ret = -EIO;
1399 goto err;
1400 }
1401
1402 if (e4b.bd_buddy_page == NULL) {
1403
1404
1405
1406
1407
1408 ret = 0;
1409 goto err;
1410 }
1411
1412 page = e4b.bd_buddy_page;
1413 ret = ext4_mb_init_cache(page, e4b.bd_bitmap, gfp);
1414 if (ret)
1415 goto err;
1416 if (!PageUptodate(page)) {
1417 ret = -EIO;
1418 goto err;
1419 }
1420 err:
1421 ext4_mb_put_buddy_page_lock(&e4b);
1422 return ret;
1423 }
1424
1425
1426
1427
1428
1429
1430 static noinline_for_stack int
1431 ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
1432 struct ext4_buddy *e4b, gfp_t gfp)
1433 {
1434 int blocks_per_page;
1435 int block;
1436 int pnum;
1437 int poff;
1438 struct page *page;
1439 int ret;
1440 struct ext4_group_info *grp;
1441 struct ext4_sb_info *sbi = EXT4_SB(sb);
1442 struct inode *inode = sbi->s_buddy_cache;
1443
1444 might_sleep();
1445 mb_debug(sb, "load group %u\n", group);
1446
1447 blocks_per_page = PAGE_SIZE / sb->s_blocksize;
1448 grp = ext4_get_group_info(sb, group);
1449
1450 e4b->bd_blkbits = sb->s_blocksize_bits;
1451 e4b->bd_info = grp;
1452 e4b->bd_sb = sb;
1453 e4b->bd_group = group;
1454 e4b->bd_buddy_page = NULL;
1455 e4b->bd_bitmap_page = NULL;
1456
1457 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1458
1459
1460
1461
1462 ret = ext4_mb_init_group(sb, group, gfp);
1463 if (ret)
1464 return ret;
1465 }
1466
1467
1468
1469
1470
1471
1472 block = group * 2;
1473 pnum = block / blocks_per_page;
1474 poff = block % blocks_per_page;
1475
1476
1477
1478 page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
1479 if (page == NULL || !PageUptodate(page)) {
1480 if (page)
1481
1482
1483
1484
1485
1486
1487
1488
1489 put_page(page);
1490 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1491 if (page) {
1492 BUG_ON(page->mapping != inode->i_mapping);
1493 if (!PageUptodate(page)) {
1494 ret = ext4_mb_init_cache(page, NULL, gfp);
1495 if (ret) {
1496 unlock_page(page);
1497 goto err;
1498 }
1499 mb_cmp_bitmaps(e4b, page_address(page) +
1500 (poff * sb->s_blocksize));
1501 }
1502 unlock_page(page);
1503 }
1504 }
1505 if (page == NULL) {
1506 ret = -ENOMEM;
1507 goto err;
1508 }
1509 if (!PageUptodate(page)) {
1510 ret = -EIO;
1511 goto err;
1512 }
1513
1514
1515 e4b->bd_bitmap_page = page;
1516 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
1517
1518 block++;
1519 pnum = block / blocks_per_page;
1520 poff = block % blocks_per_page;
1521
1522 page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
1523 if (page == NULL || !PageUptodate(page)) {
1524 if (page)
1525 put_page(page);
1526 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1527 if (page) {
1528 BUG_ON(page->mapping != inode->i_mapping);
1529 if (!PageUptodate(page)) {
1530 ret = ext4_mb_init_cache(page, e4b->bd_bitmap,
1531 gfp);
1532 if (ret) {
1533 unlock_page(page);
1534 goto err;
1535 }
1536 }
1537 unlock_page(page);
1538 }
1539 }
1540 if (page == NULL) {
1541 ret = -ENOMEM;
1542 goto err;
1543 }
1544 if (!PageUptodate(page)) {
1545 ret = -EIO;
1546 goto err;
1547 }
1548
1549
1550 e4b->bd_buddy_page = page;
1551 e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
1552
1553 return 0;
1554
1555 err:
1556 if (page)
1557 put_page(page);
1558 if (e4b->bd_bitmap_page)
1559 put_page(e4b->bd_bitmap_page);
1560 if (e4b->bd_buddy_page)
1561 put_page(e4b->bd_buddy_page);
1562 e4b->bd_buddy = NULL;
1563 e4b->bd_bitmap = NULL;
1564 return ret;
1565 }
1566
1567 static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
1568 struct ext4_buddy *e4b)
1569 {
1570 return ext4_mb_load_buddy_gfp(sb, group, e4b, GFP_NOFS);
1571 }
1572
1573 static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
1574 {
1575 if (e4b->bd_bitmap_page)
1576 put_page(e4b->bd_bitmap_page);
1577 if (e4b->bd_buddy_page)
1578 put_page(e4b->bd_buddy_page);
1579 }
1580
1581
1582 static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
1583 {
1584 int order = 1, max;
1585 void *bb;
1586
1587 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
1588 BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
1589
1590 while (order <= e4b->bd_blkbits + 1) {
1591 bb = mb_find_buddy(e4b, order, &max);
1592 if (!mb_test_bit(block >> order, bb)) {
1593
1594 return order;
1595 }
1596 order++;
1597 }
1598 return 0;
1599 }
1600
1601 static void mb_clear_bits(void *bm, int cur, int len)
1602 {
1603 __u32 *addr;
1604
1605 len = cur + len;
1606 while (cur < len) {
1607 if ((cur & 31) == 0 && (len - cur) >= 32) {
1608
1609 addr = bm + (cur >> 3);
1610 *addr = 0;
1611 cur += 32;
1612 continue;
1613 }
1614 mb_clear_bit(cur, bm);
1615 cur++;
1616 }
1617 }
1618
1619
1620
1621
1622 static int mb_test_and_clear_bits(void *bm, int cur, int len)
1623 {
1624 __u32 *addr;
1625 int zero_bit = -1;
1626
1627 len = cur + len;
1628 while (cur < len) {
1629 if ((cur & 31) == 0 && (len - cur) >= 32) {
1630
1631 addr = bm + (cur >> 3);
1632 if (*addr != (__u32)(-1) && zero_bit == -1)
1633 zero_bit = cur + mb_find_next_zero_bit(addr, 32, 0);
1634 *addr = 0;
1635 cur += 32;
1636 continue;
1637 }
1638 if (!mb_test_and_clear_bit(cur, bm) && zero_bit == -1)
1639 zero_bit = cur;
1640 cur++;
1641 }
1642
1643 return zero_bit;
1644 }
1645
1646 void mb_set_bits(void *bm, int cur, int len)
1647 {
1648 __u32 *addr;
1649
1650 len = cur + len;
1651 while (cur < len) {
1652 if ((cur & 31) == 0 && (len - cur) >= 32) {
1653
1654 addr = bm + (cur >> 3);
1655 *addr = 0xffffffff;
1656 cur += 32;
1657 continue;
1658 }
1659 mb_set_bit(cur, bm);
1660 cur++;
1661 }
1662 }
1663
1664 static inline int mb_buddy_adjust_border(int* bit, void* bitmap, int side)
1665 {
1666 if (mb_test_bit(*bit + side, bitmap)) {
1667 mb_clear_bit(*bit, bitmap);
1668 (*bit) -= side;
1669 return 1;
1670 }
1671 else {
1672 (*bit) += side;
1673 mb_set_bit(*bit, bitmap);
1674 return -1;
1675 }
1676 }
1677
1678 static void mb_buddy_mark_free(struct ext4_buddy *e4b, int first, int last)
1679 {
1680 int max;
1681 int order = 1;
1682 void *buddy = mb_find_buddy(e4b, order, &max);
1683
1684 while (buddy) {
1685 void *buddy2;
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716 if (first & 1)
1717 e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&first, buddy, -1);
1718 if (!(last & 1))
1719 e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&last, buddy, 1);
1720 if (first > last)
1721 break;
1722 order++;
1723
1724 if (first == last || !(buddy2 = mb_find_buddy(e4b, order, &max))) {
1725 mb_clear_bits(buddy, first, last - first + 1);
1726 e4b->bd_info->bb_counters[order - 1] += last - first + 1;
1727 break;
1728 }
1729 first >>= 1;
1730 last >>= 1;
1731 buddy = buddy2;
1732 }
1733 }
1734
1735 static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1736 int first, int count)
1737 {
1738 int left_is_free = 0;
1739 int right_is_free = 0;
1740 int block;
1741 int last = first + count - 1;
1742 struct super_block *sb = e4b->bd_sb;
1743
1744 if (WARN_ON(count == 0))
1745 return;
1746 BUG_ON(last >= (sb->s_blocksize << 3));
1747 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
1748
1749 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
1750 return;
1751
1752 mb_check_buddy(e4b);
1753 mb_free_blocks_double(inode, e4b, first, count);
1754
1755 this_cpu_inc(discard_pa_seq);
1756 e4b->bd_info->bb_free += count;
1757 if (first < e4b->bd_info->bb_first_free)
1758 e4b->bd_info->bb_first_free = first;
1759
1760
1761
1762
1763 if (first != 0)
1764 left_is_free = !mb_test_bit(first - 1, e4b->bd_bitmap);
1765 block = mb_test_and_clear_bits(e4b->bd_bitmap, first, count);
1766 if (last + 1 < EXT4_SB(sb)->s_mb_maxs[0])
1767 right_is_free = !mb_test_bit(last + 1, e4b->bd_bitmap);
1768
1769 if (unlikely(block != -1)) {
1770 struct ext4_sb_info *sbi = EXT4_SB(sb);
1771 ext4_fsblk_t blocknr;
1772
1773 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
1774 blocknr += EXT4_C2B(sbi, block);
1775 if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) {
1776 ext4_grp_locked_error(sb, e4b->bd_group,
1777 inode ? inode->i_ino : 0,
1778 blocknr,
1779 "freeing already freed block (bit %u); block bitmap corrupt.",
1780 block);
1781 ext4_mark_group_bitmap_corrupted(
1782 sb, e4b->bd_group,
1783 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
1784 }
1785 goto done;
1786 }
1787
1788
1789 if (left_is_free && right_is_free)
1790 e4b->bd_info->bb_fragments--;
1791 else if (!left_is_free && !right_is_free)
1792 e4b->bd_info->bb_fragments++;
1793
1794
1795
1796
1797
1798
1799
1800 if (first & 1) {
1801 first += !left_is_free;
1802 e4b->bd_info->bb_counters[0] += left_is_free ? -1 : 1;
1803 }
1804 if (!(last & 1)) {
1805 last -= !right_is_free;
1806 e4b->bd_info->bb_counters[0] += right_is_free ? -1 : 1;
1807 }
1808
1809 if (first <= last)
1810 mb_buddy_mark_free(e4b, first >> 1, last >> 1);
1811
1812 done:
1813 mb_set_largest_free_order(sb, e4b->bd_info);
1814 mb_update_avg_fragment_size(sb, e4b->bd_info);
1815 mb_check_buddy(e4b);
1816 }
1817
1818 static int mb_find_extent(struct ext4_buddy *e4b, int block,
1819 int needed, struct ext4_free_extent *ex)
1820 {
1821 int next = block;
1822 int max, order;
1823 void *buddy;
1824
1825 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1826 BUG_ON(ex == NULL);
1827
1828 buddy = mb_find_buddy(e4b, 0, &max);
1829 BUG_ON(buddy == NULL);
1830 BUG_ON(block >= max);
1831 if (mb_test_bit(block, buddy)) {
1832 ex->fe_len = 0;
1833 ex->fe_start = 0;
1834 ex->fe_group = 0;
1835 return 0;
1836 }
1837
1838
1839 order = mb_find_order_for_block(e4b, block);
1840 block = block >> order;
1841
1842 ex->fe_len = 1 << order;
1843 ex->fe_start = block << order;
1844 ex->fe_group = e4b->bd_group;
1845
1846
1847 next = next - ex->fe_start;
1848 ex->fe_len -= next;
1849 ex->fe_start += next;
1850
1851 while (needed > ex->fe_len &&
1852 mb_find_buddy(e4b, order, &max)) {
1853
1854 if (block + 1 >= max)
1855 break;
1856
1857 next = (block + 1) * (1 << order);
1858 if (mb_test_bit(next, e4b->bd_bitmap))
1859 break;
1860
1861 order = mb_find_order_for_block(e4b, next);
1862
1863 block = next >> order;
1864 ex->fe_len += 1 << order;
1865 }
1866
1867 if (ex->fe_start + ex->fe_len > EXT4_CLUSTERS_PER_GROUP(e4b->bd_sb)) {
1868
1869 WARN_ON(1);
1870 ext4_grp_locked_error(e4b->bd_sb, e4b->bd_group, 0, 0,
1871 "corruption or bug in mb_find_extent "
1872 "block=%d, order=%d needed=%d ex=%u/%d/%d@%u",
1873 block, order, needed, ex->fe_group, ex->fe_start,
1874 ex->fe_len, ex->fe_logical);
1875 ex->fe_len = 0;
1876 ex->fe_start = 0;
1877 ex->fe_group = 0;
1878 }
1879 return ex->fe_len;
1880 }
1881
1882 static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
1883 {
1884 int ord;
1885 int mlen = 0;
1886 int max = 0;
1887 int cur;
1888 int start = ex->fe_start;
1889 int len = ex->fe_len;
1890 unsigned ret = 0;
1891 int len0 = len;
1892 void *buddy;
1893 bool split = false;
1894
1895 BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
1896 BUG_ON(e4b->bd_group != ex->fe_group);
1897 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1898 mb_check_buddy(e4b);
1899 mb_mark_used_double(e4b, start, len);
1900
1901 this_cpu_inc(discard_pa_seq);
1902 e4b->bd_info->bb_free -= len;
1903 if (e4b->bd_info->bb_first_free == start)
1904 e4b->bd_info->bb_first_free += len;
1905
1906
1907 if (start != 0)
1908 mlen = !mb_test_bit(start - 1, e4b->bd_bitmap);
1909 if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0])
1910 max = !mb_test_bit(start + len, e4b->bd_bitmap);
1911 if (mlen && max)
1912 e4b->bd_info->bb_fragments++;
1913 else if (!mlen && !max)
1914 e4b->bd_info->bb_fragments--;
1915
1916
1917 while (len) {
1918 if (!split)
1919 ord = mb_find_order_for_block(e4b, start);
1920
1921 if (((start >> ord) << ord) == start && len >= (1 << ord)) {
1922
1923 mlen = 1 << ord;
1924 if (!split)
1925 buddy = mb_find_buddy(e4b, ord, &max);
1926 else
1927 split = false;
1928 BUG_ON((start >> ord) >= max);
1929 mb_set_bit(start >> ord, buddy);
1930 e4b->bd_info->bb_counters[ord]--;
1931 start += mlen;
1932 len -= mlen;
1933 BUG_ON(len < 0);
1934 continue;
1935 }
1936
1937
1938 if (ret == 0)
1939 ret = len | (ord << 16);
1940
1941
1942 BUG_ON(ord <= 0);
1943 buddy = mb_find_buddy(e4b, ord, &max);
1944 mb_set_bit(start >> ord, buddy);
1945 e4b->bd_info->bb_counters[ord]--;
1946
1947 ord--;
1948 cur = (start >> ord) & ~1U;
1949 buddy = mb_find_buddy(e4b, ord, &max);
1950 mb_clear_bit(cur, buddy);
1951 mb_clear_bit(cur + 1, buddy);
1952 e4b->bd_info->bb_counters[ord]++;
1953 e4b->bd_info->bb_counters[ord]++;
1954 split = true;
1955 }
1956 mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
1957
1958 mb_update_avg_fragment_size(e4b->bd_sb, e4b->bd_info);
1959 mb_set_bits(e4b->bd_bitmap, ex->fe_start, len0);
1960 mb_check_buddy(e4b);
1961
1962 return ret;
1963 }
1964
1965
1966
1967
1968 static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
1969 struct ext4_buddy *e4b)
1970 {
1971 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1972 int ret;
1973
1974 BUG_ON(ac->ac_b_ex.fe_group != e4b->bd_group);
1975 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
1976
1977 ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len);
1978 ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical;
1979 ret = mb_mark_used(e4b, &ac->ac_b_ex);
1980
1981
1982
1983 ac->ac_f_ex = ac->ac_b_ex;
1984
1985 ac->ac_status = AC_STATUS_FOUND;
1986 ac->ac_tail = ret & 0xffff;
1987 ac->ac_buddy = ret >> 16;
1988
1989
1990
1991
1992
1993
1994
1995
1996 ac->ac_bitmap_page = e4b->bd_bitmap_page;
1997 get_page(ac->ac_bitmap_page);
1998 ac->ac_buddy_page = e4b->bd_buddy_page;
1999 get_page(ac->ac_buddy_page);
2000
2001 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
2002 spin_lock(&sbi->s_md_lock);
2003 sbi->s_mb_last_group = ac->ac_f_ex.fe_group;
2004 sbi->s_mb_last_start = ac->ac_f_ex.fe_start;
2005 spin_unlock(&sbi->s_md_lock);
2006 }
2007
2008
2009
2010
2011
2012 if (ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
2013 ext4_mb_new_preallocation(ac);
2014
2015 }
2016
2017 static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
2018 struct ext4_buddy *e4b,
2019 int finish_group)
2020 {
2021 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2022 struct ext4_free_extent *bex = &ac->ac_b_ex;
2023 struct ext4_free_extent *gex = &ac->ac_g_ex;
2024 struct ext4_free_extent ex;
2025 int max;
2026
2027 if (ac->ac_status == AC_STATUS_FOUND)
2028 return;
2029
2030
2031
2032 if (ac->ac_found > sbi->s_mb_max_to_scan &&
2033 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
2034 ac->ac_status = AC_STATUS_BREAK;
2035 return;
2036 }
2037
2038
2039
2040
2041 if (bex->fe_len < gex->fe_len)
2042 return;
2043
2044 if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan)
2045 && bex->fe_group == e4b->bd_group) {
2046
2047
2048
2049 max = mb_find_extent(e4b, bex->fe_start, gex->fe_len, &ex);
2050 if (max >= gex->fe_len) {
2051 ext4_mb_use_best_found(ac, e4b);
2052 return;
2053 }
2054 }
2055 }
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067 static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
2068 struct ext4_free_extent *ex,
2069 struct ext4_buddy *e4b)
2070 {
2071 struct ext4_free_extent *bex = &ac->ac_b_ex;
2072 struct ext4_free_extent *gex = &ac->ac_g_ex;
2073
2074 BUG_ON(ex->fe_len <= 0);
2075 BUG_ON(ex->fe_len > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
2076 BUG_ON(ex->fe_start >= EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
2077 BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
2078
2079 ac->ac_found++;
2080
2081
2082
2083
2084 if (unlikely(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
2085 *bex = *ex;
2086 ext4_mb_use_best_found(ac, e4b);
2087 return;
2088 }
2089
2090
2091
2092
2093 if (ex->fe_len == gex->fe_len) {
2094 *bex = *ex;
2095 ext4_mb_use_best_found(ac, e4b);
2096 return;
2097 }
2098
2099
2100
2101
2102 if (bex->fe_len == 0) {
2103 *bex = *ex;
2104 return;
2105 }
2106
2107
2108
2109
2110 if (bex->fe_len < gex->fe_len) {
2111
2112
2113 if (ex->fe_len > bex->fe_len)
2114 *bex = *ex;
2115 } else if (ex->fe_len > gex->fe_len) {
2116
2117
2118
2119 if (ex->fe_len < bex->fe_len)
2120 *bex = *ex;
2121 }
2122
2123 ext4_mb_check_limits(ac, e4b, 0);
2124 }
2125
2126 static noinline_for_stack
2127 int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
2128 struct ext4_buddy *e4b)
2129 {
2130 struct ext4_free_extent ex = ac->ac_b_ex;
2131 ext4_group_t group = ex.fe_group;
2132 int max;
2133 int err;
2134
2135 BUG_ON(ex.fe_len <= 0);
2136 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
2137 if (err)
2138 return err;
2139
2140 ext4_lock_group(ac->ac_sb, group);
2141 max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
2142
2143 if (max > 0) {
2144 ac->ac_b_ex = ex;
2145 ext4_mb_use_best_found(ac, e4b);
2146 }
2147
2148 ext4_unlock_group(ac->ac_sb, group);
2149 ext4_mb_unload_buddy(e4b);
2150
2151 return 0;
2152 }
2153
2154 static noinline_for_stack
2155 int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
2156 struct ext4_buddy *e4b)
2157 {
2158 ext4_group_t group = ac->ac_g_ex.fe_group;
2159 int max;
2160 int err;
2161 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2162 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
2163 struct ext4_free_extent ex;
2164
2165 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
2166 return 0;
2167 if (grp->bb_free == 0)
2168 return 0;
2169
2170 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
2171 if (err)
2172 return err;
2173
2174 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) {
2175 ext4_mb_unload_buddy(e4b);
2176 return 0;
2177 }
2178
2179 ext4_lock_group(ac->ac_sb, group);
2180 max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
2181 ac->ac_g_ex.fe_len, &ex);
2182 ex.fe_logical = 0xDEADFA11;
2183
2184 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
2185 ext4_fsblk_t start;
2186
2187 start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
2188 ex.fe_start;
2189
2190 if (do_div(start, sbi->s_stripe) == 0) {
2191 ac->ac_found++;
2192 ac->ac_b_ex = ex;
2193 ext4_mb_use_best_found(ac, e4b);
2194 }
2195 } else if (max >= ac->ac_g_ex.fe_len) {
2196 BUG_ON(ex.fe_len <= 0);
2197 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
2198 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
2199 ac->ac_found++;
2200 ac->ac_b_ex = ex;
2201 ext4_mb_use_best_found(ac, e4b);
2202 } else if (max > 0 && (ac->ac_flags & EXT4_MB_HINT_MERGE)) {
2203
2204
2205 BUG_ON(ex.fe_len <= 0);
2206 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
2207 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
2208 ac->ac_found++;
2209 ac->ac_b_ex = ex;
2210 ext4_mb_use_best_found(ac, e4b);
2211 }
2212 ext4_unlock_group(ac->ac_sb, group);
2213 ext4_mb_unload_buddy(e4b);
2214
2215 return 0;
2216 }
2217
2218
2219
2220
2221
2222 static noinline_for_stack
2223 void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
2224 struct ext4_buddy *e4b)
2225 {
2226 struct super_block *sb = ac->ac_sb;
2227 struct ext4_group_info *grp = e4b->bd_info;
2228 void *buddy;
2229 int i;
2230 int k;
2231 int max;
2232
2233 BUG_ON(ac->ac_2order <= 0);
2234 for (i = ac->ac_2order; i < MB_NUM_ORDERS(sb); i++) {
2235 if (grp->bb_counters[i] == 0)
2236 continue;
2237
2238 buddy = mb_find_buddy(e4b, i, &max);
2239 BUG_ON(buddy == NULL);
2240
2241 k = mb_find_next_zero_bit(buddy, max, 0);
2242 if (k >= max) {
2243 ext4_grp_locked_error(ac->ac_sb, e4b->bd_group, 0, 0,
2244 "%d free clusters of order %d. But found 0",
2245 grp->bb_counters[i], i);
2246 ext4_mark_group_bitmap_corrupted(ac->ac_sb,
2247 e4b->bd_group,
2248 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
2249 break;
2250 }
2251 ac->ac_found++;
2252
2253 ac->ac_b_ex.fe_len = 1 << i;
2254 ac->ac_b_ex.fe_start = k << i;
2255 ac->ac_b_ex.fe_group = e4b->bd_group;
2256
2257 ext4_mb_use_best_found(ac, e4b);
2258
2259 BUG_ON(ac->ac_f_ex.fe_len != ac->ac_g_ex.fe_len);
2260
2261 if (EXT4_SB(sb)->s_mb_stats)
2262 atomic_inc(&EXT4_SB(sb)->s_bal_2orders);
2263
2264 break;
2265 }
2266 }
2267
2268
2269
2270
2271
2272
2273 static noinline_for_stack
2274 void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
2275 struct ext4_buddy *e4b)
2276 {
2277 struct super_block *sb = ac->ac_sb;
2278 void *bitmap = e4b->bd_bitmap;
2279 struct ext4_free_extent ex;
2280 int i;
2281 int free;
2282
2283 free = e4b->bd_info->bb_free;
2284 if (WARN_ON(free <= 0))
2285 return;
2286
2287 i = e4b->bd_info->bb_first_free;
2288
2289 while (free && ac->ac_status == AC_STATUS_CONTINUE) {
2290 i = mb_find_next_zero_bit(bitmap,
2291 EXT4_CLUSTERS_PER_GROUP(sb), i);
2292 if (i >= EXT4_CLUSTERS_PER_GROUP(sb)) {
2293
2294
2295
2296
2297
2298 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
2299 "%d free clusters as per "
2300 "group info. But bitmap says 0",
2301 free);
2302 ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
2303 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
2304 break;
2305 }
2306
2307 mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex);
2308 if (WARN_ON(ex.fe_len <= 0))
2309 break;
2310 if (free < ex.fe_len) {
2311 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
2312 "%d free clusters as per "
2313 "group info. But got %d blocks",
2314 free, ex.fe_len);
2315 ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
2316 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
2317
2318
2319
2320
2321
2322 break;
2323 }
2324 ex.fe_logical = 0xDEADC0DE;
2325 ext4_mb_measure_extent(ac, &ex, e4b);
2326
2327 i += ex.fe_len;
2328 free -= ex.fe_len;
2329 }
2330
2331 ext4_mb_check_limits(ac, e4b, 1);
2332 }
2333
2334
2335
2336
2337
2338 static noinline_for_stack
2339 void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
2340 struct ext4_buddy *e4b)
2341 {
2342 struct super_block *sb = ac->ac_sb;
2343 struct ext4_sb_info *sbi = EXT4_SB(sb);
2344 void *bitmap = e4b->bd_bitmap;
2345 struct ext4_free_extent ex;
2346 ext4_fsblk_t first_group_block;
2347 ext4_fsblk_t a;
2348 ext4_grpblk_t i;
2349 int max;
2350
2351 BUG_ON(sbi->s_stripe == 0);
2352
2353
2354 first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
2355
2356 a = first_group_block + sbi->s_stripe - 1;
2357 do_div(a, sbi->s_stripe);
2358 i = (a * sbi->s_stripe) - first_group_block;
2359
2360 while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
2361 if (!mb_test_bit(i, bitmap)) {
2362 max = mb_find_extent(e4b, i, sbi->s_stripe, &ex);
2363 if (max >= sbi->s_stripe) {
2364 ac->ac_found++;
2365 ex.fe_logical = 0xDEADF00D;
2366 ac->ac_b_ex = ex;
2367 ext4_mb_use_best_found(ac, e4b);
2368 break;
2369 }
2370 }
2371 i += sbi->s_stripe;
2372 }
2373 }
2374
2375
2376
2377
2378
2379
2380 static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
2381 ext4_group_t group, int cr)
2382 {
2383 ext4_grpblk_t free, fragments;
2384 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
2385 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
2386
2387 BUG_ON(cr < 0 || cr >= 4);
2388
2389 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
2390 return false;
2391
2392 free = grp->bb_free;
2393 if (free == 0)
2394 return false;
2395
2396 fragments = grp->bb_fragments;
2397 if (fragments == 0)
2398 return false;
2399
2400 switch (cr) {
2401 case 0:
2402 BUG_ON(ac->ac_2order == 0);
2403
2404
2405 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
2406 (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
2407 ((group % flex_size) == 0))
2408 return false;
2409
2410 if (free < ac->ac_g_ex.fe_len)
2411 return false;
2412
2413 if (ac->ac_2order >= MB_NUM_ORDERS(ac->ac_sb))
2414 return true;
2415
2416 if (grp->bb_largest_free_order < ac->ac_2order)
2417 return false;
2418
2419 return true;
2420 case 1:
2421 if ((free / fragments) >= ac->ac_g_ex.fe_len)
2422 return true;
2423 break;
2424 case 2:
2425 if (free >= ac->ac_g_ex.fe_len)
2426 return true;
2427 break;
2428 case 3:
2429 return true;
2430 default:
2431 BUG();
2432 }
2433
2434 return false;
2435 }
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448 static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
2449 ext4_group_t group, int cr)
2450 {
2451 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
2452 struct super_block *sb = ac->ac_sb;
2453 struct ext4_sb_info *sbi = EXT4_SB(sb);
2454 bool should_lock = ac->ac_flags & EXT4_MB_STRICT_CHECK;
2455 ext4_grpblk_t free;
2456 int ret = 0;
2457
2458 if (sbi->s_mb_stats)
2459 atomic64_inc(&sbi->s_bal_cX_groups_considered[ac->ac_criteria]);
2460 if (should_lock) {
2461 ext4_lock_group(sb, group);
2462 __release(ext4_group_lock_ptr(sb, group));
2463 }
2464 free = grp->bb_free;
2465 if (free == 0)
2466 goto out;
2467 if (cr <= 2 && free < ac->ac_g_ex.fe_len)
2468 goto out;
2469 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
2470 goto out;
2471 if (should_lock) {
2472 __acquire(ext4_group_lock_ptr(sb, group));
2473 ext4_unlock_group(sb, group);
2474 }
2475
2476
2477 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
2478 struct ext4_group_desc *gdp =
2479 ext4_get_group_desc(sb, group, NULL);
2480 int ret;
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490 if (cr < 2 &&
2491 (!sbi->s_log_groups_per_flex ||
2492 ((group & ((1 << sbi->s_log_groups_per_flex) - 1)) != 0)) &&
2493 !(ext4_has_group_desc_csum(sb) &&
2494 (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))))
2495 return 0;
2496 ret = ext4_mb_init_group(sb, group, GFP_NOFS);
2497 if (ret)
2498 return ret;
2499 }
2500
2501 if (should_lock) {
2502 ext4_lock_group(sb, group);
2503 __release(ext4_group_lock_ptr(sb, group));
2504 }
2505 ret = ext4_mb_good_group(ac, group, cr);
2506 out:
2507 if (should_lock) {
2508 __acquire(ext4_group_lock_ptr(sb, group));
2509 ext4_unlock_group(sb, group);
2510 }
2511 return ret;
2512 }
2513
2514
2515
2516
2517
2518 ext4_group_t ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
2519 unsigned int nr, int *cnt)
2520 {
2521 ext4_group_t ngroups = ext4_get_groups_count(sb);
2522 struct buffer_head *bh;
2523 struct blk_plug plug;
2524
2525 blk_start_plug(&plug);
2526 while (nr-- > 0) {
2527 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group,
2528 NULL);
2529 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
2530
2531
2532
2533
2534
2535
2536
2537
2538 if (!EXT4_MB_GRP_TEST_AND_SET_READ(grp) &&
2539 EXT4_MB_GRP_NEED_INIT(grp) &&
2540 ext4_free_group_clusters(sb, gdp) > 0 &&
2541 !(ext4_has_group_desc_csum(sb) &&
2542 (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) {
2543 bh = ext4_read_block_bitmap_nowait(sb, group, true);
2544 if (bh && !IS_ERR(bh)) {
2545 if (!buffer_uptodate(bh) && cnt)
2546 (*cnt)++;
2547 brelse(bh);
2548 }
2549 }
2550 if (++group >= ngroups)
2551 group = 0;
2552 }
2553 blk_finish_plug(&plug);
2554 return group;
2555 }
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569 void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
2570 unsigned int nr)
2571 {
2572 while (nr-- > 0) {
2573 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group,
2574 NULL);
2575 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
2576
2577 if (!group)
2578 group = ext4_get_groups_count(sb);
2579 group--;
2580 grp = ext4_get_group_info(sb, group);
2581
2582 if (EXT4_MB_GRP_NEED_INIT(grp) &&
2583 ext4_free_group_clusters(sb, gdp) > 0 &&
2584 !(ext4_has_group_desc_csum(sb) &&
2585 (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) {
2586 if (ext4_mb_init_group(sb, group, GFP_NOFS))
2587 break;
2588 }
2589 }
2590 }
2591
2592 static noinline_for_stack int
2593 ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
2594 {
2595 ext4_group_t prefetch_grp = 0, ngroups, group, i;
2596 int cr = -1, new_cr;
2597 int err = 0, first_err = 0;
2598 unsigned int nr = 0, prefetch_ios = 0;
2599 struct ext4_sb_info *sbi;
2600 struct super_block *sb;
2601 struct ext4_buddy e4b;
2602 int lost;
2603
2604 sb = ac->ac_sb;
2605 sbi = EXT4_SB(sb);
2606 ngroups = ext4_get_groups_count(sb);
2607
2608 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
2609 ngroups = sbi->s_blockfile_groups;
2610
2611 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
2612
2613
2614 err = ext4_mb_find_by_goal(ac, &e4b);
2615 if (err || ac->ac_status == AC_STATUS_FOUND)
2616 goto out;
2617
2618 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
2619 goto out;
2620
2621
2622
2623
2624
2625
2626 i = fls(ac->ac_g_ex.fe_len);
2627 ac->ac_2order = 0;
2628
2629
2630
2631
2632
2633
2634
2635 if (i >= sbi->s_mb_order2_reqs && i <= MB_NUM_ORDERS(sb)) {
2636
2637
2638
2639 if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
2640 ac->ac_2order = array_index_nospec(i - 1,
2641 MB_NUM_ORDERS(sb));
2642 }
2643
2644
2645 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
2646
2647 spin_lock(&sbi->s_md_lock);
2648 ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
2649 ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
2650 spin_unlock(&sbi->s_md_lock);
2651 }
2652
2653
2654 cr = ac->ac_2order ? 0 : 1;
2655
2656
2657
2658
2659 repeat:
2660 for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
2661 ac->ac_criteria = cr;
2662
2663
2664
2665
2666 group = ac->ac_g_ex.fe_group;
2667 ac->ac_groups_linear_remaining = sbi->s_mb_max_linear_groups;
2668 prefetch_grp = group;
2669
2670 for (i = 0, new_cr = cr; i < ngroups; i++,
2671 ext4_mb_choose_next_group(ac, &new_cr, &group, ngroups)) {
2672 int ret = 0;
2673
2674 cond_resched();
2675 if (new_cr != cr) {
2676 cr = new_cr;
2677 goto repeat;
2678 }
2679
2680
2681
2682
2683
2684
2685
2686 if ((prefetch_grp == group) &&
2687 (cr > 1 ||
2688 prefetch_ios < sbi->s_mb_prefetch_limit)) {
2689 unsigned int curr_ios = prefetch_ios;
2690
2691 nr = sbi->s_mb_prefetch;
2692 if (ext4_has_feature_flex_bg(sb)) {
2693 nr = 1 << sbi->s_log_groups_per_flex;
2694 nr -= group & (nr - 1);
2695 nr = min(nr, sbi->s_mb_prefetch);
2696 }
2697 prefetch_grp = ext4_mb_prefetch(sb, group,
2698 nr, &prefetch_ios);
2699 if (prefetch_ios == curr_ios)
2700 nr = 0;
2701 }
2702
2703
2704 ret = ext4_mb_good_group_nolock(ac, group, cr);
2705 if (ret <= 0) {
2706 if (!first_err)
2707 first_err = ret;
2708 continue;
2709 }
2710
2711 err = ext4_mb_load_buddy(sb, group, &e4b);
2712 if (err)
2713 goto out;
2714
2715 ext4_lock_group(sb, group);
2716
2717
2718
2719
2720
2721 ret = ext4_mb_good_group(ac, group, cr);
2722 if (ret == 0) {
2723 ext4_unlock_group(sb, group);
2724 ext4_mb_unload_buddy(&e4b);
2725 continue;
2726 }
2727
2728 ac->ac_groups_scanned++;
2729 if (cr == 0)
2730 ext4_mb_simple_scan_group(ac, &e4b);
2731 else if (cr == 1 && sbi->s_stripe &&
2732 !(ac->ac_g_ex.fe_len % sbi->s_stripe))
2733 ext4_mb_scan_aligned(ac, &e4b);
2734 else
2735 ext4_mb_complex_scan_group(ac, &e4b);
2736
2737 ext4_unlock_group(sb, group);
2738 ext4_mb_unload_buddy(&e4b);
2739
2740 if (ac->ac_status != AC_STATUS_CONTINUE)
2741 break;
2742 }
2743
2744 if (sbi->s_mb_stats && i == ngroups)
2745 atomic64_inc(&sbi->s_bal_cX_failed[cr]);
2746 }
2747
2748 if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
2749 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
2750
2751
2752
2753
2754 ext4_mb_try_best_found(ac, &e4b);
2755 if (ac->ac_status != AC_STATUS_FOUND) {
2756
2757
2758
2759
2760
2761 lost = atomic_inc_return(&sbi->s_mb_lost_chunks);
2762 mb_debug(sb, "lost chunk, group: %u, start: %d, len: %d, lost: %d\n",
2763 ac->ac_b_ex.fe_group, ac->ac_b_ex.fe_start,
2764 ac->ac_b_ex.fe_len, lost);
2765
2766 ac->ac_b_ex.fe_group = 0;
2767 ac->ac_b_ex.fe_start = 0;
2768 ac->ac_b_ex.fe_len = 0;
2769 ac->ac_status = AC_STATUS_CONTINUE;
2770 ac->ac_flags |= EXT4_MB_HINT_FIRST;
2771 cr = 3;
2772 goto repeat;
2773 }
2774 }
2775
2776 if (sbi->s_mb_stats && ac->ac_status == AC_STATUS_FOUND)
2777 atomic64_inc(&sbi->s_bal_cX_hits[ac->ac_criteria]);
2778 out:
2779 if (!err && ac->ac_status != AC_STATUS_FOUND && first_err)
2780 err = first_err;
2781
2782 mb_debug(sb, "Best len %d, origin len %d, ac_status %u, ac_flags 0x%x, cr %d ret %d\n",
2783 ac->ac_b_ex.fe_len, ac->ac_o_ex.fe_len, ac->ac_status,
2784 ac->ac_flags, cr, err);
2785
2786 if (nr)
2787 ext4_mb_prefetch_fini(sb, prefetch_grp, nr);
2788
2789 return err;
2790 }
2791
2792 static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
2793 {
2794 struct super_block *sb = pde_data(file_inode(seq->file));
2795 ext4_group_t group;
2796
2797 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2798 return NULL;
2799 group = *pos + 1;
2800 return (void *) ((unsigned long) group);
2801 }
2802
2803 static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
2804 {
2805 struct super_block *sb = pde_data(file_inode(seq->file));
2806 ext4_group_t group;
2807
2808 ++*pos;
2809 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2810 return NULL;
2811 group = *pos + 1;
2812 return (void *) ((unsigned long) group);
2813 }
2814
2815 static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
2816 {
2817 struct super_block *sb = pde_data(file_inode(seq->file));
2818 ext4_group_t group = (ext4_group_t) ((unsigned long) v);
2819 int i;
2820 int err, buddy_loaded = 0;
2821 struct ext4_buddy e4b;
2822 struct ext4_group_info *grinfo;
2823 unsigned char blocksize_bits = min_t(unsigned char,
2824 sb->s_blocksize_bits,
2825 EXT4_MAX_BLOCK_LOG_SIZE);
2826 struct sg {
2827 struct ext4_group_info info;
2828 ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2];
2829 } sg;
2830
2831 group--;
2832 if (group == 0)
2833 seq_puts(seq, "#group: free frags first ["
2834 " 2^0 2^1 2^2 2^3 2^4 2^5 2^6 "
2835 " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]\n");
2836
2837 i = (blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
2838 sizeof(struct ext4_group_info);
2839
2840 grinfo = ext4_get_group_info(sb, group);
2841
2842 if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) {
2843 err = ext4_mb_load_buddy(sb, group, &e4b);
2844 if (err) {
2845 seq_printf(seq, "#%-5u: I/O error\n", group);
2846 return 0;
2847 }
2848 buddy_loaded = 1;
2849 }
2850
2851 memcpy(&sg, ext4_get_group_info(sb, group), i);
2852
2853 if (buddy_loaded)
2854 ext4_mb_unload_buddy(&e4b);
2855
2856 seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
2857 sg.info.bb_fragments, sg.info.bb_first_free);
2858 for (i = 0; i <= 13; i++)
2859 seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ?
2860 sg.info.bb_counters[i] : 0);
2861 seq_puts(seq, " ]\n");
2862
2863 return 0;
2864 }
2865
2866 static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
2867 {
2868 }
2869
2870 const struct seq_operations ext4_mb_seq_groups_ops = {
2871 .start = ext4_mb_seq_groups_start,
2872 .next = ext4_mb_seq_groups_next,
2873 .stop = ext4_mb_seq_groups_stop,
2874 .show = ext4_mb_seq_groups_show,
2875 };
2876
2877 int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset)
2878 {
2879 struct super_block *sb = seq->private;
2880 struct ext4_sb_info *sbi = EXT4_SB(sb);
2881
2882 seq_puts(seq, "mballoc:\n");
2883 if (!sbi->s_mb_stats) {
2884 seq_puts(seq, "\tmb stats collection turned off.\n");
2885 seq_puts(seq, "\tTo enable, please write \"1\" to sysfs file mb_stats.\n");
2886 return 0;
2887 }
2888 seq_printf(seq, "\treqs: %u\n", atomic_read(&sbi->s_bal_reqs));
2889 seq_printf(seq, "\tsuccess: %u\n", atomic_read(&sbi->s_bal_success));
2890
2891 seq_printf(seq, "\tgroups_scanned: %u\n", atomic_read(&sbi->s_bal_groups_scanned));
2892
2893 seq_puts(seq, "\tcr0_stats:\n");
2894 seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[0]));
2895 seq_printf(seq, "\t\tgroups_considered: %llu\n",
2896 atomic64_read(&sbi->s_bal_cX_groups_considered[0]));
2897 seq_printf(seq, "\t\tuseless_loops: %llu\n",
2898 atomic64_read(&sbi->s_bal_cX_failed[0]));
2899 seq_printf(seq, "\t\tbad_suggestions: %u\n",
2900 atomic_read(&sbi->s_bal_cr0_bad_suggestions));
2901
2902 seq_puts(seq, "\tcr1_stats:\n");
2903 seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[1]));
2904 seq_printf(seq, "\t\tgroups_considered: %llu\n",
2905 atomic64_read(&sbi->s_bal_cX_groups_considered[1]));
2906 seq_printf(seq, "\t\tuseless_loops: %llu\n",
2907 atomic64_read(&sbi->s_bal_cX_failed[1]));
2908 seq_printf(seq, "\t\tbad_suggestions: %u\n",
2909 atomic_read(&sbi->s_bal_cr1_bad_suggestions));
2910
2911 seq_puts(seq, "\tcr2_stats:\n");
2912 seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[2]));
2913 seq_printf(seq, "\t\tgroups_considered: %llu\n",
2914 atomic64_read(&sbi->s_bal_cX_groups_considered[2]));
2915 seq_printf(seq, "\t\tuseless_loops: %llu\n",
2916 atomic64_read(&sbi->s_bal_cX_failed[2]));
2917
2918 seq_puts(seq, "\tcr3_stats:\n");
2919 seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[3]));
2920 seq_printf(seq, "\t\tgroups_considered: %llu\n",
2921 atomic64_read(&sbi->s_bal_cX_groups_considered[3]));
2922 seq_printf(seq, "\t\tuseless_loops: %llu\n",
2923 atomic64_read(&sbi->s_bal_cX_failed[3]));
2924 seq_printf(seq, "\textents_scanned: %u\n", atomic_read(&sbi->s_bal_ex_scanned));
2925 seq_printf(seq, "\t\tgoal_hits: %u\n", atomic_read(&sbi->s_bal_goals));
2926 seq_printf(seq, "\t\t2^n_hits: %u\n", atomic_read(&sbi->s_bal_2orders));
2927 seq_printf(seq, "\t\tbreaks: %u\n", atomic_read(&sbi->s_bal_breaks));
2928 seq_printf(seq, "\t\tlost: %u\n", atomic_read(&sbi->s_mb_lost_chunks));
2929
2930 seq_printf(seq, "\tbuddies_generated: %u/%u\n",
2931 atomic_read(&sbi->s_mb_buddies_generated),
2932 ext4_get_groups_count(sb));
2933 seq_printf(seq, "\tbuddies_time_used: %llu\n",
2934 atomic64_read(&sbi->s_mb_generation_time));
2935 seq_printf(seq, "\tpreallocated: %u\n",
2936 atomic_read(&sbi->s_mb_preallocated));
2937 seq_printf(seq, "\tdiscarded: %u\n",
2938 atomic_read(&sbi->s_mb_discarded));
2939 return 0;
2940 }
2941
2942 static void *ext4_mb_seq_structs_summary_start(struct seq_file *seq, loff_t *pos)
2943 __acquires(&EXT4_SB(sb)->s_mb_rb_lock)
2944 {
2945 struct super_block *sb = pde_data(file_inode(seq->file));
2946 unsigned long position;
2947
2948 if (*pos < 0 || *pos >= 2*MB_NUM_ORDERS(sb))
2949 return NULL;
2950 position = *pos + 1;
2951 return (void *) ((unsigned long) position);
2952 }
2953
2954 static void *ext4_mb_seq_structs_summary_next(struct seq_file *seq, void *v, loff_t *pos)
2955 {
2956 struct super_block *sb = pde_data(file_inode(seq->file));
2957 unsigned long position;
2958
2959 ++*pos;
2960 if (*pos < 0 || *pos >= 2*MB_NUM_ORDERS(sb))
2961 return NULL;
2962 position = *pos + 1;
2963 return (void *) ((unsigned long) position);
2964 }
2965
2966 static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v)
2967 {
2968 struct super_block *sb = pde_data(file_inode(seq->file));
2969 struct ext4_sb_info *sbi = EXT4_SB(sb);
2970 unsigned long position = ((unsigned long) v);
2971 struct ext4_group_info *grp;
2972 unsigned int count;
2973
2974 position--;
2975 if (position >= MB_NUM_ORDERS(sb)) {
2976 position -= MB_NUM_ORDERS(sb);
2977 if (position == 0)
2978 seq_puts(seq, "avg_fragment_size_lists:\n");
2979
2980 count = 0;
2981 read_lock(&sbi->s_mb_avg_fragment_size_locks[position]);
2982 list_for_each_entry(grp, &sbi->s_mb_avg_fragment_size[position],
2983 bb_avg_fragment_size_node)
2984 count++;
2985 read_unlock(&sbi->s_mb_avg_fragment_size_locks[position]);
2986 seq_printf(seq, "\tlist_order_%u_groups: %u\n",
2987 (unsigned int)position, count);
2988 return 0;
2989 }
2990
2991 if (position == 0) {
2992 seq_printf(seq, "optimize_scan: %d\n",
2993 test_opt2(sb, MB_OPTIMIZE_SCAN) ? 1 : 0);
2994 seq_puts(seq, "max_free_order_lists:\n");
2995 }
2996 count = 0;
2997 read_lock(&sbi->s_mb_largest_free_orders_locks[position]);
2998 list_for_each_entry(grp, &sbi->s_mb_largest_free_orders[position],
2999 bb_largest_free_order_node)
3000 count++;
3001 read_unlock(&sbi->s_mb_largest_free_orders_locks[position]);
3002 seq_printf(seq, "\tlist_order_%u_groups: %u\n",
3003 (unsigned int)position, count);
3004
3005 return 0;
3006 }
3007
3008 static void ext4_mb_seq_structs_summary_stop(struct seq_file *seq, void *v)
3009 {
3010 }
3011
3012 const struct seq_operations ext4_mb_seq_structs_summary_ops = {
3013 .start = ext4_mb_seq_structs_summary_start,
3014 .next = ext4_mb_seq_structs_summary_next,
3015 .stop = ext4_mb_seq_structs_summary_stop,
3016 .show = ext4_mb_seq_structs_summary_show,
3017 };
3018
3019 static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
3020 {
3021 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
3022 struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
3023
3024 BUG_ON(!cachep);
3025 return cachep;
3026 }
3027
3028
3029
3030
3031
3032 int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
3033 {
3034 struct ext4_sb_info *sbi = EXT4_SB(sb);
3035 unsigned size;
3036 struct ext4_group_info ***old_groupinfo, ***new_groupinfo;
3037
3038 size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >>
3039 EXT4_DESC_PER_BLOCK_BITS(sb);
3040 if (size <= sbi->s_group_info_size)
3041 return 0;
3042
3043 size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size);
3044 new_groupinfo = kvzalloc(size, GFP_KERNEL);
3045 if (!new_groupinfo) {
3046 ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
3047 return -ENOMEM;
3048 }
3049 rcu_read_lock();
3050 old_groupinfo = rcu_dereference(sbi->s_group_info);
3051 if (old_groupinfo)
3052 memcpy(new_groupinfo, old_groupinfo,
3053 sbi->s_group_info_size * sizeof(*sbi->s_group_info));
3054 rcu_read_unlock();
3055 rcu_assign_pointer(sbi->s_group_info, new_groupinfo);
3056 sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
3057 if (old_groupinfo)
3058 ext4_kvfree_array_rcu(old_groupinfo);
3059 ext4_debug("allocated s_groupinfo array for %d meta_bg's\n",
3060 sbi->s_group_info_size);
3061 return 0;
3062 }
3063
3064
3065 int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
3066 struct ext4_group_desc *desc)
3067 {
3068 int i;
3069 int metalen = 0;
3070 int idx = group >> EXT4_DESC_PER_BLOCK_BITS(sb);
3071 struct ext4_sb_info *sbi = EXT4_SB(sb);
3072 struct ext4_group_info **meta_group_info;
3073 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
3074
3075
3076
3077
3078
3079
3080 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
3081 metalen = sizeof(*meta_group_info) <<
3082 EXT4_DESC_PER_BLOCK_BITS(sb);
3083 meta_group_info = kmalloc(metalen, GFP_NOFS);
3084 if (meta_group_info == NULL) {
3085 ext4_msg(sb, KERN_ERR, "can't allocate mem "
3086 "for a buddy group");
3087 goto exit_meta_group_info;
3088 }
3089 rcu_read_lock();
3090 rcu_dereference(sbi->s_group_info)[idx] = meta_group_info;
3091 rcu_read_unlock();
3092 }
3093
3094 meta_group_info = sbi_array_rcu_deref(sbi, s_group_info, idx);
3095 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
3096
3097 meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS);
3098 if (meta_group_info[i] == NULL) {
3099 ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
3100 goto exit_group_info;
3101 }
3102 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
3103 &(meta_group_info[i]->bb_state));
3104
3105
3106
3107
3108
3109 if (ext4_has_group_desc_csum(sb) &&
3110 (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
3111 meta_group_info[i]->bb_free =
3112 ext4_free_clusters_after_init(sb, group, desc);
3113 } else {
3114 meta_group_info[i]->bb_free =
3115 ext4_free_group_clusters(sb, desc);
3116 }
3117
3118 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
3119 init_rwsem(&meta_group_info[i]->alloc_sem);
3120 meta_group_info[i]->bb_free_root = RB_ROOT;
3121 INIT_LIST_HEAD(&meta_group_info[i]->bb_largest_free_order_node);
3122 INIT_LIST_HEAD(&meta_group_info[i]->bb_avg_fragment_size_node);
3123 meta_group_info[i]->bb_largest_free_order = -1;
3124 meta_group_info[i]->bb_avg_fragment_size_order = -1;
3125 meta_group_info[i]->bb_group = group;
3126
3127 mb_group_bb_bitmap_alloc(sb, meta_group_info[i], group);
3128 return 0;
3129
3130 exit_group_info:
3131
3132 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
3133 struct ext4_group_info ***group_info;
3134
3135 rcu_read_lock();
3136 group_info = rcu_dereference(sbi->s_group_info);
3137 kfree(group_info[idx]);
3138 group_info[idx] = NULL;
3139 rcu_read_unlock();
3140 }
3141 exit_meta_group_info:
3142 return -ENOMEM;
3143 }
3144
3145 static int ext4_mb_init_backend(struct super_block *sb)
3146 {
3147 ext4_group_t ngroups = ext4_get_groups_count(sb);
3148 ext4_group_t i;
3149 struct ext4_sb_info *sbi = EXT4_SB(sb);
3150 int err;
3151 struct ext4_group_desc *desc;
3152 struct ext4_group_info ***group_info;
3153 struct kmem_cache *cachep;
3154
3155 err = ext4_mb_alloc_groupinfo(sb, ngroups);
3156 if (err)
3157 return err;
3158
3159 sbi->s_buddy_cache = new_inode(sb);
3160 if (sbi->s_buddy_cache == NULL) {
3161 ext4_msg(sb, KERN_ERR, "can't get new inode");
3162 goto err_freesgi;
3163 }
3164
3165
3166
3167
3168 sbi->s_buddy_cache->i_ino = EXT4_BAD_INO;
3169 EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
3170 for (i = 0; i < ngroups; i++) {
3171 cond_resched();
3172 desc = ext4_get_group_desc(sb, i, NULL);
3173 if (desc == NULL) {
3174 ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i);
3175 goto err_freebuddy;
3176 }
3177 if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
3178 goto err_freebuddy;
3179 }
3180
3181 if (ext4_has_feature_flex_bg(sb)) {
3182
3183
3184
3185
3186 if (sbi->s_es->s_log_groups_per_flex >= 32) {
3187 ext4_msg(sb, KERN_ERR, "too many log groups per flexible block group");
3188 goto err_freebuddy;
3189 }
3190 sbi->s_mb_prefetch = min_t(uint, 1 << sbi->s_es->s_log_groups_per_flex,
3191 BLK_MAX_SEGMENT_SIZE >> (sb->s_blocksize_bits - 9));
3192 sbi->s_mb_prefetch *= 8;
3193 } else {
3194 sbi->s_mb_prefetch = 32;
3195 }
3196 if (sbi->s_mb_prefetch > ext4_get_groups_count(sb))
3197 sbi->s_mb_prefetch = ext4_get_groups_count(sb);
3198
3199
3200
3201
3202
3203
3204
3205 sbi->s_mb_prefetch_limit = sbi->s_mb_prefetch * 4;
3206 if (sbi->s_mb_prefetch_limit > ext4_get_groups_count(sb))
3207 sbi->s_mb_prefetch_limit = ext4_get_groups_count(sb);
3208
3209 return 0;
3210
3211 err_freebuddy:
3212 cachep = get_groupinfo_cache(sb->s_blocksize_bits);
3213 while (i-- > 0)
3214 kmem_cache_free(cachep, ext4_get_group_info(sb, i));
3215 i = sbi->s_group_info_size;
3216 rcu_read_lock();
3217 group_info = rcu_dereference(sbi->s_group_info);
3218 while (i-- > 0)
3219 kfree(group_info[i]);
3220 rcu_read_unlock();
3221 iput(sbi->s_buddy_cache);
3222 err_freesgi:
3223 rcu_read_lock();
3224 kvfree(rcu_dereference(sbi->s_group_info));
3225 rcu_read_unlock();
3226 return -ENOMEM;
3227 }
3228
3229 static void ext4_groupinfo_destroy_slabs(void)
3230 {
3231 int i;
3232
3233 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
3234 kmem_cache_destroy(ext4_groupinfo_caches[i]);
3235 ext4_groupinfo_caches[i] = NULL;
3236 }
3237 }
3238
3239 static int ext4_groupinfo_create_slab(size_t size)
3240 {
3241 static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
3242 int slab_size;
3243 int blocksize_bits = order_base_2(size);
3244 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
3245 struct kmem_cache *cachep;
3246
3247 if (cache_index >= NR_GRPINFO_CACHES)
3248 return -EINVAL;
3249
3250 if (unlikely(cache_index < 0))
3251 cache_index = 0;
3252
3253 mutex_lock(&ext4_grpinfo_slab_create_mutex);
3254 if (ext4_groupinfo_caches[cache_index]) {
3255 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
3256 return 0;
3257 }
3258
3259 slab_size = offsetof(struct ext4_group_info,
3260 bb_counters[blocksize_bits + 2]);
3261
3262 cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
3263 slab_size, 0, SLAB_RECLAIM_ACCOUNT,
3264 NULL);
3265
3266 ext4_groupinfo_caches[cache_index] = cachep;
3267
3268 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
3269 if (!cachep) {
3270 printk(KERN_EMERG
3271 "EXT4-fs: no memory for groupinfo slab cache\n");
3272 return -ENOMEM;
3273 }
3274
3275 return 0;
3276 }
3277
3278 static void ext4_discard_work(struct work_struct *work)
3279 {
3280 struct ext4_sb_info *sbi = container_of(work,
3281 struct ext4_sb_info, s_discard_work);
3282 struct super_block *sb = sbi->s_sb;
3283 struct ext4_free_data *fd, *nfd;
3284 struct ext4_buddy e4b;
3285 struct list_head discard_list;
3286 ext4_group_t grp, load_grp;
3287 int err = 0;
3288
3289 INIT_LIST_HEAD(&discard_list);
3290 spin_lock(&sbi->s_md_lock);
3291 list_splice_init(&sbi->s_discard_list, &discard_list);
3292 spin_unlock(&sbi->s_md_lock);
3293
3294 load_grp = UINT_MAX;
3295 list_for_each_entry_safe(fd, nfd, &discard_list, efd_list) {
3296
3297
3298
3299
3300 if ((sb->s_flags & SB_ACTIVE) && !err &&
3301 !atomic_read(&sbi->s_retry_alloc_pending)) {
3302 grp = fd->efd_group;
3303 if (grp != load_grp) {
3304 if (load_grp != UINT_MAX)
3305 ext4_mb_unload_buddy(&e4b);
3306
3307 err = ext4_mb_load_buddy(sb, grp, &e4b);
3308 if (err) {
3309 kmem_cache_free(ext4_free_data_cachep, fd);
3310 load_grp = UINT_MAX;
3311 continue;
3312 } else {
3313 load_grp = grp;
3314 }
3315 }
3316
3317 ext4_lock_group(sb, grp);
3318 ext4_try_to_trim_range(sb, &e4b, fd->efd_start_cluster,
3319 fd->efd_start_cluster + fd->efd_count - 1, 1);
3320 ext4_unlock_group(sb, grp);
3321 }
3322 kmem_cache_free(ext4_free_data_cachep, fd);
3323 }
3324
3325 if (load_grp != UINT_MAX)
3326 ext4_mb_unload_buddy(&e4b);
3327 }
3328
3329 int ext4_mb_init(struct super_block *sb)
3330 {
3331 struct ext4_sb_info *sbi = EXT4_SB(sb);
3332 unsigned i, j;
3333 unsigned offset, offset_incr;
3334 unsigned max;
3335 int ret;
3336
3337 i = MB_NUM_ORDERS(sb) * sizeof(*sbi->s_mb_offsets);
3338
3339 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
3340 if (sbi->s_mb_offsets == NULL) {
3341 ret = -ENOMEM;
3342 goto out;
3343 }
3344
3345 i = MB_NUM_ORDERS(sb) * sizeof(*sbi->s_mb_maxs);
3346 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
3347 if (sbi->s_mb_maxs == NULL) {
3348 ret = -ENOMEM;
3349 goto out;
3350 }
3351
3352 ret = ext4_groupinfo_create_slab(sb->s_blocksize);
3353 if (ret < 0)
3354 goto out;
3355
3356
3357 sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
3358 sbi->s_mb_offsets[0] = 0;
3359
3360 i = 1;
3361 offset = 0;
3362 offset_incr = 1 << (sb->s_blocksize_bits - 1);
3363 max = sb->s_blocksize << 2;
3364 do {
3365 sbi->s_mb_offsets[i] = offset;
3366 sbi->s_mb_maxs[i] = max;
3367 offset += offset_incr;
3368 offset_incr = offset_incr >> 1;
3369 max = max >> 1;
3370 i++;
3371 } while (i < MB_NUM_ORDERS(sb));
3372
3373 sbi->s_mb_avg_fragment_size =
3374 kmalloc_array(MB_NUM_ORDERS(sb), sizeof(struct list_head),
3375 GFP_KERNEL);
3376 if (!sbi->s_mb_avg_fragment_size) {
3377 ret = -ENOMEM;
3378 goto out;
3379 }
3380 sbi->s_mb_avg_fragment_size_locks =
3381 kmalloc_array(MB_NUM_ORDERS(sb), sizeof(rwlock_t),
3382 GFP_KERNEL);
3383 if (!sbi->s_mb_avg_fragment_size_locks) {
3384 ret = -ENOMEM;
3385 goto out;
3386 }
3387 for (i = 0; i < MB_NUM_ORDERS(sb); i++) {
3388 INIT_LIST_HEAD(&sbi->s_mb_avg_fragment_size[i]);
3389 rwlock_init(&sbi->s_mb_avg_fragment_size_locks[i]);
3390 }
3391 sbi->s_mb_largest_free_orders =
3392 kmalloc_array(MB_NUM_ORDERS(sb), sizeof(struct list_head),
3393 GFP_KERNEL);
3394 if (!sbi->s_mb_largest_free_orders) {
3395 ret = -ENOMEM;
3396 goto out;
3397 }
3398 sbi->s_mb_largest_free_orders_locks =
3399 kmalloc_array(MB_NUM_ORDERS(sb), sizeof(rwlock_t),
3400 GFP_KERNEL);
3401 if (!sbi->s_mb_largest_free_orders_locks) {
3402 ret = -ENOMEM;
3403 goto out;
3404 }
3405 for (i = 0; i < MB_NUM_ORDERS(sb); i++) {
3406 INIT_LIST_HEAD(&sbi->s_mb_largest_free_orders[i]);
3407 rwlock_init(&sbi->s_mb_largest_free_orders_locks[i]);
3408 }
3409
3410 spin_lock_init(&sbi->s_md_lock);
3411 sbi->s_mb_free_pending = 0;
3412 INIT_LIST_HEAD(&sbi->s_freed_data_list);
3413 INIT_LIST_HEAD(&sbi->s_discard_list);
3414 INIT_WORK(&sbi->s_discard_work, ext4_discard_work);
3415 atomic_set(&sbi->s_retry_alloc_pending, 0);
3416
3417 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
3418 sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
3419 sbi->s_mb_stats = MB_DEFAULT_STATS;
3420 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
3421 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
3422 sbi->s_mb_max_inode_prealloc = MB_DEFAULT_MAX_INODE_PREALLOC;
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435 sbi->s_mb_group_prealloc = max(MB_DEFAULT_GROUP_PREALLOC >>
3436 sbi->s_cluster_bits, 32);
3437
3438
3439
3440
3441
3442
3443
3444
3445 if (sbi->s_stripe > 1) {
3446 sbi->s_mb_group_prealloc = roundup(
3447 sbi->s_mb_group_prealloc, sbi->s_stripe);
3448 }
3449
3450 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
3451 if (sbi->s_locality_groups == NULL) {
3452 ret = -ENOMEM;
3453 goto out;
3454 }
3455 for_each_possible_cpu(i) {
3456 struct ext4_locality_group *lg;
3457 lg = per_cpu_ptr(sbi->s_locality_groups, i);
3458 mutex_init(&lg->lg_mutex);
3459 for (j = 0; j < PREALLOC_TB_SIZE; j++)
3460 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
3461 spin_lock_init(&lg->lg_prealloc_lock);
3462 }
3463
3464 if (bdev_nonrot(sb->s_bdev))
3465 sbi->s_mb_max_linear_groups = 0;
3466 else
3467 sbi->s_mb_max_linear_groups = MB_DEFAULT_LINEAR_LIMIT;
3468
3469 ret = ext4_mb_init_backend(sb);
3470 if (ret != 0)
3471 goto out_free_locality_groups;
3472
3473 return 0;
3474
3475 out_free_locality_groups:
3476 free_percpu(sbi->s_locality_groups);
3477 sbi->s_locality_groups = NULL;
3478 out:
3479 kfree(sbi->s_mb_avg_fragment_size);
3480 kfree(sbi->s_mb_avg_fragment_size_locks);
3481 kfree(sbi->s_mb_largest_free_orders);
3482 kfree(sbi->s_mb_largest_free_orders_locks);
3483 kfree(sbi->s_mb_offsets);
3484 sbi->s_mb_offsets = NULL;
3485 kfree(sbi->s_mb_maxs);
3486 sbi->s_mb_maxs = NULL;
3487 return ret;
3488 }
3489
3490
3491 static int ext4_mb_cleanup_pa(struct ext4_group_info *grp)
3492 {
3493 struct ext4_prealloc_space *pa;
3494 struct list_head *cur, *tmp;
3495 int count = 0;
3496
3497 list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) {
3498 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
3499 list_del(&pa->pa_group_list);
3500 count++;
3501 kmem_cache_free(ext4_pspace_cachep, pa);
3502 }
3503 return count;
3504 }
3505
3506 int ext4_mb_release(struct super_block *sb)
3507 {
3508 ext4_group_t ngroups = ext4_get_groups_count(sb);
3509 ext4_group_t i;
3510 int num_meta_group_infos;
3511 struct ext4_group_info *grinfo, ***group_info;
3512 struct ext4_sb_info *sbi = EXT4_SB(sb);
3513 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
3514 int count;
3515
3516 if (test_opt(sb, DISCARD)) {
3517
3518
3519
3520 flush_work(&sbi->s_discard_work);
3521 WARN_ON_ONCE(!list_empty(&sbi->s_discard_list));
3522 }
3523
3524 if (sbi->s_group_info) {
3525 for (i = 0; i < ngroups; i++) {
3526 cond_resched();
3527 grinfo = ext4_get_group_info(sb, i);
3528 mb_group_bb_bitmap_free(grinfo);
3529 ext4_lock_group(sb, i);
3530 count = ext4_mb_cleanup_pa(grinfo);
3531 if (count)
3532 mb_debug(sb, "mballoc: %d PAs left\n",
3533 count);
3534 ext4_unlock_group(sb, i);
3535 kmem_cache_free(cachep, grinfo);
3536 }
3537 num_meta_group_infos = (ngroups +
3538 EXT4_DESC_PER_BLOCK(sb) - 1) >>
3539 EXT4_DESC_PER_BLOCK_BITS(sb);
3540 rcu_read_lock();
3541 group_info = rcu_dereference(sbi->s_group_info);
3542 for (i = 0; i < num_meta_group_infos; i++)
3543 kfree(group_info[i]);
3544 kvfree(group_info);
3545 rcu_read_unlock();
3546 }
3547 kfree(sbi->s_mb_avg_fragment_size);
3548 kfree(sbi->s_mb_avg_fragment_size_locks);
3549 kfree(sbi->s_mb_largest_free_orders);
3550 kfree(sbi->s_mb_largest_free_orders_locks);
3551 kfree(sbi->s_mb_offsets);
3552 kfree(sbi->s_mb_maxs);
3553 iput(sbi->s_buddy_cache);
3554 if (sbi->s_mb_stats) {
3555 ext4_msg(sb, KERN_INFO,
3556 "mballoc: %u blocks %u reqs (%u success)",
3557 atomic_read(&sbi->s_bal_allocated),
3558 atomic_read(&sbi->s_bal_reqs),
3559 atomic_read(&sbi->s_bal_success));
3560 ext4_msg(sb, KERN_INFO,
3561 "mballoc: %u extents scanned, %u groups scanned, %u goal hits, "
3562 "%u 2^N hits, %u breaks, %u lost",
3563 atomic_read(&sbi->s_bal_ex_scanned),
3564 atomic_read(&sbi->s_bal_groups_scanned),
3565 atomic_read(&sbi->s_bal_goals),
3566 atomic_read(&sbi->s_bal_2orders),
3567 atomic_read(&sbi->s_bal_breaks),
3568 atomic_read(&sbi->s_mb_lost_chunks));
3569 ext4_msg(sb, KERN_INFO,
3570 "mballoc: %u generated and it took %llu",
3571 atomic_read(&sbi->s_mb_buddies_generated),
3572 atomic64_read(&sbi->s_mb_generation_time));
3573 ext4_msg(sb, KERN_INFO,
3574 "mballoc: %u preallocated, %u discarded",
3575 atomic_read(&sbi->s_mb_preallocated),
3576 atomic_read(&sbi->s_mb_discarded));
3577 }
3578
3579 free_percpu(sbi->s_locality_groups);
3580
3581 return 0;
3582 }
3583
3584 static inline int ext4_issue_discard(struct super_block *sb,
3585 ext4_group_t block_group, ext4_grpblk_t cluster, int count,
3586 struct bio **biop)
3587 {
3588 ext4_fsblk_t discard_block;
3589
3590 discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
3591 ext4_group_first_block_no(sb, block_group));
3592 count = EXT4_C2B(EXT4_SB(sb), count);
3593 trace_ext4_discard_blocks(sb,
3594 (unsigned long long) discard_block, count);
3595 if (biop) {
3596 return __blkdev_issue_discard(sb->s_bdev,
3597 (sector_t)discard_block << (sb->s_blocksize_bits - 9),
3598 (sector_t)count << (sb->s_blocksize_bits - 9),
3599 GFP_NOFS, biop);
3600 } else
3601 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
3602 }
3603
3604 static void ext4_free_data_in_buddy(struct super_block *sb,
3605 struct ext4_free_data *entry)
3606 {
3607 struct ext4_buddy e4b;
3608 struct ext4_group_info *db;
3609 int err, count = 0, count2 = 0;
3610
3611 mb_debug(sb, "gonna free %u blocks in group %u (0x%p):",
3612 entry->efd_count, entry->efd_group, entry);
3613
3614 err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
3615
3616 BUG_ON(err != 0);
3617
3618 spin_lock(&EXT4_SB(sb)->s_md_lock);
3619 EXT4_SB(sb)->s_mb_free_pending -= entry->efd_count;
3620 spin_unlock(&EXT4_SB(sb)->s_md_lock);
3621
3622 db = e4b.bd_info;
3623
3624 count += entry->efd_count;
3625 count2++;
3626 ext4_lock_group(sb, entry->efd_group);
3627
3628 rb_erase(&entry->efd_node, &(db->bb_free_root));
3629 mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count);
3630
3631
3632
3633
3634
3635
3636
3637 if (!test_opt(sb, DISCARD))
3638 EXT4_MB_GRP_CLEAR_TRIMMED(db);
3639
3640 if (!db->bb_free_root.rb_node) {
3641
3642
3643
3644 put_page(e4b.bd_buddy_page);
3645 put_page(e4b.bd_bitmap_page);
3646 }
3647 ext4_unlock_group(sb, entry->efd_group);
3648 ext4_mb_unload_buddy(&e4b);
3649
3650 mb_debug(sb, "freed %d blocks in %d structures\n", count,
3651 count2);
3652 }
3653
3654
3655
3656
3657
3658 void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid)
3659 {
3660 struct ext4_sb_info *sbi = EXT4_SB(sb);
3661 struct ext4_free_data *entry, *tmp;
3662 struct list_head freed_data_list;
3663 struct list_head *cut_pos = NULL;
3664 bool wake;
3665
3666 INIT_LIST_HEAD(&freed_data_list);
3667
3668 spin_lock(&sbi->s_md_lock);
3669 list_for_each_entry(entry, &sbi->s_freed_data_list, efd_list) {
3670 if (entry->efd_tid != commit_tid)
3671 break;
3672 cut_pos = &entry->efd_list;
3673 }
3674 if (cut_pos)
3675 list_cut_position(&freed_data_list, &sbi->s_freed_data_list,
3676 cut_pos);
3677 spin_unlock(&sbi->s_md_lock);
3678
3679 list_for_each_entry(entry, &freed_data_list, efd_list)
3680 ext4_free_data_in_buddy(sb, entry);
3681
3682 if (test_opt(sb, DISCARD)) {
3683 spin_lock(&sbi->s_md_lock);
3684 wake = list_empty(&sbi->s_discard_list);
3685 list_splice_tail(&freed_data_list, &sbi->s_discard_list);
3686 spin_unlock(&sbi->s_md_lock);
3687 if (wake)
3688 queue_work(system_unbound_wq, &sbi->s_discard_work);
3689 } else {
3690 list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list)
3691 kmem_cache_free(ext4_free_data_cachep, entry);
3692 }
3693 }
3694
3695 int __init ext4_init_mballoc(void)
3696 {
3697 ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
3698 SLAB_RECLAIM_ACCOUNT);
3699 if (ext4_pspace_cachep == NULL)
3700 goto out;
3701
3702 ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
3703 SLAB_RECLAIM_ACCOUNT);
3704 if (ext4_ac_cachep == NULL)
3705 goto out_pa_free;
3706
3707 ext4_free_data_cachep = KMEM_CACHE(ext4_free_data,
3708 SLAB_RECLAIM_ACCOUNT);
3709 if (ext4_free_data_cachep == NULL)
3710 goto out_ac_free;
3711
3712 return 0;
3713
3714 out_ac_free:
3715 kmem_cache_destroy(ext4_ac_cachep);
3716 out_pa_free:
3717 kmem_cache_destroy(ext4_pspace_cachep);
3718 out:
3719 return -ENOMEM;
3720 }
3721
3722 void ext4_exit_mballoc(void)
3723 {
3724
3725
3726
3727
3728 rcu_barrier();
3729 kmem_cache_destroy(ext4_pspace_cachep);
3730 kmem_cache_destroy(ext4_ac_cachep);
3731 kmem_cache_destroy(ext4_free_data_cachep);
3732 ext4_groupinfo_destroy_slabs();
3733 }
3734
3735
3736
3737
3738
3739
3740 static noinline_for_stack int
3741 ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3742 handle_t *handle, unsigned int reserv_clstrs)
3743 {
3744 struct buffer_head *bitmap_bh = NULL;
3745 struct ext4_group_desc *gdp;
3746 struct buffer_head *gdp_bh;
3747 struct ext4_sb_info *sbi;
3748 struct super_block *sb;
3749 ext4_fsblk_t block;
3750 int err, len;
3751
3752 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3753 BUG_ON(ac->ac_b_ex.fe_len <= 0);
3754
3755 sb = ac->ac_sb;
3756 sbi = EXT4_SB(sb);
3757
3758 bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
3759 if (IS_ERR(bitmap_bh)) {
3760 err = PTR_ERR(bitmap_bh);
3761 bitmap_bh = NULL;
3762 goto out_err;
3763 }
3764
3765 BUFFER_TRACE(bitmap_bh, "getting write access");
3766 err = ext4_journal_get_write_access(handle, sb, bitmap_bh,
3767 EXT4_JTR_NONE);
3768 if (err)
3769 goto out_err;
3770
3771 err = -EIO;
3772 gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh);
3773 if (!gdp)
3774 goto out_err;
3775
3776 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
3777 ext4_free_group_clusters(sb, gdp));
3778
3779 BUFFER_TRACE(gdp_bh, "get_write_access");
3780 err = ext4_journal_get_write_access(handle, sb, gdp_bh, EXT4_JTR_NONE);
3781 if (err)
3782 goto out_err;
3783
3784 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3785
3786 len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
3787 if (!ext4_inode_block_valid(ac->ac_inode, block, len)) {
3788 ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
3789 "fs metadata", block, block+len);
3790
3791
3792
3793
3794 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3795 mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
3796 ac->ac_b_ex.fe_len);
3797 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3798 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
3799 if (!err)
3800 err = -EFSCORRUPTED;
3801 goto out_err;
3802 }
3803
3804 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3805 #ifdef AGGRESSIVE_CHECK
3806 {
3807 int i;
3808 for (i = 0; i < ac->ac_b_ex.fe_len; i++) {
3809 BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i,
3810 bitmap_bh->b_data));
3811 }
3812 }
3813 #endif
3814 mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
3815 ac->ac_b_ex.fe_len);
3816 if (ext4_has_group_desc_csum(sb) &&
3817 (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
3818 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
3819 ext4_free_group_clusters_set(sb, gdp,
3820 ext4_free_clusters_after_init(sb,
3821 ac->ac_b_ex.fe_group, gdp));
3822 }
3823 len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
3824 ext4_free_group_clusters_set(sb, gdp, len);
3825 ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh);
3826 ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp);
3827
3828 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3829 percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
3830
3831
3832
3833 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
3834
3835 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
3836 reserv_clstrs);
3837
3838 if (sbi->s_log_groups_per_flex) {
3839 ext4_group_t flex_group = ext4_flex_group(sbi,
3840 ac->ac_b_ex.fe_group);
3841 atomic64_sub(ac->ac_b_ex.fe_len,
3842 &sbi_array_rcu_deref(sbi, s_flex_groups,
3843 flex_group)->free_clusters);
3844 }
3845
3846 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
3847 if (err)
3848 goto out_err;
3849 err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
3850
3851 out_err:
3852 brelse(bitmap_bh);
3853 return err;
3854 }
3855
3856
3857
3858
3859
3860 void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
3861 int len, int state)
3862 {
3863 struct buffer_head *bitmap_bh = NULL;
3864 struct ext4_group_desc *gdp;
3865 struct buffer_head *gdp_bh;
3866 struct ext4_sb_info *sbi = EXT4_SB(sb);
3867 ext4_group_t group;
3868 ext4_grpblk_t blkoff;
3869 int i, err;
3870 int already;
3871 unsigned int clen, clen_changed, thisgrp_len;
3872
3873 while (len > 0) {
3874 ext4_get_group_no_and_offset(sb, block, &group, &blkoff);
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884 thisgrp_len = min_t(unsigned int, (unsigned int)len,
3885 EXT4_BLOCKS_PER_GROUP(sb) - EXT4_C2B(sbi, blkoff));
3886 clen = EXT4_NUM_B2C(sbi, thisgrp_len);
3887
3888 if (!ext4_sb_block_valid(sb, NULL, block, thisgrp_len)) {
3889 ext4_error(sb, "Marking blocks in system zone - "
3890 "Block = %llu, len = %u",
3891 block, thisgrp_len);
3892 bitmap_bh = NULL;
3893 break;
3894 }
3895
3896 bitmap_bh = ext4_read_block_bitmap(sb, group);
3897 if (IS_ERR(bitmap_bh)) {
3898 err = PTR_ERR(bitmap_bh);
3899 bitmap_bh = NULL;
3900 break;
3901 }
3902
3903 err = -EIO;
3904 gdp = ext4_get_group_desc(sb, group, &gdp_bh);
3905 if (!gdp)
3906 break;
3907
3908 ext4_lock_group(sb, group);
3909 already = 0;
3910 for (i = 0; i < clen; i++)
3911 if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) ==
3912 !state)
3913 already++;
3914
3915 clen_changed = clen - already;
3916 if (state)
3917 mb_set_bits(bitmap_bh->b_data, blkoff, clen);
3918 else
3919 mb_clear_bits(bitmap_bh->b_data, blkoff, clen);
3920 if (ext4_has_group_desc_csum(sb) &&
3921 (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
3922 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
3923 ext4_free_group_clusters_set(sb, gdp,
3924 ext4_free_clusters_after_init(sb, group, gdp));
3925 }
3926 if (state)
3927 clen = ext4_free_group_clusters(sb, gdp) - clen_changed;
3928 else
3929 clen = ext4_free_group_clusters(sb, gdp) + clen_changed;
3930
3931 ext4_free_group_clusters_set(sb, gdp, clen);
3932 ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh);
3933 ext4_group_desc_csum_set(sb, group, gdp);
3934
3935 ext4_unlock_group(sb, group);
3936
3937 if (sbi->s_log_groups_per_flex) {
3938 ext4_group_t flex_group = ext4_flex_group(sbi, group);
3939 struct flex_groups *fg = sbi_array_rcu_deref(sbi,
3940 s_flex_groups, flex_group);
3941
3942 if (state)
3943 atomic64_sub(clen_changed, &fg->free_clusters);
3944 else
3945 atomic64_add(clen_changed, &fg->free_clusters);
3946
3947 }
3948
3949 err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
3950 if (err)
3951 break;
3952 sync_dirty_buffer(bitmap_bh);
3953 err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
3954 sync_dirty_buffer(gdp_bh);
3955 if (err)
3956 break;
3957
3958 block += thisgrp_len;
3959 len -= thisgrp_len;
3960 brelse(bitmap_bh);
3961 BUG_ON(len < 0);
3962 }
3963
3964 if (err)
3965 brelse(bitmap_bh);
3966 }
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977 static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
3978 {
3979 struct super_block *sb = ac->ac_sb;
3980 struct ext4_locality_group *lg = ac->ac_lg;
3981
3982 BUG_ON(lg == NULL);
3983 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
3984 mb_debug(sb, "goal %u blocks for locality group\n", ac->ac_g_ex.fe_len);
3985 }
3986
3987
3988
3989
3990
3991 static noinline_for_stack void
3992 ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3993 struct ext4_allocation_request *ar)
3994 {
3995 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3996 int bsbits, max;
3997 ext4_lblk_t end;
3998 loff_t size, start_off;
3999 loff_t orig_size __maybe_unused;
4000 ext4_lblk_t start;
4001 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
4002 struct ext4_prealloc_space *pa;
4003
4004
4005
4006 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
4007 return;
4008
4009
4010 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
4011 return;
4012
4013
4014
4015 if (ac->ac_flags & EXT4_MB_HINT_NOPREALLOC)
4016 return;
4017
4018 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) {
4019 ext4_mb_normalize_group_request(ac);
4020 return ;
4021 }
4022
4023 bsbits = ac->ac_sb->s_blocksize_bits;
4024
4025
4026
4027 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
4028 size = size << bsbits;
4029 if (size < i_size_read(ac->ac_inode))
4030 size = i_size_read(ac->ac_inode);
4031 orig_size = size;
4032
4033
4034 max = 2 << bsbits;
4035
4036 #define NRL_CHECK_SIZE(req, size, max, chunk_size) \
4037 (req <= (size) || max <= (chunk_size))
4038
4039
4040
4041 start_off = 0;
4042 if (size <= 16 * 1024) {
4043 size = 16 * 1024;
4044 } else if (size <= 32 * 1024) {
4045 size = 32 * 1024;
4046 } else if (size <= 64 * 1024) {
4047 size = 64 * 1024;
4048 } else if (size <= 128 * 1024) {
4049 size = 128 * 1024;
4050 } else if (size <= 256 * 1024) {
4051 size = 256 * 1024;
4052 } else if (size <= 512 * 1024) {
4053 size = 512 * 1024;
4054 } else if (size <= 1024 * 1024) {
4055 size = 1024 * 1024;
4056 } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
4057 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
4058 (21 - bsbits)) << 21;
4059 size = 2 * 1024 * 1024;
4060 } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
4061 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
4062 (22 - bsbits)) << 22;
4063 size = 4 * 1024 * 1024;
4064 } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
4065 (8<<20)>>bsbits, max, 8 * 1024)) {
4066 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
4067 (23 - bsbits)) << 23;
4068 size = 8 * 1024 * 1024;
4069 } else {
4070 start_off = (loff_t) ac->ac_o_ex.fe_logical << bsbits;
4071 size = (loff_t) EXT4_C2B(EXT4_SB(ac->ac_sb),
4072 ac->ac_o_ex.fe_len) << bsbits;
4073 }
4074 size = size >> bsbits;
4075 start = start_off >> bsbits;
4076
4077
4078
4079
4080
4081
4082
4083 start = max(start, rounddown(ac->ac_o_ex.fe_logical,
4084 (ext4_lblk_t)EXT4_BLOCKS_PER_GROUP(ac->ac_sb)));
4085
4086
4087 if (ar->pleft && start <= ar->lleft) {
4088 size -= ar->lleft + 1 - start;
4089 start = ar->lleft + 1;
4090 }
4091 if (ar->pright && start + size - 1 >= ar->lright)
4092 size -= start + size - ar->lright;
4093
4094
4095
4096
4097
4098 if (size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb))
4099 size = EXT4_BLOCKS_PER_GROUP(ac->ac_sb);
4100
4101 end = start + size;
4102
4103
4104 rcu_read_lock();
4105 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
4106 ext4_lblk_t pa_end;
4107
4108 if (pa->pa_deleted)
4109 continue;
4110 spin_lock(&pa->pa_lock);
4111 if (pa->pa_deleted) {
4112 spin_unlock(&pa->pa_lock);
4113 continue;
4114 }
4115
4116 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
4117 pa->pa_len);
4118
4119
4120 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
4121 ac->ac_o_ex.fe_logical < pa->pa_lstart));
4122
4123
4124 if (pa->pa_lstart >= end || pa_end <= start) {
4125 spin_unlock(&pa->pa_lock);
4126 continue;
4127 }
4128 BUG_ON(pa->pa_lstart <= start && pa_end >= end);
4129
4130
4131 if (pa_end <= ac->ac_o_ex.fe_logical) {
4132 BUG_ON(pa_end < start);
4133 start = pa_end;
4134 } else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) {
4135 BUG_ON(pa->pa_lstart > end);
4136 end = pa->pa_lstart;
4137 }
4138 spin_unlock(&pa->pa_lock);
4139 }
4140 rcu_read_unlock();
4141 size = end - start;
4142
4143
4144 rcu_read_lock();
4145 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
4146 ext4_lblk_t pa_end;
4147
4148 spin_lock(&pa->pa_lock);
4149 if (pa->pa_deleted == 0) {
4150 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
4151 pa->pa_len);
4152 BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
4153 }
4154 spin_unlock(&pa->pa_lock);
4155 }
4156 rcu_read_unlock();
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173 if (start + size <= ac->ac_o_ex.fe_logical ||
4174 start > ac->ac_o_ex.fe_logical) {
4175 ext4_msg(ac->ac_sb, KERN_ERR,
4176 "start %lu, size %lu, fe_logical %lu",
4177 (unsigned long) start, (unsigned long) size,
4178 (unsigned long) ac->ac_o_ex.fe_logical);
4179 BUG();
4180 }
4181 BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
4182
4183
4184
4185
4186
4187 ac->ac_g_ex.fe_logical = start;
4188 ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size);
4189
4190
4191 if (ar->pright && (ar->lright == (start + size))) {
4192
4193 ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
4194 &ac->ac_f_ex.fe_group,
4195 &ac->ac_f_ex.fe_start);
4196 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
4197 }
4198 if (ar->pleft && (ar->lleft + 1 == start)) {
4199
4200 ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
4201 &ac->ac_f_ex.fe_group,
4202 &ac->ac_f_ex.fe_start);
4203 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
4204 }
4205
4206 mb_debug(ac->ac_sb, "goal: %lld(was %lld) blocks at %u\n", size,
4207 orig_size, start);
4208 }
4209
4210 static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
4211 {
4212 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4213
4214 if (sbi->s_mb_stats && ac->ac_g_ex.fe_len >= 1) {
4215 atomic_inc(&sbi->s_bal_reqs);
4216 atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
4217 if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len)
4218 atomic_inc(&sbi->s_bal_success);
4219 atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
4220 atomic_add(ac->ac_groups_scanned, &sbi->s_bal_groups_scanned);
4221 if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
4222 ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
4223 atomic_inc(&sbi->s_bal_goals);
4224 if (ac->ac_found > sbi->s_mb_max_to_scan)
4225 atomic_inc(&sbi->s_bal_breaks);
4226 }
4227
4228 if (ac->ac_op == EXT4_MB_HISTORY_ALLOC)
4229 trace_ext4_mballoc_alloc(ac);
4230 else
4231 trace_ext4_mballoc_prealloc(ac);
4232 }
4233
4234
4235
4236
4237
4238
4239
4240 static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
4241 {
4242 struct ext4_prealloc_space *pa = ac->ac_pa;
4243 struct ext4_buddy e4b;
4244 int err;
4245
4246 if (pa == NULL) {
4247 if (ac->ac_f_ex.fe_len == 0)
4248 return;
4249 err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b);
4250 if (err) {
4251
4252
4253
4254
4255
4256 WARN(1, "mb_load_buddy failed (%d)", err);
4257 return;
4258 }
4259 ext4_lock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
4260 mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start,
4261 ac->ac_f_ex.fe_len);
4262 ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
4263 ext4_mb_unload_buddy(&e4b);
4264 return;
4265 }
4266 if (pa->pa_type == MB_INODE_PA)
4267 pa->pa_free += ac->ac_b_ex.fe_len;
4268 }
4269
4270
4271
4272
4273 static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
4274 struct ext4_prealloc_space *pa)
4275 {
4276 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4277 ext4_fsblk_t start;
4278 ext4_fsblk_t end;
4279 int len;
4280
4281
4282 start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
4283 end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
4284 start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len));
4285 len = EXT4_NUM_B2C(sbi, end - start);
4286 ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
4287 &ac->ac_b_ex.fe_start);
4288 ac->ac_b_ex.fe_len = len;
4289 ac->ac_status = AC_STATUS_FOUND;
4290 ac->ac_pa = pa;
4291
4292 BUG_ON(start < pa->pa_pstart);
4293 BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len));
4294 BUG_ON(pa->pa_free < len);
4295 pa->pa_free -= len;
4296
4297 mb_debug(ac->ac_sb, "use %llu/%d from inode pa %p\n", start, len, pa);
4298 }
4299
4300
4301
4302
4303 static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
4304 struct ext4_prealloc_space *pa)
4305 {
4306 unsigned int len = ac->ac_o_ex.fe_len;
4307
4308 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
4309 &ac->ac_b_ex.fe_group,
4310 &ac->ac_b_ex.fe_start);
4311 ac->ac_b_ex.fe_len = len;
4312 ac->ac_status = AC_STATUS_FOUND;
4313 ac->ac_pa = pa;
4314
4315
4316
4317
4318
4319
4320
4321 mb_debug(ac->ac_sb, "use %u/%u from group pa %p\n",
4322 pa->pa_lstart-len, len, pa);
4323 }
4324
4325
4326
4327
4328
4329
4330
4331 static struct ext4_prealloc_space *
4332 ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
4333 struct ext4_prealloc_space *pa,
4334 struct ext4_prealloc_space *cpa)
4335 {
4336 ext4_fsblk_t cur_distance, new_distance;
4337
4338 if (cpa == NULL) {
4339 atomic_inc(&pa->pa_count);
4340 return pa;
4341 }
4342 cur_distance = abs(goal_block - cpa->pa_pstart);
4343 new_distance = abs(goal_block - pa->pa_pstart);
4344
4345 if (cur_distance <= new_distance)
4346 return cpa;
4347
4348
4349 atomic_dec(&cpa->pa_count);
4350 atomic_inc(&pa->pa_count);
4351 return pa;
4352 }
4353
4354
4355
4356
4357 static noinline_for_stack bool
4358 ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
4359 {
4360 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4361 int order, i;
4362 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
4363 struct ext4_locality_group *lg;
4364 struct ext4_prealloc_space *pa, *cpa = NULL;
4365 ext4_fsblk_t goal_block;
4366
4367
4368 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
4369 return false;
4370
4371
4372 rcu_read_lock();
4373 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
4374
4375
4376
4377 if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
4378 ac->ac_o_ex.fe_logical >= (pa->pa_lstart +
4379 EXT4_C2B(sbi, pa->pa_len)))
4380 continue;
4381
4382
4383 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
4384 (pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len) >
4385 EXT4_MAX_BLOCK_FILE_PHYS))
4386 continue;
4387
4388
4389 spin_lock(&pa->pa_lock);
4390 if (pa->pa_deleted == 0 && pa->pa_free) {
4391 atomic_inc(&pa->pa_count);
4392 ext4_mb_use_inode_pa(ac, pa);
4393 spin_unlock(&pa->pa_lock);
4394 ac->ac_criteria = 10;
4395 rcu_read_unlock();
4396 return true;
4397 }
4398 spin_unlock(&pa->pa_lock);
4399 }
4400 rcu_read_unlock();
4401
4402
4403 if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC))
4404 return false;
4405
4406
4407 lg = ac->ac_lg;
4408 if (lg == NULL)
4409 return false;
4410 order = fls(ac->ac_o_ex.fe_len) - 1;
4411 if (order > PREALLOC_TB_SIZE - 1)
4412
4413 order = PREALLOC_TB_SIZE - 1;
4414
4415 goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex);
4416
4417
4418
4419
4420 for (i = order; i < PREALLOC_TB_SIZE; i++) {
4421 rcu_read_lock();
4422 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
4423 pa_inode_list) {
4424 spin_lock(&pa->pa_lock);
4425 if (pa->pa_deleted == 0 &&
4426 pa->pa_free >= ac->ac_o_ex.fe_len) {
4427
4428 cpa = ext4_mb_check_group_pa(goal_block,
4429 pa, cpa);
4430 }
4431 spin_unlock(&pa->pa_lock);
4432 }
4433 rcu_read_unlock();
4434 }
4435 if (cpa) {
4436 ext4_mb_use_group_pa(ac, cpa);
4437 ac->ac_criteria = 20;
4438 return true;
4439 }
4440 return false;
4441 }
4442
4443
4444
4445
4446
4447
4448
4449 static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
4450 ext4_group_t group)
4451 {
4452 struct rb_node *n;
4453 struct ext4_group_info *grp;
4454 struct ext4_free_data *entry;
4455
4456 grp = ext4_get_group_info(sb, group);
4457 n = rb_first(&(grp->bb_free_root));
4458
4459 while (n) {
4460 entry = rb_entry(n, struct ext4_free_data, efd_node);
4461 mb_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
4462 n = rb_next(n);
4463 }
4464 return;
4465 }
4466
4467
4468
4469
4470
4471
4472 static noinline_for_stack
4473 void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
4474 ext4_group_t group)
4475 {
4476 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
4477 struct ext4_prealloc_space *pa;
4478 struct list_head *cur;
4479 ext4_group_t groupnr;
4480 ext4_grpblk_t start;
4481 int preallocated = 0;
4482 int len;
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492 list_for_each(cur, &grp->bb_prealloc_list) {
4493 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
4494 spin_lock(&pa->pa_lock);
4495 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
4496 &groupnr, &start);
4497 len = pa->pa_len;
4498 spin_unlock(&pa->pa_lock);
4499 if (unlikely(len == 0))
4500 continue;
4501 BUG_ON(groupnr != group);
4502 mb_set_bits(bitmap, start, len);
4503 preallocated += len;
4504 }
4505 mb_debug(sb, "preallocated %d for group %u\n", preallocated, group);
4506 }
4507
4508 static void ext4_mb_mark_pa_deleted(struct super_block *sb,
4509 struct ext4_prealloc_space *pa)
4510 {
4511 struct ext4_inode_info *ei;
4512
4513 if (pa->pa_deleted) {
4514 ext4_warning(sb, "deleted pa, type:%d, pblk:%llu, lblk:%u, len:%d\n",
4515 pa->pa_type, pa->pa_pstart, pa->pa_lstart,
4516 pa->pa_len);
4517 return;
4518 }
4519
4520 pa->pa_deleted = 1;
4521
4522 if (pa->pa_type == MB_INODE_PA) {
4523 ei = EXT4_I(pa->pa_inode);
4524 atomic_dec(&ei->i_prealloc_active);
4525 }
4526 }
4527
4528 static void ext4_mb_pa_callback(struct rcu_head *head)
4529 {
4530 struct ext4_prealloc_space *pa;
4531 pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
4532
4533 BUG_ON(atomic_read(&pa->pa_count));
4534 BUG_ON(pa->pa_deleted == 0);
4535 kmem_cache_free(ext4_pspace_cachep, pa);
4536 }
4537
4538
4539
4540
4541
4542 static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
4543 struct super_block *sb, struct ext4_prealloc_space *pa)
4544 {
4545 ext4_group_t grp;
4546 ext4_fsblk_t grp_blk;
4547
4548
4549 spin_lock(&pa->pa_lock);
4550 if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) {
4551 spin_unlock(&pa->pa_lock);
4552 return;
4553 }
4554
4555 if (pa->pa_deleted == 1) {
4556 spin_unlock(&pa->pa_lock);
4557 return;
4558 }
4559
4560 ext4_mb_mark_pa_deleted(sb, pa);
4561 spin_unlock(&pa->pa_lock);
4562
4563 grp_blk = pa->pa_pstart;
4564
4565
4566
4567
4568 if (pa->pa_type == MB_GROUP_PA)
4569 grp_blk--;
4570
4571 grp = ext4_get_group_number(sb, grp_blk);
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587 ext4_lock_group(sb, grp);
4588 list_del(&pa->pa_group_list);
4589 ext4_unlock_group(sb, grp);
4590
4591 spin_lock(pa->pa_obj_lock);
4592 list_del_rcu(&pa->pa_inode_list);
4593 spin_unlock(pa->pa_obj_lock);
4594
4595 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4596 }
4597
4598
4599
4600
4601 static noinline_for_stack void
4602 ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
4603 {
4604 struct super_block *sb = ac->ac_sb;
4605 struct ext4_sb_info *sbi = EXT4_SB(sb);
4606 struct ext4_prealloc_space *pa;
4607 struct ext4_group_info *grp;
4608 struct ext4_inode_info *ei;
4609
4610
4611 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
4612 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
4613 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
4614 BUG_ON(ac->ac_pa == NULL);
4615
4616 pa = ac->ac_pa;
4617
4618 if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
4619 int winl;
4620 int wins;
4621 int win;
4622 int offs;
4623
4624
4625
4626
4627 BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
4628 BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
4629
4630
4631
4632
4633 winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
4634
4635
4636 wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len);
4637
4638
4639 win = min(winl, wins);
4640
4641 offs = ac->ac_o_ex.fe_logical %
4642 EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4643 if (offs && offs < win)
4644 win = offs;
4645
4646 ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
4647 EXT4_NUM_B2C(sbi, win);
4648 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
4649 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
4650 }
4651
4652
4653
4654 ac->ac_f_ex = ac->ac_b_ex;
4655
4656 pa->pa_lstart = ac->ac_b_ex.fe_logical;
4657 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
4658 pa->pa_len = ac->ac_b_ex.fe_len;
4659 pa->pa_free = pa->pa_len;
4660 spin_lock_init(&pa->pa_lock);
4661 INIT_LIST_HEAD(&pa->pa_inode_list);
4662 INIT_LIST_HEAD(&pa->pa_group_list);
4663 pa->pa_deleted = 0;
4664 pa->pa_type = MB_INODE_PA;
4665
4666 mb_debug(sb, "new inode pa %p: %llu/%d for %u\n", pa, pa->pa_pstart,
4667 pa->pa_len, pa->pa_lstart);
4668 trace_ext4_mb_new_inode_pa(ac, pa);
4669
4670 ext4_mb_use_inode_pa(ac, pa);
4671 atomic_add(pa->pa_free, &sbi->s_mb_preallocated);
4672
4673 ei = EXT4_I(ac->ac_inode);
4674 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
4675
4676 pa->pa_obj_lock = &ei->i_prealloc_lock;
4677 pa->pa_inode = ac->ac_inode;
4678
4679 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
4680
4681 spin_lock(pa->pa_obj_lock);
4682 list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
4683 spin_unlock(pa->pa_obj_lock);
4684 atomic_inc(&ei->i_prealloc_active);
4685 }
4686
4687
4688
4689
4690 static noinline_for_stack void
4691 ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
4692 {
4693 struct super_block *sb = ac->ac_sb;
4694 struct ext4_locality_group *lg;
4695 struct ext4_prealloc_space *pa;
4696 struct ext4_group_info *grp;
4697
4698
4699 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
4700 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
4701 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
4702 BUG_ON(ac->ac_pa == NULL);
4703
4704 pa = ac->ac_pa;
4705
4706
4707
4708 ac->ac_f_ex = ac->ac_b_ex;
4709
4710 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
4711 pa->pa_lstart = pa->pa_pstart;
4712 pa->pa_len = ac->ac_b_ex.fe_len;
4713 pa->pa_free = pa->pa_len;
4714 spin_lock_init(&pa->pa_lock);
4715 INIT_LIST_HEAD(&pa->pa_inode_list);
4716 INIT_LIST_HEAD(&pa->pa_group_list);
4717 pa->pa_deleted = 0;
4718 pa->pa_type = MB_GROUP_PA;
4719
4720 mb_debug(sb, "new group pa %p: %llu/%d for %u\n", pa, pa->pa_pstart,
4721 pa->pa_len, pa->pa_lstart);
4722 trace_ext4_mb_new_group_pa(ac, pa);
4723
4724 ext4_mb_use_group_pa(ac, pa);
4725 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
4726
4727 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
4728 lg = ac->ac_lg;
4729 BUG_ON(lg == NULL);
4730
4731 pa->pa_obj_lock = &lg->lg_prealloc_lock;
4732 pa->pa_inode = NULL;
4733
4734 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
4735
4736
4737
4738
4739
4740 }
4741
4742 static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
4743 {
4744 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
4745 ext4_mb_new_group_pa(ac);
4746 else
4747 ext4_mb_new_inode_pa(ac);
4748 }
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758 static noinline_for_stack int
4759 ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
4760 struct ext4_prealloc_space *pa)
4761 {
4762 struct super_block *sb = e4b->bd_sb;
4763 struct ext4_sb_info *sbi = EXT4_SB(sb);
4764 unsigned int end;
4765 unsigned int next;
4766 ext4_group_t group;
4767 ext4_grpblk_t bit;
4768 unsigned long long grp_blk_start;
4769 int free = 0;
4770
4771 BUG_ON(pa->pa_deleted == 0);
4772 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
4773 grp_blk_start = pa->pa_pstart - EXT4_C2B(sbi, bit);
4774 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
4775 end = bit + pa->pa_len;
4776
4777 while (bit < end) {
4778 bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
4779 if (bit >= end)
4780 break;
4781 next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
4782 mb_debug(sb, "free preallocated %u/%u in group %u\n",
4783 (unsigned) ext4_group_first_block_no(sb, group) + bit,
4784 (unsigned) next - bit, (unsigned) group);
4785 free += next - bit;
4786
4787 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
4788 trace_ext4_mb_release_inode_pa(pa, (grp_blk_start +
4789 EXT4_C2B(sbi, bit)),
4790 next - bit);
4791 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
4792 bit = next + 1;
4793 }
4794 if (free != pa->pa_free) {
4795 ext4_msg(e4b->bd_sb, KERN_CRIT,
4796 "pa %p: logic %lu, phys. %lu, len %d",
4797 pa, (unsigned long) pa->pa_lstart,
4798 (unsigned long) pa->pa_pstart,
4799 pa->pa_len);
4800 ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
4801 free, pa->pa_free);
4802
4803
4804
4805
4806 }
4807 atomic_add(free, &sbi->s_mb_discarded);
4808
4809 return 0;
4810 }
4811
4812 static noinline_for_stack int
4813 ext4_mb_release_group_pa(struct ext4_buddy *e4b,
4814 struct ext4_prealloc_space *pa)
4815 {
4816 struct super_block *sb = e4b->bd_sb;
4817 ext4_group_t group;
4818 ext4_grpblk_t bit;
4819
4820 trace_ext4_mb_release_group_pa(sb, pa);
4821 BUG_ON(pa->pa_deleted == 0);
4822 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
4823 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
4824 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
4825 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
4826 trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
4827
4828 return 0;
4829 }
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840 static noinline_for_stack int
4841 ext4_mb_discard_group_preallocations(struct super_block *sb,
4842 ext4_group_t group, int *busy)
4843 {
4844 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
4845 struct buffer_head *bitmap_bh = NULL;
4846 struct ext4_prealloc_space *pa, *tmp;
4847 struct list_head list;
4848 struct ext4_buddy e4b;
4849 int err;
4850 int free = 0;
4851
4852 mb_debug(sb, "discard preallocation for group %u\n", group);
4853 if (list_empty(&grp->bb_prealloc_list))
4854 goto out_dbg;
4855
4856 bitmap_bh = ext4_read_block_bitmap(sb, group);
4857 if (IS_ERR(bitmap_bh)) {
4858 err = PTR_ERR(bitmap_bh);
4859 ext4_error_err(sb, -err,
4860 "Error %d reading block bitmap for %u",
4861 err, group);
4862 goto out_dbg;
4863 }
4864
4865 err = ext4_mb_load_buddy(sb, group, &e4b);
4866 if (err) {
4867 ext4_warning(sb, "Error %d loading buddy information for %u",
4868 err, group);
4869 put_bh(bitmap_bh);
4870 goto out_dbg;
4871 }
4872
4873 INIT_LIST_HEAD(&list);
4874 ext4_lock_group(sb, group);
4875 list_for_each_entry_safe(pa, tmp,
4876 &grp->bb_prealloc_list, pa_group_list) {
4877 spin_lock(&pa->pa_lock);
4878 if (atomic_read(&pa->pa_count)) {
4879 spin_unlock(&pa->pa_lock);
4880 *busy = 1;
4881 continue;
4882 }
4883 if (pa->pa_deleted) {
4884 spin_unlock(&pa->pa_lock);
4885 continue;
4886 }
4887
4888
4889 ext4_mb_mark_pa_deleted(sb, pa);
4890
4891 if (!free)
4892 this_cpu_inc(discard_pa_seq);
4893
4894
4895 free += pa->pa_free;
4896
4897 spin_unlock(&pa->pa_lock);
4898
4899 list_del(&pa->pa_group_list);
4900 list_add(&pa->u.pa_tmp_list, &list);
4901 }
4902
4903
4904 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
4905
4906
4907 spin_lock(pa->pa_obj_lock);
4908 list_del_rcu(&pa->pa_inode_list);
4909 spin_unlock(pa->pa_obj_lock);
4910
4911 if (pa->pa_type == MB_GROUP_PA)
4912 ext4_mb_release_group_pa(&e4b, pa);
4913 else
4914 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
4915
4916 list_del(&pa->u.pa_tmp_list);
4917 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4918 }
4919
4920 ext4_unlock_group(sb, group);
4921 ext4_mb_unload_buddy(&e4b);
4922 put_bh(bitmap_bh);
4923 out_dbg:
4924 mb_debug(sb, "discarded (%d) blocks preallocated for group %u bb_free (%d)\n",
4925 free, group, grp->bb_free);
4926 return free;
4927 }
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938 void ext4_discard_preallocations(struct inode *inode, unsigned int needed)
4939 {
4940 struct ext4_inode_info *ei = EXT4_I(inode);
4941 struct super_block *sb = inode->i_sb;
4942 struct buffer_head *bitmap_bh = NULL;
4943 struct ext4_prealloc_space *pa, *tmp;
4944 ext4_group_t group = 0;
4945 struct list_head list;
4946 struct ext4_buddy e4b;
4947 int err;
4948
4949 if (!S_ISREG(inode->i_mode)) {
4950
4951 return;
4952 }
4953
4954 if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)
4955 return;
4956
4957 mb_debug(sb, "discard preallocation for inode %lu\n",
4958 inode->i_ino);
4959 trace_ext4_discard_preallocations(inode,
4960 atomic_read(&ei->i_prealloc_active), needed);
4961
4962 INIT_LIST_HEAD(&list);
4963
4964 if (needed == 0)
4965 needed = UINT_MAX;
4966
4967 repeat:
4968
4969 spin_lock(&ei->i_prealloc_lock);
4970 while (!list_empty(&ei->i_prealloc_list) && needed) {
4971 pa = list_entry(ei->i_prealloc_list.prev,
4972 struct ext4_prealloc_space, pa_inode_list);
4973 BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock);
4974 spin_lock(&pa->pa_lock);
4975 if (atomic_read(&pa->pa_count)) {
4976
4977
4978 spin_unlock(&pa->pa_lock);
4979 spin_unlock(&ei->i_prealloc_lock);
4980 ext4_msg(sb, KERN_ERR,
4981 "uh-oh! used pa while discarding");
4982 WARN_ON(1);
4983 schedule_timeout_uninterruptible(HZ);
4984 goto repeat;
4985
4986 }
4987 if (pa->pa_deleted == 0) {
4988 ext4_mb_mark_pa_deleted(sb, pa);
4989 spin_unlock(&pa->pa_lock);
4990 list_del_rcu(&pa->pa_inode_list);
4991 list_add(&pa->u.pa_tmp_list, &list);
4992 needed--;
4993 continue;
4994 }
4995
4996
4997 spin_unlock(&pa->pa_lock);
4998 spin_unlock(&ei->i_prealloc_lock);
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012 schedule_timeout_uninterruptible(HZ);
5013 goto repeat;
5014 }
5015 spin_unlock(&ei->i_prealloc_lock);
5016
5017 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
5018 BUG_ON(pa->pa_type != MB_INODE_PA);
5019 group = ext4_get_group_number(sb, pa->pa_pstart);
5020
5021 err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
5022 GFP_NOFS|__GFP_NOFAIL);
5023 if (err) {
5024 ext4_error_err(sb, -err, "Error %d loading buddy information for %u",
5025 err, group);
5026 continue;
5027 }
5028
5029 bitmap_bh = ext4_read_block_bitmap(sb, group);
5030 if (IS_ERR(bitmap_bh)) {
5031 err = PTR_ERR(bitmap_bh);
5032 ext4_error_err(sb, -err, "Error %d reading block bitmap for %u",
5033 err, group);
5034 ext4_mb_unload_buddy(&e4b);
5035 continue;
5036 }
5037
5038 ext4_lock_group(sb, group);
5039 list_del(&pa->pa_group_list);
5040 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
5041 ext4_unlock_group(sb, group);
5042
5043 ext4_mb_unload_buddy(&e4b);
5044 put_bh(bitmap_bh);
5045
5046 list_del(&pa->u.pa_tmp_list);
5047 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
5048 }
5049 }
5050
5051 static int ext4_mb_pa_alloc(struct ext4_allocation_context *ac)
5052 {
5053 struct ext4_prealloc_space *pa;
5054
5055 BUG_ON(ext4_pspace_cachep == NULL);
5056 pa = kmem_cache_zalloc(ext4_pspace_cachep, GFP_NOFS);
5057 if (!pa)
5058 return -ENOMEM;
5059 atomic_set(&pa->pa_count, 1);
5060 ac->ac_pa = pa;
5061 return 0;
5062 }
5063
5064 static void ext4_mb_pa_free(struct ext4_allocation_context *ac)
5065 {
5066 struct ext4_prealloc_space *pa = ac->ac_pa;
5067
5068 BUG_ON(!pa);
5069 ac->ac_pa = NULL;
5070 WARN_ON(!atomic_dec_and_test(&pa->pa_count));
5071 kmem_cache_free(ext4_pspace_cachep, pa);
5072 }
5073
5074 #ifdef CONFIG_EXT4_DEBUG
5075 static inline void ext4_mb_show_pa(struct super_block *sb)
5076 {
5077 ext4_group_t i, ngroups;
5078
5079 if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
5080 return;
5081
5082 ngroups = ext4_get_groups_count(sb);
5083 mb_debug(sb, "groups: ");
5084 for (i = 0; i < ngroups; i++) {
5085 struct ext4_group_info *grp = ext4_get_group_info(sb, i);
5086 struct ext4_prealloc_space *pa;
5087 ext4_grpblk_t start;
5088 struct list_head *cur;
5089 ext4_lock_group(sb, i);
5090 list_for_each(cur, &grp->bb_prealloc_list) {
5091 pa = list_entry(cur, struct ext4_prealloc_space,
5092 pa_group_list);
5093 spin_lock(&pa->pa_lock);
5094 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
5095 NULL, &start);
5096 spin_unlock(&pa->pa_lock);
5097 mb_debug(sb, "PA:%u:%d:%d\n", i, start,
5098 pa->pa_len);
5099 }
5100 ext4_unlock_group(sb, i);
5101 mb_debug(sb, "%u: %d/%d\n", i, grp->bb_free,
5102 grp->bb_fragments);
5103 }
5104 }
5105
5106 static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
5107 {
5108 struct super_block *sb = ac->ac_sb;
5109
5110 if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
5111 return;
5112
5113 mb_debug(sb, "Can't allocate:"
5114 " Allocation context details:");
5115 mb_debug(sb, "status %u flags 0x%x",
5116 ac->ac_status, ac->ac_flags);
5117 mb_debug(sb, "orig %lu/%lu/%lu@%lu, "
5118 "goal %lu/%lu/%lu@%lu, "
5119 "best %lu/%lu/%lu@%lu cr %d",
5120 (unsigned long)ac->ac_o_ex.fe_group,
5121 (unsigned long)ac->ac_o_ex.fe_start,
5122 (unsigned long)ac->ac_o_ex.fe_len,
5123 (unsigned long)ac->ac_o_ex.fe_logical,
5124 (unsigned long)ac->ac_g_ex.fe_group,
5125 (unsigned long)ac->ac_g_ex.fe_start,
5126 (unsigned long)ac->ac_g_ex.fe_len,
5127 (unsigned long)ac->ac_g_ex.fe_logical,
5128 (unsigned long)ac->ac_b_ex.fe_group,
5129 (unsigned long)ac->ac_b_ex.fe_start,
5130 (unsigned long)ac->ac_b_ex.fe_len,
5131 (unsigned long)ac->ac_b_ex.fe_logical,
5132 (int)ac->ac_criteria);
5133 mb_debug(sb, "%u found", ac->ac_found);
5134 ext4_mb_show_pa(sb);
5135 }
5136 #else
5137 static inline void ext4_mb_show_pa(struct super_block *sb)
5138 {
5139 return;
5140 }
5141 static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
5142 {
5143 ext4_mb_show_pa(ac->ac_sb);
5144 return;
5145 }
5146 #endif
5147
5148
5149
5150
5151
5152
5153
5154
5155 static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
5156 {
5157 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
5158 int bsbits = ac->ac_sb->s_blocksize_bits;
5159 loff_t size, isize;
5160 bool inode_pa_eligible, group_pa_eligible;
5161
5162 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
5163 return;
5164
5165 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
5166 return;
5167
5168 group_pa_eligible = sbi->s_mb_group_prealloc > 0;
5169 inode_pa_eligible = true;
5170 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
5171 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
5172 >> bsbits;
5173
5174
5175 if ((size == isize) && !ext4_fs_is_busy(sbi) &&
5176 !inode_is_open_for_write(ac->ac_inode))
5177 inode_pa_eligible = false;
5178
5179 size = max(size, isize);
5180
5181 if (size > sbi->s_mb_stream_request)
5182 group_pa_eligible = false;
5183
5184 if (!group_pa_eligible) {
5185 if (inode_pa_eligible)
5186 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
5187 else
5188 ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
5189 return;
5190 }
5191
5192 BUG_ON(ac->ac_lg != NULL);
5193
5194
5195
5196
5197
5198 ac->ac_lg = raw_cpu_ptr(sbi->s_locality_groups);
5199
5200
5201 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
5202
5203
5204 mutex_lock(&ac->ac_lg->lg_mutex);
5205 }
5206
5207 static noinline_for_stack int
5208 ext4_mb_initialize_context(struct ext4_allocation_context *ac,
5209 struct ext4_allocation_request *ar)
5210 {
5211 struct super_block *sb = ar->inode->i_sb;
5212 struct ext4_sb_info *sbi = EXT4_SB(sb);
5213 struct ext4_super_block *es = sbi->s_es;
5214 ext4_group_t group;
5215 unsigned int len;
5216 ext4_fsblk_t goal;
5217 ext4_grpblk_t block;
5218
5219
5220 len = ar->len;
5221
5222
5223 if (len >= EXT4_CLUSTERS_PER_GROUP(sb))
5224 len = EXT4_CLUSTERS_PER_GROUP(sb);
5225
5226
5227 goal = ar->goal;
5228 if (goal < le32_to_cpu(es->s_first_data_block) ||
5229 goal >= ext4_blocks_count(es))
5230 goal = le32_to_cpu(es->s_first_data_block);
5231 ext4_get_group_no_and_offset(sb, goal, &group, &block);
5232
5233
5234 ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical);
5235 ac->ac_status = AC_STATUS_CONTINUE;
5236 ac->ac_sb = sb;
5237 ac->ac_inode = ar->inode;
5238 ac->ac_o_ex.fe_logical = ac->ac_b_ex.fe_logical;
5239 ac->ac_o_ex.fe_group = group;
5240 ac->ac_o_ex.fe_start = block;
5241 ac->ac_o_ex.fe_len = len;
5242 ac->ac_g_ex = ac->ac_o_ex;
5243 ac->ac_flags = ar->flags;
5244
5245
5246
5247 ext4_mb_group_or_file(ac);
5248
5249 mb_debug(sb, "init ac: %u blocks @ %u, goal %u, flags 0x%x, 2^%d, "
5250 "left: %u/%u, right %u/%u to %swritable\n",
5251 (unsigned) ar->len, (unsigned) ar->logical,
5252 (unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
5253 (unsigned) ar->lleft, (unsigned) ar->pleft,
5254 (unsigned) ar->lright, (unsigned) ar->pright,
5255 inode_is_open_for_write(ar->inode) ? "" : "non-");
5256 return 0;
5257
5258 }
5259
5260 static noinline_for_stack void
5261 ext4_mb_discard_lg_preallocations(struct super_block *sb,
5262 struct ext4_locality_group *lg,
5263 int order, int total_entries)
5264 {
5265 ext4_group_t group = 0;
5266 struct ext4_buddy e4b;
5267 struct list_head discard_list;
5268 struct ext4_prealloc_space *pa, *tmp;
5269
5270 mb_debug(sb, "discard locality group preallocation\n");
5271
5272 INIT_LIST_HEAD(&discard_list);
5273
5274 spin_lock(&lg->lg_prealloc_lock);
5275 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
5276 pa_inode_list,
5277 lockdep_is_held(&lg->lg_prealloc_lock)) {
5278 spin_lock(&pa->pa_lock);
5279 if (atomic_read(&pa->pa_count)) {
5280
5281
5282
5283
5284
5285 spin_unlock(&pa->pa_lock);
5286 continue;
5287 }
5288 if (pa->pa_deleted) {
5289 spin_unlock(&pa->pa_lock);
5290 continue;
5291 }
5292
5293 BUG_ON(pa->pa_type != MB_GROUP_PA);
5294
5295
5296 ext4_mb_mark_pa_deleted(sb, pa);
5297 spin_unlock(&pa->pa_lock);
5298
5299 list_del_rcu(&pa->pa_inode_list);
5300 list_add(&pa->u.pa_tmp_list, &discard_list);
5301
5302 total_entries--;
5303 if (total_entries <= 5) {
5304
5305
5306
5307
5308
5309
5310 break;
5311 }
5312 }
5313 spin_unlock(&lg->lg_prealloc_lock);
5314
5315 list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
5316 int err;
5317
5318 group = ext4_get_group_number(sb, pa->pa_pstart);
5319 err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
5320 GFP_NOFS|__GFP_NOFAIL);
5321 if (err) {
5322 ext4_error_err(sb, -err, "Error %d loading buddy information for %u",
5323 err, group);
5324 continue;
5325 }
5326 ext4_lock_group(sb, group);
5327 list_del(&pa->pa_group_list);
5328 ext4_mb_release_group_pa(&e4b, pa);
5329 ext4_unlock_group(sb, group);
5330
5331 ext4_mb_unload_buddy(&e4b);
5332 list_del(&pa->u.pa_tmp_list);
5333 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
5334 }
5335 }
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346 static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
5347 {
5348 int order, added = 0, lg_prealloc_count = 1;
5349 struct super_block *sb = ac->ac_sb;
5350 struct ext4_locality_group *lg = ac->ac_lg;
5351 struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
5352
5353 order = fls(pa->pa_free) - 1;
5354 if (order > PREALLOC_TB_SIZE - 1)
5355
5356 order = PREALLOC_TB_SIZE - 1;
5357
5358 spin_lock(&lg->lg_prealloc_lock);
5359 list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
5360 pa_inode_list,
5361 lockdep_is_held(&lg->lg_prealloc_lock)) {
5362 spin_lock(&tmp_pa->pa_lock);
5363 if (tmp_pa->pa_deleted) {
5364 spin_unlock(&tmp_pa->pa_lock);
5365 continue;
5366 }
5367 if (!added && pa->pa_free < tmp_pa->pa_free) {
5368
5369 list_add_tail_rcu(&pa->pa_inode_list,
5370 &tmp_pa->pa_inode_list);
5371 added = 1;
5372
5373
5374
5375
5376 }
5377 spin_unlock(&tmp_pa->pa_lock);
5378 lg_prealloc_count++;
5379 }
5380 if (!added)
5381 list_add_tail_rcu(&pa->pa_inode_list,
5382 &lg->lg_prealloc_list[order]);
5383 spin_unlock(&lg->lg_prealloc_lock);
5384
5385
5386 if (lg_prealloc_count > 8) {
5387 ext4_mb_discard_lg_preallocations(sb, lg,
5388 order, lg_prealloc_count);
5389 return;
5390 }
5391 return ;
5392 }
5393
5394
5395
5396
5397 static void ext4_mb_trim_inode_pa(struct inode *inode)
5398 {
5399 struct ext4_inode_info *ei = EXT4_I(inode);
5400 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
5401 int count, delta;
5402
5403 count = atomic_read(&ei->i_prealloc_active);
5404 delta = (sbi->s_mb_max_inode_prealloc >> 2) + 1;
5405 if (count > sbi->s_mb_max_inode_prealloc + delta) {
5406 count -= sbi->s_mb_max_inode_prealloc;
5407 ext4_discard_preallocations(inode, count);
5408 }
5409 }
5410
5411
5412
5413
5414 static int ext4_mb_release_context(struct ext4_allocation_context *ac)
5415 {
5416 struct inode *inode = ac->ac_inode;
5417 struct ext4_inode_info *ei = EXT4_I(inode);
5418 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
5419 struct ext4_prealloc_space *pa = ac->ac_pa;
5420 if (pa) {
5421 if (pa->pa_type == MB_GROUP_PA) {
5422
5423 spin_lock(&pa->pa_lock);
5424 pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
5425 pa->pa_lstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
5426 pa->pa_free -= ac->ac_b_ex.fe_len;
5427 pa->pa_len -= ac->ac_b_ex.fe_len;
5428 spin_unlock(&pa->pa_lock);
5429
5430
5431
5432
5433
5434
5435
5436 if (likely(pa->pa_free)) {
5437 spin_lock(pa->pa_obj_lock);
5438 list_del_rcu(&pa->pa_inode_list);
5439 spin_unlock(pa->pa_obj_lock);
5440 ext4_mb_add_n_trim(ac);
5441 }
5442 }
5443
5444 if (pa->pa_type == MB_INODE_PA) {
5445
5446
5447
5448
5449 spin_lock(pa->pa_obj_lock);
5450 list_move(&pa->pa_inode_list, &ei->i_prealloc_list);
5451 spin_unlock(pa->pa_obj_lock);
5452 }
5453
5454 ext4_mb_put_pa(ac, ac->ac_sb, pa);
5455 }
5456 if (ac->ac_bitmap_page)
5457 put_page(ac->ac_bitmap_page);
5458 if (ac->ac_buddy_page)
5459 put_page(ac->ac_buddy_page);
5460 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
5461 mutex_unlock(&ac->ac_lg->lg_mutex);
5462 ext4_mb_collect_stats(ac);
5463 ext4_mb_trim_inode_pa(inode);
5464 return 0;
5465 }
5466
5467 static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
5468 {
5469 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
5470 int ret;
5471 int freed = 0, busy = 0;
5472 int retry = 0;
5473
5474 trace_ext4_mb_discard_preallocations(sb, needed);
5475
5476 if (needed == 0)
5477 needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
5478 repeat:
5479 for (i = 0; i < ngroups && needed > 0; i++) {
5480 ret = ext4_mb_discard_group_preallocations(sb, i, &busy);
5481 freed += ret;
5482 needed -= ret;
5483 cond_resched();
5484 }
5485
5486 if (needed > 0 && busy && ++retry < 3) {
5487 busy = 0;
5488 goto repeat;
5489 }
5490
5491 return freed;
5492 }
5493
5494 static bool ext4_mb_discard_preallocations_should_retry(struct super_block *sb,
5495 struct ext4_allocation_context *ac, u64 *seq)
5496 {
5497 int freed;
5498 u64 seq_retry = 0;
5499 bool ret = false;
5500
5501 freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
5502 if (freed) {
5503 ret = true;
5504 goto out_dbg;
5505 }
5506 seq_retry = ext4_get_discard_pa_seq_sum();
5507 if (!(ac->ac_flags & EXT4_MB_STRICT_CHECK) || seq_retry != *seq) {
5508 ac->ac_flags |= EXT4_MB_STRICT_CHECK;
5509 *seq = seq_retry;
5510 ret = true;
5511 }
5512
5513 out_dbg:
5514 mb_debug(sb, "freed %d, retry ? %s\n", freed, ret ? "yes" : "no");
5515 return ret;
5516 }
5517
5518 static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle,
5519 struct ext4_allocation_request *ar, int *errp);
5520
5521
5522
5523
5524
5525
5526 ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
5527 struct ext4_allocation_request *ar, int *errp)
5528 {
5529 struct ext4_allocation_context *ac = NULL;
5530 struct ext4_sb_info *sbi;
5531 struct super_block *sb;
5532 ext4_fsblk_t block = 0;
5533 unsigned int inquota = 0;
5534 unsigned int reserv_clstrs = 0;
5535 int retries = 0;
5536 u64 seq;
5537
5538 might_sleep();
5539 sb = ar->inode->i_sb;
5540 sbi = EXT4_SB(sb);
5541
5542 trace_ext4_request_blocks(ar);
5543 if (sbi->s_mount_state & EXT4_FC_REPLAY)
5544 return ext4_mb_new_blocks_simple(handle, ar, errp);
5545
5546
5547 if (ext4_is_quota_file(ar->inode))
5548 ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
5549
5550 if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) {
5551
5552
5553
5554
5555 while (ar->len &&
5556 ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
5557
5558
5559 cond_resched();
5560 ar->len = ar->len >> 1;
5561 }
5562 if (!ar->len) {
5563 ext4_mb_show_pa(sb);
5564 *errp = -ENOSPC;
5565 return 0;
5566 }
5567 reserv_clstrs = ar->len;
5568 if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
5569 dquot_alloc_block_nofail(ar->inode,
5570 EXT4_C2B(sbi, ar->len));
5571 } else {
5572 while (ar->len &&
5573 dquot_alloc_block(ar->inode,
5574 EXT4_C2B(sbi, ar->len))) {
5575
5576 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
5577 ar->len--;
5578 }
5579 }
5580 inquota = ar->len;
5581 if (ar->len == 0) {
5582 *errp = -EDQUOT;
5583 goto out;
5584 }
5585 }
5586
5587 ac = kmem_cache_zalloc(ext4_ac_cachep, GFP_NOFS);
5588 if (!ac) {
5589 ar->len = 0;
5590 *errp = -ENOMEM;
5591 goto out;
5592 }
5593
5594 *errp = ext4_mb_initialize_context(ac, ar);
5595 if (*errp) {
5596 ar->len = 0;
5597 goto out;
5598 }
5599
5600 ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
5601 seq = this_cpu_read(discard_pa_seq);
5602 if (!ext4_mb_use_preallocated(ac)) {
5603 ac->ac_op = EXT4_MB_HISTORY_ALLOC;
5604 ext4_mb_normalize_request(ac, ar);
5605
5606 *errp = ext4_mb_pa_alloc(ac);
5607 if (*errp)
5608 goto errout;
5609 repeat:
5610
5611 *errp = ext4_mb_regular_allocator(ac);
5612
5613
5614
5615
5616
5617
5618
5619 if (*errp) {
5620 ext4_mb_pa_free(ac);
5621 ext4_discard_allocated_blocks(ac);
5622 goto errout;
5623 }
5624 if (ac->ac_status == AC_STATUS_FOUND &&
5625 ac->ac_o_ex.fe_len >= ac->ac_f_ex.fe_len)
5626 ext4_mb_pa_free(ac);
5627 }
5628 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
5629 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
5630 if (*errp) {
5631 ext4_discard_allocated_blocks(ac);
5632 goto errout;
5633 } else {
5634 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
5635 ar->len = ac->ac_b_ex.fe_len;
5636 }
5637 } else {
5638 if (++retries < 3 &&
5639 ext4_mb_discard_preallocations_should_retry(sb, ac, &seq))
5640 goto repeat;
5641
5642
5643
5644
5645 ext4_mb_pa_free(ac);
5646 *errp = -ENOSPC;
5647 }
5648
5649 errout:
5650 if (*errp) {
5651 ac->ac_b_ex.fe_len = 0;
5652 ar->len = 0;
5653 ext4_mb_show_ac(ac);
5654 }
5655 ext4_mb_release_context(ac);
5656 out:
5657 if (ac)
5658 kmem_cache_free(ext4_ac_cachep, ac);
5659 if (inquota && ar->len < inquota)
5660 dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
5661 if (!ar->len) {
5662 if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0)
5663
5664 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
5665 reserv_clstrs);
5666 }
5667
5668 trace_ext4_allocate_blocks(ar, (unsigned long long)block);
5669
5670 return block;
5671 }
5672
5673
5674
5675
5676
5677
5678 static void ext4_try_merge_freed_extent(struct ext4_sb_info *sbi,
5679 struct ext4_free_data *entry,
5680 struct ext4_free_data *new_entry,
5681 struct rb_root *entry_rb_root)
5682 {
5683 if ((entry->efd_tid != new_entry->efd_tid) ||
5684 (entry->efd_group != new_entry->efd_group))
5685 return;
5686 if (entry->efd_start_cluster + entry->efd_count ==
5687 new_entry->efd_start_cluster) {
5688 new_entry->efd_start_cluster = entry->efd_start_cluster;
5689 new_entry->efd_count += entry->efd_count;
5690 } else if (new_entry->efd_start_cluster + new_entry->efd_count ==
5691 entry->efd_start_cluster) {
5692 new_entry->efd_count += entry->efd_count;
5693 } else
5694 return;
5695 spin_lock(&sbi->s_md_lock);
5696 list_del(&entry->efd_list);
5697 spin_unlock(&sbi->s_md_lock);
5698 rb_erase(&entry->efd_node, entry_rb_root);
5699 kmem_cache_free(ext4_free_data_cachep, entry);
5700 }
5701
5702 static noinline_for_stack int
5703 ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
5704 struct ext4_free_data *new_entry)
5705 {
5706 ext4_group_t group = e4b->bd_group;
5707 ext4_grpblk_t cluster;
5708 ext4_grpblk_t clusters = new_entry->efd_count;
5709 struct ext4_free_data *entry;
5710 struct ext4_group_info *db = e4b->bd_info;
5711 struct super_block *sb = e4b->bd_sb;
5712 struct ext4_sb_info *sbi = EXT4_SB(sb);
5713 struct rb_node **n = &db->bb_free_root.rb_node, *node;
5714 struct rb_node *parent = NULL, *new_node;
5715
5716 BUG_ON(!ext4_handle_valid(handle));
5717 BUG_ON(e4b->bd_bitmap_page == NULL);
5718 BUG_ON(e4b->bd_buddy_page == NULL);
5719
5720 new_node = &new_entry->efd_node;
5721 cluster = new_entry->efd_start_cluster;
5722
5723 if (!*n) {
5724
5725
5726
5727
5728
5729 get_page(e4b->bd_buddy_page);
5730 get_page(e4b->bd_bitmap_page);
5731 }
5732 while (*n) {
5733 parent = *n;
5734 entry = rb_entry(parent, struct ext4_free_data, efd_node);
5735 if (cluster < entry->efd_start_cluster)
5736 n = &(*n)->rb_left;
5737 else if (cluster >= (entry->efd_start_cluster + entry->efd_count))
5738 n = &(*n)->rb_right;
5739 else {
5740 ext4_grp_locked_error(sb, group, 0,
5741 ext4_group_first_block_no(sb, group) +
5742 EXT4_C2B(sbi, cluster),
5743 "Block already on to-be-freed list");
5744 kmem_cache_free(ext4_free_data_cachep, new_entry);
5745 return 0;
5746 }
5747 }
5748
5749 rb_link_node(new_node, parent, n);
5750 rb_insert_color(new_node, &db->bb_free_root);
5751
5752
5753 node = rb_prev(new_node);
5754 if (node) {
5755 entry = rb_entry(node, struct ext4_free_data, efd_node);
5756 ext4_try_merge_freed_extent(sbi, entry, new_entry,
5757 &(db->bb_free_root));
5758 }
5759
5760 node = rb_next(new_node);
5761 if (node) {
5762 entry = rb_entry(node, struct ext4_free_data, efd_node);
5763 ext4_try_merge_freed_extent(sbi, entry, new_entry,
5764 &(db->bb_free_root));
5765 }
5766
5767 spin_lock(&sbi->s_md_lock);
5768 list_add_tail(&new_entry->efd_list, &sbi->s_freed_data_list);
5769 sbi->s_mb_free_pending += clusters;
5770 spin_unlock(&sbi->s_md_lock);
5771 return 0;
5772 }
5773
5774
5775
5776
5777
5778
5779 static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle,
5780 struct ext4_allocation_request *ar, int *errp)
5781 {
5782 struct buffer_head *bitmap_bh;
5783 struct super_block *sb = ar->inode->i_sb;
5784 ext4_group_t group;
5785 ext4_grpblk_t blkoff;
5786 ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
5787 ext4_grpblk_t i = 0;
5788 ext4_fsblk_t goal, block;
5789 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
5790
5791 goal = ar->goal;
5792 if (goal < le32_to_cpu(es->s_first_data_block) ||
5793 goal >= ext4_blocks_count(es))
5794 goal = le32_to_cpu(es->s_first_data_block);
5795
5796 ar->len = 0;
5797 ext4_get_group_no_and_offset(sb, goal, &group, &blkoff);
5798 for (; group < ext4_get_groups_count(sb); group++) {
5799 bitmap_bh = ext4_read_block_bitmap(sb, group);
5800 if (IS_ERR(bitmap_bh)) {
5801 *errp = PTR_ERR(bitmap_bh);
5802 pr_warn("Failed to read block bitmap\n");
5803 return 0;
5804 }
5805
5806 ext4_get_group_no_and_offset(sb,
5807 max(ext4_group_first_block_no(sb, group), goal),
5808 NULL, &blkoff);
5809 while (1) {
5810 i = mb_find_next_zero_bit(bitmap_bh->b_data, max,
5811 blkoff);
5812 if (i >= max)
5813 break;
5814 if (ext4_fc_replay_check_excluded(sb,
5815 ext4_group_first_block_no(sb, group) + i)) {
5816 blkoff = i + 1;
5817 } else
5818 break;
5819 }
5820 brelse(bitmap_bh);
5821 if (i < max)
5822 break;
5823 }
5824
5825 if (group >= ext4_get_groups_count(sb) || i >= max) {
5826 *errp = -ENOSPC;
5827 return 0;
5828 }
5829
5830 block = ext4_group_first_block_no(sb, group) + i;
5831 ext4_mb_mark_bb(sb, block, 1, 1);
5832 ar->len = 1;
5833
5834 return block;
5835 }
5836
5837 static void ext4_free_blocks_simple(struct inode *inode, ext4_fsblk_t block,
5838 unsigned long count)
5839 {
5840 struct buffer_head *bitmap_bh;
5841 struct super_block *sb = inode->i_sb;
5842 struct ext4_group_desc *gdp;
5843 struct buffer_head *gdp_bh;
5844 ext4_group_t group;
5845 ext4_grpblk_t blkoff;
5846 int already_freed = 0, err, i;
5847
5848 ext4_get_group_no_and_offset(sb, block, &group, &blkoff);
5849 bitmap_bh = ext4_read_block_bitmap(sb, group);
5850 if (IS_ERR(bitmap_bh)) {
5851 err = PTR_ERR(bitmap_bh);
5852 pr_warn("Failed to read block bitmap\n");
5853 return;
5854 }
5855 gdp = ext4_get_group_desc(sb, group, &gdp_bh);
5856 if (!gdp)
5857 return;
5858
5859 for (i = 0; i < count; i++) {
5860 if (!mb_test_bit(blkoff + i, bitmap_bh->b_data))
5861 already_freed++;
5862 }
5863 mb_clear_bits(bitmap_bh->b_data, blkoff, count);
5864 err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
5865 if (err)
5866 return;
5867 ext4_free_group_clusters_set(
5868 sb, gdp, ext4_free_group_clusters(sb, gdp) +
5869 count - already_freed);
5870 ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh);
5871 ext4_group_desc_csum_set(sb, group, gdp);
5872 ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
5873 sync_dirty_buffer(bitmap_bh);
5874 sync_dirty_buffer(gdp_bh);
5875 brelse(bitmap_bh);
5876 }
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887 static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode,
5888 ext4_fsblk_t block, unsigned long count,
5889 int flags)
5890 {
5891 struct buffer_head *bitmap_bh = NULL;
5892 struct super_block *sb = inode->i_sb;
5893 struct ext4_group_desc *gdp;
5894 unsigned int overflow;
5895 ext4_grpblk_t bit;
5896 struct buffer_head *gd_bh;
5897 ext4_group_t block_group;
5898 struct ext4_sb_info *sbi;
5899 struct ext4_buddy e4b;
5900 unsigned int count_clusters;
5901 int err = 0;
5902 int ret;
5903
5904 sbi = EXT4_SB(sb);
5905
5906 if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
5907 !ext4_inode_block_valid(inode, block, count)) {
5908 ext4_error(sb, "Freeing blocks in system zone - "
5909 "Block = %llu, count = %lu", block, count);
5910
5911 goto error_return;
5912 }
5913 flags |= EXT4_FREE_BLOCKS_VALIDATED;
5914
5915 do_more:
5916 overflow = 0;
5917 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
5918
5919 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(
5920 ext4_get_group_info(sb, block_group))))
5921 return;
5922
5923
5924
5925
5926
5927 if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) {
5928 overflow = EXT4_C2B(sbi, bit) + count -
5929 EXT4_BLOCKS_PER_GROUP(sb);
5930 count -= overflow;
5931
5932 flags &= ~EXT4_FREE_BLOCKS_VALIDATED;
5933 }
5934 count_clusters = EXT4_NUM_B2C(sbi, count);
5935 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
5936 if (IS_ERR(bitmap_bh)) {
5937 err = PTR_ERR(bitmap_bh);
5938 bitmap_bh = NULL;
5939 goto error_return;
5940 }
5941 gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
5942 if (!gdp) {
5943 err = -EIO;
5944 goto error_return;
5945 }
5946
5947 if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
5948 !ext4_inode_block_valid(inode, block, count)) {
5949 ext4_error(sb, "Freeing blocks in system zone - "
5950 "Block = %llu, count = %lu", block, count);
5951
5952 goto error_return;
5953 }
5954
5955 BUFFER_TRACE(bitmap_bh, "getting write access");
5956 err = ext4_journal_get_write_access(handle, sb, bitmap_bh,
5957 EXT4_JTR_NONE);
5958 if (err)
5959 goto error_return;
5960
5961
5962
5963
5964
5965
5966 BUFFER_TRACE(gd_bh, "get_write_access");
5967 err = ext4_journal_get_write_access(handle, sb, gd_bh, EXT4_JTR_NONE);
5968 if (err)
5969 goto error_return;
5970 #ifdef AGGRESSIVE_CHECK
5971 {
5972 int i;
5973 for (i = 0; i < count_clusters; i++)
5974 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
5975 }
5976 #endif
5977 trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
5978
5979
5980 err = ext4_mb_load_buddy_gfp(sb, block_group, &e4b,
5981 GFP_NOFS|__GFP_NOFAIL);
5982 if (err)
5983 goto error_return;
5984
5985
5986
5987
5988
5989
5990
5991 if (ext4_handle_valid(handle) &&
5992 ((flags & EXT4_FREE_BLOCKS_METADATA) ||
5993 !ext4_should_writeback_data(inode))) {
5994 struct ext4_free_data *new_entry;
5995
5996
5997
5998
5999 new_entry = kmem_cache_alloc(ext4_free_data_cachep,
6000 GFP_NOFS|__GFP_NOFAIL);
6001 new_entry->efd_start_cluster = bit;
6002 new_entry->efd_group = block_group;
6003 new_entry->efd_count = count_clusters;
6004 new_entry->efd_tid = handle->h_transaction->t_tid;
6005
6006 ext4_lock_group(sb, block_group);
6007 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
6008 ext4_mb_free_metadata(handle, &e4b, new_entry);
6009 } else {
6010
6011
6012
6013
6014 if (test_opt(sb, DISCARD)) {
6015 err = ext4_issue_discard(sb, block_group, bit, count,
6016 NULL);
6017 if (err && err != -EOPNOTSUPP)
6018 ext4_msg(sb, KERN_WARNING, "discard request in"
6019 " group:%u block:%d count:%lu failed"
6020 " with %d", block_group, bit, count,
6021 err);
6022 } else
6023 EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info);
6024
6025 ext4_lock_group(sb, block_group);
6026 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
6027 mb_free_blocks(inode, &e4b, bit, count_clusters);
6028 }
6029
6030 ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
6031 ext4_free_group_clusters_set(sb, gdp, ret);
6032 ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh);
6033 ext4_group_desc_csum_set(sb, block_group, gdp);
6034 ext4_unlock_group(sb, block_group);
6035
6036 if (sbi->s_log_groups_per_flex) {
6037 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
6038 atomic64_add(count_clusters,
6039 &sbi_array_rcu_deref(sbi, s_flex_groups,
6040 flex_group)->free_clusters);
6041 }
6042
6043
6044
6045
6046
6047
6048 if (!(flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)) {
6049 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
6050 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
6051 percpu_counter_add(&sbi->s_freeclusters_counter,
6052 count_clusters);
6053 }
6054
6055 ext4_mb_unload_buddy(&e4b);
6056
6057
6058 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
6059 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
6060
6061
6062 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
6063 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
6064 if (!err)
6065 err = ret;
6066
6067 if (overflow && !err) {
6068 block += count;
6069 count = overflow;
6070 put_bh(bitmap_bh);
6071
6072 flags &= ~EXT4_FREE_BLOCKS_VALIDATED;
6073 goto do_more;
6074 }
6075 error_return:
6076 brelse(bitmap_bh);
6077 ext4_std_error(sb, err);
6078 return;
6079 }
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090 void ext4_free_blocks(handle_t *handle, struct inode *inode,
6091 struct buffer_head *bh, ext4_fsblk_t block,
6092 unsigned long count, int flags)
6093 {
6094 struct super_block *sb = inode->i_sb;
6095 unsigned int overflow;
6096 struct ext4_sb_info *sbi;
6097
6098 sbi = EXT4_SB(sb);
6099
6100 if (sbi->s_mount_state & EXT4_FC_REPLAY) {
6101 ext4_free_blocks_simple(inode, block, count);
6102 return;
6103 }
6104
6105 might_sleep();
6106 if (bh) {
6107 if (block)
6108 BUG_ON(block != bh->b_blocknr);
6109 else
6110 block = bh->b_blocknr;
6111 }
6112
6113 if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
6114 !ext4_inode_block_valid(inode, block, count)) {
6115 ext4_error(sb, "Freeing blocks not in datazone - "
6116 "block = %llu, count = %lu", block, count);
6117 return;
6118 }
6119 flags |= EXT4_FREE_BLOCKS_VALIDATED;
6120
6121 ext4_debug("freeing block %llu\n", block);
6122 trace_ext4_free_blocks(inode, block, count, flags);
6123
6124 if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
6125 BUG_ON(count > 1);
6126
6127 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
6128 inode, bh, block);
6129 }
6130
6131
6132
6133
6134
6135
6136
6137
6138 overflow = EXT4_PBLK_COFF(sbi, block);
6139 if (overflow) {
6140 if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
6141 overflow = sbi->s_cluster_ratio - overflow;
6142 block += overflow;
6143 if (count > overflow)
6144 count -= overflow;
6145 else
6146 return;
6147 } else {
6148 block -= overflow;
6149 count += overflow;
6150 }
6151
6152 flags &= ~EXT4_FREE_BLOCKS_VALIDATED;
6153 }
6154 overflow = EXT4_LBLK_COFF(sbi, count);
6155 if (overflow) {
6156 if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
6157 if (count > overflow)
6158 count -= overflow;
6159 else
6160 return;
6161 } else
6162 count += sbi->s_cluster_ratio - overflow;
6163
6164 flags &= ~EXT4_FREE_BLOCKS_VALIDATED;
6165 }
6166
6167 if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
6168 int i;
6169 int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA;
6170
6171 for (i = 0; i < count; i++) {
6172 cond_resched();
6173 if (is_metadata)
6174 bh = sb_find_get_block(inode->i_sb, block + i);
6175 ext4_forget(handle, is_metadata, inode, bh, block + i);
6176 }
6177 }
6178
6179 ext4_mb_clear_bb(handle, inode, block, count, flags);
6180 return;
6181 }
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192 int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
6193 ext4_fsblk_t block, unsigned long count)
6194 {
6195 struct buffer_head *bitmap_bh = NULL;
6196 struct buffer_head *gd_bh;
6197 ext4_group_t block_group;
6198 ext4_grpblk_t bit;
6199 unsigned int i;
6200 struct ext4_group_desc *desc;
6201 struct ext4_sb_info *sbi = EXT4_SB(sb);
6202 struct ext4_buddy e4b;
6203 int err = 0, ret, free_clusters_count;
6204 ext4_grpblk_t clusters_freed;
6205 ext4_fsblk_t first_cluster = EXT4_B2C(sbi, block);
6206 ext4_fsblk_t last_cluster = EXT4_B2C(sbi, block + count - 1);
6207 unsigned long cluster_count = last_cluster - first_cluster + 1;
6208
6209 ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
6210
6211 if (count == 0)
6212 return 0;
6213
6214 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
6215
6216
6217
6218
6219 if (bit + cluster_count > EXT4_CLUSTERS_PER_GROUP(sb)) {
6220 ext4_warning(sb, "too many blocks added to group %u",
6221 block_group);
6222 err = -EINVAL;
6223 goto error_return;
6224 }
6225
6226 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
6227 if (IS_ERR(bitmap_bh)) {
6228 err = PTR_ERR(bitmap_bh);
6229 bitmap_bh = NULL;
6230 goto error_return;
6231 }
6232
6233 desc = ext4_get_group_desc(sb, block_group, &gd_bh);
6234 if (!desc) {
6235 err = -EIO;
6236 goto error_return;
6237 }
6238
6239 if (!ext4_sb_block_valid(sb, NULL, block, count)) {
6240 ext4_error(sb, "Adding blocks in system zones - "
6241 "Block = %llu, count = %lu",
6242 block, count);
6243 err = -EINVAL;
6244 goto error_return;
6245 }
6246
6247 BUFFER_TRACE(bitmap_bh, "getting write access");
6248 err = ext4_journal_get_write_access(handle, sb, bitmap_bh,
6249 EXT4_JTR_NONE);
6250 if (err)
6251 goto error_return;
6252
6253
6254
6255
6256
6257
6258 BUFFER_TRACE(gd_bh, "get_write_access");
6259 err = ext4_journal_get_write_access(handle, sb, gd_bh, EXT4_JTR_NONE);
6260 if (err)
6261 goto error_return;
6262
6263 for (i = 0, clusters_freed = 0; i < cluster_count; i++) {
6264 BUFFER_TRACE(bitmap_bh, "clear bit");
6265 if (!mb_test_bit(bit + i, bitmap_bh->b_data)) {
6266 ext4_error(sb, "bit already cleared for block %llu",
6267 (ext4_fsblk_t)(block + i));
6268 BUFFER_TRACE(bitmap_bh, "bit already cleared");
6269 } else {
6270 clusters_freed++;
6271 }
6272 }
6273
6274 err = ext4_mb_load_buddy(sb, block_group, &e4b);
6275 if (err)
6276 goto error_return;
6277
6278
6279
6280
6281
6282
6283 ext4_lock_group(sb, block_group);
6284 mb_clear_bits(bitmap_bh->b_data, bit, cluster_count);
6285 mb_free_blocks(NULL, &e4b, bit, cluster_count);
6286 free_clusters_count = clusters_freed +
6287 ext4_free_group_clusters(sb, desc);
6288 ext4_free_group_clusters_set(sb, desc, free_clusters_count);
6289 ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh);
6290 ext4_group_desc_csum_set(sb, block_group, desc);
6291 ext4_unlock_group(sb, block_group);
6292 percpu_counter_add(&sbi->s_freeclusters_counter,
6293 clusters_freed);
6294
6295 if (sbi->s_log_groups_per_flex) {
6296 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
6297 atomic64_add(clusters_freed,
6298 &sbi_array_rcu_deref(sbi, s_flex_groups,
6299 flex_group)->free_clusters);
6300 }
6301
6302 ext4_mb_unload_buddy(&e4b);
6303
6304
6305 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
6306 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
6307
6308
6309 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
6310 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
6311 if (!err)
6312 err = ret;
6313
6314 error_return:
6315 brelse(bitmap_bh);
6316 ext4_std_error(sb, err);
6317 return err;
6318 }
6319
6320
6321
6322
6323
6324
6325
6326
6327
6328
6329
6330
6331 static int ext4_trim_extent(struct super_block *sb,
6332 int start, int count, struct ext4_buddy *e4b)
6333 __releases(bitlock)
6334 __acquires(bitlock)
6335 {
6336 struct ext4_free_extent ex;
6337 ext4_group_t group = e4b->bd_group;
6338 int ret = 0;
6339
6340 trace_ext4_trim_extent(sb, group, start, count);
6341
6342 assert_spin_locked(ext4_group_lock_ptr(sb, group));
6343
6344 ex.fe_start = start;
6345 ex.fe_group = group;
6346 ex.fe_len = count;
6347
6348
6349
6350
6351
6352 mb_mark_used(e4b, &ex);
6353 ext4_unlock_group(sb, group);
6354 ret = ext4_issue_discard(sb, group, start, count, NULL);
6355 ext4_lock_group(sb, group);
6356 mb_free_blocks(NULL, e4b, start, ex.fe_len);
6357 return ret;
6358 }
6359
6360 static int ext4_try_to_trim_range(struct super_block *sb,
6361 struct ext4_buddy *e4b, ext4_grpblk_t start,
6362 ext4_grpblk_t max, ext4_grpblk_t minblocks)
6363 __acquires(ext4_group_lock_ptr(sb, e4b->bd_group))
6364 __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
6365 {
6366 ext4_grpblk_t next, count, free_count;
6367 void *bitmap;
6368
6369 bitmap = e4b->bd_bitmap;
6370 start = (e4b->bd_info->bb_first_free > start) ?
6371 e4b->bd_info->bb_first_free : start;
6372 count = 0;
6373 free_count = 0;
6374
6375 while (start <= max) {
6376 start = mb_find_next_zero_bit(bitmap, max + 1, start);
6377 if (start > max)
6378 break;
6379 next = mb_find_next_bit(bitmap, max + 1, start);
6380
6381 if ((next - start) >= minblocks) {
6382 int ret = ext4_trim_extent(sb, start, next - start, e4b);
6383
6384 if (ret && ret != -EOPNOTSUPP)
6385 break;
6386 count += next - start;
6387 }
6388 free_count += next - start;
6389 start = next + 1;
6390
6391 if (fatal_signal_pending(current)) {
6392 count = -ERESTARTSYS;
6393 break;
6394 }
6395
6396 if (need_resched()) {
6397 ext4_unlock_group(sb, e4b->bd_group);
6398 cond_resched();
6399 ext4_lock_group(sb, e4b->bd_group);
6400 }
6401
6402 if ((e4b->bd_info->bb_free - free_count) < minblocks)
6403 break;
6404 }
6405
6406 return count;
6407 }
6408
6409
6410
6411
6412
6413
6414
6415
6416
6417
6418
6419
6420
6421
6422
6423 static ext4_grpblk_t
6424 ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
6425 ext4_grpblk_t start, ext4_grpblk_t max,
6426 ext4_grpblk_t minblocks, bool set_trimmed)
6427 {
6428 struct ext4_buddy e4b;
6429 int ret;
6430
6431 trace_ext4_trim_all_free(sb, group, start, max);
6432
6433 ret = ext4_mb_load_buddy(sb, group, &e4b);
6434 if (ret) {
6435 ext4_warning(sb, "Error %d loading buddy information for %u",
6436 ret, group);
6437 return ret;
6438 }
6439
6440 ext4_lock_group(sb, group);
6441
6442 if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) ||
6443 minblocks < EXT4_SB(sb)->s_last_trim_minblks) {
6444 ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks);
6445 if (ret >= 0 && set_trimmed)
6446 EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
6447 } else {
6448 ret = 0;
6449 }
6450
6451 ext4_unlock_group(sb, group);
6452 ext4_mb_unload_buddy(&e4b);
6453
6454 ext4_debug("trimmed %d blocks in the group %d\n",
6455 ret, group);
6456
6457 return ret;
6458 }
6459
6460
6461
6462
6463
6464
6465
6466
6467
6468
6469
6470
6471
6472 int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
6473 {
6474 unsigned int discard_granularity = bdev_discard_granularity(sb->s_bdev);
6475 struct ext4_group_info *grp;
6476 ext4_group_t group, first_group, last_group;
6477 ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
6478 uint64_t start, end, minlen, trimmed = 0;
6479 ext4_fsblk_t first_data_blk =
6480 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
6481 ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es);
6482 bool whole_group, eof = false;
6483 int ret = 0;
6484
6485 start = range->start >> sb->s_blocksize_bits;
6486 end = start + (range->len >> sb->s_blocksize_bits) - 1;
6487 minlen = EXT4_NUM_B2C(EXT4_SB(sb),
6488 range->minlen >> sb->s_blocksize_bits);
6489
6490 if (minlen > EXT4_CLUSTERS_PER_GROUP(sb) ||
6491 start >= max_blks ||
6492 range->len < sb->s_blocksize)
6493 return -EINVAL;
6494
6495 if (range->minlen < discard_granularity) {
6496 minlen = EXT4_NUM_B2C(EXT4_SB(sb),
6497 discard_granularity >> sb->s_blocksize_bits);
6498 if (minlen > EXT4_CLUSTERS_PER_GROUP(sb))
6499 goto out;
6500 }
6501 if (end >= max_blks - 1) {
6502 end = max_blks - 1;
6503 eof = true;
6504 }
6505 if (end <= first_data_blk)
6506 goto out;
6507 if (start < first_data_blk)
6508 start = first_data_blk;
6509
6510
6511 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
6512 &first_group, &first_cluster);
6513 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) end,
6514 &last_group, &last_cluster);
6515
6516
6517 end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
6518 whole_group = true;
6519
6520 for (group = first_group; group <= last_group; group++) {
6521 grp = ext4_get_group_info(sb, group);
6522
6523 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
6524 ret = ext4_mb_init_group(sb, group, GFP_NOFS);
6525 if (ret)
6526 break;
6527 }
6528
6529
6530
6531
6532
6533
6534
6535 if (group == last_group) {
6536 end = last_cluster;
6537 whole_group = eof ? true : end == EXT4_CLUSTERS_PER_GROUP(sb) - 1;
6538 }
6539 if (grp->bb_free >= minlen) {
6540 cnt = ext4_trim_all_free(sb, group, first_cluster,
6541 end, minlen, whole_group);
6542 if (cnt < 0) {
6543 ret = cnt;
6544 break;
6545 }
6546 trimmed += cnt;
6547 }
6548
6549
6550
6551
6552
6553 first_cluster = 0;
6554 }
6555
6556 if (!ret)
6557 EXT4_SB(sb)->s_last_trim_minblks = minlen;
6558
6559 out:
6560 range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
6561 return ret;
6562 }
6563
6564
6565 int
6566 ext4_mballoc_query_range(
6567 struct super_block *sb,
6568 ext4_group_t group,
6569 ext4_grpblk_t start,
6570 ext4_grpblk_t end,
6571 ext4_mballoc_query_range_fn formatter,
6572 void *priv)
6573 {
6574 void *bitmap;
6575 ext4_grpblk_t next;
6576 struct ext4_buddy e4b;
6577 int error;
6578
6579 error = ext4_mb_load_buddy(sb, group, &e4b);
6580 if (error)
6581 return error;
6582 bitmap = e4b.bd_bitmap;
6583
6584 ext4_lock_group(sb, group);
6585
6586 start = (e4b.bd_info->bb_first_free > start) ?
6587 e4b.bd_info->bb_first_free : start;
6588 if (end >= EXT4_CLUSTERS_PER_GROUP(sb))
6589 end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
6590
6591 while (start <= end) {
6592 start = mb_find_next_zero_bit(bitmap, end + 1, start);
6593 if (start > end)
6594 break;
6595 next = mb_find_next_bit(bitmap, end + 1, start);
6596
6597 ext4_unlock_group(sb, group);
6598 error = formatter(sb, group, start, next - start, priv);
6599 if (error)
6600 goto out_unload;
6601 ext4_lock_group(sb, group);
6602
6603 start = next + 1;
6604 }
6605
6606 ext4_unlock_group(sb, group);
6607 out_unload:
6608 ext4_mb_unload_buddy(&e4b);
6609
6610 return error;
6611 }