fs/ext4/resize.c

0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  *  linux/fs/ext4/resize.c
0004  *
0005  * Support for resizing an ext4 filesystem while it is mounted.
0006  *
0007  * Copyright (C) 2001, 2002 Andreas Dilger <adilger@clusterfs.com>
0008  *
0009  * This could probably be made into a module, because it is not often in use.
0010  */
0011
0012
0013 #define EXT4FS_DEBUG
0014
0015 #include <linux/errno.h>
0016 #include <linux/slab.h>
0017 #include <linux/jiffies.h>
0018
0019 #include "ext4_jbd2.h"
0020
0021 struct ext4_rcu_ptr {
0022     struct rcu_head rcu;
0023     void *ptr;
0024 };
0025
0026 static void ext4_rcu_ptr_callback(struct rcu_head *head)
0027 {
0028     struct ext4_rcu_ptr *ptr;
0029
0030     ptr = container_of(head, struct ext4_rcu_ptr, rcu);
0031     kvfree(ptr->ptr);
0032     kfree(ptr);
0033 }
0034
0035 void ext4_kvfree_array_rcu(void *to_free)
0036 {
0037     struct ext4_rcu_ptr *ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
0038
0039     if (ptr) {
0040         ptr->ptr = to_free;
0041         call_rcu(&ptr->rcu, ext4_rcu_ptr_callback);
0042         return;
0043     }
0044     synchronize_rcu();
0045     kvfree(to_free);
0046 }
0047
0048 int ext4_resize_begin(struct super_block *sb)
0049 {
0050     struct ext4_sb_info *sbi = EXT4_SB(sb);
0051     int ret = 0;
0052
0053     if (!capable(CAP_SYS_RESOURCE))
0054         return -EPERM;
0055
0056     /*
0057      * If the reserved GDT blocks is non-zero, the resize_inode feature
0058      * should always be set.
0059      */
0060     if (EXT4_SB(sb)->s_es->s_reserved_gdt_blocks &&
0061         !ext4_has_feature_resize_inode(sb)) {
0062         ext4_error(sb, "resize_inode disabled but reserved GDT blocks non-zero");
0063         return -EFSCORRUPTED;
0064     }
0065
0066     /*
0067      * If we are not using the primary superblock/GDT copy don't resize,
0068          * because the user tools have no way of handling this.  Probably a
0069          * bad time to do it anyways.
0070          */
0071     if (EXT4_B2C(sbi, sbi->s_sbh->b_blocknr) !=
0072         le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
0073         ext4_warning(sb, "won't resize using backup superblock at %llu",
0074             (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
0075         return -EPERM;
0076     }
0077
0078     /*
0079      * We are not allowed to do online-resizing on a filesystem mounted
0080      * with error, because it can destroy the filesystem easily.
0081      */
0082     if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
0083         ext4_warning(sb, "There are errors in the filesystem, "
0084                  "so online resizing is not allowed");
0085         return -EPERM;
0086     }
0087
0088     if (ext4_has_feature_sparse_super2(sb)) {
0089         ext4_msg(sb, KERN_ERR, "Online resizing not supported with sparse_super2");
0090         return -EOPNOTSUPP;
0091     }
0092
0093     if (test_and_set_bit_lock(EXT4_FLAGS_RESIZING,
0094                   &EXT4_SB(sb)->s_ext4_flags))
0095         ret = -EBUSY;
0096
0097     return ret;
0098 }
0099
0100 int ext4_resize_end(struct super_block *sb, bool update_backups)
0101 {
0102     clear_bit_unlock(EXT4_FLAGS_RESIZING, &EXT4_SB(sb)->s_ext4_flags);
0103     smp_mb__after_atomic();
0104     if (update_backups)
0105         return ext4_update_overhead(sb, true);
0106     return 0;
0107 }
0108
0109 static ext4_group_t ext4_meta_bg_first_group(struct super_block *sb,
0110                          ext4_group_t group) {
0111     return (group >> EXT4_DESC_PER_BLOCK_BITS(sb)) <<
0112            EXT4_DESC_PER_BLOCK_BITS(sb);
0113 }
0114
0115 static ext4_fsblk_t ext4_meta_bg_first_block_no(struct super_block *sb,
0116                          ext4_group_t group) {
0117     group = ext4_meta_bg_first_group(sb, group);
0118     return ext4_group_first_block_no(sb, group);
0119 }
0120
0121 static ext4_grpblk_t ext4_group_overhead_blocks(struct super_block *sb,
0122                         ext4_group_t group) {
0123     ext4_grpblk_t overhead;
0124     overhead = ext4_bg_num_gdb(sb, group);
0125     if (ext4_bg_has_super(sb, group))
0126         overhead += 1 +
0127               le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks);
0128     return overhead;
0129 }
0130
0131 #define outside(b, first, last) ((b) < (first) || (b) >= (last))
0132 #define inside(b, first, last)  ((b) >= (first) && (b) < (last))
0133
0134 static int verify_group_input(struct super_block *sb,
0135                   struct ext4_new_group_data *input)
0136 {
0137     struct ext4_sb_info *sbi = EXT4_SB(sb);
0138     struct ext4_super_block *es = sbi->s_es;
0139     ext4_fsblk_t start = ext4_blocks_count(es);
0140     ext4_fsblk_t end = start + input->blocks_count;
0141     ext4_group_t group = input->group;
0142     ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group;
0143     unsigned overhead;
0144     ext4_fsblk_t metaend;
0145     struct buffer_head *bh = NULL;
0146     ext4_grpblk_t free_blocks_count, offset;
0147     int err = -EINVAL;
0148
0149     if (group != sbi->s_groups_count) {
0150         ext4_warning(sb, "Cannot add at group %u (only %u groups)",
0151                  input->group, sbi->s_groups_count);
0152         return -EINVAL;
0153     }
0154
0155     overhead = ext4_group_overhead_blocks(sb, group);
0156     metaend = start + overhead;
0157     input->free_clusters_count = free_blocks_count =
0158         input->blocks_count - 2 - overhead - sbi->s_itb_per_group;
0159
0160     if (test_opt(sb, DEBUG))
0161         printk(KERN_DEBUG "EXT4-fs: adding %s group %u: %u blocks "
0162                "(%d free, %u reserved)\n",
0163                ext4_bg_has_super(sb, input->group) ? "normal" :
0164                "no-super", input->group, input->blocks_count,
0165                free_blocks_count, input->reserved_blocks);
0166
0167     ext4_get_group_no_and_offset(sb, start, NULL, &offset);
0168     if (offset != 0)
0169             ext4_warning(sb, "Last group not full");
0170     else if (input->reserved_blocks > input->blocks_count / 5)
0171         ext4_warning(sb, "Reserved blocks too high (%u)",
0172                  input->reserved_blocks);
0173     else if (free_blocks_count < 0)
0174         ext4_warning(sb, "Bad blocks count %u",
0175                  input->blocks_count);
0176     else if (IS_ERR(bh = ext4_sb_bread(sb, end - 1, 0))) {
0177         err = PTR_ERR(bh);
0178         bh = NULL;
0179         ext4_warning(sb, "Cannot read last block (%llu)",
0180                  end - 1);
0181     } else if (outside(input->block_bitmap, start, end))
0182         ext4_warning(sb, "Block bitmap not in group (block %llu)",
0183                  (unsigned long long)input->block_bitmap);
0184     else if (outside(input->inode_bitmap, start, end))
0185         ext4_warning(sb, "Inode bitmap not in group (block %llu)",
0186                  (unsigned long long)input->inode_bitmap);
0187     else if (outside(input->inode_table, start, end) ||
0188          outside(itend - 1, start, end))
0189         ext4_warning(sb, "Inode table not in group (blocks %llu-%llu)",
0190                  (unsigned long long)input->inode_table, itend - 1);
0191     else if (input->inode_bitmap == input->block_bitmap)
0192         ext4_warning(sb, "Block bitmap same as inode bitmap (%llu)",
0193                  (unsigned long long)input->block_bitmap);
0194     else if (inside(input->block_bitmap, input->inode_table, itend))
0195         ext4_warning(sb, "Block bitmap (%llu) in inode table "
0196                  "(%llu-%llu)",
0197                  (unsigned long long)input->block_bitmap,
0198                  (unsigned long long)input->inode_table, itend - 1);
0199     else if (inside(input->inode_bitmap, input->inode_table, itend))
0200         ext4_warning(sb, "Inode bitmap (%llu) in inode table "
0201                  "(%llu-%llu)",
0202                  (unsigned long long)input->inode_bitmap,
0203                  (unsigned long long)input->inode_table, itend - 1);
0204     else if (inside(input->block_bitmap, start, metaend))
0205         ext4_warning(sb, "Block bitmap (%llu) in GDT table (%llu-%llu)",
0206                  (unsigned long long)input->block_bitmap,
0207                  start, metaend - 1);
0208     else if (inside(input->inode_bitmap, start, metaend))
0209         ext4_warning(sb, "Inode bitmap (%llu) in GDT table (%llu-%llu)",
0210                  (unsigned long long)input->inode_bitmap,
0211                  start, metaend - 1);
0212     else if (inside(input->inode_table, start, metaend) ||
0213          inside(itend - 1, start, metaend))
0214         ext4_warning(sb, "Inode table (%llu-%llu) overlaps GDT table "
0215                  "(%llu-%llu)",
0216                  (unsigned long long)input->inode_table,
0217                  itend - 1, start, metaend - 1);
0218     else
0219         err = 0;
0220     brelse(bh);
0221
0222     return err;
0223 }
0224
0225 /*
0226  * ext4_new_flex_group_data is used by 64bit-resize interface to add a flex
0227  * group each time.
0228  */
0229 struct ext4_new_flex_group_data {
0230     struct ext4_new_group_data *groups; /* new_group_data for groups
0231                            in the flex group */
0232     __u16 *bg_flags;            /* block group flags of groups
0233                            in @groups */
0234     ext4_group_t count;         /* number of groups in @groups
0235                          */
0236 };
0237
0238 /*
0239  * alloc_flex_gd() allocates a ext4_new_flex_group_data with size of
0240  * @flexbg_size.
0241  *
0242  * Returns NULL on failure otherwise address of the allocated structure.
0243  */
0244 static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size)
0245 {
0246     struct ext4_new_flex_group_data *flex_gd;
0247
0248     flex_gd = kmalloc(sizeof(*flex_gd), GFP_NOFS);
0249     if (flex_gd == NULL)
0250         goto out3;
0251
0252     if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_group_data))
0253         goto out2;
0254     flex_gd->count = flexbg_size;
0255
0256     flex_gd->groups = kmalloc_array(flexbg_size,
0257                     sizeof(struct ext4_new_group_data),
0258                     GFP_NOFS);
0259     if (flex_gd->groups == NULL)
0260         goto out2;
0261
0262     flex_gd->bg_flags = kmalloc_array(flexbg_size, sizeof(__u16),
0263                       GFP_NOFS);
0264     if (flex_gd->bg_flags == NULL)
0265         goto out1;
0266
0267     return flex_gd;
0268
0269 out1:
0270     kfree(flex_gd->groups);
0271 out2:
0272     kfree(flex_gd);
0273 out3:
0274     return NULL;
0275 }
0276
0277 static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd)
0278 {
0279     kfree(flex_gd->bg_flags);
0280     kfree(flex_gd->groups);
0281     kfree(flex_gd);
0282 }
0283
0284 /*
0285  * ext4_alloc_group_tables() allocates block bitmaps, inode bitmaps
0286  * and inode tables for a flex group.
0287  *
0288  * This function is used by 64bit-resize.  Note that this function allocates
0289  * group tables from the 1st group of groups contained by @flexgd, which may
0290  * be a partial of a flex group.
0291  *
0292  * @sb: super block of fs to which the groups belongs
0293  *
0294  * Returns 0 on a successful allocation of the metadata blocks in the
0295  * block group.
0296  */
0297 static int ext4_alloc_group_tables(struct super_block *sb,
0298                 struct ext4_new_flex_group_data *flex_gd,
0299                 int flexbg_size)
0300 {
0301     struct ext4_new_group_data *group_data = flex_gd->groups;
0302     ext4_fsblk_t start_blk;
0303     ext4_fsblk_t last_blk;
0304     ext4_group_t src_group;
0305     ext4_group_t bb_index = 0;
0306     ext4_group_t ib_index = 0;
0307     ext4_group_t it_index = 0;
0308     ext4_group_t group;
0309     ext4_group_t last_group;
0310     unsigned overhead;
0311     __u16 uninit_mask = (flexbg_size > 1) ? ~EXT4_BG_BLOCK_UNINIT : ~0;
0312     int i;
0313
0314     BUG_ON(flex_gd->count == 0 || group_data == NULL);
0315
0316     src_group = group_data[0].group;
0317     last_group  = src_group + flex_gd->count - 1;
0318
0319     BUG_ON((flexbg_size > 1) && ((src_group & ~(flexbg_size - 1)) !=
0320            (last_group & ~(flexbg_size - 1))));
0321 next_group:
0322     group = group_data[0].group;
0323     if (src_group >= group_data[0].group + flex_gd->count)
0324         return -ENOSPC;
0325     start_blk = ext4_group_first_block_no(sb, src_group);
0326     last_blk = start_blk + group_data[src_group - group].blocks_count;
0327
0328     overhead = ext4_group_overhead_blocks(sb, src_group);
0329
0330     start_blk += overhead;
0331
0332     /* We collect contiguous blocks as much as possible. */
0333     src_group++;
0334     for (; src_group <= last_group; src_group++) {
0335         overhead = ext4_group_overhead_blocks(sb, src_group);
0336         if (overhead == 0)
0337             last_blk += group_data[src_group - group].blocks_count;
0338         else
0339             break;
0340     }
0341
0342     /* Allocate block bitmaps */
0343     for (; bb_index < flex_gd->count; bb_index++) {
0344         if (start_blk >= last_blk)
0345             goto next_group;
0346         group_data[bb_index].block_bitmap = start_blk++;
0347         group = ext4_get_group_number(sb, start_blk - 1);
0348         group -= group_data[0].group;
0349         group_data[group].mdata_blocks++;
0350         flex_gd->bg_flags[group] &= uninit_mask;
0351     }
0352
0353     /* Allocate inode bitmaps */
0354     for (; ib_index < flex_gd->count; ib_index++) {
0355         if (start_blk >= last_blk)
0356             goto next_group;
0357         group_data[ib_index].inode_bitmap = start_blk++;
0358         group = ext4_get_group_number(sb, start_blk - 1);
0359         group -= group_data[0].group;
0360         group_data[group].mdata_blocks++;
0361         flex_gd->bg_flags[group] &= uninit_mask;
0362     }
0363
0364     /* Allocate inode tables */
0365     for (; it_index < flex_gd->count; it_index++) {
0366         unsigned int itb = EXT4_SB(sb)->s_itb_per_group;
0367         ext4_fsblk_t next_group_start;
0368
0369         if (start_blk + itb > last_blk)
0370             goto next_group;
0371         group_data[it_index].inode_table = start_blk;
0372         group = ext4_get_group_number(sb, start_blk);
0373         next_group_start = ext4_group_first_block_no(sb, group + 1);
0374         group -= group_data[0].group;
0375
0376         if (start_blk + itb > next_group_start) {
0377             flex_gd->bg_flags[group + 1] &= uninit_mask;
0378             overhead = start_blk + itb - next_group_start;
0379             group_data[group + 1].mdata_blocks += overhead;
0380             itb -= overhead;
0381         }
0382
0383         group_data[group].mdata_blocks += itb;
0384         flex_gd->bg_flags[group] &= uninit_mask;
0385         start_blk += EXT4_SB(sb)->s_itb_per_group;
0386     }
0387
0388     /* Update free clusters count to exclude metadata blocks */
0389     for (i = 0; i < flex_gd->count; i++) {
0390         group_data[i].free_clusters_count -=
0391                 EXT4_NUM_B2C(EXT4_SB(sb),
0392                          group_data[i].mdata_blocks);
0393     }
0394
0395     if (test_opt(sb, DEBUG)) {
0396         int i;
0397         group = group_data[0].group;
0398
0399         printk(KERN_DEBUG "EXT4-fs: adding a flex group with "
0400                "%d groups, flexbg size is %d:\n", flex_gd->count,
0401                flexbg_size);
0402
0403         for (i = 0; i < flex_gd->count; i++) {
0404             ext4_debug(
0405                    "adding %s group %u: %u blocks (%d free, %d mdata blocks)\n",
0406                    ext4_bg_has_super(sb, group + i) ? "normal" :
0407                    "no-super", group + i,
0408                    group_data[i].blocks_count,
0409                    group_data[i].free_clusters_count,
0410                    group_data[i].mdata_blocks);
0411         }
0412     }
0413     return 0;
0414 }
0415
0416 static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
0417                   ext4_fsblk_t blk)
0418 {
0419     struct buffer_head *bh;
0420     int err;
0421
0422     bh = sb_getblk(sb, blk);
0423     if (unlikely(!bh))
0424         return ERR_PTR(-ENOMEM);
0425     BUFFER_TRACE(bh, "get_write_access");
0426     err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE);
0427     if (err) {
0428         brelse(bh);
0429         bh = ERR_PTR(err);
0430     } else {
0431         memset(bh->b_data, 0, sb->s_blocksize);
0432         set_buffer_uptodate(bh);
0433     }
0434
0435     return bh;
0436 }
0437
0438 static int ext4_resize_ensure_credits_batch(handle_t *handle, int credits)
0439 {
0440     return ext4_journal_ensure_credits_fn(handle, credits,
0441         EXT4_MAX_TRANS_DATA, 0, 0);
0442 }
0443
0444 /*
0445  * set_flexbg_block_bitmap() mark clusters [@first_cluster, @last_cluster] used.
0446  *
0447  * Helper function for ext4_setup_new_group_blocks() which set .
0448  *
0449  * @sb: super block
0450  * @handle: journal handle
0451  * @flex_gd: flex group data
0452  */
0453 static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
0454             struct ext4_new_flex_group_data *flex_gd,
0455             ext4_fsblk_t first_cluster, ext4_fsblk_t last_cluster)
0456 {
0457     struct ext4_sb_info *sbi = EXT4_SB(sb);
0458     ext4_group_t count = last_cluster - first_cluster + 1;
0459     ext4_group_t count2;
0460
0461     ext4_debug("mark clusters [%llu-%llu] used\n", first_cluster,
0462            last_cluster);
0463     for (count2 = count; count > 0;
0464          count -= count2, first_cluster += count2) {
0465         ext4_fsblk_t start;
0466         struct buffer_head *bh;
0467         ext4_group_t group;
0468         int err;
0469
0470         group = ext4_get_group_number(sb, EXT4_C2B(sbi, first_cluster));
0471         start = EXT4_B2C(sbi, ext4_group_first_block_no(sb, group));
0472         group -= flex_gd->groups[0].group;
0473
0474         count2 = EXT4_CLUSTERS_PER_GROUP(sb) - (first_cluster - start);
0475         if (count2 > count)
0476             count2 = count;
0477
0478         if (flex_gd->bg_flags[group] & EXT4_BG_BLOCK_UNINIT) {
0479             BUG_ON(flex_gd->count > 1);
0480             continue;
0481         }
0482
0483         err = ext4_resize_ensure_credits_batch(handle, 1);
0484         if (err < 0)
0485             return err;
0486
0487         bh = sb_getblk(sb, flex_gd->groups[group].block_bitmap);
0488         if (unlikely(!bh))
0489             return -ENOMEM;
0490
0491         BUFFER_TRACE(bh, "get_write_access");
0492         err = ext4_journal_get_write_access(handle, sb, bh,
0493                             EXT4_JTR_NONE);
0494         if (err) {
0495             brelse(bh);
0496             return err;
0497         }
0498         ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n",
0499                first_cluster, first_cluster - start, count2);
0500         mb_set_bits(bh->b_data, first_cluster - start, count2);
0501
0502         err = ext4_handle_dirty_metadata(handle, NULL, bh);
0503         brelse(bh);
0504         if (unlikely(err))
0505             return err;
0506     }
0507
0508     return 0;
0509 }
0510
0511 /*
0512  * Set up the block and inode bitmaps, and the inode table for the new groups.
0513  * This doesn't need to be part of the main transaction, since we are only
0514  * changing blocks outside the actual filesystem.  We still do journaling to
0515  * ensure the recovery is correct in case of a failure just after resize.
0516  * If any part of this fails, we simply abort the resize.
0517  *
0518  * setup_new_flex_group_blocks handles a flex group as follow:
0519  *  1. copy super block and GDT, and initialize group tables if necessary.
0520  *     In this step, we only set bits in blocks bitmaps for blocks taken by
0521  *     super block and GDT.
0522  *  2. allocate group tables in block bitmaps, that is, set bits in block
0523  *     bitmap for blocks taken by group tables.
0524  */
0525 static int setup_new_flex_group_blocks(struct super_block *sb,
0526                 struct ext4_new_flex_group_data *flex_gd)
0527 {
0528     int group_table_count[] = {1, 1, EXT4_SB(sb)->s_itb_per_group};
0529     ext4_fsblk_t start;
0530     ext4_fsblk_t block;
0531     struct ext4_sb_info *sbi = EXT4_SB(sb);
0532     struct ext4_super_block *es = sbi->s_es;
0533     struct ext4_new_group_data *group_data = flex_gd->groups;
0534     __u16 *bg_flags = flex_gd->bg_flags;
0535     handle_t *handle;
0536     ext4_group_t group, count;
0537     struct buffer_head *bh = NULL;
0538     int reserved_gdb, i, j, err = 0, err2;
0539     int meta_bg;
0540
0541     BUG_ON(!flex_gd->count || !group_data ||
0542            group_data[0].group != sbi->s_groups_count);
0543
0544     reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks);
0545     meta_bg = ext4_has_feature_meta_bg(sb);
0546
0547     /* This transaction may be extended/restarted along the way */
0548     handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, EXT4_MAX_TRANS_DATA);
0549     if (IS_ERR(handle))
0550         return PTR_ERR(handle);
0551
0552     group = group_data[0].group;
0553     for (i = 0; i < flex_gd->count; i++, group++) {
0554         unsigned long gdblocks;
0555         ext4_grpblk_t overhead;
0556
0557         gdblocks = ext4_bg_num_gdb(sb, group);
0558         start = ext4_group_first_block_no(sb, group);
0559
0560         if (meta_bg == 0 && !ext4_bg_has_super(sb, group))
0561             goto handle_itb;
0562
0563         if (meta_bg == 1) {
0564             ext4_group_t first_group;
0565             first_group = ext4_meta_bg_first_group(sb, group);
0566             if (first_group != group + 1 &&
0567                 first_group != group + EXT4_DESC_PER_BLOCK(sb) - 1)
0568                 goto handle_itb;
0569         }
0570
0571         block = start + ext4_bg_has_super(sb, group);
0572         /* Copy all of the GDT blocks into the backup in this group */
0573         for (j = 0; j < gdblocks; j++, block++) {
0574             struct buffer_head *gdb;
0575
0576             ext4_debug("update backup group %#04llx\n", block);
0577             err = ext4_resize_ensure_credits_batch(handle, 1);
0578             if (err < 0)
0579                 goto out;
0580
0581             gdb = sb_getblk(sb, block);
0582             if (unlikely(!gdb)) {
0583                 err = -ENOMEM;
0584                 goto out;
0585             }
0586
0587             BUFFER_TRACE(gdb, "get_write_access");
0588             err = ext4_journal_get_write_access(handle, sb, gdb,
0589                                 EXT4_JTR_NONE);
0590             if (err) {
0591                 brelse(gdb);
0592                 goto out;
0593             }
0594             memcpy(gdb->b_data, sbi_array_rcu_deref(sbi,
0595                 s_group_desc, j)->b_data, gdb->b_size);
0596             set_buffer_uptodate(gdb);
0597
0598             err = ext4_handle_dirty_metadata(handle, NULL, gdb);
0599             if (unlikely(err)) {
0600                 brelse(gdb);
0601                 goto out;
0602             }
0603             brelse(gdb);
0604         }
0605
0606         /* Zero out all of the reserved backup group descriptor
0607          * table blocks
0608          */
0609         if (ext4_bg_has_super(sb, group)) {
0610             err = sb_issue_zeroout(sb, gdblocks + start + 1,
0611                     reserved_gdb, GFP_NOFS);
0612             if (err)
0613                 goto out;
0614         }
0615
0616 handle_itb:
0617         /* Initialize group tables of the grop @group */
0618         if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED))
0619             goto handle_bb;
0620
0621         /* Zero out all of the inode table blocks */
0622         block = group_data[i].inode_table;
0623         ext4_debug("clear inode table blocks %#04llx -> %#04lx\n",
0624                block, sbi->s_itb_per_group);
0625         err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group,
0626                        GFP_NOFS);
0627         if (err)
0628             goto out;
0629
0630 handle_bb:
0631         if (bg_flags[i] & EXT4_BG_BLOCK_UNINIT)
0632             goto handle_ib;
0633
0634         /* Initialize block bitmap of the @group */
0635         block = group_data[i].block_bitmap;
0636         err = ext4_resize_ensure_credits_batch(handle, 1);
0637         if (err < 0)
0638             goto out;
0639
0640         bh = bclean(handle, sb, block);
0641         if (IS_ERR(bh)) {
0642             err = PTR_ERR(bh);
0643             goto out;
0644         }
0645         overhead = ext4_group_overhead_blocks(sb, group);
0646         if (overhead != 0) {
0647             ext4_debug("mark backup superblock %#04llx (+0)\n",
0648                    start);
0649             mb_set_bits(bh->b_data, 0,
0650                       EXT4_NUM_B2C(sbi, overhead));
0651         }
0652         ext4_mark_bitmap_end(EXT4_B2C(sbi, group_data[i].blocks_count),
0653                      sb->s_blocksize * 8, bh->b_data);
0654         err = ext4_handle_dirty_metadata(handle, NULL, bh);
0655         brelse(bh);
0656         if (err)
0657             goto out;
0658
0659 handle_ib:
0660         if (bg_flags[i] & EXT4_BG_INODE_UNINIT)
0661             continue;
0662
0663         /* Initialize inode bitmap of the @group */
0664         block = group_data[i].inode_bitmap;
0665         err = ext4_resize_ensure_credits_batch(handle, 1);
0666         if (err < 0)
0667             goto out;
0668         /* Mark unused entries in inode bitmap used */
0669         bh = bclean(handle, sb, block);
0670         if (IS_ERR(bh)) {
0671             err = PTR_ERR(bh);
0672             goto out;
0673         }
0674
0675         ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb),
0676                      sb->s_blocksize * 8, bh->b_data);
0677         err = ext4_handle_dirty_metadata(handle, NULL, bh);
0678         brelse(bh);
0679         if (err)
0680             goto out;
0681     }
0682
0683     /* Mark group tables in block bitmap */
0684     for (j = 0; j < GROUP_TABLE_COUNT; j++) {
0685         count = group_table_count[j];
0686         start = (&group_data[0].block_bitmap)[j];
0687         block = start;
0688         for (i = 1; i < flex_gd->count; i++) {
0689             block += group_table_count[j];
0690             if (block == (&group_data[i].block_bitmap)[j]) {
0691                 count += group_table_count[j];
0692                 continue;
0693             }
0694             err = set_flexbg_block_bitmap(sb, handle,
0695                               flex_gd,
0696                               EXT4_B2C(sbi, start),
0697                               EXT4_B2C(sbi,
0698                                    start + count
0699                                    - 1));
0700             if (err)
0701                 goto out;
0702             count = group_table_count[j];
0703             start = (&group_data[i].block_bitmap)[j];
0704             block = start;
0705         }
0706
0707         if (count) {
0708             err = set_flexbg_block_bitmap(sb, handle,
0709                               flex_gd,
0710                               EXT4_B2C(sbi, start),
0711                               EXT4_B2C(sbi,
0712                                    start + count
0713                                    - 1));
0714             if (err)
0715                 goto out;
0716         }
0717     }
0718
0719 out:
0720     err2 = ext4_journal_stop(handle);
0721     if (err2 && !err)
0722         err = err2;
0723
0724     return err;
0725 }
0726
0727 /*
0728  * Iterate through the groups which hold BACKUP superblock/GDT copies in an
0729  * ext4 filesystem.  The counters should be initialized to 1, 5, and 7 before
0730  * calling this for the first time.  In a sparse filesystem it will be the
0731  * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ...
0732  * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ...
0733  */
0734 unsigned int ext4_list_backups(struct super_block *sb, unsigned int *three,
0735                    unsigned int *five, unsigned int *seven)
0736 {
0737     struct ext4_super_block *es = EXT4_SB(sb)->s_es;
0738     unsigned int *min = three;
0739     int mult = 3;
0740     unsigned int ret;
0741
0742     if (ext4_has_feature_sparse_super2(sb)) {
0743         do {
0744             if (*min > 2)
0745                 return UINT_MAX;
0746             ret = le32_to_cpu(es->s_backup_bgs[*min - 1]);
0747             *min += 1;
0748         } while (!ret);
0749         return ret;
0750     }
0751
0752     if (!ext4_has_feature_sparse_super(sb)) {
0753         ret = *min;
0754         *min += 1;
0755         return ret;
0756     }
0757
0758     if (*five < *min) {
0759         min = five;
0760         mult = 5;
0761     }
0762     if (*seven < *min) {
0763         min = seven;
0764         mult = 7;
0765     }
0766
0767     ret = *min;
0768     *min *= mult;
0769
0770     return ret;
0771 }
0772
0773 /*
0774  * Check that all of the backup GDT blocks are held in the primary GDT block.
0775  * It is assumed that they are stored in group order.  Returns the number of
0776  * groups in current filesystem that have BACKUPS, or -ve error code.
0777  */
0778 static int verify_reserved_gdb(struct super_block *sb,
0779                    ext4_group_t end,
0780                    struct buffer_head *primary)
0781 {
0782     const ext4_fsblk_t blk = primary->b_blocknr;
0783     unsigned three = 1;
0784     unsigned five = 5;
0785     unsigned seven = 7;
0786     unsigned grp;
0787     __le32 *p = (__le32 *)primary->b_data;
0788     int gdbackups = 0;
0789
0790     while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) {
0791         if (le32_to_cpu(*p++) !=
0792             grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){
0793             ext4_warning(sb, "reserved GDT %llu"
0794                      " missing grp %d (%llu)",
0795                      blk, grp,
0796                      grp *
0797                      (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) +
0798                      blk);
0799             return -EINVAL;
0800         }
0801         if (++gdbackups > EXT4_ADDR_PER_BLOCK(sb))
0802             return -EFBIG;
0803     }
0804
0805     return gdbackups;
0806 }
0807
0808 /*
0809  * Called when we need to bring a reserved group descriptor table block into
0810  * use from the resize inode.  The primary copy of the new GDT block currently
0811  * is an indirect block (under the double indirect block in the resize inode).
0812  * The new backup GDT blocks will be stored as leaf blocks in this indirect
0813  * block, in group order.  Even though we know all the block numbers we need,
0814  * we check to ensure that the resize inode has actually reserved these blocks.
0815  *
0816  * Don't need to update the block bitmaps because the blocks are still in use.
0817  *
0818  * We get all of the error cases out of the way, so that we are sure to not
0819  * fail once we start modifying the data on disk, because JBD has no rollback.
0820  */
0821 static int add_new_gdb(handle_t *handle, struct inode *inode,
0822                ext4_group_t group)
0823 {
0824     struct super_block *sb = inode->i_sb;
0825     struct ext4_super_block *es = EXT4_SB(sb)->s_es;
0826     unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
0827     ext4_fsblk_t gdblock = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num;
0828     struct buffer_head **o_group_desc, **n_group_desc = NULL;
0829     struct buffer_head *dind = NULL;
0830     struct buffer_head *gdb_bh = NULL;
0831     int gdbackups;
0832     struct ext4_iloc iloc = { .bh = NULL };
0833     __le32 *data;
0834     int err;
0835
0836     if (test_opt(sb, DEBUG))
0837         printk(KERN_DEBUG
0838                "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n",
0839                gdb_num);
0840
0841     gdb_bh = ext4_sb_bread(sb, gdblock, 0);
0842     if (IS_ERR(gdb_bh))
0843         return PTR_ERR(gdb_bh);
0844
0845     gdbackups = verify_reserved_gdb(sb, group, gdb_bh);
0846     if (gdbackups < 0) {
0847         err = gdbackups;
0848         goto errout;
0849     }
0850
0851     data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK;
0852     dind = ext4_sb_bread(sb, le32_to_cpu(*data), 0);
0853     if (IS_ERR(dind)) {
0854         err = PTR_ERR(dind);
0855         dind = NULL;
0856         goto errout;
0857     }
0858
0859     data = (__le32 *)dind->b_data;
0860     if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) {
0861         ext4_warning(sb, "new group %u GDT block %llu not reserved",
0862                  group, gdblock);
0863         err = -EINVAL;
0864         goto errout;
0865     }
0866
0867     BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
0868     err = ext4_journal_get_write_access(handle, sb, EXT4_SB(sb)->s_sbh,
0869                         EXT4_JTR_NONE);
0870     if (unlikely(err))
0871         goto errout;
0872
0873     BUFFER_TRACE(gdb_bh, "get_write_access");
0874     err = ext4_journal_get_write_access(handle, sb, gdb_bh, EXT4_JTR_NONE);
0875     if (unlikely(err))
0876         goto errout;
0877
0878     BUFFER_TRACE(dind, "get_write_access");
0879     err = ext4_journal_get_write_access(handle, sb, dind, EXT4_JTR_NONE);
0880     if (unlikely(err)) {
0881         ext4_std_error(sb, err);
0882         goto errout;
0883     }
0884
0885     /* ext4_reserve_inode_write() gets a reference on the iloc */
0886     err = ext4_reserve_inode_write(handle, inode, &iloc);
0887     if (unlikely(err))
0888         goto errout;
0889
0890     n_group_desc = kvmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
0891                 GFP_KERNEL);
0892     if (!n_group_desc) {
0893         err = -ENOMEM;
0894         ext4_warning(sb, "not enough memory for %lu groups",
0895                  gdb_num + 1);
0896         goto errout;
0897     }
0898
0899     /*
0900      * Finally, we have all of the possible failures behind us...
0901      *
0902      * Remove new GDT block from inode double-indirect block and clear out
0903      * the new GDT block for use (which also "frees" the backup GDT blocks
0904      * from the reserved inode).  We don't need to change the bitmaps for
0905      * these blocks, because they are marked as in-use from being in the
0906      * reserved inode, and will become GDT blocks (primary and backup).
0907      */
0908     data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0;
0909     err = ext4_handle_dirty_metadata(handle, NULL, dind);
0910     if (unlikely(err)) {
0911         ext4_std_error(sb, err);
0912         goto errout;
0913     }
0914     inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >>
0915                (9 - EXT4_SB(sb)->s_cluster_bits);
0916     ext4_mark_iloc_dirty(handle, inode, &iloc);
0917     memset(gdb_bh->b_data, 0, sb->s_blocksize);
0918     err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh);
0919     if (unlikely(err)) {
0920         ext4_std_error(sb, err);
0921         iloc.bh = NULL;
0922         goto errout;
0923     }
0924     brelse(dind);
0925
0926     rcu_read_lock();
0927     o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc);
0928     memcpy(n_group_desc, o_group_desc,
0929            EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
0930     rcu_read_unlock();
0931     n_group_desc[gdb_num] = gdb_bh;
0932     rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc);
0933     EXT4_SB(sb)->s_gdb_count++;
0934     ext4_kvfree_array_rcu(o_group_desc);
0935
0936     lock_buffer(EXT4_SB(sb)->s_sbh);
0937     le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
0938     ext4_superblock_csum_set(sb);
0939     unlock_buffer(EXT4_SB(sb)->s_sbh);
0940     err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
0941     if (err)
0942         ext4_std_error(sb, err);
0943     return err;
0944 errout:
0945     kvfree(n_group_desc);
0946     brelse(iloc.bh);
0947     brelse(dind);
0948     brelse(gdb_bh);
0949
0950     ext4_debug("leaving with error %d\n", err);
0951     return err;
0952 }
0953
0954 /*
0955  * add_new_gdb_meta_bg is the sister of add_new_gdb.
0956  */
0957 static int add_new_gdb_meta_bg(struct super_block *sb,
0958                    handle_t *handle, ext4_group_t group) {
0959     ext4_fsblk_t gdblock;
0960     struct buffer_head *gdb_bh;
0961     struct buffer_head **o_group_desc, **n_group_desc;
0962     unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
0963     int err;
0964
0965     gdblock = ext4_meta_bg_first_block_no(sb, group) +
0966            ext4_bg_has_super(sb, group);
0967     gdb_bh = ext4_sb_bread(sb, gdblock, 0);
0968     if (IS_ERR(gdb_bh))
0969         return PTR_ERR(gdb_bh);
0970     n_group_desc = kvmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
0971                 GFP_KERNEL);
0972     if (!n_group_desc) {
0973         brelse(gdb_bh);
0974         err = -ENOMEM;
0975         ext4_warning(sb, "not enough memory for %lu groups",
0976                  gdb_num + 1);
0977         return err;
0978     }
0979
0980     rcu_read_lock();
0981     o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc);
0982     memcpy(n_group_desc, o_group_desc,
0983            EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
0984     rcu_read_unlock();
0985     n_group_desc[gdb_num] = gdb_bh;
0986
0987     BUFFER_TRACE(gdb_bh, "get_write_access");
0988     err = ext4_journal_get_write_access(handle, sb, gdb_bh, EXT4_JTR_NONE);
0989     if (err) {
0990         kvfree(n_group_desc);
0991         brelse(gdb_bh);
0992         return err;
0993     }
0994
0995     rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc);
0996     EXT4_SB(sb)->s_gdb_count++;
0997     ext4_kvfree_array_rcu(o_group_desc);
0998     return err;
0999 }
1000
1001 /*
1002  * Called when we are adding a new group which has a backup copy of each of
1003  * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks.
1004  * We need to add these reserved backup GDT blocks to the resize inode, so
1005  * that they are kept for future resizing and not allocated to files.
1006  *
1007  * Each reserved backup GDT block will go into a different indirect block.
1008  * The indirect blocks are actually the primary reserved GDT blocks,
1009  * so we know in advance what their block numbers are.  We only get the
1010  * double-indirect block to verify it is pointing to the primary reserved
1011  * GDT blocks so we don't overwrite a data block by accident.  The reserved
1012  * backup GDT blocks are stored in their reserved primary GDT block.
1013  */
1014 static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
1015                   ext4_group_t group)
1016 {
1017     struct super_block *sb = inode->i_sb;
1018     int reserved_gdb =le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks);
1019     int cluster_bits = EXT4_SB(sb)->s_cluster_bits;
1020     struct buffer_head **primary;
1021     struct buffer_head *dind;
1022     struct ext4_iloc iloc;
1023     ext4_fsblk_t blk;
1024     __le32 *data, *end;
1025     int gdbackups = 0;
1026     int res, i;
1027     int err;
1028
1029     primary = kmalloc_array(reserved_gdb, sizeof(*primary), GFP_NOFS);
1030     if (!primary)
1031         return -ENOMEM;
1032
1033     data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK;
1034     dind = ext4_sb_bread(sb, le32_to_cpu(*data), 0);
1035     if (IS_ERR(dind)) {
1036         err = PTR_ERR(dind);
1037         dind = NULL;
1038         goto exit_free;
1039     }
1040
1041     blk = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + EXT4_SB(sb)->s_gdb_count;
1042     data = (__le32 *)dind->b_data + (EXT4_SB(sb)->s_gdb_count %
1043                      EXT4_ADDR_PER_BLOCK(sb));
1044     end = (__le32 *)dind->b_data + EXT4_ADDR_PER_BLOCK(sb);
1045
1046     /* Get each reserved primary GDT block and verify it holds backups */
1047     for (res = 0; res < reserved_gdb; res++, blk++) {
1048         if (le32_to_cpu(*data) != blk) {
1049             ext4_warning(sb, "reserved block %llu"
1050                      " not at offset %ld",
1051                      blk,
1052                      (long)(data - (__le32 *)dind->b_data));
1053             err = -EINVAL;
1054             goto exit_bh;
1055         }
1056         primary[res] = ext4_sb_bread(sb, blk, 0);
1057         if (IS_ERR(primary[res])) {
1058             err = PTR_ERR(primary[res]);
1059             primary[res] = NULL;
1060             goto exit_bh;
1061         }
1062         gdbackups = verify_reserved_gdb(sb, group, primary[res]);
1063         if (gdbackups < 0) {
1064             brelse(primary[res]);
1065             err = gdbackups;
1066             goto exit_bh;
1067         }
1068         if (++data >= end)
1069             data = (__le32 *)dind->b_data;
1070     }
1071
1072     for (i = 0; i < reserved_gdb; i++) {
1073         BUFFER_TRACE(primary[i], "get_write_access");
1074         if ((err = ext4_journal_get_write_access(handle, sb, primary[i],
1075                              EXT4_JTR_NONE)))
1076             goto exit_bh;
1077     }
1078
1079     if ((err = ext4_reserve_inode_write(handle, inode, &iloc)))
1080         goto exit_bh;
1081
1082     /*
1083      * Finally we can add each of the reserved backup GDT blocks from
1084      * the new group to its reserved primary GDT block.
1085      */
1086     blk = group * EXT4_BLOCKS_PER_GROUP(sb);
1087     for (i = 0; i < reserved_gdb; i++) {
1088         int err2;
1089         data = (__le32 *)primary[i]->b_data;
1090         /* printk("reserving backup %lu[%u] = %lu\n",
1091                primary[i]->b_blocknr, gdbackups,
1092                blk + primary[i]->b_blocknr); */
1093         data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr);
1094         err2 = ext4_handle_dirty_metadata(handle, NULL, primary[i]);
1095         if (!err)
1096             err = err2;
1097     }
1098
1099     inode->i_blocks += reserved_gdb * sb->s_blocksize >> (9 - cluster_bits);
1100     ext4_mark_iloc_dirty(handle, inode, &iloc);
1101
1102 exit_bh:
1103     while (--res >= 0)
1104         brelse(primary[res]);
1105     brelse(dind);
1106
1107 exit_free:
1108     kfree(primary);
1109
1110     return err;
1111 }
1112
1113 /*
1114  * Update the backup copies of the ext4 metadata.  These don't need to be part
1115  * of the main resize transaction, because e2fsck will re-write them if there
1116  * is a problem (basically only OOM will cause a problem).  However, we
1117  * _should_ update the backups if possible, in case the primary gets trashed
1118  * for some reason and we need to run e2fsck from a backup superblock.  The
1119  * important part is that the new block and inode counts are in the backup
1120  * superblocks, and the location of the new group metadata in the GDT backups.
1121  *
1122  * We do not need take the s_resize_lock for this, because these
1123  * blocks are not otherwise touched by the filesystem code when it is
1124  * mounted.  We don't need to worry about last changing from
1125  * sbi->s_groups_count, because the worst that can happen is that we
1126  * do not copy the full number of backups at this time.  The resize
1127  * which changed s_groups_count will backup again.
1128  */
1129 static void update_backups(struct super_block *sb, sector_t blk_off, char *data,
1130                int size, int meta_bg)
1131 {
1132     struct ext4_sb_info *sbi = EXT4_SB(sb);
1133     ext4_group_t last;
1134     const int bpg = EXT4_BLOCKS_PER_GROUP(sb);
1135     unsigned three = 1;
1136     unsigned five = 5;
1137     unsigned seven = 7;
1138     ext4_group_t group = 0;
1139     int rest = sb->s_blocksize - size;
1140     handle_t *handle;
1141     int err = 0, err2;
1142
1143     handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, EXT4_MAX_TRANS_DATA);
1144     if (IS_ERR(handle)) {
1145         group = 1;
1146         err = PTR_ERR(handle);
1147         goto exit_err;
1148     }
1149
1150     if (meta_bg == 0) {
1151         group = ext4_list_backups(sb, &three, &five, &seven);
1152         last = sbi->s_groups_count;
1153     } else {
1154         group = ext4_get_group_number(sb, blk_off) + 1;
1155         last = (ext4_group_t)(group + EXT4_DESC_PER_BLOCK(sb) - 2);
1156     }
1157
1158     while (group < sbi->s_groups_count) {
1159         struct buffer_head *bh;
1160         ext4_fsblk_t backup_block;
1161
1162         /* Out of journal space, and can't get more - abort - so sad */
1163         err = ext4_resize_ensure_credits_batch(handle, 1);
1164         if (err < 0)
1165             break;
1166
1167         if (meta_bg == 0)
1168             backup_block = ((ext4_fsblk_t)group) * bpg + blk_off;
1169         else
1170             backup_block = (ext4_group_first_block_no(sb, group) +
1171                     ext4_bg_has_super(sb, group));
1172
1173         bh = sb_getblk(sb, backup_block);
1174         if (unlikely(!bh)) {
1175             err = -ENOMEM;
1176             break;
1177         }
1178         ext4_debug("update metadata backup %llu(+%llu)\n",
1179                backup_block, backup_block -
1180                ext4_group_first_block_no(sb, group));
1181         BUFFER_TRACE(bh, "get_write_access");
1182         if ((err = ext4_journal_get_write_access(handle, sb, bh,
1183                              EXT4_JTR_NONE)))
1184             break;
1185         lock_buffer(bh);
1186         memcpy(bh->b_data, data, size);
1187         if (rest)
1188             memset(bh->b_data + size, 0, rest);
1189         set_buffer_uptodate(bh);
1190         unlock_buffer(bh);
1191         err = ext4_handle_dirty_metadata(handle, NULL, bh);
1192         if (unlikely(err))
1193             ext4_std_error(sb, err);
1194         brelse(bh);
1195
1196         if (meta_bg == 0)
1197             group = ext4_list_backups(sb, &three, &five, &seven);
1198         else if (group == last)
1199             break;
1200         else
1201             group = last;
1202     }
1203     if ((err2 = ext4_journal_stop(handle)) && !err)
1204         err = err2;
1205
1206     /*
1207      * Ugh! Need to have e2fsck write the backup copies.  It is too
1208      * late to revert the resize, we shouldn't fail just because of
1209      * the backup copies (they are only needed in case of corruption).
1210      *
1211      * However, if we got here we have a journal problem too, so we
1212      * can't really start a transaction to mark the superblock.
1213      * Chicken out and just set the flag on the hope it will be written
1214      * to disk, and if not - we will simply wait until next fsck.
1215      */
1216 exit_err:
1217     if (err) {
1218         ext4_warning(sb, "can't update backup for group %u (err %d), "
1219                  "forcing fsck on next reboot", group, err);
1220         sbi->s_mount_state &= ~EXT4_VALID_FS;
1221         sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
1222         mark_buffer_dirty(sbi->s_sbh);
1223     }
1224 }
1225
1226 /*
1227  * ext4_add_new_descs() adds @count group descriptor of groups
1228  * starting at @group
1229  *
1230  * @handle: journal handle
1231  * @sb: super block
1232  * @group: the group no. of the first group desc to be added
1233  * @resize_inode: the resize inode
1234  * @count: number of group descriptors to be added
1235  */
1236 static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
1237                   ext4_group_t group, struct inode *resize_inode,
1238                   ext4_group_t count)
1239 {
1240     struct ext4_sb_info *sbi = EXT4_SB(sb);
1241     struct ext4_super_block *es = sbi->s_es;
1242     struct buffer_head *gdb_bh;
1243     int i, gdb_off, gdb_num, err = 0;
1244     int meta_bg;
1245
1246     meta_bg = ext4_has_feature_meta_bg(sb);
1247     for (i = 0; i < count; i++, group++) {
1248         int reserved_gdb = ext4_bg_has_super(sb, group) ?
1249             le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
1250
1251         gdb_off = group % EXT4_DESC_PER_BLOCK(sb);
1252         gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
1253
1254         /*
1255          * We will only either add reserved group blocks to a backup group
1256          * or remove reserved blocks for the first group in a new group block.
1257          * Doing both would be mean more complex code, and sane people don't
1258          * use non-sparse filesystems anymore.  This is already checked above.
1259          */
1260         if (gdb_off) {
1261             gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc,
1262                              gdb_num);
1263             BUFFER_TRACE(gdb_bh, "get_write_access");
1264             err = ext4_journal_get_write_access(handle, sb, gdb_bh,
1265                                 EXT4_JTR_NONE);
1266
1267             if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group))
1268                 err = reserve_backup_gdb(handle, resize_inode, group);
1269         } else if (meta_bg != 0) {
1270             err = add_new_gdb_meta_bg(sb, handle, group);
1271         } else {
1272             err = add_new_gdb(handle, resize_inode, group);
1273         }
1274         if (err)
1275             break;
1276     }
1277     return err;
1278 }
1279
1280 static struct buffer_head *ext4_get_bitmap(struct super_block *sb, __u64 block)
1281 {
1282     struct buffer_head *bh = sb_getblk(sb, block);
1283     if (unlikely(!bh))
1284         return NULL;
1285     if (!bh_uptodate_or_lock(bh)) {
1286         if (ext4_read_bh(bh, 0, NULL) < 0) {
1287             brelse(bh);
1288             return NULL;
1289         }
1290     }
1291
1292     return bh;
1293 }
1294
1295 static int ext4_set_bitmap_checksums(struct super_block *sb,
1296                      ext4_group_t group,
1297                      struct ext4_group_desc *gdp,
1298                      struct ext4_new_group_data *group_data)
1299 {
1300     struct buffer_head *bh;
1301
1302     if (!ext4_has_metadata_csum(sb))
1303         return 0;
1304
1305     bh = ext4_get_bitmap(sb, group_data->inode_bitmap);
1306     if (!bh)
1307         return -EIO;
1308     ext4_inode_bitmap_csum_set(sb, group, gdp, bh,
1309                    EXT4_INODES_PER_GROUP(sb) / 8);
1310     brelse(bh);
1311
1312     bh = ext4_get_bitmap(sb, group_data->block_bitmap);
1313     if (!bh)
1314         return -EIO;
1315     ext4_block_bitmap_csum_set(sb, group, gdp, bh);
1316     brelse(bh);
1317
1318     return 0;
1319 }
1320
1321 /*
1322  * ext4_setup_new_descs() will set up the group descriptor descriptors of a flex bg
1323  */
1324 static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb,
1325                 struct ext4_new_flex_group_data *flex_gd)
1326 {
1327     struct ext4_new_group_data  *group_data = flex_gd->groups;
1328     struct ext4_group_desc      *gdp;
1329     struct ext4_sb_info     *sbi = EXT4_SB(sb);
1330     struct buffer_head      *gdb_bh;
1331     ext4_group_t            group;
1332     __u16               *bg_flags = flex_gd->bg_flags;
1333     int             i, gdb_off, gdb_num, err = 0;
1334
1335
1336     for (i = 0; i < flex_gd->count; i++, group_data++, bg_flags++) {
1337         group = group_data->group;
1338
1339         gdb_off = group % EXT4_DESC_PER_BLOCK(sb);
1340         gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
1341
1342         /*
1343          * get_write_access() has been called on gdb_bh by ext4_add_new_desc().
1344          */
1345         gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, gdb_num);
1346         /* Update group descriptor block for new group */
1347         gdp = (struct ext4_group_desc *)(gdb_bh->b_data +
1348                          gdb_off * EXT4_DESC_SIZE(sb));
1349
1350         memset(gdp, 0, EXT4_DESC_SIZE(sb));
1351         ext4_block_bitmap_set(sb, gdp, group_data->block_bitmap);
1352         ext4_inode_bitmap_set(sb, gdp, group_data->inode_bitmap);
1353         err = ext4_set_bitmap_checksums(sb, group, gdp, group_data);
1354         if (err) {
1355             ext4_std_error(sb, err);
1356             break;
1357         }
1358
1359         ext4_inode_table_set(sb, gdp, group_data->inode_table);
1360         ext4_free_group_clusters_set(sb, gdp,
1361                          group_data->free_clusters_count);
1362         ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
1363         if (ext4_has_group_desc_csum(sb))
1364             ext4_itable_unused_set(sb, gdp,
1365                            EXT4_INODES_PER_GROUP(sb));
1366         gdp->bg_flags = cpu_to_le16(*bg_flags);
1367         ext4_group_desc_csum_set(sb, group, gdp);
1368
1369         err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh);
1370         if (unlikely(err)) {
1371             ext4_std_error(sb, err);
1372             break;
1373         }
1374
1375         /*
1376          * We can allocate memory for mb_alloc based on the new group
1377          * descriptor
1378          */
1379         err = ext4_mb_add_groupinfo(sb, group, gdp);
1380         if (err)
1381             break;
1382     }
1383     return err;
1384 }
1385
1386 static void ext4_add_overhead(struct super_block *sb,
1387                               const ext4_fsblk_t overhead)
1388 {
1389        struct ext4_sb_info *sbi = EXT4_SB(sb);
1390        struct ext4_super_block *es = sbi->s_es;
1391
1392        sbi->s_overhead += overhead;
1393        es->s_overhead_clusters = cpu_to_le32(sbi->s_overhead);
1394        smp_wmb();
1395 }
1396
1397 /*
1398  * ext4_update_super() updates the super block so that the newly added
1399  * groups can be seen by the filesystem.
1400  *
1401  * @sb: super block
1402  * @flex_gd: new added groups
1403  */
1404 static void ext4_update_super(struct super_block *sb,
1405                  struct ext4_new_flex_group_data *flex_gd)
1406 {
1407     ext4_fsblk_t blocks_count = 0;
1408     ext4_fsblk_t free_blocks = 0;
1409     ext4_fsblk_t reserved_blocks = 0;
1410     struct ext4_new_group_data *group_data = flex_gd->groups;
1411     struct ext4_sb_info *sbi = EXT4_SB(sb);
1412     struct ext4_super_block *es = sbi->s_es;
1413     int i;
1414
1415     BUG_ON(flex_gd->count == 0 || group_data == NULL);
1416     /*
1417      * Make the new blocks and inodes valid next.  We do this before
1418      * increasing the group count so that once the group is enabled,
1419      * all of its blocks and inodes are already valid.
1420      *
1421      * We always allocate group-by-group, then block-by-block or
1422      * inode-by-inode within a group, so enabling these
1423      * blocks/inodes before the group is live won't actually let us
1424      * allocate the new space yet.
1425      */
1426     for (i = 0; i < flex_gd->count; i++) {
1427         blocks_count += group_data[i].blocks_count;
1428         free_blocks += EXT4_C2B(sbi, group_data[i].free_clusters_count);
1429     }
1430
1431     reserved_blocks = ext4_r_blocks_count(es) * 100;
1432     reserved_blocks = div64_u64(reserved_blocks, ext4_blocks_count(es));
1433     reserved_blocks *= blocks_count;
1434     do_div(reserved_blocks, 100);
1435
1436     lock_buffer(sbi->s_sbh);
1437     ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count);
1438     ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + free_blocks);
1439     le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) *
1440              flex_gd->count);
1441     le32_add_cpu(&es->s_free_inodes_count, EXT4_INODES_PER_GROUP(sb) *
1442              flex_gd->count);
1443
1444     ext4_debug("free blocks count %llu", ext4_free_blocks_count(es));
1445     /*
1446      * We need to protect s_groups_count against other CPUs seeing
1447      * inconsistent state in the superblock.
1448      *
1449      * The precise rules we use are:
1450      *
1451      * * Writers must perform a smp_wmb() after updating all
1452      *   dependent data and before modifying the groups count
1453      *
1454      * * Readers must perform an smp_rmb() after reading the groups
1455      *   count and before reading any dependent data.
1456      *
1457      * NB. These rules can be relaxed when checking the group count
1458      * while freeing data, as we can only allocate from a block
1459      * group after serialising against the group count, and we can
1460      * only then free after serialising in turn against that
1461      * allocation.
1462      */
1463     smp_wmb();
1464
1465     /* Update the global fs size fields */
1466     sbi->s_groups_count += flex_gd->count;
1467     sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
1468             (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
1469
1470     /* Update the reserved block counts only once the new group is
1471      * active. */
1472     ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) +
1473                 reserved_blocks);
1474     ext4_superblock_csum_set(sb);
1475     unlock_buffer(sbi->s_sbh);
1476
1477     /* Update the free space counts */
1478     percpu_counter_add(&sbi->s_freeclusters_counter,
1479                EXT4_NUM_B2C(sbi, free_blocks));
1480     percpu_counter_add(&sbi->s_freeinodes_counter,
1481                EXT4_INODES_PER_GROUP(sb) * flex_gd->count);
1482
1483     ext4_debug("free blocks count %llu",
1484            percpu_counter_read(&sbi->s_freeclusters_counter));
1485     if (ext4_has_feature_flex_bg(sb) && sbi->s_log_groups_per_flex) {
1486         ext4_group_t flex_group;
1487         struct flex_groups *fg;
1488
1489         flex_group = ext4_flex_group(sbi, group_data[0].group);
1490         fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group);
1491         atomic64_add(EXT4_NUM_B2C(sbi, free_blocks),
1492                  &fg->free_clusters);
1493         atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count,
1494                &fg->free_inodes);
1495     }
1496
1497     /*
1498      * Update the fs overhead information.
1499      *
1500      * For bigalloc, if the superblock already has a properly calculated
1501      * overhead, update it with a value based on numbers already computed
1502      * above for the newly allocated capacity.
1503      */
1504     if (ext4_has_feature_bigalloc(sb) && (sbi->s_overhead != 0))
1505         ext4_add_overhead(sb,
1506             EXT4_NUM_B2C(sbi, blocks_count - free_blocks));
1507     else
1508         ext4_calculate_overhead(sb);
1509     es->s_overhead_clusters = cpu_to_le32(sbi->s_overhead);
1510
1511     if (test_opt(sb, DEBUG))
1512         printk(KERN_DEBUG "EXT4-fs: added group %u:"
1513                "%llu blocks(%llu free %llu reserved)\n", flex_gd->count,
1514                blocks_count, free_blocks, reserved_blocks);
1515 }
1516
1517 /* Add a flex group to an fs. Ensure we handle all possible error conditions
1518  * _before_ we start modifying the filesystem, because we cannot abort the
1519  * transaction and not have it write the data to disk.
1520  */
1521 static int ext4_flex_group_add(struct super_block *sb,
1522                    struct inode *resize_inode,
1523                    struct ext4_new_flex_group_data *flex_gd)
1524 {
1525     struct ext4_sb_info *sbi = EXT4_SB(sb);
1526     struct ext4_super_block *es = sbi->s_es;
1527     ext4_fsblk_t o_blocks_count;
1528     ext4_grpblk_t last;
1529     ext4_group_t group;
1530     handle_t *handle;
1531     unsigned reserved_gdb;
1532     int err = 0, err2 = 0, credit;
1533
1534     BUG_ON(!flex_gd->count || !flex_gd->groups || !flex_gd->bg_flags);
1535
1536     reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks);
1537     o_blocks_count = ext4_blocks_count(es);
1538     ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
1539     BUG_ON(last);
1540
1541     err = setup_new_flex_group_blocks(sb, flex_gd);
1542     if (err)
1543         goto exit;
1544     /*
1545      * We will always be modifying at least the superblock and  GDT
1546      * blocks.  If we are adding a group past the last current GDT block,
1547      * we will also modify the inode and the dindirect block.  If we
1548      * are adding a group with superblock/GDT backups  we will also
1549      * modify each of the reserved GDT dindirect blocks.
1550      */
1551     credit = 3; /* sb, resize inode, resize inode dindirect */
1552     /* GDT blocks */
1553     credit += 1 + DIV_ROUND_UP(flex_gd->count, EXT4_DESC_PER_BLOCK(sb));
1554     credit += reserved_gdb; /* Reserved GDT dindirect blocks */
1555     handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, credit);
1556     if (IS_ERR(handle)) {
1557         err = PTR_ERR(handle);
1558         goto exit;
1559     }
1560
1561     BUFFER_TRACE(sbi->s_sbh, "get_write_access");
1562     err = ext4_journal_get_write_access(handle, sb, sbi->s_sbh,
1563                         EXT4_JTR_NONE);
1564     if (err)
1565         goto exit_journal;
1566
1567     group = flex_gd->groups[0].group;
1568     BUG_ON(group != sbi->s_groups_count);
1569     err = ext4_add_new_descs(handle, sb, group,
1570                 resize_inode, flex_gd->count);
1571     if (err)
1572         goto exit_journal;
1573
1574     err = ext4_setup_new_descs(handle, sb, flex_gd);
1575     if (err)
1576         goto exit_journal;
1577
1578     ext4_update_super(sb, flex_gd);
1579
1580     err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
1581
1582 exit_journal:
1583     err2 = ext4_journal_stop(handle);
1584     if (!err)
1585         err = err2;
1586
1587     if (!err) {
1588         int gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
1589         int gdb_num_end = ((group + flex_gd->count - 1) /
1590                    EXT4_DESC_PER_BLOCK(sb));
1591         int meta_bg = ext4_has_feature_meta_bg(sb);
1592         sector_t old_gdb = 0;
1593
1594         update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
1595                    sizeof(struct ext4_super_block), 0);
1596         for (; gdb_num <= gdb_num_end; gdb_num++) {
1597             struct buffer_head *gdb_bh;
1598
1599             gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc,
1600                              gdb_num);
1601             if (old_gdb == gdb_bh->b_blocknr)
1602                 continue;
1603             update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data,
1604                        gdb_bh->b_size, meta_bg);
1605             old_gdb = gdb_bh->b_blocknr;
1606         }
1607     }
1608 exit:
1609     return err;
1610 }
1611
1612 static int ext4_setup_next_flex_gd(struct super_block *sb,
1613                     struct ext4_new_flex_group_data *flex_gd,
1614                     ext4_fsblk_t n_blocks_count,
1615                     unsigned long flexbg_size)
1616 {
1617     struct ext4_sb_info *sbi = EXT4_SB(sb);
1618     struct ext4_super_block *es = sbi->s_es;
1619     struct ext4_new_group_data *group_data = flex_gd->groups;
1620     ext4_fsblk_t o_blocks_count;
1621     ext4_group_t n_group;
1622     ext4_group_t group;
1623     ext4_group_t last_group;
1624     ext4_grpblk_t last;
1625     ext4_grpblk_t clusters_per_group;
1626     unsigned long i;
1627
1628     clusters_per_group = EXT4_CLUSTERS_PER_GROUP(sb);
1629
1630     o_blocks_count = ext4_blocks_count(es);
1631
1632     if (o_blocks_count == n_blocks_count)
1633         return 0;
1634
1635     ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
1636     BUG_ON(last);
1637     ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &last);
1638
1639     last_group = group | (flexbg_size - 1);
1640     if (last_group > n_group)
1641         last_group = n_group;
1642
1643     flex_gd->count = last_group - group + 1;
1644
1645     for (i = 0; i < flex_gd->count; i++) {
1646         int overhead;
1647
1648         group_data[i].group = group + i;
1649         group_data[i].blocks_count = EXT4_BLOCKS_PER_GROUP(sb);
1650         overhead = ext4_group_overhead_blocks(sb, group + i);
1651         group_data[i].mdata_blocks = overhead;
1652         group_data[i].free_clusters_count = EXT4_CLUSTERS_PER_GROUP(sb);
1653         if (ext4_has_group_desc_csum(sb)) {
1654             flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT |
1655                            EXT4_BG_INODE_UNINIT;
1656             if (!test_opt(sb, INIT_INODE_TABLE))
1657                 flex_gd->bg_flags[i] |= EXT4_BG_INODE_ZEROED;
1658         } else
1659             flex_gd->bg_flags[i] = EXT4_BG_INODE_ZEROED;
1660     }
1661
1662     if (last_group == n_group && ext4_has_group_desc_csum(sb))
1663         /* We need to initialize block bitmap of last group. */
1664         flex_gd->bg_flags[i - 1] &= ~EXT4_BG_BLOCK_UNINIT;
1665
1666     if ((last_group == n_group) && (last != clusters_per_group - 1)) {
1667         group_data[i - 1].blocks_count = EXT4_C2B(sbi, last + 1);
1668         group_data[i - 1].free_clusters_count -= clusters_per_group -
1669                                last - 1;
1670     }
1671
1672     return 1;
1673 }
1674
1675 /* Add group descriptor data to an existing or new group descriptor block.
1676  * Ensure we handle all possible error conditions _before_ we start modifying
1677  * the filesystem, because we cannot abort the transaction and not have it
1678  * write the data to disk.
1679  *
1680  * If we are on a GDT block boundary, we need to get the reserved GDT block.
1681  * Otherwise, we may need to add backup GDT blocks for a sparse group.
1682  *
1683  * We only need to hold the superblock lock while we are actually adding
1684  * in the new group's counts to the superblock.  Prior to that we have
1685  * not really "added" the group at all.  We re-check that we are still
1686  * adding in the last group in case things have changed since verifying.
1687  */
1688 int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
1689 {
1690     struct ext4_new_flex_group_data flex_gd;
1691     struct ext4_sb_info *sbi = EXT4_SB(sb);
1692     struct ext4_super_block *es = sbi->s_es;
1693     int reserved_gdb = ext4_bg_has_super(sb, input->group) ?
1694         le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
1695     struct inode *inode = NULL;
1696     int gdb_off;
1697     int err;
1698     __u16 bg_flags = 0;
1699
1700     gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb);
1701
1702     if (gdb_off == 0 && !ext4_has_feature_sparse_super(sb)) {
1703         ext4_warning(sb, "Can't resize non-sparse filesystem further");
1704         return -EPERM;
1705     }
1706
1707     if (ext4_blocks_count(es) + input->blocks_count <
1708         ext4_blocks_count(es)) {
1709         ext4_warning(sb, "blocks_count overflow");
1710         return -EINVAL;
1711     }
1712
1713     if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) <
1714         le32_to_cpu(es->s_inodes_count)) {
1715         ext4_warning(sb, "inodes_count overflow");
1716         return -EINVAL;
1717     }
1718
1719     if (reserved_gdb || gdb_off == 0) {
1720         if (!ext4_has_feature_resize_inode(sb) ||
1721             !le16_to_cpu(es->s_reserved_gdt_blocks)) {
1722             ext4_warning(sb,
1723                      "No reserved GDT blocks, can't resize");
1724             return -EPERM;
1725         }
1726         inode = ext4_iget(sb, EXT4_RESIZE_INO, EXT4_IGET_SPECIAL);
1727         if (IS_ERR(inode)) {
1728             ext4_warning(sb, "Error opening resize inode");
1729             return PTR_ERR(inode);
1730         }
1731     }
1732
1733
1734     err = verify_group_input(sb, input);
1735     if (err)
1736         goto out;
1737
1738     err = ext4_alloc_flex_bg_array(sb, input->group + 1);
1739     if (err)
1740         goto out;
1741
1742     err = ext4_mb_alloc_groupinfo(sb, input->group + 1);
1743     if (err)
1744         goto out;
1745
1746     flex_gd.count = 1;
1747     flex_gd.groups = input;
1748     flex_gd.bg_flags = &bg_flags;
1749     err = ext4_flex_group_add(sb, inode, &flex_gd);
1750 out:
1751     iput(inode);
1752     return err;
1753 } /* ext4_group_add */
1754
1755 /*
1756  * extend a group without checking assuming that checking has been done.
1757  */
1758 static int ext4_group_extend_no_check(struct super_block *sb,
1759                       ext4_fsblk_t o_blocks_count, ext4_grpblk_t add)
1760 {
1761     struct ext4_super_block *es = EXT4_SB(sb)->s_es;
1762     handle_t *handle;
1763     int err = 0, err2;
1764
1765     /* We will update the superblock, one block bitmap, and
1766      * one group descriptor via ext4_group_add_blocks().
1767      */
1768     handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, 3);
1769     if (IS_ERR(handle)) {
1770         err = PTR_ERR(handle);
1771         ext4_warning(sb, "error %d on journal start", err);
1772         return err;
1773     }
1774
1775     BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
1776     err = ext4_journal_get_write_access(handle, sb, EXT4_SB(sb)->s_sbh,
1777                         EXT4_JTR_NONE);
1778     if (err) {
1779         ext4_warning(sb, "error %d on journal write access", err);
1780         goto errout;
1781     }
1782
1783     lock_buffer(EXT4_SB(sb)->s_sbh);
1784     ext4_blocks_count_set(es, o_blocks_count + add);
1785     ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + add);
1786     ext4_superblock_csum_set(sb);
1787     unlock_buffer(EXT4_SB(sb)->s_sbh);
1788     ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
1789            o_blocks_count + add);
1790     /* We add the blocks to the bitmap and set the group need init bit */
1791     err = ext4_group_add_blocks(handle, sb, o_blocks_count, add);
1792     if (err)
1793         goto errout;
1794     ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
1795     ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
1796            o_blocks_count + add);
1797 errout:
1798     err2 = ext4_journal_stop(handle);
1799     if (err2 && !err)
1800         err = err2;
1801
1802     if (!err) {
1803         if (test_opt(sb, DEBUG))
1804             printk(KERN_DEBUG "EXT4-fs: extended group to %llu "
1805                    "blocks\n", ext4_blocks_count(es));
1806         update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr,
1807                    (char *)es, sizeof(struct ext4_super_block), 0);
1808     }
1809     return err;
1810 }
1811
1812 /*
1813  * Extend the filesystem to the new number of blocks specified.  This entry
1814  * point is only used to extend the current filesystem to the end of the last
1815  * existing group.  It can be accessed via ioctl, or by "remount,resize=<size>"
1816  * for emergencies (because it has no dependencies on reserved blocks).
1817  *
1818  * If we _really_ wanted, we could use default values to call ext4_group_add()
1819  * allow the "remount" trick to work for arbitrary resizing, assuming enough
1820  * GDT blocks are reserved to grow to the desired size.
1821  */
1822 int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1823               ext4_fsblk_t n_blocks_count)
1824 {
1825     ext4_fsblk_t o_blocks_count;
1826     ext4_grpblk_t last;
1827     ext4_grpblk_t add;
1828     struct buffer_head *bh;
1829     int err;
1830     ext4_group_t group;
1831
1832     o_blocks_count = ext4_blocks_count(es);
1833
1834     if (test_opt(sb, DEBUG))
1835         ext4_msg(sb, KERN_DEBUG,
1836              "extending last group from %llu to %llu blocks",
1837              o_blocks_count, n_blocks_count);
1838
1839     if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
1840         return 0;
1841
1842     if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
1843         ext4_msg(sb, KERN_ERR,
1844              "filesystem too large to resize to %llu blocks safely",
1845              n_blocks_count);
1846         return -EINVAL;
1847     }
1848
1849     if (n_blocks_count < o_blocks_count) {
1850         ext4_warning(sb, "can't shrink FS - resize aborted");
1851         return -EINVAL;
1852     }
1853
1854     /* Handle the remaining blocks in the last group only. */
1855     ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
1856
1857     if (last == 0) {
1858         ext4_warning(sb, "need to use ext2online to resize further");
1859         return -EPERM;
1860     }
1861
1862     add = EXT4_BLOCKS_PER_GROUP(sb) - last;
1863
1864     if (o_blocks_count + add < o_blocks_count) {
1865         ext4_warning(sb, "blocks_count overflow");
1866         return -EINVAL;
1867     }
1868
1869     if (o_blocks_count + add > n_blocks_count)
1870         add = n_blocks_count - o_blocks_count;
1871
1872     if (o_blocks_count + add < n_blocks_count)
1873         ext4_warning(sb, "will only finish group (%llu blocks, %u new)",
1874                  o_blocks_count + add, add);
1875
1876     /* See if the device is actually as big as what was requested */
1877     bh = ext4_sb_bread(sb, o_blocks_count + add - 1, 0);
1878     if (IS_ERR(bh)) {
1879         ext4_warning(sb, "can't read last block, resize aborted");
1880         return -ENOSPC;
1881     }
1882     brelse(bh);
1883
1884     err = ext4_group_extend_no_check(sb, o_blocks_count, add);
1885     return err;
1886 } /* ext4_group_extend */
1887
1888
1889 static int num_desc_blocks(struct super_block *sb, ext4_group_t groups)
1890 {
1891     return (groups + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb);
1892 }
1893
1894 /*
1895  * Release the resize inode and drop the resize_inode feature if there
1896  * are no more reserved gdt blocks, and then convert the file system
1897  * to enable meta_bg
1898  */
1899 static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode)
1900 {
1901     handle_t *handle;
1902     struct ext4_sb_info *sbi = EXT4_SB(sb);
1903     struct ext4_super_block *es = sbi->s_es;
1904     struct ext4_inode_info *ei = EXT4_I(inode);
1905     ext4_fsblk_t nr;
1906     int i, ret, err = 0;
1907     int credits = 1;
1908
1909     ext4_msg(sb, KERN_INFO, "Converting file system to meta_bg");
1910     if (inode) {
1911         if (es->s_reserved_gdt_blocks) {
1912             ext4_error(sb, "Unexpected non-zero "
1913                    "s_reserved_gdt_blocks");
1914             return -EPERM;
1915         }
1916
1917         /* Do a quick sanity check of the resize inode */
1918         if (inode->i_blocks != 1 << (inode->i_blkbits -
1919                          (9 - sbi->s_cluster_bits)))
1920             goto invalid_resize_inode;
1921         for (i = 0; i < EXT4_N_BLOCKS; i++) {
1922             if (i == EXT4_DIND_BLOCK) {
1923                 if (ei->i_data[i])
1924                     continue;
1925                 else
1926                     goto invalid_resize_inode;
1927             }
1928             if (ei->i_data[i])
1929                 goto invalid_resize_inode;
1930         }
1931         credits += 3;   /* block bitmap, bg descriptor, resize inode */
1932     }
1933
1934     handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, credits);
1935     if (IS_ERR(handle))
1936         return PTR_ERR(handle);
1937
1938     BUFFER_TRACE(sbi->s_sbh, "get_write_access");
1939     err = ext4_journal_get_write_access(handle, sb, sbi->s_sbh,
1940                         EXT4_JTR_NONE);
1941     if (err)
1942         goto errout;
1943
1944     lock_buffer(sbi->s_sbh);
1945     ext4_clear_feature_resize_inode(sb);
1946     ext4_set_feature_meta_bg(sb);
1947     sbi->s_es->s_first_meta_bg =
1948         cpu_to_le32(num_desc_blocks(sb, sbi->s_groups_count));
1949     ext4_superblock_csum_set(sb);
1950     unlock_buffer(sbi->s_sbh);
1951
1952     err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
1953     if (err) {
1954         ext4_std_error(sb, err);
1955         goto errout;
1956     }
1957
1958     if (inode) {
1959         nr = le32_to_cpu(ei->i_data[EXT4_DIND_BLOCK]);
1960         ext4_free_blocks(handle, inode, NULL, nr, 1,
1961                  EXT4_FREE_BLOCKS_METADATA |
1962                  EXT4_FREE_BLOCKS_FORGET);
1963         ei->i_data[EXT4_DIND_BLOCK] = 0;
1964         inode->i_blocks = 0;
1965
1966         err = ext4_mark_inode_dirty(handle, inode);
1967         if (err)
1968             ext4_std_error(sb, err);
1969     }
1970
1971 errout:
1972     ret = ext4_journal_stop(handle);
1973     if (!err)
1974         err = ret;
1975     return ret;
1976
1977 invalid_resize_inode:
1978     ext4_error(sb, "corrupted/inconsistent resize inode");
1979     return -EINVAL;
1980 }
1981
1982 /*
1983  * ext4_resize_fs() resizes a fs to new size specified by @n_blocks_count
1984  *
1985  * @sb: super block of the fs to be resized
1986  * @n_blocks_count: the number of blocks resides in the resized fs
1987  */
1988 int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
1989 {
1990     struct ext4_new_flex_group_data *flex_gd = NULL;
1991     struct ext4_sb_info *sbi = EXT4_SB(sb);
1992     struct ext4_super_block *es = sbi->s_es;
1993     struct buffer_head *bh;
1994     struct inode *resize_inode = NULL;
1995     ext4_grpblk_t add, offset;
1996     unsigned long n_desc_blocks;
1997     unsigned long o_desc_blocks;
1998     ext4_group_t o_group;
1999     ext4_group_t n_group;
2000     ext4_fsblk_t o_blocks_count;
2001     ext4_fsblk_t n_blocks_count_retry = 0;
2002     unsigned long last_update_time = 0;
2003     int err = 0, flexbg_size = 1 << sbi->s_log_groups_per_flex;
2004     int meta_bg;
2005
2006     /* See if the device is actually as big as what was requested */
2007     bh = ext4_sb_bread(sb, n_blocks_count - 1, 0);
2008     if (IS_ERR(bh)) {
2009         ext4_warning(sb, "can't read last block, resize aborted");
2010         return -ENOSPC;
2011     }
2012     brelse(bh);
2013
2014     /*
2015      * For bigalloc, trim the requested size to the nearest cluster
2016      * boundary to avoid creating an unusable filesystem. We do this
2017      * silently, instead of returning an error, to avoid breaking
2018      * callers that blindly resize the filesystem to the full size of
2019      * the underlying block device.
2020      */
2021     if (ext4_has_feature_bigalloc(sb))
2022         n_blocks_count &= ~((1 << EXT4_CLUSTER_BITS(sb)) - 1);
2023
2024 retry:
2025     o_blocks_count = ext4_blocks_count(es);
2026
2027     ext4_msg(sb, KERN_INFO, "resizing filesystem from %llu "
2028          "to %llu blocks", o_blocks_count, n_blocks_count);
2029
2030     if (n_blocks_count < o_blocks_count) {
2031         /* On-line shrinking not supported */
2032         ext4_warning(sb, "can't shrink FS - resize aborted");
2033         return -EINVAL;
2034     }
2035
2036     if (n_blocks_count == o_blocks_count)
2037         /* Nothing need to do */
2038         return 0;
2039
2040     n_group = ext4_get_group_number(sb, n_blocks_count - 1);
2041     if (n_group >= (0xFFFFFFFFUL / EXT4_INODES_PER_GROUP(sb))) {
2042         ext4_warning(sb, "resize would cause inodes_count overflow");
2043         return -EINVAL;
2044     }
2045     ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset);
2046
2047     n_desc_blocks = num_desc_blocks(sb, n_group + 1);
2048     o_desc_blocks = num_desc_blocks(sb, sbi->s_groups_count);
2049
2050     meta_bg = ext4_has_feature_meta_bg(sb);
2051
2052     if (ext4_has_feature_resize_inode(sb)) {
2053         if (meta_bg) {
2054             ext4_error(sb, "resize_inode and meta_bg enabled "
2055                    "simultaneously");
2056             return -EINVAL;
2057         }
2058         if (n_desc_blocks > o_desc_blocks +
2059             le16_to_cpu(es->s_reserved_gdt_blocks)) {
2060             n_blocks_count_retry = n_blocks_count;
2061             n_desc_blocks = o_desc_blocks +
2062                 le16_to_cpu(es->s_reserved_gdt_blocks);
2063             n_group = n_desc_blocks * EXT4_DESC_PER_BLOCK(sb);
2064             n_blocks_count = (ext4_fsblk_t)n_group *
2065                 EXT4_BLOCKS_PER_GROUP(sb) +
2066                 le32_to_cpu(es->s_first_data_block);
2067             n_group--; /* set to last group number */
2068         }
2069
2070         if (!resize_inode)
2071             resize_inode = ext4_iget(sb, EXT4_RESIZE_INO,
2072                          EXT4_IGET_SPECIAL);
2073         if (IS_ERR(resize_inode)) {
2074             ext4_warning(sb, "Error opening resize inode");
2075             return PTR_ERR(resize_inode);
2076         }
2077     }
2078
2079     if ((!resize_inode && !meta_bg) || n_blocks_count == o_blocks_count) {
2080         err = ext4_convert_meta_bg(sb, resize_inode);
2081         if (err)
2082             goto out;
2083         if (resize_inode) {
2084             iput(resize_inode);
2085             resize_inode = NULL;
2086         }
2087         if (n_blocks_count_retry) {
2088             n_blocks_count = n_blocks_count_retry;
2089             n_blocks_count_retry = 0;
2090             goto retry;
2091         }
2092     }
2093
2094     /*
2095      * Make sure the last group has enough space so that it's
2096      * guaranteed to have enough space for all metadata blocks
2097      * that it might need to hold.  (We might not need to store
2098      * the inode table blocks in the last block group, but there
2099      * will be cases where this might be needed.)
2100      */
2101     if ((ext4_group_first_block_no(sb, n_group) +
2102          ext4_group_overhead_blocks(sb, n_group) + 2 +
2103          sbi->s_itb_per_group + sbi->s_cluster_ratio) >= n_blocks_count) {
2104         n_blocks_count = ext4_group_first_block_no(sb, n_group);
2105         n_group--;
2106         n_blocks_count_retry = 0;
2107         if (resize_inode) {
2108             iput(resize_inode);
2109             resize_inode = NULL;
2110         }
2111         goto retry;
2112     }
2113
2114     /* extend the last group */
2115     if (n_group == o_group)
2116         add = n_blocks_count - o_blocks_count;
2117     else
2118         add = EXT4_C2B(sbi, EXT4_CLUSTERS_PER_GROUP(sb) - (offset + 1));
2119     if (add > 0) {
2120         err = ext4_group_extend_no_check(sb, o_blocks_count, add);
2121         if (err)
2122             goto out;
2123     }
2124
2125     if (ext4_blocks_count(es) == n_blocks_count)
2126         goto out;
2127
2128     err = ext4_alloc_flex_bg_array(sb, n_group + 1);
2129     if (err)
2130         goto out;
2131
2132     err = ext4_mb_alloc_groupinfo(sb, n_group + 1);
2133     if (err)
2134         goto out;
2135
2136     flex_gd = alloc_flex_gd(flexbg_size);
2137     if (flex_gd == NULL) {
2138         err = -ENOMEM;
2139         goto out;
2140     }
2141
2142     /* Add flex groups. Note that a regular group is a
2143      * flex group with 1 group.
2144      */
2145     while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count,
2146                           flexbg_size)) {
2147         if (time_is_before_jiffies(last_update_time + HZ * 10)) {
2148             if (last_update_time)
2149                 ext4_msg(sb, KERN_INFO,
2150                      "resized to %llu blocks",
2151                      ext4_blocks_count(es));
2152             last_update_time = jiffies;
2153         }
2154         if (ext4_alloc_group_tables(sb, flex_gd, flexbg_size) != 0)
2155             break;
2156         err = ext4_flex_group_add(sb, resize_inode, flex_gd);
2157         if (unlikely(err))
2158             break;
2159     }
2160
2161     if (!err && n_blocks_count_retry) {
2162         n_blocks_count = n_blocks_count_retry;
2163         n_blocks_count_retry = 0;
2164         free_flex_gd(flex_gd);
2165         flex_gd = NULL;
2166         if (resize_inode) {
2167             iput(resize_inode);
2168             resize_inode = NULL;
2169         }
2170         goto retry;
2171     }
2172
2173 out:
2174     if (flex_gd)
2175         free_flex_gd(flex_gd);
2176     if (resize_inode != NULL)
2177         iput(resize_inode);
2178     if (err)
2179         ext4_warning(sb, "error (%d) occurred during "
2180                  "file system resize", err);
2181     ext4_msg(sb, KERN_INFO, "resized filesystem to %llu",
2182          ext4_blocks_count(es));
2183     return err;
2184 }