Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  *  linux/fs/ext4/indirect.c
0004  *
0005  *  from
0006  *
0007  *  linux/fs/ext4/inode.c
0008  *
0009  * Copyright (C) 1992, 1993, 1994, 1995
0010  * Remy Card (card@masi.ibp.fr)
0011  * Laboratoire MASI - Institut Blaise Pascal
0012  * Universite Pierre et Marie Curie (Paris VI)
0013  *
0014  *  from
0015  *
0016  *  linux/fs/minix/inode.c
0017  *
0018  *  Copyright (C) 1991, 1992  Linus Torvalds
0019  *
0020  *  Goal-directed block allocation by Stephen Tweedie
0021  *  (sct@redhat.com), 1993, 1998
0022  */
0023 
0024 #include "ext4_jbd2.h"
0025 #include "truncate.h"
0026 #include <linux/dax.h>
0027 #include <linux/uio.h>
0028 
0029 #include <trace/events/ext4.h>
0030 
0031 typedef struct {
0032     __le32  *p;
0033     __le32  key;
0034     struct buffer_head *bh;
0035 } Indirect;
0036 
0037 static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
0038 {
0039     p->key = *(p->p = v);
0040     p->bh = bh;
0041 }
0042 
0043 /**
0044  *  ext4_block_to_path - parse the block number into array of offsets
0045  *  @inode: inode in question (we are only interested in its superblock)
0046  *  @i_block: block number to be parsed
0047  *  @offsets: array to store the offsets in
0048  *  @boundary: set this non-zero if the referred-to block is likely to be
0049  *         followed (on disk) by an indirect block.
0050  *
0051  *  To store the locations of file's data ext4 uses a data structure common
0052  *  for UNIX filesystems - tree of pointers anchored in the inode, with
0053  *  data blocks at leaves and indirect blocks in intermediate nodes.
0054  *  This function translates the block number into path in that tree -
0055  *  return value is the path length and @offsets[n] is the offset of
0056  *  pointer to (n+1)th node in the nth one. If @block is out of range
0057  *  (negative or too large) warning is printed and zero returned.
0058  *
0059  *  Note: function doesn't find node addresses, so no IO is needed. All
0060  *  we need to know is the capacity of indirect blocks (taken from the
0061  *  inode->i_sb).
0062  */
0063 
0064 /*
0065  * Portability note: the last comparison (check that we fit into triple
0066  * indirect block) is spelled differently, because otherwise on an
0067  * architecture with 32-bit longs and 8Kb pages we might get into trouble
0068  * if our filesystem had 8Kb blocks. We might use long long, but that would
0069  * kill us on x86. Oh, well, at least the sign propagation does not matter -
0070  * i_block would have to be negative in the very beginning, so we would not
0071  * get there at all.
0072  */
0073 
0074 static int ext4_block_to_path(struct inode *inode,
0075                   ext4_lblk_t i_block,
0076                   ext4_lblk_t offsets[4], int *boundary)
0077 {
0078     int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb);
0079     int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb);
0080     const long direct_blocks = EXT4_NDIR_BLOCKS,
0081         indirect_blocks = ptrs,
0082         double_blocks = (1 << (ptrs_bits * 2));
0083     int n = 0;
0084     int final = 0;
0085 
0086     if (i_block < direct_blocks) {
0087         offsets[n++] = i_block;
0088         final = direct_blocks;
0089     } else if ((i_block -= direct_blocks) < indirect_blocks) {
0090         offsets[n++] = EXT4_IND_BLOCK;
0091         offsets[n++] = i_block;
0092         final = ptrs;
0093     } else if ((i_block -= indirect_blocks) < double_blocks) {
0094         offsets[n++] = EXT4_DIND_BLOCK;
0095         offsets[n++] = i_block >> ptrs_bits;
0096         offsets[n++] = i_block & (ptrs - 1);
0097         final = ptrs;
0098     } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) {
0099         offsets[n++] = EXT4_TIND_BLOCK;
0100         offsets[n++] = i_block >> (ptrs_bits * 2);
0101         offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1);
0102         offsets[n++] = i_block & (ptrs - 1);
0103         final = ptrs;
0104     } else {
0105         ext4_warning(inode->i_sb, "block %lu > max in inode %lu",
0106                  i_block + direct_blocks +
0107                  indirect_blocks + double_blocks, inode->i_ino);
0108     }
0109     if (boundary)
0110         *boundary = final - 1 - (i_block & (ptrs - 1));
0111     return n;
0112 }
0113 
0114 /**
0115  *  ext4_get_branch - read the chain of indirect blocks leading to data
0116  *  @inode: inode in question
0117  *  @depth: depth of the chain (1 - direct pointer, etc.)
0118  *  @offsets: offsets of pointers in inode/indirect blocks
0119  *  @chain: place to store the result
0120  *  @err: here we store the error value
0121  *
0122  *  Function fills the array of triples <key, p, bh> and returns %NULL
0123  *  if everything went OK or the pointer to the last filled triple
0124  *  (incomplete one) otherwise. Upon the return chain[i].key contains
0125  *  the number of (i+1)-th block in the chain (as it is stored in memory,
0126  *  i.e. little-endian 32-bit), chain[i].p contains the address of that
0127  *  number (it points into struct inode for i==0 and into the bh->b_data
0128  *  for i>0) and chain[i].bh points to the buffer_head of i-th indirect
0129  *  block for i>0 and NULL for i==0. In other words, it holds the block
0130  *  numbers of the chain, addresses they were taken from (and where we can
0131  *  verify that chain did not change) and buffer_heads hosting these
0132  *  numbers.
0133  *
0134  *  Function stops when it stumbles upon zero pointer (absent block)
0135  *      (pointer to last triple returned, *@err == 0)
0136  *  or when it gets an IO error reading an indirect block
0137  *      (ditto, *@err == -EIO)
0138  *  or when it reads all @depth-1 indirect blocks successfully and finds
0139  *  the whole chain, all way to the data (returns %NULL, *err == 0).
0140  *
0141  *      Need to be called with
0142  *      down_read(&EXT4_I(inode)->i_data_sem)
0143  */
0144 static Indirect *ext4_get_branch(struct inode *inode, int depth,
0145                  ext4_lblk_t  *offsets,
0146                  Indirect chain[4], int *err)
0147 {
0148     struct super_block *sb = inode->i_sb;
0149     Indirect *p = chain;
0150     struct buffer_head *bh;
0151     int ret = -EIO;
0152 
0153     *err = 0;
0154     /* i_data is not going away, no lock needed */
0155     add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets);
0156     if (!p->key)
0157         goto no_block;
0158     while (--depth) {
0159         bh = sb_getblk(sb, le32_to_cpu(p->key));
0160         if (unlikely(!bh)) {
0161             ret = -ENOMEM;
0162             goto failure;
0163         }
0164 
0165         if (!bh_uptodate_or_lock(bh)) {
0166             if (ext4_read_bh(bh, 0, NULL) < 0) {
0167                 put_bh(bh);
0168                 goto failure;
0169             }
0170             /* validate block references */
0171             if (ext4_check_indirect_blockref(inode, bh)) {
0172                 put_bh(bh);
0173                 goto failure;
0174             }
0175         }
0176 
0177         add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets);
0178         /* Reader: end */
0179         if (!p->key)
0180             goto no_block;
0181     }
0182     return NULL;
0183 
0184 failure:
0185     *err = ret;
0186 no_block:
0187     return p;
0188 }
0189 
0190 /**
0191  *  ext4_find_near - find a place for allocation with sufficient locality
0192  *  @inode: owner
0193  *  @ind: descriptor of indirect block.
0194  *
0195  *  This function returns the preferred place for block allocation.
0196  *  It is used when heuristic for sequential allocation fails.
0197  *  Rules are:
0198  *    + if there is a block to the left of our position - allocate near it.
0199  *    + if pointer will live in indirect block - allocate near that block.
0200  *    + if pointer will live in inode - allocate in the same
0201  *      cylinder group.
0202  *
0203  * In the latter case we colour the starting block by the callers PID to
0204  * prevent it from clashing with concurrent allocations for a different inode
0205  * in the same block group.   The PID is used here so that functionally related
0206  * files will be close-by on-disk.
0207  *
0208  *  Caller must make sure that @ind is valid and will stay that way.
0209  */
0210 static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
0211 {
0212     struct ext4_inode_info *ei = EXT4_I(inode);
0213     __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data;
0214     __le32 *p;
0215 
0216     /* Try to find previous block */
0217     for (p = ind->p - 1; p >= start; p--) {
0218         if (*p)
0219             return le32_to_cpu(*p);
0220     }
0221 
0222     /* No such thing, so let's try location of indirect block */
0223     if (ind->bh)
0224         return ind->bh->b_blocknr;
0225 
0226     /*
0227      * It is going to be referred to from the inode itself? OK, just put it
0228      * into the same cylinder group then.
0229      */
0230     return ext4_inode_to_goal_block(inode);
0231 }
0232 
0233 /**
0234  *  ext4_find_goal - find a preferred place for allocation.
0235  *  @inode: owner
0236  *  @block:  block we want
0237  *  @partial: pointer to the last triple within a chain
0238  *
0239  *  Normally this function find the preferred place for block allocation,
0240  *  returns it.
0241  *  Because this is only used for non-extent files, we limit the block nr
0242  *  to 32 bits.
0243  */
0244 static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
0245                    Indirect *partial)
0246 {
0247     ext4_fsblk_t goal;
0248 
0249     /*
0250      * XXX need to get goal block from mballoc's data structures
0251      */
0252 
0253     goal = ext4_find_near(inode, partial);
0254     goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
0255     return goal;
0256 }
0257 
0258 /**
0259  *  ext4_blks_to_allocate - Look up the block map and count the number
0260  *  of direct blocks need to be allocated for the given branch.
0261  *
0262  *  @branch: chain of indirect blocks
0263  *  @k: number of blocks need for indirect blocks
0264  *  @blks: number of data blocks to be mapped.
0265  *  @blocks_to_boundary:  the offset in the indirect block
0266  *
0267  *  return the total number of blocks to be allocate, including the
0268  *  direct and indirect blocks.
0269  */
0270 static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
0271                  int blocks_to_boundary)
0272 {
0273     unsigned int count = 0;
0274 
0275     /*
0276      * Simple case, [t,d]Indirect block(s) has not allocated yet
0277      * then it's clear blocks on that path have not allocated
0278      */
0279     if (k > 0) {
0280         /* right now we don't handle cross boundary allocation */
0281         if (blks < blocks_to_boundary + 1)
0282             count += blks;
0283         else
0284             count += blocks_to_boundary + 1;
0285         return count;
0286     }
0287 
0288     count++;
0289     while (count < blks && count <= blocks_to_boundary &&
0290         le32_to_cpu(*(branch[0].p + count)) == 0) {
0291         count++;
0292     }
0293     return count;
0294 }
0295 
0296 /**
0297  * ext4_alloc_branch() - allocate and set up a chain of blocks
0298  * @handle: handle for this transaction
0299  * @ar: structure describing the allocation request
0300  * @indirect_blks: number of allocated indirect blocks
0301  * @offsets: offsets (in the blocks) to store the pointers to next.
0302  * @branch: place to store the chain in.
0303  *
0304  *  This function allocates blocks, zeroes out all but the last one,
0305  *  links them into chain and (if we are synchronous) writes them to disk.
0306  *  In other words, it prepares a branch that can be spliced onto the
0307  *  inode. It stores the information about that chain in the branch[], in
0308  *  the same format as ext4_get_branch() would do. We are calling it after
0309  *  we had read the existing part of chain and partial points to the last
0310  *  triple of that (one with zero ->key). Upon the exit we have the same
0311  *  picture as after the successful ext4_get_block(), except that in one
0312  *  place chain is disconnected - *branch->p is still zero (we did not
0313  *  set the last link), but branch->key contains the number that should
0314  *  be placed into *branch->p to fill that gap.
0315  *
0316  *  If allocation fails we free all blocks we've allocated (and forget
0317  *  their buffer_heads) and return the error value the from failed
0318  *  ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain
0319  *  as described above and return 0.
0320  */
0321 static int ext4_alloc_branch(handle_t *handle,
0322                  struct ext4_allocation_request *ar,
0323                  int indirect_blks, ext4_lblk_t *offsets,
0324                  Indirect *branch)
0325 {
0326     struct buffer_head *        bh;
0327     ext4_fsblk_t            b, new_blocks[4];
0328     __le32              *p;
0329     int             i, j, err, len = 1;
0330 
0331     for (i = 0; i <= indirect_blks; i++) {
0332         if (i == indirect_blks) {
0333             new_blocks[i] = ext4_mb_new_blocks(handle, ar, &err);
0334         } else {
0335             ar->goal = new_blocks[i] = ext4_new_meta_blocks(handle,
0336                     ar->inode, ar->goal,
0337                     ar->flags & EXT4_MB_DELALLOC_RESERVED,
0338                     NULL, &err);
0339             /* Simplify error cleanup... */
0340             branch[i+1].bh = NULL;
0341         }
0342         if (err) {
0343             i--;
0344             goto failed;
0345         }
0346         branch[i].key = cpu_to_le32(new_blocks[i]);
0347         if (i == 0)
0348             continue;
0349 
0350         bh = branch[i].bh = sb_getblk(ar->inode->i_sb, new_blocks[i-1]);
0351         if (unlikely(!bh)) {
0352             err = -ENOMEM;
0353             goto failed;
0354         }
0355         lock_buffer(bh);
0356         BUFFER_TRACE(bh, "call get_create_access");
0357         err = ext4_journal_get_create_access(handle, ar->inode->i_sb,
0358                              bh, EXT4_JTR_NONE);
0359         if (err) {
0360             unlock_buffer(bh);
0361             goto failed;
0362         }
0363 
0364         memset(bh->b_data, 0, bh->b_size);
0365         p = branch[i].p = (__le32 *) bh->b_data + offsets[i];
0366         b = new_blocks[i];
0367 
0368         if (i == indirect_blks)
0369             len = ar->len;
0370         for (j = 0; j < len; j++)
0371             *p++ = cpu_to_le32(b++);
0372 
0373         BUFFER_TRACE(bh, "marking uptodate");
0374         set_buffer_uptodate(bh);
0375         unlock_buffer(bh);
0376 
0377         BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
0378         err = ext4_handle_dirty_metadata(handle, ar->inode, bh);
0379         if (err)
0380             goto failed;
0381     }
0382     return 0;
0383 failed:
0384     if (i == indirect_blks) {
0385         /* Free data blocks */
0386         ext4_free_blocks(handle, ar->inode, NULL, new_blocks[i],
0387                  ar->len, 0);
0388         i--;
0389     }
0390     for (; i >= 0; i--) {
0391         /*
0392          * We want to ext4_forget() only freshly allocated indirect
0393          * blocks. Buffer for new_blocks[i] is at branch[i+1].bh
0394          * (buffer at branch[0].bh is indirect block / inode already
0395          * existing before ext4_alloc_branch() was called). Also
0396          * because blocks are freshly allocated, we don't need to
0397          * revoke them which is why we don't set
0398          * EXT4_FREE_BLOCKS_METADATA.
0399          */
0400         ext4_free_blocks(handle, ar->inode, branch[i+1].bh,
0401                  new_blocks[i], 1,
0402                  branch[i+1].bh ? EXT4_FREE_BLOCKS_FORGET : 0);
0403     }
0404     return err;
0405 }
0406 
0407 /**
0408  * ext4_splice_branch() - splice the allocated branch onto inode.
0409  * @handle: handle for this transaction
0410  * @ar: structure describing the allocation request
0411  * @where: location of missing link
0412  * @num:   number of indirect blocks we are adding
0413  *
0414  * This function fills the missing link and does all housekeeping needed in
0415  * inode (->i_blocks, etc.). In case of success we end up with the full
0416  * chain to new block and return 0.
0417  */
0418 static int ext4_splice_branch(handle_t *handle,
0419                   struct ext4_allocation_request *ar,
0420                   Indirect *where, int num)
0421 {
0422     int i;
0423     int err = 0;
0424     ext4_fsblk_t current_block;
0425 
0426     /*
0427      * If we're splicing into a [td]indirect block (as opposed to the
0428      * inode) then we need to get write access to the [td]indirect block
0429      * before the splice.
0430      */
0431     if (where->bh) {
0432         BUFFER_TRACE(where->bh, "get_write_access");
0433         err = ext4_journal_get_write_access(handle, ar->inode->i_sb,
0434                             where->bh, EXT4_JTR_NONE);
0435         if (err)
0436             goto err_out;
0437     }
0438     /* That's it */
0439 
0440     *where->p = where->key;
0441 
0442     /*
0443      * Update the host buffer_head or inode to point to more just allocated
0444      * direct blocks blocks
0445      */
0446     if (num == 0 && ar->len > 1) {
0447         current_block = le32_to_cpu(where->key) + 1;
0448         for (i = 1; i < ar->len; i++)
0449             *(where->p + i) = cpu_to_le32(current_block++);
0450     }
0451 
0452     /* We are done with atomic stuff, now do the rest of housekeeping */
0453     /* had we spliced it onto indirect block? */
0454     if (where->bh) {
0455         /*
0456          * If we spliced it onto an indirect block, we haven't
0457          * altered the inode.  Note however that if it is being spliced
0458          * onto an indirect block at the very end of the file (the
0459          * file is growing) then we *will* alter the inode to reflect
0460          * the new i_size.  But that is not done here - it is done in
0461          * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode.
0462          */
0463         ext4_debug("splicing indirect only\n");
0464         BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata");
0465         err = ext4_handle_dirty_metadata(handle, ar->inode, where->bh);
0466         if (err)
0467             goto err_out;
0468     } else {
0469         /*
0470          * OK, we spliced it into the inode itself on a direct block.
0471          */
0472         err = ext4_mark_inode_dirty(handle, ar->inode);
0473         if (unlikely(err))
0474             goto err_out;
0475         ext4_debug("splicing direct\n");
0476     }
0477     return err;
0478 
0479 err_out:
0480     for (i = 1; i <= num; i++) {
0481         /*
0482          * branch[i].bh is newly allocated, so there is no
0483          * need to revoke the block, which is why we don't
0484          * need to set EXT4_FREE_BLOCKS_METADATA.
0485          */
0486         ext4_free_blocks(handle, ar->inode, where[i].bh, 0, 1,
0487                  EXT4_FREE_BLOCKS_FORGET);
0488     }
0489     ext4_free_blocks(handle, ar->inode, NULL, le32_to_cpu(where[num].key),
0490              ar->len, 0);
0491 
0492     return err;
0493 }
0494 
0495 /*
0496  * The ext4_ind_map_blocks() function handles non-extents inodes
0497  * (i.e., using the traditional indirect/double-indirect i_blocks
0498  * scheme) for ext4_map_blocks().
0499  *
0500  * Allocation strategy is simple: if we have to allocate something, we will
0501  * have to go the whole way to leaf. So let's do it before attaching anything
0502  * to tree, set linkage between the newborn blocks, write them if sync is
0503  * required, recheck the path, free and repeat if check fails, otherwise
0504  * set the last missing link (that will protect us from any truncate-generated
0505  * removals - all blocks on the path are immune now) and possibly force the
0506  * write on the parent block.
0507  * That has a nice additional property: no special recovery from the failed
0508  * allocations is needed - we simply release blocks and do not touch anything
0509  * reachable from inode.
0510  *
0511  * `handle' can be NULL if create == 0.
0512  *
0513  * return > 0, # of blocks mapped or allocated.
0514  * return = 0, if plain lookup failed.
0515  * return < 0, error case.
0516  *
0517  * The ext4_ind_get_blocks() function should be called with
0518  * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem
0519  * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or
0520  * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system
0521  * blocks.
0522  */
0523 int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
0524             struct ext4_map_blocks *map,
0525             int flags)
0526 {
0527     struct ext4_allocation_request ar;
0528     int err = -EIO;
0529     ext4_lblk_t offsets[4];
0530     Indirect chain[4];
0531     Indirect *partial;
0532     int indirect_blks;
0533     int blocks_to_boundary = 0;
0534     int depth;
0535     int count = 0;
0536     ext4_fsblk_t first_block = 0;
0537 
0538     trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
0539     ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
0540     ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
0541     depth = ext4_block_to_path(inode, map->m_lblk, offsets,
0542                    &blocks_to_boundary);
0543 
0544     if (depth == 0)
0545         goto out;
0546 
0547     partial = ext4_get_branch(inode, depth, offsets, chain, &err);
0548 
0549     /* Simplest case - block found, no allocation needed */
0550     if (!partial) {
0551         first_block = le32_to_cpu(chain[depth - 1].key);
0552         count++;
0553         /*map more blocks*/
0554         while (count < map->m_len && count <= blocks_to_boundary) {
0555             ext4_fsblk_t blk;
0556 
0557             blk = le32_to_cpu(*(chain[depth-1].p + count));
0558 
0559             if (blk == first_block + count)
0560                 count++;
0561             else
0562                 break;
0563         }
0564         goto got_it;
0565     }
0566 
0567     /* Next simple case - plain lookup failed */
0568     if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
0569         unsigned epb = inode->i_sb->s_blocksize / sizeof(u32);
0570         int i;
0571 
0572         /*
0573          * Count number blocks in a subtree under 'partial'. At each
0574          * level we count number of complete empty subtrees beyond
0575          * current offset and then descend into the subtree only
0576          * partially beyond current offset.
0577          */
0578         count = 0;
0579         for (i = partial - chain + 1; i < depth; i++)
0580             count = count * epb + (epb - offsets[i] - 1);
0581         count++;
0582         /* Fill in size of a hole we found */
0583         map->m_pblk = 0;
0584         map->m_len = min_t(unsigned int, map->m_len, count);
0585         goto cleanup;
0586     }
0587 
0588     /* Failed read of indirect block */
0589     if (err == -EIO)
0590         goto cleanup;
0591 
0592     /*
0593      * Okay, we need to do block allocation.
0594     */
0595     if (ext4_has_feature_bigalloc(inode->i_sb)) {
0596         EXT4_ERROR_INODE(inode, "Can't allocate blocks for "
0597                  "non-extent mapped inodes with bigalloc");
0598         err = -EFSCORRUPTED;
0599         goto out;
0600     }
0601 
0602     /* Set up for the direct block allocation */
0603     memset(&ar, 0, sizeof(ar));
0604     ar.inode = inode;
0605     ar.logical = map->m_lblk;
0606     if (S_ISREG(inode->i_mode))
0607         ar.flags = EXT4_MB_HINT_DATA;
0608     if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
0609         ar.flags |= EXT4_MB_DELALLOC_RESERVED;
0610     if (flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
0611         ar.flags |= EXT4_MB_USE_RESERVED;
0612 
0613     ar.goal = ext4_find_goal(inode, map->m_lblk, partial);
0614 
0615     /* the number of blocks need to allocate for [d,t]indirect blocks */
0616     indirect_blks = (chain + depth) - partial - 1;
0617 
0618     /*
0619      * Next look up the indirect map to count the totoal number of
0620      * direct blocks to allocate for this branch.
0621      */
0622     ar.len = ext4_blks_to_allocate(partial, indirect_blks,
0623                        map->m_len, blocks_to_boundary);
0624 
0625     /*
0626      * Block out ext4_truncate while we alter the tree
0627      */
0628     err = ext4_alloc_branch(handle, &ar, indirect_blks,
0629                 offsets + (partial - chain), partial);
0630 
0631     /*
0632      * The ext4_splice_branch call will free and forget any buffers
0633      * on the new chain if there is a failure, but that risks using
0634      * up transaction credits, especially for bitmaps where the
0635      * credits cannot be returned.  Can we handle this somehow?  We
0636      * may need to return -EAGAIN upwards in the worst case.  --sct
0637      */
0638     if (!err)
0639         err = ext4_splice_branch(handle, &ar, partial, indirect_blks);
0640     if (err)
0641         goto cleanup;
0642 
0643     map->m_flags |= EXT4_MAP_NEW;
0644 
0645     ext4_update_inode_fsync_trans(handle, inode, 1);
0646     count = ar.len;
0647 got_it:
0648     map->m_flags |= EXT4_MAP_MAPPED;
0649     map->m_pblk = le32_to_cpu(chain[depth-1].key);
0650     map->m_len = count;
0651     if (count > blocks_to_boundary)
0652         map->m_flags |= EXT4_MAP_BOUNDARY;
0653     err = count;
0654     /* Clean up and exit */
0655     partial = chain + depth - 1;    /* the whole chain */
0656 cleanup:
0657     while (partial > chain) {
0658         BUFFER_TRACE(partial->bh, "call brelse");
0659         brelse(partial->bh);
0660         partial--;
0661     }
0662 out:
0663     trace_ext4_ind_map_blocks_exit(inode, flags, map, err);
0664     return err;
0665 }
0666 
0667 /*
0668  * Calculate number of indirect blocks touched by mapping @nrblocks logically
0669  * contiguous blocks
0670  */
0671 int ext4_ind_trans_blocks(struct inode *inode, int nrblocks)
0672 {
0673     /*
0674      * With N contiguous data blocks, we need at most
0675      * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks,
0676      * 2 dindirect blocks, and 1 tindirect block
0677      */
0678     return DIV_ROUND_UP(nrblocks, EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4;
0679 }
0680 
0681 static int ext4_ind_trunc_restart_fn(handle_t *handle, struct inode *inode,
0682                      struct buffer_head *bh, int *dropped)
0683 {
0684     int err;
0685 
0686     if (bh) {
0687         BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
0688         err = ext4_handle_dirty_metadata(handle, inode, bh);
0689         if (unlikely(err))
0690             return err;
0691     }
0692     err = ext4_mark_inode_dirty(handle, inode);
0693     if (unlikely(err))
0694         return err;
0695     /*
0696      * Drop i_data_sem to avoid deadlock with ext4_map_blocks.  At this
0697      * moment, get_block can be called only for blocks inside i_size since
0698      * page cache has been already dropped and writes are blocked by
0699      * i_rwsem. So we can safely drop the i_data_sem here.
0700      */
0701     BUG_ON(EXT4_JOURNAL(inode) == NULL);
0702     ext4_discard_preallocations(inode, 0);
0703     up_write(&EXT4_I(inode)->i_data_sem);
0704     *dropped = 1;
0705     return 0;
0706 }
0707 
0708 /*
0709  * Truncate transactions can be complex and absolutely huge.  So we need to
0710  * be able to restart the transaction at a convenient checkpoint to make
0711  * sure we don't overflow the journal.
0712  *
0713  * Try to extend this transaction for the purposes of truncation.  If
0714  * extend fails, we restart transaction.
0715  */
0716 static int ext4_ind_truncate_ensure_credits(handle_t *handle,
0717                         struct inode *inode,
0718                         struct buffer_head *bh,
0719                         int revoke_creds)
0720 {
0721     int ret;
0722     int dropped = 0;
0723 
0724     ret = ext4_journal_ensure_credits_fn(handle, EXT4_RESERVE_TRANS_BLOCKS,
0725             ext4_blocks_for_truncate(inode), revoke_creds,
0726             ext4_ind_trunc_restart_fn(handle, inode, bh, &dropped));
0727     if (dropped)
0728         down_write(&EXT4_I(inode)->i_data_sem);
0729     if (ret <= 0)
0730         return ret;
0731     if (bh) {
0732         BUFFER_TRACE(bh, "retaking write access");
0733         ret = ext4_journal_get_write_access(handle, inode->i_sb, bh,
0734                             EXT4_JTR_NONE);
0735         if (unlikely(ret))
0736             return ret;
0737     }
0738     return 0;
0739 }
0740 
0741 /*
0742  * Probably it should be a library function... search for first non-zero word
0743  * or memcmp with zero_page, whatever is better for particular architecture.
0744  * Linus?
0745  */
0746 static inline int all_zeroes(__le32 *p, __le32 *q)
0747 {
0748     while (p < q)
0749         if (*p++)
0750             return 0;
0751     return 1;
0752 }
0753 
0754 /**
0755  *  ext4_find_shared - find the indirect blocks for partial truncation.
0756  *  @inode:   inode in question
0757  *  @depth:   depth of the affected branch
0758  *  @offsets: offsets of pointers in that branch (see ext4_block_to_path)
0759  *  @chain:   place to store the pointers to partial indirect blocks
0760  *  @top:     place to the (detached) top of branch
0761  *
0762  *  This is a helper function used by ext4_truncate().
0763  *
0764  *  When we do truncate() we may have to clean the ends of several
0765  *  indirect blocks but leave the blocks themselves alive. Block is
0766  *  partially truncated if some data below the new i_size is referred
0767  *  from it (and it is on the path to the first completely truncated
0768  *  data block, indeed).  We have to free the top of that path along
0769  *  with everything to the right of the path. Since no allocation
0770  *  past the truncation point is possible until ext4_truncate()
0771  *  finishes, we may safely do the latter, but top of branch may
0772  *  require special attention - pageout below the truncation point
0773  *  might try to populate it.
0774  *
0775  *  We atomically detach the top of branch from the tree, store the
0776  *  block number of its root in *@top, pointers to buffer_heads of
0777  *  partially truncated blocks - in @chain[].bh and pointers to
0778  *  their last elements that should not be removed - in
0779  *  @chain[].p. Return value is the pointer to last filled element
0780  *  of @chain.
0781  *
0782  *  The work left to caller to do the actual freeing of subtrees:
0783  *      a) free the subtree starting from *@top
0784  *      b) free the subtrees whose roots are stored in
0785  *          (@chain[i].p+1 .. end of @chain[i].bh->b_data)
0786  *      c) free the subtrees growing from the inode past the @chain[0].
0787  *          (no partially truncated stuff there).  */
0788 
0789 static Indirect *ext4_find_shared(struct inode *inode, int depth,
0790                   ext4_lblk_t offsets[4], Indirect chain[4],
0791                   __le32 *top)
0792 {
0793     Indirect *partial, *p;
0794     int k, err;
0795 
0796     *top = 0;
0797     /* Make k index the deepest non-null offset + 1 */
0798     for (k = depth; k > 1 && !offsets[k-1]; k--)
0799         ;
0800     partial = ext4_get_branch(inode, k, offsets, chain, &err);
0801     /* Writer: pointers */
0802     if (!partial)
0803         partial = chain + k-1;
0804     /*
0805      * If the branch acquired continuation since we've looked at it -
0806      * fine, it should all survive and (new) top doesn't belong to us.
0807      */
0808     if (!partial->key && *partial->p)
0809         /* Writer: end */
0810         goto no_top;
0811     for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--)
0812         ;
0813     /*
0814      * OK, we've found the last block that must survive. The rest of our
0815      * branch should be detached before unlocking. However, if that rest
0816      * of branch is all ours and does not grow immediately from the inode
0817      * it's easier to cheat and just decrement partial->p.
0818      */
0819     if (p == chain + k - 1 && p > chain) {
0820         p->p--;
0821     } else {
0822         *top = *p->p;
0823         /* Nope, don't do this in ext4.  Must leave the tree intact */
0824 #if 0
0825         *p->p = 0;
0826 #endif
0827     }
0828     /* Writer: end */
0829 
0830     while (partial > p) {
0831         brelse(partial->bh);
0832         partial--;
0833     }
0834 no_top:
0835     return partial;
0836 }
0837 
0838 /*
0839  * Zero a number of block pointers in either an inode or an indirect block.
0840  * If we restart the transaction we must again get write access to the
0841  * indirect block for further modification.
0842  *
0843  * We release `count' blocks on disk, but (last - first) may be greater
0844  * than `count' because there can be holes in there.
0845  *
0846  * Return 0 on success, 1 on invalid block range
0847  * and < 0 on fatal error.
0848  */
0849 static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
0850                  struct buffer_head *bh,
0851                  ext4_fsblk_t block_to_free,
0852                  unsigned long count, __le32 *first,
0853                  __le32 *last)
0854 {
0855     __le32 *p;
0856     int flags = EXT4_FREE_BLOCKS_VALIDATED;
0857     int err;
0858 
0859     if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
0860         ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE))
0861         flags |= EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_METADATA;
0862     else if (ext4_should_journal_data(inode))
0863         flags |= EXT4_FREE_BLOCKS_FORGET;
0864 
0865     if (!ext4_inode_block_valid(inode, block_to_free, count)) {
0866         EXT4_ERROR_INODE(inode, "attempt to clear invalid "
0867                  "blocks %llu len %lu",
0868                  (unsigned long long) block_to_free, count);
0869         return 1;
0870     }
0871 
0872     err = ext4_ind_truncate_ensure_credits(handle, inode, bh,
0873                 ext4_free_data_revoke_credits(inode, count));
0874     if (err < 0)
0875         goto out_err;
0876 
0877     for (p = first; p < last; p++)
0878         *p = 0;
0879 
0880     ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags);
0881     return 0;
0882 out_err:
0883     ext4_std_error(inode->i_sb, err);
0884     return err;
0885 }
0886 
0887 /**
0888  * ext4_free_data - free a list of data blocks
0889  * @handle: handle for this transaction
0890  * @inode:  inode we are dealing with
0891  * @this_bh:    indirect buffer_head which contains *@first and *@last
0892  * @first:  array of block numbers
0893  * @last:   points immediately past the end of array
0894  *
0895  * We are freeing all blocks referred from that array (numbers are stored as
0896  * little-endian 32-bit) and updating @inode->i_blocks appropriately.
0897  *
0898  * We accumulate contiguous runs of blocks to free.  Conveniently, if these
0899  * blocks are contiguous then releasing them at one time will only affect one
0900  * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't
0901  * actually use a lot of journal space.
0902  *
0903  * @this_bh will be %NULL if @first and @last point into the inode's direct
0904  * block pointers.
0905  */
0906 static void ext4_free_data(handle_t *handle, struct inode *inode,
0907                struct buffer_head *this_bh,
0908                __le32 *first, __le32 *last)
0909 {
0910     ext4_fsblk_t block_to_free = 0;    /* Starting block # of a run */
0911     unsigned long count = 0;        /* Number of blocks in the run */
0912     __le32 *block_to_free_p = NULL;     /* Pointer into inode/ind
0913                            corresponding to
0914                            block_to_free */
0915     ext4_fsblk_t nr;            /* Current block # */
0916     __le32 *p;              /* Pointer into inode/ind
0917                            for current block */
0918     int err = 0;
0919 
0920     if (this_bh) {              /* For indirect block */
0921         BUFFER_TRACE(this_bh, "get_write_access");
0922         err = ext4_journal_get_write_access(handle, inode->i_sb,
0923                             this_bh, EXT4_JTR_NONE);
0924         /* Important: if we can't update the indirect pointers
0925          * to the blocks, we can't free them. */
0926         if (err)
0927             return;
0928     }
0929 
0930     for (p = first; p < last; p++) {
0931         nr = le32_to_cpu(*p);
0932         if (nr) {
0933             /* accumulate blocks to free if they're contiguous */
0934             if (count == 0) {
0935                 block_to_free = nr;
0936                 block_to_free_p = p;
0937                 count = 1;
0938             } else if (nr == block_to_free + count) {
0939                 count++;
0940             } else {
0941                 err = ext4_clear_blocks(handle, inode, this_bh,
0942                                 block_to_free, count,
0943                                 block_to_free_p, p);
0944                 if (err)
0945                     break;
0946                 block_to_free = nr;
0947                 block_to_free_p = p;
0948                 count = 1;
0949             }
0950         }
0951     }
0952 
0953     if (!err && count > 0)
0954         err = ext4_clear_blocks(handle, inode, this_bh, block_to_free,
0955                     count, block_to_free_p, p);
0956     if (err < 0)
0957         /* fatal error */
0958         return;
0959 
0960     if (this_bh) {
0961         BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata");
0962 
0963         /*
0964          * The buffer head should have an attached journal head at this
0965          * point. However, if the data is corrupted and an indirect
0966          * block pointed to itself, it would have been detached when
0967          * the block was cleared. Check for this instead of OOPSing.
0968          */
0969         if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh))
0970             ext4_handle_dirty_metadata(handle, inode, this_bh);
0971         else
0972             EXT4_ERROR_INODE(inode,
0973                      "circular indirect block detected at "
0974                      "block %llu",
0975                 (unsigned long long) this_bh->b_blocknr);
0976     }
0977 }
0978 
0979 /**
0980  *  ext4_free_branches - free an array of branches
0981  *  @handle: JBD handle for this transaction
0982  *  @inode: inode we are dealing with
0983  *  @parent_bh: the buffer_head which contains *@first and *@last
0984  *  @first: array of block numbers
0985  *  @last:  pointer immediately past the end of array
0986  *  @depth: depth of the branches to free
0987  *
0988  *  We are freeing all blocks referred from these branches (numbers are
0989  *  stored as little-endian 32-bit) and updating @inode->i_blocks
0990  *  appropriately.
0991  */
0992 static void ext4_free_branches(handle_t *handle, struct inode *inode,
0993                    struct buffer_head *parent_bh,
0994                    __le32 *first, __le32 *last, int depth)
0995 {
0996     ext4_fsblk_t nr;
0997     __le32 *p;
0998 
0999     if (ext4_handle_is_aborted(handle))
1000         return;
1001 
1002     if (depth--) {
1003         struct buffer_head *bh;
1004         int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
1005         p = last;
1006         while (--p >= first) {
1007             nr = le32_to_cpu(*p);
1008             if (!nr)
1009                 continue;       /* A hole */
1010 
1011             if (!ext4_inode_block_valid(inode, nr, 1)) {
1012                 EXT4_ERROR_INODE(inode,
1013                          "invalid indirect mapped "
1014                          "block %lu (level %d)",
1015                          (unsigned long) nr, depth);
1016                 break;
1017             }
1018 
1019             /* Go read the buffer for the next level down */
1020             bh = ext4_sb_bread(inode->i_sb, nr, 0);
1021 
1022             /*
1023              * A read failure? Report error and clear slot
1024              * (should be rare).
1025              */
1026             if (IS_ERR(bh)) {
1027                 ext4_error_inode_block(inode, nr, -PTR_ERR(bh),
1028                                "Read failure");
1029                 continue;
1030             }
1031 
1032             /* This zaps the entire block.  Bottom up. */
1033             BUFFER_TRACE(bh, "free child branches");
1034             ext4_free_branches(handle, inode, bh,
1035                     (__le32 *) bh->b_data,
1036                     (__le32 *) bh->b_data + addr_per_block,
1037                     depth);
1038             brelse(bh);
1039 
1040             /*
1041              * Everything below this pointer has been
1042              * released.  Now let this top-of-subtree go.
1043              *
1044              * We want the freeing of this indirect block to be
1045              * atomic in the journal with the updating of the
1046              * bitmap block which owns it.  So make some room in
1047              * the journal.
1048              *
1049              * We zero the parent pointer *after* freeing its
1050              * pointee in the bitmaps, so if extend_transaction()
1051              * for some reason fails to put the bitmap changes and
1052              * the release into the same transaction, recovery
1053              * will merely complain about releasing a free block,
1054              * rather than leaking blocks.
1055              */
1056             if (ext4_handle_is_aborted(handle))
1057                 return;
1058             if (ext4_ind_truncate_ensure_credits(handle, inode,
1059                     NULL,
1060                     ext4_free_metadata_revoke_credits(
1061                             inode->i_sb, 1)) < 0)
1062                 return;
1063 
1064             /*
1065              * The forget flag here is critical because if
1066              * we are journaling (and not doing data
1067              * journaling), we have to make sure a revoke
1068              * record is written to prevent the journal
1069              * replay from overwriting the (former)
1070              * indirect block if it gets reallocated as a
1071              * data block.  This must happen in the same
1072              * transaction where the data blocks are
1073              * actually freed.
1074              */
1075             ext4_free_blocks(handle, inode, NULL, nr, 1,
1076                      EXT4_FREE_BLOCKS_METADATA|
1077                      EXT4_FREE_BLOCKS_FORGET);
1078 
1079             if (parent_bh) {
1080                 /*
1081                  * The block which we have just freed is
1082                  * pointed to by an indirect block: journal it
1083                  */
1084                 BUFFER_TRACE(parent_bh, "get_write_access");
1085                 if (!ext4_journal_get_write_access(handle,
1086                         inode->i_sb, parent_bh,
1087                         EXT4_JTR_NONE)) {
1088                     *p = 0;
1089                     BUFFER_TRACE(parent_bh,
1090                     "call ext4_handle_dirty_metadata");
1091                     ext4_handle_dirty_metadata(handle,
1092                                    inode,
1093                                    parent_bh);
1094                 }
1095             }
1096         }
1097     } else {
1098         /* We have reached the bottom of the tree. */
1099         BUFFER_TRACE(parent_bh, "free data blocks");
1100         ext4_free_data(handle, inode, parent_bh, first, last);
1101     }
1102 }
1103 
1104 void ext4_ind_truncate(handle_t *handle, struct inode *inode)
1105 {
1106     struct ext4_inode_info *ei = EXT4_I(inode);
1107     __le32 *i_data = ei->i_data;
1108     int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
1109     ext4_lblk_t offsets[4];
1110     Indirect chain[4];
1111     Indirect *partial;
1112     __le32 nr = 0;
1113     int n = 0;
1114     ext4_lblk_t last_block, max_block;
1115     unsigned blocksize = inode->i_sb->s_blocksize;
1116 
1117     last_block = (inode->i_size + blocksize-1)
1118                     >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
1119     max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
1120                     >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
1121 
1122     if (last_block != max_block) {
1123         n = ext4_block_to_path(inode, last_block, offsets, NULL);
1124         if (n == 0)
1125             return;
1126     }
1127 
1128     ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block);
1129 
1130     /*
1131      * The orphan list entry will now protect us from any crash which
1132      * occurs before the truncate completes, so it is now safe to propagate
1133      * the new, shorter inode size (held for now in i_size) into the
1134      * on-disk inode. We do this via i_disksize, which is the value which
1135      * ext4 *really* writes onto the disk inode.
1136      */
1137     ei->i_disksize = inode->i_size;
1138 
1139     if (last_block == max_block) {
1140         /*
1141          * It is unnecessary to free any data blocks if last_block is
1142          * equal to the indirect block limit.
1143          */
1144         return;
1145     } else if (n == 1) {        /* direct blocks */
1146         ext4_free_data(handle, inode, NULL, i_data+offsets[0],
1147                    i_data + EXT4_NDIR_BLOCKS);
1148         goto do_indirects;
1149     }
1150 
1151     partial = ext4_find_shared(inode, n, offsets, chain, &nr);
1152     /* Kill the top of shared branch (not detached) */
1153     if (nr) {
1154         if (partial == chain) {
1155             /* Shared branch grows from the inode */
1156             ext4_free_branches(handle, inode, NULL,
1157                        &nr, &nr+1, (chain+n-1) - partial);
1158             *partial->p = 0;
1159             /*
1160              * We mark the inode dirty prior to restart,
1161              * and prior to stop.  No need for it here.
1162              */
1163         } else {
1164             /* Shared branch grows from an indirect block */
1165             BUFFER_TRACE(partial->bh, "get_write_access");
1166             ext4_free_branches(handle, inode, partial->bh,
1167                     partial->p,
1168                     partial->p+1, (chain+n-1) - partial);
1169         }
1170     }
1171     /* Clear the ends of indirect blocks on the shared branch */
1172     while (partial > chain) {
1173         ext4_free_branches(handle, inode, partial->bh, partial->p + 1,
1174                    (__le32*)partial->bh->b_data+addr_per_block,
1175                    (chain+n-1) - partial);
1176         BUFFER_TRACE(partial->bh, "call brelse");
1177         brelse(partial->bh);
1178         partial--;
1179     }
1180 do_indirects:
1181     /* Kill the remaining (whole) subtrees */
1182     switch (offsets[0]) {
1183     default:
1184         nr = i_data[EXT4_IND_BLOCK];
1185         if (nr) {
1186             ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
1187             i_data[EXT4_IND_BLOCK] = 0;
1188         }
1189         fallthrough;
1190     case EXT4_IND_BLOCK:
1191         nr = i_data[EXT4_DIND_BLOCK];
1192         if (nr) {
1193             ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
1194             i_data[EXT4_DIND_BLOCK] = 0;
1195         }
1196         fallthrough;
1197     case EXT4_DIND_BLOCK:
1198         nr = i_data[EXT4_TIND_BLOCK];
1199         if (nr) {
1200             ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
1201             i_data[EXT4_TIND_BLOCK] = 0;
1202         }
1203         fallthrough;
1204     case EXT4_TIND_BLOCK:
1205         ;
1206     }
1207 }
1208 
1209 /**
1210  *  ext4_ind_remove_space - remove space from the range
1211  *  @handle: JBD handle for this transaction
1212  *  @inode: inode we are dealing with
1213  *  @start: First block to remove
1214  *  @end:   One block after the last block to remove (exclusive)
1215  *
1216  *  Free the blocks in the defined range (end is exclusive endpoint of
1217  *  range). This is used by ext4_punch_hole().
1218  */
1219 int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
1220               ext4_lblk_t start, ext4_lblk_t end)
1221 {
1222     struct ext4_inode_info *ei = EXT4_I(inode);
1223     __le32 *i_data = ei->i_data;
1224     int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
1225     ext4_lblk_t offsets[4], offsets2[4];
1226     Indirect chain[4], chain2[4];
1227     Indirect *partial, *partial2;
1228     Indirect *p = NULL, *p2 = NULL;
1229     ext4_lblk_t max_block;
1230     __le32 nr = 0, nr2 = 0;
1231     int n = 0, n2 = 0;
1232     unsigned blocksize = inode->i_sb->s_blocksize;
1233 
1234     max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
1235                     >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
1236     if (end >= max_block)
1237         end = max_block;
1238     if ((start >= end) || (start > max_block))
1239         return 0;
1240 
1241     n = ext4_block_to_path(inode, start, offsets, NULL);
1242     n2 = ext4_block_to_path(inode, end, offsets2, NULL);
1243 
1244     BUG_ON(n > n2);
1245 
1246     if ((n == 1) && (n == n2)) {
1247         /* We're punching only within direct block range */
1248         ext4_free_data(handle, inode, NULL, i_data + offsets[0],
1249                    i_data + offsets2[0]);
1250         return 0;
1251     } else if (n2 > n) {
1252         /*
1253          * Start and end are on a different levels so we're going to
1254          * free partial block at start, and partial block at end of
1255          * the range. If there are some levels in between then
1256          * do_indirects label will take care of that.
1257          */
1258 
1259         if (n == 1) {
1260             /*
1261              * Start is at the direct block level, free
1262              * everything to the end of the level.
1263              */
1264             ext4_free_data(handle, inode, NULL, i_data + offsets[0],
1265                        i_data + EXT4_NDIR_BLOCKS);
1266             goto end_range;
1267         }
1268 
1269 
1270         partial = p = ext4_find_shared(inode, n, offsets, chain, &nr);
1271         if (nr) {
1272             if (partial == chain) {
1273                 /* Shared branch grows from the inode */
1274                 ext4_free_branches(handle, inode, NULL,
1275                        &nr, &nr+1, (chain+n-1) - partial);
1276                 *partial->p = 0;
1277             } else {
1278                 /* Shared branch grows from an indirect block */
1279                 BUFFER_TRACE(partial->bh, "get_write_access");
1280                 ext4_free_branches(handle, inode, partial->bh,
1281                     partial->p,
1282                     partial->p+1, (chain+n-1) - partial);
1283             }
1284         }
1285 
1286         /*
1287          * Clear the ends of indirect blocks on the shared branch
1288          * at the start of the range
1289          */
1290         while (partial > chain) {
1291             ext4_free_branches(handle, inode, partial->bh,
1292                 partial->p + 1,
1293                 (__le32 *)partial->bh->b_data+addr_per_block,
1294                 (chain+n-1) - partial);
1295             partial--;
1296         }
1297 
1298 end_range:
1299         partial2 = p2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
1300         if (nr2) {
1301             if (partial2 == chain2) {
1302                 /*
1303                  * Remember, end is exclusive so here we're at
1304                  * the start of the next level we're not going
1305                  * to free. Everything was covered by the start
1306                  * of the range.
1307                  */
1308                 goto do_indirects;
1309             }
1310         } else {
1311             /*
1312              * ext4_find_shared returns Indirect structure which
1313              * points to the last element which should not be
1314              * removed by truncate. But this is end of the range
1315              * in punch_hole so we need to point to the next element
1316              */
1317             partial2->p++;
1318         }
1319 
1320         /*
1321          * Clear the ends of indirect blocks on the shared branch
1322          * at the end of the range
1323          */
1324         while (partial2 > chain2) {
1325             ext4_free_branches(handle, inode, partial2->bh,
1326                        (__le32 *)partial2->bh->b_data,
1327                        partial2->p,
1328                        (chain2+n2-1) - partial2);
1329             partial2--;
1330         }
1331         goto do_indirects;
1332     }
1333 
1334     /* Punch happened within the same level (n == n2) */
1335     partial = p = ext4_find_shared(inode, n, offsets, chain, &nr);
1336     partial2 = p2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
1337 
1338     /* Free top, but only if partial2 isn't its subtree. */
1339     if (nr) {
1340         int level = min(partial - chain, partial2 - chain2);
1341         int i;
1342         int subtree = 1;
1343 
1344         for (i = 0; i <= level; i++) {
1345             if (offsets[i] != offsets2[i]) {
1346                 subtree = 0;
1347                 break;
1348             }
1349         }
1350 
1351         if (!subtree) {
1352             if (partial == chain) {
1353                 /* Shared branch grows from the inode */
1354                 ext4_free_branches(handle, inode, NULL,
1355                            &nr, &nr+1,
1356                            (chain+n-1) - partial);
1357                 *partial->p = 0;
1358             } else {
1359                 /* Shared branch grows from an indirect block */
1360                 BUFFER_TRACE(partial->bh, "get_write_access");
1361                 ext4_free_branches(handle, inode, partial->bh,
1362                            partial->p,
1363                            partial->p+1,
1364                            (chain+n-1) - partial);
1365             }
1366         }
1367     }
1368 
1369     if (!nr2) {
1370         /*
1371          * ext4_find_shared returns Indirect structure which
1372          * points to the last element which should not be
1373          * removed by truncate. But this is end of the range
1374          * in punch_hole so we need to point to the next element
1375          */
1376         partial2->p++;
1377     }
1378 
1379     while (partial > chain || partial2 > chain2) {
1380         int depth = (chain+n-1) - partial;
1381         int depth2 = (chain2+n2-1) - partial2;
1382 
1383         if (partial > chain && partial2 > chain2 &&
1384             partial->bh->b_blocknr == partial2->bh->b_blocknr) {
1385             /*
1386              * We've converged on the same block. Clear the range,
1387              * then we're done.
1388              */
1389             ext4_free_branches(handle, inode, partial->bh,
1390                        partial->p + 1,
1391                        partial2->p,
1392                        (chain+n-1) - partial);
1393             goto cleanup;
1394         }
1395 
1396         /*
1397          * The start and end partial branches may not be at the same
1398          * level even though the punch happened within one level. So, we
1399          * give them a chance to arrive at the same level, then walk
1400          * them in step with each other until we converge on the same
1401          * block.
1402          */
1403         if (partial > chain && depth <= depth2) {
1404             ext4_free_branches(handle, inode, partial->bh,
1405                        partial->p + 1,
1406                        (__le32 *)partial->bh->b_data+addr_per_block,
1407                        (chain+n-1) - partial);
1408             partial--;
1409         }
1410         if (partial2 > chain2 && depth2 <= depth) {
1411             ext4_free_branches(handle, inode, partial2->bh,
1412                        (__le32 *)partial2->bh->b_data,
1413                        partial2->p,
1414                        (chain2+n2-1) - partial2);
1415             partial2--;
1416         }
1417     }
1418 
1419 cleanup:
1420     while (p && p > chain) {
1421         BUFFER_TRACE(p->bh, "call brelse");
1422         brelse(p->bh);
1423         p--;
1424     }
1425     while (p2 && p2 > chain2) {
1426         BUFFER_TRACE(p2->bh, "call brelse");
1427         brelse(p2->bh);
1428         p2--;
1429     }
1430     return 0;
1431 
1432 do_indirects:
1433     /* Kill the remaining (whole) subtrees */
1434     switch (offsets[0]) {
1435     default:
1436         if (++n >= n2)
1437             break;
1438         nr = i_data[EXT4_IND_BLOCK];
1439         if (nr) {
1440             ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
1441             i_data[EXT4_IND_BLOCK] = 0;
1442         }
1443         fallthrough;
1444     case EXT4_IND_BLOCK:
1445         if (++n >= n2)
1446             break;
1447         nr = i_data[EXT4_DIND_BLOCK];
1448         if (nr) {
1449             ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
1450             i_data[EXT4_DIND_BLOCK] = 0;
1451         }
1452         fallthrough;
1453     case EXT4_DIND_BLOCK:
1454         if (++n >= n2)
1455             break;
1456         nr = i_data[EXT4_TIND_BLOCK];
1457         if (nr) {
1458             ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
1459             i_data[EXT4_TIND_BLOCK] = 0;
1460         }
1461         fallthrough;
1462     case EXT4_TIND_BLOCK:
1463         ;
1464     }
1465     goto cleanup;
1466 }