Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: LGPL-2.1
0002 /*
0003  * Copyright (c) 2008,2009 NEC Software Tohoku, Ltd.
0004  * Written by Takashi Sato <t-sato@yk.jp.nec.com>
0005  *            Akira Fujita <a-fujita@rs.jp.nec.com>
0006  */
0007 
0008 #include <linux/fs.h>
0009 #include <linux/quotaops.h>
0010 #include <linux/slab.h>
0011 #include <linux/sched/mm.h>
0012 #include "ext4_jbd2.h"
0013 #include "ext4.h"
0014 #include "ext4_extents.h"
0015 
0016 /**
0017  * get_ext_path() - Find an extent path for designated logical block number.
0018  * @inode:  inode to be searched
0019  * @lblock: logical block number to find an extent path
0020  * @ppath:  pointer to an extent path pointer (for output)
0021  *
0022  * ext4_find_extent wrapper. Return 0 on success, or a negative error value
0023  * on failure.
0024  */
0025 static inline int
0026 get_ext_path(struct inode *inode, ext4_lblk_t lblock,
0027         struct ext4_ext_path **ppath)
0028 {
0029     struct ext4_ext_path *path;
0030 
0031     path = ext4_find_extent(inode, lblock, ppath, EXT4_EX_NOCACHE);
0032     if (IS_ERR(path))
0033         return PTR_ERR(path);
0034     if (path[ext_depth(inode)].p_ext == NULL) {
0035         ext4_ext_drop_refs(path);
0036         kfree(path);
0037         *ppath = NULL;
0038         return -ENODATA;
0039     }
0040     *ppath = path;
0041     return 0;
0042 }
0043 
0044 /**
0045  * ext4_double_down_write_data_sem() - write lock two inodes's i_data_sem
0046  * @first: inode to be locked
0047  * @second: inode to be locked
0048  *
0049  * Acquire write lock of i_data_sem of the two inodes
0050  */
0051 void
0052 ext4_double_down_write_data_sem(struct inode *first, struct inode *second)
0053 {
0054     if (first < second) {
0055         down_write(&EXT4_I(first)->i_data_sem);
0056         down_write_nested(&EXT4_I(second)->i_data_sem, I_DATA_SEM_OTHER);
0057     } else {
0058         down_write(&EXT4_I(second)->i_data_sem);
0059         down_write_nested(&EXT4_I(first)->i_data_sem, I_DATA_SEM_OTHER);
0060 
0061     }
0062 }
0063 
0064 /**
0065  * ext4_double_up_write_data_sem - Release two inodes' write lock of i_data_sem
0066  *
0067  * @orig_inode:     original inode structure to be released its lock first
0068  * @donor_inode:    donor inode structure to be released its lock second
0069  * Release write lock of i_data_sem of two inodes (orig and donor).
0070  */
0071 void
0072 ext4_double_up_write_data_sem(struct inode *orig_inode,
0073                   struct inode *donor_inode)
0074 {
0075     up_write(&EXT4_I(orig_inode)->i_data_sem);
0076     up_write(&EXT4_I(donor_inode)->i_data_sem);
0077 }
0078 
0079 /**
0080  * mext_check_coverage - Check that all extents in range has the same type
0081  *
0082  * @inode:      inode in question
0083  * @from:       block offset of inode
0084  * @count:      block count to be checked
0085  * @unwritten:      extents expected to be unwritten
0086  * @err:        pointer to save error value
0087  *
0088  * Return 1 if all extents in range has expected type, and zero otherwise.
0089  */
0090 static int
0091 mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,
0092             int unwritten, int *err)
0093 {
0094     struct ext4_ext_path *path = NULL;
0095     struct ext4_extent *ext;
0096     int ret = 0;
0097     ext4_lblk_t last = from + count;
0098     while (from < last) {
0099         *err = get_ext_path(inode, from, &path);
0100         if (*err)
0101             goto out;
0102         ext = path[ext_depth(inode)].p_ext;
0103         if (unwritten != ext4_ext_is_unwritten(ext))
0104             goto out;
0105         from += ext4_ext_get_actual_len(ext);
0106         ext4_ext_drop_refs(path);
0107     }
0108     ret = 1;
0109 out:
0110     ext4_ext_drop_refs(path);
0111     kfree(path);
0112     return ret;
0113 }
0114 
0115 /**
0116  * mext_page_double_lock - Grab and lock pages on both @inode1 and @inode2
0117  *
0118  * @inode1: the inode structure
0119  * @inode2: the inode structure
0120  * @index1: page index
0121  * @index2: page index
0122  * @page:   result page vector
0123  *
0124  * Grab two locked pages for inode's by inode order
0125  */
0126 static int
0127 mext_page_double_lock(struct inode *inode1, struct inode *inode2,
0128               pgoff_t index1, pgoff_t index2, struct page *page[2])
0129 {
0130     struct address_space *mapping[2];
0131     unsigned int flags;
0132 
0133     BUG_ON(!inode1 || !inode2);
0134     if (inode1 < inode2) {
0135         mapping[0] = inode1->i_mapping;
0136         mapping[1] = inode2->i_mapping;
0137     } else {
0138         swap(index1, index2);
0139         mapping[0] = inode2->i_mapping;
0140         mapping[1] = inode1->i_mapping;
0141     }
0142 
0143     flags = memalloc_nofs_save();
0144     page[0] = grab_cache_page_write_begin(mapping[0], index1);
0145     if (!page[0]) {
0146         memalloc_nofs_restore(flags);
0147         return -ENOMEM;
0148     }
0149 
0150     page[1] = grab_cache_page_write_begin(mapping[1], index2);
0151     memalloc_nofs_restore(flags);
0152     if (!page[1]) {
0153         unlock_page(page[0]);
0154         put_page(page[0]);
0155         return -ENOMEM;
0156     }
0157     /*
0158      * grab_cache_page_write_begin() may not wait on page's writeback if
0159      * BDI not demand that. But it is reasonable to be very conservative
0160      * here and explicitly wait on page's writeback
0161      */
0162     wait_on_page_writeback(page[0]);
0163     wait_on_page_writeback(page[1]);
0164     if (inode1 > inode2)
0165         swap(page[0], page[1]);
0166 
0167     return 0;
0168 }
0169 
0170 /* Force page buffers uptodate w/o dropping page's lock */
0171 static int
0172 mext_page_mkuptodate(struct page *page, unsigned from, unsigned to)
0173 {
0174     struct inode *inode = page->mapping->host;
0175     sector_t block;
0176     struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
0177     unsigned int blocksize, block_start, block_end;
0178     int i, err,  nr = 0, partial = 0;
0179     BUG_ON(!PageLocked(page));
0180     BUG_ON(PageWriteback(page));
0181 
0182     if (PageUptodate(page))
0183         return 0;
0184 
0185     blocksize = i_blocksize(inode);
0186     if (!page_has_buffers(page))
0187         create_empty_buffers(page, blocksize, 0);
0188 
0189     head = page_buffers(page);
0190     block = (sector_t)page->index << (PAGE_SHIFT - inode->i_blkbits);
0191     for (bh = head, block_start = 0; bh != head || !block_start;
0192          block++, block_start = block_end, bh = bh->b_this_page) {
0193         block_end = block_start + blocksize;
0194         if (block_end <= from || block_start >= to) {
0195             if (!buffer_uptodate(bh))
0196                 partial = 1;
0197             continue;
0198         }
0199         if (buffer_uptodate(bh))
0200             continue;
0201         if (!buffer_mapped(bh)) {
0202             err = ext4_get_block(inode, block, bh, 0);
0203             if (err) {
0204                 SetPageError(page);
0205                 return err;
0206             }
0207             if (!buffer_mapped(bh)) {
0208                 zero_user(page, block_start, blocksize);
0209                 set_buffer_uptodate(bh);
0210                 continue;
0211             }
0212         }
0213         BUG_ON(nr >= MAX_BUF_PER_PAGE);
0214         arr[nr++] = bh;
0215     }
0216     /* No io required */
0217     if (!nr)
0218         goto out;
0219 
0220     for (i = 0; i < nr; i++) {
0221         bh = arr[i];
0222         if (!bh_uptodate_or_lock(bh)) {
0223             err = ext4_read_bh(bh, 0, NULL);
0224             if (err)
0225                 return err;
0226         }
0227     }
0228 out:
0229     if (!partial)
0230         SetPageUptodate(page);
0231     return 0;
0232 }
0233 
0234 /**
0235  * move_extent_per_page - Move extent data per page
0236  *
0237  * @o_filp:         file structure of original file
0238  * @donor_inode:        donor inode
0239  * @orig_page_offset:       page index on original file
0240  * @donor_page_offset:      page index on donor file
0241  * @data_offset_in_page:    block index where data swapping starts
0242  * @block_len_in_page:      the number of blocks to be swapped
0243  * @unwritten:          orig extent is unwritten or not
0244  * @err:            pointer to save return value
0245  *
0246  * Save the data in original inode blocks and replace original inode extents
0247  * with donor inode extents by calling ext4_swap_extents().
0248  * Finally, write out the saved data in new original inode blocks. Return
0249  * replaced block count.
0250  */
0251 static int
0252 move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
0253              pgoff_t orig_page_offset, pgoff_t donor_page_offset,
0254              int data_offset_in_page,
0255              int block_len_in_page, int unwritten, int *err)
0256 {
0257     struct inode *orig_inode = file_inode(o_filp);
0258     struct page *pagep[2] = {NULL, NULL};
0259     handle_t *handle;
0260     ext4_lblk_t orig_blk_offset, donor_blk_offset;
0261     unsigned long blocksize = orig_inode->i_sb->s_blocksize;
0262     unsigned int tmp_data_size, data_size, replaced_size;
0263     int i, err2, jblocks, retries = 0;
0264     int replaced_count = 0;
0265     int from = data_offset_in_page << orig_inode->i_blkbits;
0266     int blocks_per_page = PAGE_SIZE >> orig_inode->i_blkbits;
0267     struct super_block *sb = orig_inode->i_sb;
0268     struct buffer_head *bh = NULL;
0269 
0270     /*
0271      * It needs twice the amount of ordinary journal buffers because
0272      * inode and donor_inode may change each different metadata blocks.
0273      */
0274 again:
0275     *err = 0;
0276     jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
0277     handle = ext4_journal_start(orig_inode, EXT4_HT_MOVE_EXTENTS, jblocks);
0278     if (IS_ERR(handle)) {
0279         *err = PTR_ERR(handle);
0280         return 0;
0281     }
0282 
0283     orig_blk_offset = orig_page_offset * blocks_per_page +
0284         data_offset_in_page;
0285 
0286     donor_blk_offset = donor_page_offset * blocks_per_page +
0287         data_offset_in_page;
0288 
0289     /* Calculate data_size */
0290     if ((orig_blk_offset + block_len_in_page - 1) ==
0291         ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
0292         /* Replace the last block */
0293         tmp_data_size = orig_inode->i_size & (blocksize - 1);
0294         /*
0295          * If data_size equal zero, it shows data_size is multiples of
0296          * blocksize. So we set appropriate value.
0297          */
0298         if (tmp_data_size == 0)
0299             tmp_data_size = blocksize;
0300 
0301         data_size = tmp_data_size +
0302             ((block_len_in_page - 1) << orig_inode->i_blkbits);
0303     } else
0304         data_size = block_len_in_page << orig_inode->i_blkbits;
0305 
0306     replaced_size = data_size;
0307 
0308     *err = mext_page_double_lock(orig_inode, donor_inode, orig_page_offset,
0309                      donor_page_offset, pagep);
0310     if (unlikely(*err < 0))
0311         goto stop_journal;
0312     /*
0313      * If orig extent was unwritten it can become initialized
0314      * at any time after i_data_sem was dropped, in order to
0315      * serialize with delalloc we have recheck extent while we
0316      * hold page's lock, if it is still the case data copy is not
0317      * necessary, just swap data blocks between orig and donor.
0318      */
0319     if (unwritten) {
0320         ext4_double_down_write_data_sem(orig_inode, donor_inode);
0321         /* If any of extents in range became initialized we have to
0322          * fallback to data copying */
0323         unwritten = mext_check_coverage(orig_inode, orig_blk_offset,
0324                         block_len_in_page, 1, err);
0325         if (*err)
0326             goto drop_data_sem;
0327 
0328         unwritten &= mext_check_coverage(donor_inode, donor_blk_offset,
0329                          block_len_in_page, 1, err);
0330         if (*err)
0331             goto drop_data_sem;
0332 
0333         if (!unwritten) {
0334             ext4_double_up_write_data_sem(orig_inode, donor_inode);
0335             goto data_copy;
0336         }
0337         if ((page_has_private(pagep[0]) &&
0338              !try_to_release_page(pagep[0], 0)) ||
0339             (page_has_private(pagep[1]) &&
0340              !try_to_release_page(pagep[1], 0))) {
0341             *err = -EBUSY;
0342             goto drop_data_sem;
0343         }
0344         replaced_count = ext4_swap_extents(handle, orig_inode,
0345                            donor_inode, orig_blk_offset,
0346                            donor_blk_offset,
0347                            block_len_in_page, 1, err);
0348     drop_data_sem:
0349         ext4_double_up_write_data_sem(orig_inode, donor_inode);
0350         goto unlock_pages;
0351     }
0352 data_copy:
0353     *err = mext_page_mkuptodate(pagep[0], from, from + replaced_size);
0354     if (*err)
0355         goto unlock_pages;
0356 
0357     /* At this point all buffers in range are uptodate, old mapping layout
0358      * is no longer required, try to drop it now. */
0359     if ((page_has_private(pagep[0]) && !try_to_release_page(pagep[0], 0)) ||
0360         (page_has_private(pagep[1]) && !try_to_release_page(pagep[1], 0))) {
0361         *err = -EBUSY;
0362         goto unlock_pages;
0363     }
0364     ext4_double_down_write_data_sem(orig_inode, donor_inode);
0365     replaced_count = ext4_swap_extents(handle, orig_inode, donor_inode,
0366                            orig_blk_offset, donor_blk_offset,
0367                        block_len_in_page, 1, err);
0368     ext4_double_up_write_data_sem(orig_inode, donor_inode);
0369     if (*err) {
0370         if (replaced_count) {
0371             block_len_in_page = replaced_count;
0372             replaced_size =
0373                 block_len_in_page << orig_inode->i_blkbits;
0374         } else
0375             goto unlock_pages;
0376     }
0377     /* Perform all necessary steps similar write_begin()/write_end()
0378      * but keeping in mind that i_size will not change */
0379     if (!page_has_buffers(pagep[0]))
0380         create_empty_buffers(pagep[0], 1 << orig_inode->i_blkbits, 0);
0381     bh = page_buffers(pagep[0]);
0382     for (i = 0; i < data_offset_in_page; i++)
0383         bh = bh->b_this_page;
0384     for (i = 0; i < block_len_in_page; i++) {
0385         *err = ext4_get_block(orig_inode, orig_blk_offset + i, bh, 0);
0386         if (*err < 0)
0387             break;
0388         bh = bh->b_this_page;
0389     }
0390     if (!*err)
0391         *err = block_commit_write(pagep[0], from, from + replaced_size);
0392 
0393     if (unlikely(*err < 0))
0394         goto repair_branches;
0395 
0396     /* Even in case of data=writeback it is reasonable to pin
0397      * inode to transaction, to prevent unexpected data loss */
0398     *err = ext4_jbd2_inode_add_write(handle, orig_inode,
0399             (loff_t)orig_page_offset << PAGE_SHIFT, replaced_size);
0400 
0401 unlock_pages:
0402     unlock_page(pagep[0]);
0403     put_page(pagep[0]);
0404     unlock_page(pagep[1]);
0405     put_page(pagep[1]);
0406 stop_journal:
0407     ext4_journal_stop(handle);
0408     if (*err == -ENOSPC &&
0409         ext4_should_retry_alloc(sb, &retries))
0410         goto again;
0411     /* Buffer was busy because probably is pinned to journal transaction,
0412      * force transaction commit may help to free it. */
0413     if (*err == -EBUSY && retries++ < 4 && EXT4_SB(sb)->s_journal &&
0414         jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal))
0415         goto again;
0416     return replaced_count;
0417 
0418 repair_branches:
0419     /*
0420      * This should never ever happen!
0421      * Extents are swapped already, but we are not able to copy data.
0422      * Try to swap extents to it's original places
0423      */
0424     ext4_double_down_write_data_sem(orig_inode, donor_inode);
0425     replaced_count = ext4_swap_extents(handle, donor_inode, orig_inode,
0426                            orig_blk_offset, donor_blk_offset,
0427                        block_len_in_page, 0, &err2);
0428     ext4_double_up_write_data_sem(orig_inode, donor_inode);
0429     if (replaced_count != block_len_in_page) {
0430         ext4_error_inode_block(orig_inode, (sector_t)(orig_blk_offset),
0431                        EIO, "Unable to copy data block,"
0432                        " data will be lost.");
0433         *err = -EIO;
0434     }
0435     replaced_count = 0;
0436     goto unlock_pages;
0437 }
0438 
0439 /**
0440  * mext_check_arguments - Check whether move extent can be done
0441  *
0442  * @orig_inode:     original inode
0443  * @donor_inode:    donor inode
0444  * @orig_start:     logical start offset in block for orig
0445  * @donor_start:    logical start offset in block for donor
0446  * @len:        the number of blocks to be moved
0447  *
0448  * Check the arguments of ext4_move_extents() whether the files can be
0449  * exchanged with each other.
0450  * Return 0 on success, or a negative error value on failure.
0451  */
0452 static int
0453 mext_check_arguments(struct inode *orig_inode,
0454              struct inode *donor_inode, __u64 orig_start,
0455              __u64 donor_start, __u64 *len)
0456 {
0457     __u64 orig_eof, donor_eof;
0458     unsigned int blkbits = orig_inode->i_blkbits;
0459     unsigned int blocksize = 1 << blkbits;
0460 
0461     orig_eof = (i_size_read(orig_inode) + blocksize - 1) >> blkbits;
0462     donor_eof = (i_size_read(donor_inode) + blocksize - 1) >> blkbits;
0463 
0464 
0465     if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
0466         ext4_debug("ext4 move extent: suid or sgid is set"
0467                " to donor file [ino:orig %lu, donor %lu]\n",
0468                orig_inode->i_ino, donor_inode->i_ino);
0469         return -EINVAL;
0470     }
0471 
0472     if (IS_IMMUTABLE(donor_inode) || IS_APPEND(donor_inode))
0473         return -EPERM;
0474 
0475     /* Ext4 move extent does not support swapfile */
0476     if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
0477         ext4_debug("ext4 move extent: The argument files should "
0478             "not be swapfile [ino:orig %lu, donor %lu]\n",
0479             orig_inode->i_ino, donor_inode->i_ino);
0480         return -EBUSY;
0481     }
0482 
0483     if (ext4_is_quota_file(orig_inode) && ext4_is_quota_file(donor_inode)) {
0484         ext4_debug("ext4 move extent: The argument files should "
0485             "not be quota files [ino:orig %lu, donor %lu]\n",
0486             orig_inode->i_ino, donor_inode->i_ino);
0487         return -EBUSY;
0488     }
0489 
0490     /* Ext4 move extent supports only extent based file */
0491     if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) {
0492         ext4_debug("ext4 move extent: orig file is not extents "
0493             "based file [ino:orig %lu]\n", orig_inode->i_ino);
0494         return -EOPNOTSUPP;
0495     } else if (!(ext4_test_inode_flag(donor_inode, EXT4_INODE_EXTENTS))) {
0496         ext4_debug("ext4 move extent: donor file is not extents "
0497             "based file [ino:donor %lu]\n", donor_inode->i_ino);
0498         return -EOPNOTSUPP;
0499     }
0500 
0501     if ((!orig_inode->i_size) || (!donor_inode->i_size)) {
0502         ext4_debug("ext4 move extent: File size is 0 byte\n");
0503         return -EINVAL;
0504     }
0505 
0506     /* Start offset should be same */
0507     if ((orig_start & ~(PAGE_MASK >> orig_inode->i_blkbits)) !=
0508         (donor_start & ~(PAGE_MASK >> orig_inode->i_blkbits))) {
0509         ext4_debug("ext4 move extent: orig and donor's start "
0510             "offsets are not aligned [ino:orig %lu, donor %lu]\n",
0511             orig_inode->i_ino, donor_inode->i_ino);
0512         return -EINVAL;
0513     }
0514 
0515     if ((orig_start >= EXT_MAX_BLOCKS) ||
0516         (donor_start >= EXT_MAX_BLOCKS) ||
0517         (*len > EXT_MAX_BLOCKS) ||
0518         (donor_start + *len >= EXT_MAX_BLOCKS) ||
0519         (orig_start + *len >= EXT_MAX_BLOCKS))  {
0520         ext4_debug("ext4 move extent: Can't handle over [%u] blocks "
0521             "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS,
0522             orig_inode->i_ino, donor_inode->i_ino);
0523         return -EINVAL;
0524     }
0525     if (orig_eof <= orig_start)
0526         *len = 0;
0527     else if (orig_eof < orig_start + *len - 1)
0528         *len = orig_eof - orig_start;
0529     if (donor_eof <= donor_start)
0530         *len = 0;
0531     else if (donor_eof < donor_start + *len - 1)
0532         *len = donor_eof - donor_start;
0533     if (!*len) {
0534         ext4_debug("ext4 move extent: len should not be 0 "
0535             "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
0536             donor_inode->i_ino);
0537         return -EINVAL;
0538     }
0539 
0540     return 0;
0541 }
0542 
0543 /**
0544  * ext4_move_extents - Exchange the specified range of a file
0545  *
0546  * @o_filp:     file structure of the original file
0547  * @d_filp:     file structure of the donor file
0548  * @orig_blk:       start offset in block for orig
0549  * @donor_blk:      start offset in block for donor
0550  * @len:        the number of blocks to be moved
0551  * @moved_len:      moved block length
0552  *
0553  * This function returns 0 and moved block length is set in moved_len
0554  * if succeed, otherwise returns error value.
0555  *
0556  */
0557 int
0558 ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
0559           __u64 donor_blk, __u64 len, __u64 *moved_len)
0560 {
0561     struct inode *orig_inode = file_inode(o_filp);
0562     struct inode *donor_inode = file_inode(d_filp);
0563     struct ext4_ext_path *path = NULL;
0564     int blocks_per_page = PAGE_SIZE >> orig_inode->i_blkbits;
0565     ext4_lblk_t o_end, o_start = orig_blk;
0566     ext4_lblk_t d_start = donor_blk;
0567     int ret;
0568 
0569     if (orig_inode->i_sb != donor_inode->i_sb) {
0570         ext4_debug("ext4 move extent: The argument files "
0571             "should be in same FS [ino:orig %lu, donor %lu]\n",
0572             orig_inode->i_ino, donor_inode->i_ino);
0573         return -EINVAL;
0574     }
0575 
0576     /* orig and donor should be different inodes */
0577     if (orig_inode == donor_inode) {
0578         ext4_debug("ext4 move extent: The argument files should not "
0579             "be same inode [ino:orig %lu, donor %lu]\n",
0580             orig_inode->i_ino, donor_inode->i_ino);
0581         return -EINVAL;
0582     }
0583 
0584     /* Regular file check */
0585     if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) {
0586         ext4_debug("ext4 move extent: The argument files should be "
0587             "regular file [ino:orig %lu, donor %lu]\n",
0588             orig_inode->i_ino, donor_inode->i_ino);
0589         return -EINVAL;
0590     }
0591 
0592     /* TODO: it's not obvious how to swap blocks for inodes with full
0593        journaling enabled */
0594     if (ext4_should_journal_data(orig_inode) ||
0595         ext4_should_journal_data(donor_inode)) {
0596         ext4_msg(orig_inode->i_sb, KERN_ERR,
0597              "Online defrag not supported with data journaling");
0598         return -EOPNOTSUPP;
0599     }
0600 
0601     if (IS_ENCRYPTED(orig_inode) || IS_ENCRYPTED(donor_inode)) {
0602         ext4_msg(orig_inode->i_sb, KERN_ERR,
0603              "Online defrag not supported for encrypted files");
0604         return -EOPNOTSUPP;
0605     }
0606 
0607     /* Protect orig and donor inodes against a truncate */
0608     lock_two_nondirectories(orig_inode, donor_inode);
0609 
0610     /* Wait for all existing dio workers */
0611     inode_dio_wait(orig_inode);
0612     inode_dio_wait(donor_inode);
0613 
0614     /* Protect extent tree against block allocations via delalloc */
0615     ext4_double_down_write_data_sem(orig_inode, donor_inode);
0616     /* Check the filesystem environment whether move_extent can be done */
0617     ret = mext_check_arguments(orig_inode, donor_inode, orig_blk,
0618                     donor_blk, &len);
0619     if (ret)
0620         goto out;
0621     o_end = o_start + len;
0622 
0623     while (o_start < o_end) {
0624         struct ext4_extent *ex;
0625         ext4_lblk_t cur_blk, next_blk;
0626         pgoff_t orig_page_index, donor_page_index;
0627         int offset_in_page;
0628         int unwritten, cur_len;
0629 
0630         ret = get_ext_path(orig_inode, o_start, &path);
0631         if (ret)
0632             goto out;
0633         ex = path[path->p_depth].p_ext;
0634         next_blk = ext4_ext_next_allocated_block(path);
0635         cur_blk = le32_to_cpu(ex->ee_block);
0636         cur_len = ext4_ext_get_actual_len(ex);
0637         /* Check hole before the start pos */
0638         if (cur_blk + cur_len - 1 < o_start) {
0639             if (next_blk == EXT_MAX_BLOCKS) {
0640                 ret = -ENODATA;
0641                 goto out;
0642             }
0643             d_start += next_blk - o_start;
0644             o_start = next_blk;
0645             continue;
0646         /* Check hole after the start pos */
0647         } else if (cur_blk > o_start) {
0648             /* Skip hole */
0649             d_start += cur_blk - o_start;
0650             o_start = cur_blk;
0651             /* Extent inside requested range ?*/
0652             if (cur_blk >= o_end)
0653                 goto out;
0654         } else { /* in_range(o_start, o_blk, o_len) */
0655             cur_len += cur_blk - o_start;
0656         }
0657         unwritten = ext4_ext_is_unwritten(ex);
0658         if (o_end - o_start < cur_len)
0659             cur_len = o_end - o_start;
0660 
0661         orig_page_index = o_start >> (PAGE_SHIFT -
0662                            orig_inode->i_blkbits);
0663         donor_page_index = d_start >> (PAGE_SHIFT -
0664                            donor_inode->i_blkbits);
0665         offset_in_page = o_start % blocks_per_page;
0666         if (cur_len > blocks_per_page- offset_in_page)
0667             cur_len = blocks_per_page - offset_in_page;
0668         /*
0669          * Up semaphore to avoid following problems:
0670          * a. transaction deadlock among ext4_journal_start,
0671          *    ->write_begin via pagefault, and jbd2_journal_commit
0672          * b. racing with ->read_folio, ->write_begin, and
0673          *    ext4_get_block in move_extent_per_page
0674          */
0675         ext4_double_up_write_data_sem(orig_inode, donor_inode);
0676         /* Swap original branches with new branches */
0677         move_extent_per_page(o_filp, donor_inode,
0678                      orig_page_index, donor_page_index,
0679                      offset_in_page, cur_len,
0680                      unwritten, &ret);
0681         ext4_double_down_write_data_sem(orig_inode, donor_inode);
0682         if (ret < 0)
0683             break;
0684         o_start += cur_len;
0685         d_start += cur_len;
0686     }
0687     *moved_len = o_start - orig_blk;
0688     if (*moved_len > len)
0689         *moved_len = len;
0690 
0691 out:
0692     if (*moved_len) {
0693         ext4_discard_preallocations(orig_inode, 0);
0694         ext4_discard_preallocations(donor_inode, 0);
0695     }
0696 
0697     ext4_ext_drop_refs(path);
0698     kfree(path);
0699     ext4_double_up_write_data_sem(orig_inode, donor_inode);
0700     unlock_two_nondirectories(orig_inode, donor_inode);
0701 
0702     return ret;
0703 }