fs/f2fs/data.c

0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * fs/f2fs/data.c
0004  *
0005  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
0006  *             http://www.samsung.com/
0007  */
0008 #include <linux/fs.h>
0009 #include <linux/f2fs_fs.h>
0010 #include <linux/buffer_head.h>
0011 #include <linux/sched/mm.h>
0012 #include <linux/mpage.h>
0013 #include <linux/writeback.h>
0014 #include <linux/pagevec.h>
0015 #include <linux/blkdev.h>
0016 #include <linux/bio.h>
0017 #include <linux/blk-crypto.h>
0018 #include <linux/swap.h>
0019 #include <linux/prefetch.h>
0020 #include <linux/uio.h>
0021 #include <linux/sched/signal.h>
0022 #include <linux/fiemap.h>
0023 #include <linux/iomap.h>
0024
0025 #include "f2fs.h"
0026 #include "node.h"
0027 #include "segment.h"
0028 #include "iostat.h"
0029 #include <trace/events/f2fs.h>
0030
0031 #define NUM_PREALLOC_POST_READ_CTXS 128
0032
0033 static struct kmem_cache *bio_post_read_ctx_cache;
0034 static struct kmem_cache *bio_entry_slab;
0035 static mempool_t *bio_post_read_ctx_pool;
0036 static struct bio_set f2fs_bioset;
0037
0038 #define F2FS_BIO_POOL_SIZE  NR_CURSEG_TYPE
0039
0040 int __init f2fs_init_bioset(void)
0041 {
0042     if (bioset_init(&f2fs_bioset, F2FS_BIO_POOL_SIZE,
0043                     0, BIOSET_NEED_BVECS))
0044         return -ENOMEM;
0045     return 0;
0046 }
0047
0048 void f2fs_destroy_bioset(void)
0049 {
0050     bioset_exit(&f2fs_bioset);
0051 }
0052
0053 static bool __is_cp_guaranteed(struct page *page)
0054 {
0055     struct address_space *mapping = page->mapping;
0056     struct inode *inode;
0057     struct f2fs_sb_info *sbi;
0058
0059     if (!mapping)
0060         return false;
0061
0062     inode = mapping->host;
0063     sbi = F2FS_I_SB(inode);
0064
0065     if (inode->i_ino == F2FS_META_INO(sbi) ||
0066             inode->i_ino == F2FS_NODE_INO(sbi) ||
0067             S_ISDIR(inode->i_mode))
0068         return true;
0069
0070     if (f2fs_is_compressed_page(page))
0071         return false;
0072     if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) ||
0073             page_private_gcing(page))
0074         return true;
0075     return false;
0076 }
0077
0078 static enum count_type __read_io_type(struct page *page)
0079 {
0080     struct address_space *mapping = page_file_mapping(page);
0081
0082     if (mapping) {
0083         struct inode *inode = mapping->host;
0084         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
0085
0086         if (inode->i_ino == F2FS_META_INO(sbi))
0087             return F2FS_RD_META;
0088
0089         if (inode->i_ino == F2FS_NODE_INO(sbi))
0090             return F2FS_RD_NODE;
0091     }
0092     return F2FS_RD_DATA;
0093 }
0094
0095 /* postprocessing steps for read bios */
0096 enum bio_post_read_step {
0097 #ifdef CONFIG_FS_ENCRYPTION
0098     STEP_DECRYPT    = 1 << 0,
0099 #else
0100     STEP_DECRYPT    = 0,    /* compile out the decryption-related code */
0101 #endif
0102 #ifdef CONFIG_F2FS_FS_COMPRESSION
0103     STEP_DECOMPRESS = 1 << 1,
0104 #else
0105     STEP_DECOMPRESS = 0,    /* compile out the decompression-related code */
0106 #endif
0107 #ifdef CONFIG_FS_VERITY
0108     STEP_VERITY = 1 << 2,
0109 #else
0110     STEP_VERITY = 0,    /* compile out the verity-related code */
0111 #endif
0112 };
0113
0114 struct bio_post_read_ctx {
0115     struct bio *bio;
0116     struct f2fs_sb_info *sbi;
0117     struct work_struct work;
0118     unsigned int enabled_steps;
0119     block_t fs_blkaddr;
0120 };
0121
0122 static void f2fs_finish_read_bio(struct bio *bio, bool in_task)
0123 {
0124     struct bio_vec *bv;
0125     struct bvec_iter_all iter_all;
0126
0127     /*
0128      * Update and unlock the bio's pagecache pages, and put the
0129      * decompression context for any compressed pages.
0130      */
0131     bio_for_each_segment_all(bv, bio, iter_all) {
0132         struct page *page = bv->bv_page;
0133
0134         if (f2fs_is_compressed_page(page)) {
0135             if (bio->bi_status)
0136                 f2fs_end_read_compressed_page(page, true, 0,
0137                             in_task);
0138             f2fs_put_page_dic(page, in_task);
0139             continue;
0140         }
0141
0142         /* PG_error was set if decryption or verity failed. */
0143         if (bio->bi_status || PageError(page)) {
0144             ClearPageUptodate(page);
0145             /* will re-read again later */
0146             ClearPageError(page);
0147         } else {
0148             SetPageUptodate(page);
0149         }
0150         dec_page_count(F2FS_P_SB(page), __read_io_type(page));
0151         unlock_page(page);
0152     }
0153
0154     if (bio->bi_private)
0155         mempool_free(bio->bi_private, bio_post_read_ctx_pool);
0156     bio_put(bio);
0157 }
0158
0159 static void f2fs_verify_bio(struct work_struct *work)
0160 {
0161     struct bio_post_read_ctx *ctx =
0162         container_of(work, struct bio_post_read_ctx, work);
0163     struct bio *bio = ctx->bio;
0164     bool may_have_compressed_pages = (ctx->enabled_steps & STEP_DECOMPRESS);
0165
0166     /*
0167      * fsverity_verify_bio() may call readahead() again, and while verity
0168      * will be disabled for this, decryption and/or decompression may still
0169      * be needed, resulting in another bio_post_read_ctx being allocated.
0170      * So to prevent deadlocks we need to release the current ctx to the
0171      * mempool first.  This assumes that verity is the last post-read step.
0172      */
0173     mempool_free(ctx, bio_post_read_ctx_pool);
0174     bio->bi_private = NULL;
0175
0176     /*
0177      * Verify the bio's pages with fs-verity.  Exclude compressed pages,
0178      * as those were handled separately by f2fs_end_read_compressed_page().
0179      */
0180     if (may_have_compressed_pages) {
0181         struct bio_vec *bv;
0182         struct bvec_iter_all iter_all;
0183
0184         bio_for_each_segment_all(bv, bio, iter_all) {
0185             struct page *page = bv->bv_page;
0186
0187             if (!f2fs_is_compressed_page(page) &&
0188                 !PageError(page) && !fsverity_verify_page(page))
0189                 SetPageError(page);
0190         }
0191     } else {
0192         fsverity_verify_bio(bio);
0193     }
0194
0195     f2fs_finish_read_bio(bio, true);
0196 }
0197
0198 /*
0199  * If the bio's data needs to be verified with fs-verity, then enqueue the
0200  * verity work for the bio.  Otherwise finish the bio now.
0201  *
0202  * Note that to avoid deadlocks, the verity work can't be done on the
0203  * decryption/decompression workqueue.  This is because verifying the data pages
0204  * can involve reading verity metadata pages from the file, and these verity
0205  * metadata pages may be encrypted and/or compressed.
0206  */
0207 static void f2fs_verify_and_finish_bio(struct bio *bio, bool in_task)
0208 {
0209     struct bio_post_read_ctx *ctx = bio->bi_private;
0210
0211     if (ctx && (ctx->enabled_steps & STEP_VERITY)) {
0212         INIT_WORK(&ctx->work, f2fs_verify_bio);
0213         fsverity_enqueue_verify_work(&ctx->work);
0214     } else {
0215         f2fs_finish_read_bio(bio, in_task);
0216     }
0217 }
0218
0219 /*
0220  * Handle STEP_DECOMPRESS by decompressing any compressed clusters whose last
0221  * remaining page was read by @ctx->bio.
0222  *
0223  * Note that a bio may span clusters (even a mix of compressed and uncompressed
0224  * clusters) or be for just part of a cluster.  STEP_DECOMPRESS just indicates
0225  * that the bio includes at least one compressed page.  The actual decompression
0226  * is done on a per-cluster basis, not a per-bio basis.
0227  */
0228 static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx,
0229         bool in_task)
0230 {
0231     struct bio_vec *bv;
0232     struct bvec_iter_all iter_all;
0233     bool all_compressed = true;
0234     block_t blkaddr = ctx->fs_blkaddr;
0235
0236     bio_for_each_segment_all(bv, ctx->bio, iter_all) {
0237         struct page *page = bv->bv_page;
0238
0239         /* PG_error was set if decryption failed. */
0240         if (f2fs_is_compressed_page(page))
0241             f2fs_end_read_compressed_page(page, PageError(page),
0242                         blkaddr, in_task);
0243         else
0244             all_compressed = false;
0245
0246         blkaddr++;
0247     }
0248
0249     /*
0250      * Optimization: if all the bio's pages are compressed, then scheduling
0251      * the per-bio verity work is unnecessary, as verity will be fully
0252      * handled at the compression cluster level.
0253      */
0254     if (all_compressed)
0255         ctx->enabled_steps &= ~STEP_VERITY;
0256 }
0257
0258 static void f2fs_post_read_work(struct work_struct *work)
0259 {
0260     struct bio_post_read_ctx *ctx =
0261         container_of(work, struct bio_post_read_ctx, work);
0262
0263     if (ctx->enabled_steps & STEP_DECRYPT)
0264         fscrypt_decrypt_bio(ctx->bio);
0265
0266     if (ctx->enabled_steps & STEP_DECOMPRESS)
0267         f2fs_handle_step_decompress(ctx, true);
0268
0269     f2fs_verify_and_finish_bio(ctx->bio, true);
0270 }
0271
0272 static void f2fs_read_end_io(struct bio *bio)
0273 {
0274     struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio));
0275     struct bio_post_read_ctx *ctx;
0276     bool intask = in_task();
0277
0278     iostat_update_and_unbind_ctx(bio, 0);
0279     ctx = bio->bi_private;
0280
0281     if (time_to_inject(sbi, FAULT_READ_IO)) {
0282         f2fs_show_injection_info(sbi, FAULT_READ_IO);
0283         bio->bi_status = BLK_STS_IOERR;
0284     }
0285
0286     if (bio->bi_status) {
0287         f2fs_finish_read_bio(bio, intask);
0288         return;
0289     }
0290
0291     if (ctx) {
0292         unsigned int enabled_steps = ctx->enabled_steps &
0293                     (STEP_DECRYPT | STEP_DECOMPRESS);
0294
0295         /*
0296          * If we have only decompression step between decompression and
0297          * decrypt, we don't need post processing for this.
0298          */
0299         if (enabled_steps == STEP_DECOMPRESS &&
0300                 !f2fs_low_mem_mode(sbi)) {
0301             f2fs_handle_step_decompress(ctx, intask);
0302         } else if (enabled_steps) {
0303             INIT_WORK(&ctx->work, f2fs_post_read_work);
0304             queue_work(ctx->sbi->post_read_wq, &ctx->work);
0305             return;
0306         }
0307     }
0308
0309     f2fs_verify_and_finish_bio(bio, intask);
0310 }
0311
0312 static void f2fs_write_end_io(struct bio *bio)
0313 {
0314     struct f2fs_sb_info *sbi;
0315     struct bio_vec *bvec;
0316     struct bvec_iter_all iter_all;
0317
0318     iostat_update_and_unbind_ctx(bio, 1);
0319     sbi = bio->bi_private;
0320
0321     if (time_to_inject(sbi, FAULT_WRITE_IO)) {
0322         f2fs_show_injection_info(sbi, FAULT_WRITE_IO);
0323         bio->bi_status = BLK_STS_IOERR;
0324     }
0325
0326     bio_for_each_segment_all(bvec, bio, iter_all) {
0327         struct page *page = bvec->bv_page;
0328         enum count_type type = WB_DATA_TYPE(page);
0329
0330         if (page_private_dummy(page)) {
0331             clear_page_private_dummy(page);
0332             unlock_page(page);
0333             mempool_free(page, sbi->write_io_dummy);
0334
0335             if (unlikely(bio->bi_status))
0336                 f2fs_stop_checkpoint(sbi, true);
0337             continue;
0338         }
0339
0340         fscrypt_finalize_bounce_page(&page);
0341
0342 #ifdef CONFIG_F2FS_FS_COMPRESSION
0343         if (f2fs_is_compressed_page(page)) {
0344             f2fs_compress_write_end_io(bio, page);
0345             continue;
0346         }
0347 #endif
0348
0349         if (unlikely(bio->bi_status)) {
0350             mapping_set_error(page->mapping, -EIO);
0351             if (type == F2FS_WB_CP_DATA)
0352                 f2fs_stop_checkpoint(sbi, true);
0353         }
0354
0355         f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) &&
0356                     page->index != nid_of_node(page));
0357
0358         dec_page_count(sbi, type);
0359         if (f2fs_in_warm_node_list(sbi, page))
0360             f2fs_del_fsync_node_entry(sbi, page);
0361         clear_page_private_gcing(page);
0362         end_page_writeback(page);
0363     }
0364     if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
0365                 wq_has_sleeper(&sbi->cp_wait))
0366         wake_up(&sbi->cp_wait);
0367
0368     bio_put(bio);
0369 }
0370
0371 struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
0372         block_t blk_addr, sector_t *sector)
0373 {
0374     struct block_device *bdev = sbi->sb->s_bdev;
0375     int i;
0376
0377     if (f2fs_is_multi_device(sbi)) {
0378         for (i = 0; i < sbi->s_ndevs; i++) {
0379             if (FDEV(i).start_blk <= blk_addr &&
0380                 FDEV(i).end_blk >= blk_addr) {
0381                 blk_addr -= FDEV(i).start_blk;
0382                 bdev = FDEV(i).bdev;
0383                 break;
0384             }
0385         }
0386     }
0387
0388     if (sector)
0389         *sector = SECTOR_FROM_BLOCK(blk_addr);
0390     return bdev;
0391 }
0392
0393 int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
0394 {
0395     int i;
0396
0397     if (!f2fs_is_multi_device(sbi))
0398         return 0;
0399
0400     for (i = 0; i < sbi->s_ndevs; i++)
0401         if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
0402             return i;
0403     return 0;
0404 }
0405
0406 static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio)
0407 {
0408     unsigned int temp_mask = (1 << NR_TEMP_TYPE) - 1;
0409     unsigned int fua_flag, meta_flag, io_flag;
0410     blk_opf_t op_flags = 0;
0411
0412     if (fio->op != REQ_OP_WRITE)
0413         return 0;
0414     if (fio->type == DATA)
0415         io_flag = fio->sbi->data_io_flag;
0416     else if (fio->type == NODE)
0417         io_flag = fio->sbi->node_io_flag;
0418     else
0419         return 0;
0420
0421     fua_flag = io_flag & temp_mask;
0422     meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask;
0423
0424     /*
0425      * data/node io flag bits per temp:
0426      *      REQ_META     |      REQ_FUA      |
0427      *    5 |    4 |   3 |    2 |    1 |   0 |
0428      * Cold | Warm | Hot | Cold | Warm | Hot |
0429      */
0430     if ((1 << fio->temp) & meta_flag)
0431         op_flags |= REQ_META;
0432     if ((1 << fio->temp) & fua_flag)
0433         op_flags |= REQ_FUA;
0434     return op_flags;
0435 }
0436
0437 static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
0438 {
0439     struct f2fs_sb_info *sbi = fio->sbi;
0440     struct block_device *bdev;
0441     sector_t sector;
0442     struct bio *bio;
0443
0444     bdev = f2fs_target_device(sbi, fio->new_blkaddr, &sector);
0445     bio = bio_alloc_bioset(bdev, npages,
0446                 fio->op | fio->op_flags | f2fs_io_flags(fio),
0447                 GFP_NOIO, &f2fs_bioset);
0448     bio->bi_iter.bi_sector = sector;
0449     if (is_read_io(fio->op)) {
0450         bio->bi_end_io = f2fs_read_end_io;
0451         bio->bi_private = NULL;
0452     } else {
0453         bio->bi_end_io = f2fs_write_end_io;
0454         bio->bi_private = sbi;
0455     }
0456     iostat_alloc_and_bind_ctx(sbi, bio, NULL);
0457
0458     if (fio->io_wbc)
0459         wbc_init_bio(fio->io_wbc, bio);
0460
0461     return bio;
0462 }
0463
0464 static void f2fs_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode,
0465                   pgoff_t first_idx,
0466                   const struct f2fs_io_info *fio,
0467                   gfp_t gfp_mask)
0468 {
0469     /*
0470      * The f2fs garbage collector sets ->encrypted_page when it wants to
0471      * read/write raw data without encryption.
0472      */
0473     if (!fio || !fio->encrypted_page)
0474         fscrypt_set_bio_crypt_ctx(bio, inode, first_idx, gfp_mask);
0475 }
0476
0477 static bool f2fs_crypt_mergeable_bio(struct bio *bio, const struct inode *inode,
0478                      pgoff_t next_idx,
0479                      const struct f2fs_io_info *fio)
0480 {
0481     /*
0482      * The f2fs garbage collector sets ->encrypted_page when it wants to
0483      * read/write raw data without encryption.
0484      */
0485     if (fio && fio->encrypted_page)
0486         return !bio_has_crypt_ctx(bio);
0487
0488     return fscrypt_mergeable_bio(bio, inode, next_idx);
0489 }
0490
0491 static inline void __submit_bio(struct f2fs_sb_info *sbi,
0492                 struct bio *bio, enum page_type type)
0493 {
0494     if (!is_read_io(bio_op(bio))) {
0495         unsigned int start;
0496
0497         if (type != DATA && type != NODE)
0498             goto submit_io;
0499
0500         if (f2fs_lfs_mode(sbi) && current->plug)
0501             blk_finish_plug(current->plug);
0502
0503         if (!F2FS_IO_ALIGNED(sbi))
0504             goto submit_io;
0505
0506         start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
0507         start %= F2FS_IO_SIZE(sbi);
0508
0509         if (start == 0)
0510             goto submit_io;
0511
0512         /* fill dummy pages */
0513         for (; start < F2FS_IO_SIZE(sbi); start++) {
0514             struct page *page =
0515                 mempool_alloc(sbi->write_io_dummy,
0516                           GFP_NOIO | __GFP_NOFAIL);
0517             f2fs_bug_on(sbi, !page);
0518
0519             lock_page(page);
0520
0521             zero_user_segment(page, 0, PAGE_SIZE);
0522             set_page_private_dummy(page);
0523
0524             if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
0525                 f2fs_bug_on(sbi, 1);
0526         }
0527         /*
0528          * In the NODE case, we lose next block address chain. So, we
0529          * need to do checkpoint in f2fs_sync_file.
0530          */
0531         if (type == NODE)
0532             set_sbi_flag(sbi, SBI_NEED_CP);
0533     }
0534 submit_io:
0535     if (is_read_io(bio_op(bio)))
0536         trace_f2fs_submit_read_bio(sbi->sb, type, bio);
0537     else
0538         trace_f2fs_submit_write_bio(sbi->sb, type, bio);
0539
0540     iostat_update_submit_ctx(bio, type);
0541     submit_bio(bio);
0542 }
0543
0544 void f2fs_submit_bio(struct f2fs_sb_info *sbi,
0545                 struct bio *bio, enum page_type type)
0546 {
0547     __submit_bio(sbi, bio, type);
0548 }
0549
0550 static void __submit_merged_bio(struct f2fs_bio_info *io)
0551 {
0552     struct f2fs_io_info *fio = &io->fio;
0553
0554     if (!io->bio)
0555         return;
0556
0557     if (is_read_io(fio->op))
0558         trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
0559     else
0560         trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);
0561
0562     __submit_bio(io->sbi, io->bio, fio->type);
0563     io->bio = NULL;
0564 }
0565
0566 static bool __has_merged_page(struct bio *bio, struct inode *inode,
0567                         struct page *page, nid_t ino)
0568 {
0569     struct bio_vec *bvec;
0570     struct bvec_iter_all iter_all;
0571
0572     if (!bio)
0573         return false;
0574
0575     if (!inode && !page && !ino)
0576         return true;
0577
0578     bio_for_each_segment_all(bvec, bio, iter_all) {
0579         struct page *target = bvec->bv_page;
0580
0581         if (fscrypt_is_bounce_page(target)) {
0582             target = fscrypt_pagecache_page(target);
0583             if (IS_ERR(target))
0584                 continue;
0585         }
0586         if (f2fs_is_compressed_page(target)) {
0587             target = f2fs_compress_control_page(target);
0588             if (IS_ERR(target))
0589                 continue;
0590         }
0591
0592         if (inode && inode == target->mapping->host)
0593             return true;
0594         if (page && page == target)
0595             return true;
0596         if (ino && ino == ino_of_node(target))
0597             return true;
0598     }
0599
0600     return false;
0601 }
0602
0603 int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
0604 {
0605     int i;
0606
0607     for (i = 0; i < NR_PAGE_TYPE; i++) {
0608         int n = (i == META) ? 1 : NR_TEMP_TYPE;
0609         int j;
0610
0611         sbi->write_io[i] = f2fs_kmalloc(sbi,
0612                 array_size(n, sizeof(struct f2fs_bio_info)),
0613                 GFP_KERNEL);
0614         if (!sbi->write_io[i])
0615             return -ENOMEM;
0616
0617         for (j = HOT; j < n; j++) {
0618             init_f2fs_rwsem(&sbi->write_io[i][j].io_rwsem);
0619             sbi->write_io[i][j].sbi = sbi;
0620             sbi->write_io[i][j].bio = NULL;
0621             spin_lock_init(&sbi->write_io[i][j].io_lock);
0622             INIT_LIST_HEAD(&sbi->write_io[i][j].io_list);
0623             INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
0624             init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
0625         }
0626     }
0627
0628     return 0;
0629 }
0630
0631 static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
0632                 enum page_type type, enum temp_type temp)
0633 {
0634     enum page_type btype = PAGE_TYPE_OF_BIO(type);
0635     struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
0636
0637     f2fs_down_write(&io->io_rwsem);
0638
0639     /* change META to META_FLUSH in the checkpoint procedure */
0640     if (type >= META_FLUSH) {
0641         io->fio.type = META_FLUSH;
0642         io->bio->bi_opf |= REQ_META | REQ_PRIO | REQ_SYNC;
0643         if (!test_opt(sbi, NOBARRIER))
0644             io->bio->bi_opf |= REQ_PREFLUSH | REQ_FUA;
0645     }
0646     __submit_merged_bio(io);
0647     f2fs_up_write(&io->io_rwsem);
0648 }
0649
0650 static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
0651                 struct inode *inode, struct page *page,
0652                 nid_t ino, enum page_type type, bool force)
0653 {
0654     enum temp_type temp;
0655     bool ret = true;
0656
0657     for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
0658         if (!force) {
0659             enum page_type btype = PAGE_TYPE_OF_BIO(type);
0660             struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
0661
0662             f2fs_down_read(&io->io_rwsem);
0663             ret = __has_merged_page(io->bio, inode, page, ino);
0664             f2fs_up_read(&io->io_rwsem);
0665         }
0666         if (ret)
0667             __f2fs_submit_merged_write(sbi, type, temp);
0668
0669         /* TODO: use HOT temp only for meta pages now. */
0670         if (type >= META)
0671             break;
0672     }
0673 }
0674
0675 void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
0676 {
0677     __submit_merged_write_cond(sbi, NULL, NULL, 0, type, true);
0678 }
0679
0680 void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
0681                 struct inode *inode, struct page *page,
0682                 nid_t ino, enum page_type type)
0683 {
0684     __submit_merged_write_cond(sbi, inode, page, ino, type, false);
0685 }
0686
0687 void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
0688 {
0689     f2fs_submit_merged_write(sbi, DATA);
0690     f2fs_submit_merged_write(sbi, NODE);
0691     f2fs_submit_merged_write(sbi, META);
0692 }
0693
0694 /*
0695  * Fill the locked page with data located in the block address.
0696  * A caller needs to unlock the page on failure.
0697  */
0698 int f2fs_submit_page_bio(struct f2fs_io_info *fio)
0699 {
0700     struct bio *bio;
0701     struct page *page = fio->encrypted_page ?
0702             fio->encrypted_page : fio->page;
0703
0704     if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
0705             fio->is_por ? META_POR : (__is_meta_io(fio) ?
0706             META_GENERIC : DATA_GENERIC_ENHANCE)))
0707         return -EFSCORRUPTED;
0708
0709     trace_f2fs_submit_page_bio(page, fio);
0710
0711     /* Allocate a new bio */
0712     bio = __bio_alloc(fio, 1);
0713
0714     f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
0715                    fio->page->index, fio, GFP_NOIO);
0716
0717     if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
0718         bio_put(bio);
0719         return -EFAULT;
0720     }
0721
0722     if (fio->io_wbc && !is_read_io(fio->op))
0723         wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE);
0724
0725     inc_page_count(fio->sbi, is_read_io(fio->op) ?
0726             __read_io_type(page): WB_DATA_TYPE(fio->page));
0727
0728     __submit_bio(fio->sbi, bio, fio->type);
0729     return 0;
0730 }
0731
0732 static bool page_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
0733                 block_t last_blkaddr, block_t cur_blkaddr)
0734 {
0735     if (unlikely(sbi->max_io_bytes &&
0736             bio->bi_iter.bi_size >= sbi->max_io_bytes))
0737         return false;
0738     if (last_blkaddr + 1 != cur_blkaddr)
0739         return false;
0740     return bio->bi_bdev == f2fs_target_device(sbi, cur_blkaddr, NULL);
0741 }
0742
0743 static bool io_type_is_mergeable(struct f2fs_bio_info *io,
0744                         struct f2fs_io_info *fio)
0745 {
0746     if (io->fio.op != fio->op)
0747         return false;
0748     return io->fio.op_flags == fio->op_flags;
0749 }
0750
0751 static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
0752                     struct f2fs_bio_info *io,
0753                     struct f2fs_io_info *fio,
0754                     block_t last_blkaddr,
0755                     block_t cur_blkaddr)
0756 {
0757     if (F2FS_IO_ALIGNED(sbi) && (fio->type == DATA || fio->type == NODE)) {
0758         unsigned int filled_blocks =
0759                 F2FS_BYTES_TO_BLK(bio->bi_iter.bi_size);
0760         unsigned int io_size = F2FS_IO_SIZE(sbi);
0761         unsigned int left_vecs = bio->bi_max_vecs - bio->bi_vcnt;
0762
0763         /* IOs in bio is aligned and left space of vectors is not enough */
0764         if (!(filled_blocks % io_size) && left_vecs < io_size)
0765             return false;
0766     }
0767     if (!page_is_mergeable(sbi, bio, last_blkaddr, cur_blkaddr))
0768         return false;
0769     return io_type_is_mergeable(io, fio);
0770 }
0771
0772 static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio,
0773                 struct page *page, enum temp_type temp)
0774 {
0775     struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
0776     struct bio_entry *be;
0777
0778     be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS, true, NULL);
0779     be->bio = bio;
0780     bio_get(bio);
0781
0782     if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE)
0783         f2fs_bug_on(sbi, 1);
0784
0785     f2fs_down_write(&io->bio_list_lock);
0786     list_add_tail(&be->list, &io->bio_list);
0787     f2fs_up_write(&io->bio_list_lock);
0788 }
0789
0790 static void del_bio_entry(struct bio_entry *be)
0791 {
0792     list_del(&be->list);
0793     kmem_cache_free(bio_entry_slab, be);
0794 }
0795
0796 static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio,
0797                             struct page *page)
0798 {
0799     struct f2fs_sb_info *sbi = fio->sbi;
0800     enum temp_type temp;
0801     bool found = false;
0802     int ret = -EAGAIN;
0803
0804     for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
0805         struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
0806         struct list_head *head = &io->bio_list;
0807         struct bio_entry *be;
0808
0809         f2fs_down_write(&io->bio_list_lock);
0810         list_for_each_entry(be, head, list) {
0811             if (be->bio != *bio)
0812                 continue;
0813
0814             found = true;
0815
0816             f2fs_bug_on(sbi, !page_is_mergeable(sbi, *bio,
0817                                 *fio->last_block,
0818                                 fio->new_blkaddr));
0819             if (f2fs_crypt_mergeable_bio(*bio,
0820                     fio->page->mapping->host,
0821                     fio->page->index, fio) &&
0822                 bio_add_page(*bio, page, PAGE_SIZE, 0) ==
0823                     PAGE_SIZE) {
0824                 ret = 0;
0825                 break;
0826             }
0827
0828             /* page can't be merged into bio; submit the bio */
0829             del_bio_entry(be);
0830             __submit_bio(sbi, *bio, DATA);
0831             break;
0832         }
0833         f2fs_up_write(&io->bio_list_lock);
0834     }
0835
0836     if (ret) {
0837         bio_put(*bio);
0838         *bio = NULL;
0839     }
0840
0841     return ret;
0842 }
0843
0844 void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
0845                     struct bio **bio, struct page *page)
0846 {
0847     enum temp_type temp;
0848     bool found = false;
0849     struct bio *target = bio ? *bio : NULL;
0850
0851     for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
0852         struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
0853         struct list_head *head = &io->bio_list;
0854         struct bio_entry *be;
0855
0856         if (list_empty(head))
0857             continue;
0858
0859         f2fs_down_read(&io->bio_list_lock);
0860         list_for_each_entry(be, head, list) {
0861             if (target)
0862                 found = (target == be->bio);
0863             else
0864                 found = __has_merged_page(be->bio, NULL,
0865                                 page, 0);
0866             if (found)
0867                 break;
0868         }
0869         f2fs_up_read(&io->bio_list_lock);
0870
0871         if (!found)
0872             continue;
0873
0874         found = false;
0875
0876         f2fs_down_write(&io->bio_list_lock);
0877         list_for_each_entry(be, head, list) {
0878             if (target)
0879                 found = (target == be->bio);
0880             else
0881                 found = __has_merged_page(be->bio, NULL,
0882                                 page, 0);
0883             if (found) {
0884                 target = be->bio;
0885                 del_bio_entry(be);
0886                 break;
0887             }
0888         }
0889         f2fs_up_write(&io->bio_list_lock);
0890     }
0891
0892     if (found)
0893         __submit_bio(sbi, target, DATA);
0894     if (bio && *bio) {
0895         bio_put(*bio);
0896         *bio = NULL;
0897     }
0898 }
0899
0900 int f2fs_merge_page_bio(struct f2fs_io_info *fio)
0901 {
0902     struct bio *bio = *fio->bio;
0903     struct page *page = fio->encrypted_page ?
0904             fio->encrypted_page : fio->page;
0905
0906     if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
0907             __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
0908         return -EFSCORRUPTED;
0909
0910     trace_f2fs_submit_page_bio(page, fio);
0911
0912     if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block,
0913                         fio->new_blkaddr))
0914         f2fs_submit_merged_ipu_write(fio->sbi, &bio, NULL);
0915 alloc_new:
0916     if (!bio) {
0917         bio = __bio_alloc(fio, BIO_MAX_VECS);
0918         f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
0919                        fio->page->index, fio, GFP_NOIO);
0920
0921         add_bio_entry(fio->sbi, bio, page, fio->temp);
0922     } else {
0923         if (add_ipu_page(fio, &bio, page))
0924             goto alloc_new;
0925     }
0926
0927     if (fio->io_wbc)
0928         wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE);
0929
0930     inc_page_count(fio->sbi, WB_DATA_TYPE(page));
0931
0932     *fio->last_block = fio->new_blkaddr;
0933     *fio->bio = bio;
0934
0935     return 0;
0936 }
0937
0938 void f2fs_submit_page_write(struct f2fs_io_info *fio)
0939 {
0940     struct f2fs_sb_info *sbi = fio->sbi;
0941     enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
0942     struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
0943     struct page *bio_page;
0944
0945     f2fs_bug_on(sbi, is_read_io(fio->op));
0946
0947     f2fs_down_write(&io->io_rwsem);
0948 next:
0949     if (fio->in_list) {
0950         spin_lock(&io->io_lock);
0951         if (list_empty(&io->io_list)) {
0952             spin_unlock(&io->io_lock);
0953             goto out;
0954         }
0955         fio = list_first_entry(&io->io_list,
0956                         struct f2fs_io_info, list);
0957         list_del(&fio->list);
0958         spin_unlock(&io->io_lock);
0959     }
0960
0961     verify_fio_blkaddr(fio);
0962
0963     if (fio->encrypted_page)
0964         bio_page = fio->encrypted_page;
0965     else if (fio->compressed_page)
0966         bio_page = fio->compressed_page;
0967     else
0968         bio_page = fio->page;
0969
0970     /* set submitted = true as a return value */
0971     fio->submitted = true;
0972
0973     inc_page_count(sbi, WB_DATA_TYPE(bio_page));
0974
0975     if (io->bio &&
0976         (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
0977                   fio->new_blkaddr) ||
0978          !f2fs_crypt_mergeable_bio(io->bio, fio->page->mapping->host,
0979                        bio_page->index, fio)))
0980         __submit_merged_bio(io);
0981 alloc_new:
0982     if (io->bio == NULL) {
0983         if (F2FS_IO_ALIGNED(sbi) &&
0984                 (fio->type == DATA || fio->type == NODE) &&
0985                 fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
0986             dec_page_count(sbi, WB_DATA_TYPE(bio_page));
0987             fio->retry = true;
0988             goto skip;
0989         }
0990         io->bio = __bio_alloc(fio, BIO_MAX_VECS);
0991         f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host,
0992                        bio_page->index, fio, GFP_NOIO);
0993         io->fio = *fio;
0994     }
0995
0996     if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) {
0997         __submit_merged_bio(io);
0998         goto alloc_new;
0999     }
1000
1001     if (fio->io_wbc)
1002         wbc_account_cgroup_owner(fio->io_wbc, bio_page, PAGE_SIZE);
1003
1004     io->last_block_in_bio = fio->new_blkaddr;
1005
1006     trace_f2fs_submit_page_write(fio->page, fio);
1007 skip:
1008     if (fio->in_list)
1009         goto next;
1010 out:
1011     if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
1012                 !f2fs_is_checkpoint_ready(sbi))
1013         __submit_merged_bio(io);
1014     f2fs_up_write(&io->io_rwsem);
1015 }
1016
1017 static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
1018                       unsigned nr_pages, blk_opf_t op_flag,
1019                       pgoff_t first_idx, bool for_write)
1020 {
1021     struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1022     struct bio *bio;
1023     struct bio_post_read_ctx *ctx = NULL;
1024     unsigned int post_read_steps = 0;
1025     sector_t sector;
1026     struct block_device *bdev = f2fs_target_device(sbi, blkaddr, &sector);
1027
1028     bio = bio_alloc_bioset(bdev, bio_max_segs(nr_pages),
1029                    REQ_OP_READ | op_flag,
1030                    for_write ? GFP_NOIO : GFP_KERNEL, &f2fs_bioset);
1031     if (!bio)
1032         return ERR_PTR(-ENOMEM);
1033     bio->bi_iter.bi_sector = sector;
1034     f2fs_set_bio_crypt_ctx(bio, inode, first_idx, NULL, GFP_NOFS);
1035     bio->bi_end_io = f2fs_read_end_io;
1036
1037     if (fscrypt_inode_uses_fs_layer_crypto(inode))
1038         post_read_steps |= STEP_DECRYPT;
1039
1040     if (f2fs_need_verity(inode, first_idx))
1041         post_read_steps |= STEP_VERITY;
1042
1043     /*
1044      * STEP_DECOMPRESS is handled specially, since a compressed file might
1045      * contain both compressed and uncompressed clusters.  We'll allocate a
1046      * bio_post_read_ctx if the file is compressed, but the caller is
1047      * responsible for enabling STEP_DECOMPRESS if it's actually needed.
1048      */
1049
1050     if (post_read_steps || f2fs_compressed_file(inode)) {
1051         /* Due to the mempool, this never fails. */
1052         ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
1053         ctx->bio = bio;
1054         ctx->sbi = sbi;
1055         ctx->enabled_steps = post_read_steps;
1056         ctx->fs_blkaddr = blkaddr;
1057         bio->bi_private = ctx;
1058     }
1059     iostat_alloc_and_bind_ctx(sbi, bio, ctx);
1060
1061     return bio;
1062 }
1063
1064 /* This can handle encryption stuffs */
1065 static int f2fs_submit_page_read(struct inode *inode, struct page *page,
1066                  block_t blkaddr, blk_opf_t op_flags,
1067                  bool for_write)
1068 {
1069     struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1070     struct bio *bio;
1071
1072     bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags,
1073                     page->index, for_write);
1074     if (IS_ERR(bio))
1075         return PTR_ERR(bio);
1076
1077     /* wait for GCed page writeback via META_MAPPING */
1078     f2fs_wait_on_block_writeback(inode, blkaddr);
1079
1080     if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
1081         bio_put(bio);
1082         return -EFAULT;
1083     }
1084     ClearPageError(page);
1085     inc_page_count(sbi, F2FS_RD_DATA);
1086     f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
1087     __submit_bio(sbi, bio, DATA);
1088     return 0;
1089 }
1090
1091 static void __set_data_blkaddr(struct dnode_of_data *dn)
1092 {
1093     struct f2fs_node *rn = F2FS_NODE(dn->node_page);
1094     __le32 *addr_array;
1095     int base = 0;
1096
1097     if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode))
1098         base = get_extra_isize(dn->inode);
1099
1100     /* Get physical address of data block */
1101     addr_array = blkaddr_in_node(rn);
1102     addr_array[base + dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
1103 }
1104
1105 /*
1106  * Lock ordering for the change of data block address:
1107  * ->data_page
1108  *  ->node_page
1109  *    update block addresses in the node page
1110  */
1111 void f2fs_set_data_blkaddr(struct dnode_of_data *dn)
1112 {
1113     f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
1114     __set_data_blkaddr(dn);
1115     if (set_page_dirty(dn->node_page))
1116         dn->node_changed = true;
1117 }
1118
1119 void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
1120 {
1121     dn->data_blkaddr = blkaddr;
1122     f2fs_set_data_blkaddr(dn);
1123     f2fs_update_extent_cache(dn);
1124 }
1125
1126 /* dn->ofs_in_node will be returned with up-to-date last block pointer */
1127 int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
1128 {
1129     struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1130     int err;
1131
1132     if (!count)
1133         return 0;
1134
1135     if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
1136         return -EPERM;
1137     if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
1138         return err;
1139
1140     trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
1141                         dn->ofs_in_node, count);
1142
1143     f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
1144
1145     for (; count > 0; dn->ofs_in_node++) {
1146         block_t blkaddr = f2fs_data_blkaddr(dn);
1147
1148         if (blkaddr == NULL_ADDR) {
1149             dn->data_blkaddr = NEW_ADDR;
1150             __set_data_blkaddr(dn);
1151             count--;
1152         }
1153     }
1154
1155     if (set_page_dirty(dn->node_page))
1156         dn->node_changed = true;
1157     return 0;
1158 }
1159
1160 /* Should keep dn->ofs_in_node unchanged */
1161 int f2fs_reserve_new_block(struct dnode_of_data *dn)
1162 {
1163     unsigned int ofs_in_node = dn->ofs_in_node;
1164     int ret;
1165
1166     ret = f2fs_reserve_new_blocks(dn, 1);
1167     dn->ofs_in_node = ofs_in_node;
1168     return ret;
1169 }
1170
1171 int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
1172 {
1173     bool need_put = dn->inode_page ? false : true;
1174     int err;
1175
1176     err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE);
1177     if (err)
1178         return err;
1179
1180     if (dn->data_blkaddr == NULL_ADDR)
1181         err = f2fs_reserve_new_block(dn);
1182     if (err || need_put)
1183         f2fs_put_dnode(dn);
1184     return err;
1185 }
1186
1187 int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
1188 {
1189     struct extent_info ei = {0, };
1190     struct inode *inode = dn->inode;
1191
1192     if (f2fs_lookup_extent_cache(inode, index, &ei)) {
1193         dn->data_blkaddr = ei.blk + index - ei.fofs;
1194         return 0;
1195     }
1196
1197     return f2fs_reserve_block(dn, index);
1198 }
1199
1200 struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
1201                      blk_opf_t op_flags, bool for_write)
1202 {
1203     struct address_space *mapping = inode->i_mapping;
1204     struct dnode_of_data dn;
1205     struct page *page;
1206     struct extent_info ei = {0, };
1207     int err;
1208
1209     page = f2fs_grab_cache_page(mapping, index, for_write);
1210     if (!page)
1211         return ERR_PTR(-ENOMEM);
1212
1213     if (f2fs_lookup_extent_cache(inode, index, &ei)) {
1214         dn.data_blkaddr = ei.blk + index - ei.fofs;
1215         if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
1216                         DATA_GENERIC_ENHANCE_READ)) {
1217             err = -EFSCORRUPTED;
1218             goto put_err;
1219         }
1220         goto got_it;
1221     }
1222
1223     set_new_dnode(&dn, inode, NULL, NULL, 0);
1224     err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
1225     if (err)
1226         goto put_err;
1227     f2fs_put_dnode(&dn);
1228
1229     if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
1230         err = -ENOENT;
1231         goto put_err;
1232     }
1233     if (dn.data_blkaddr != NEW_ADDR &&
1234             !f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
1235                         dn.data_blkaddr,
1236                         DATA_GENERIC_ENHANCE)) {
1237         err = -EFSCORRUPTED;
1238         goto put_err;
1239     }
1240 got_it:
1241     if (PageUptodate(page)) {
1242         unlock_page(page);
1243         return page;
1244     }
1245
1246     /*
1247      * A new dentry page is allocated but not able to be written, since its
1248      * new inode page couldn't be allocated due to -ENOSPC.
1249      * In such the case, its blkaddr can be remained as NEW_ADDR.
1250      * see, f2fs_add_link -> f2fs_get_new_data_page ->
1251      * f2fs_init_inode_metadata.
1252      */
1253     if (dn.data_blkaddr == NEW_ADDR) {
1254         zero_user_segment(page, 0, PAGE_SIZE);
1255         if (!PageUptodate(page))
1256             SetPageUptodate(page);
1257         unlock_page(page);
1258         return page;
1259     }
1260
1261     err = f2fs_submit_page_read(inode, page, dn.data_blkaddr,
1262                         op_flags, for_write);
1263     if (err)
1264         goto put_err;
1265     return page;
1266
1267 put_err:
1268     f2fs_put_page(page, 1);
1269     return ERR_PTR(err);
1270 }
1271
1272 struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index)
1273 {
1274     struct address_space *mapping = inode->i_mapping;
1275     struct page *page;
1276
1277     page = find_get_page(mapping, index);
1278     if (page && PageUptodate(page))
1279         return page;
1280     f2fs_put_page(page, 0);
1281
1282     page = f2fs_get_read_data_page(inode, index, 0, false);
1283     if (IS_ERR(page))
1284         return page;
1285
1286     if (PageUptodate(page))
1287         return page;
1288
1289     wait_on_page_locked(page);
1290     if (unlikely(!PageUptodate(page))) {
1291         f2fs_put_page(page, 0);
1292         return ERR_PTR(-EIO);
1293     }
1294     return page;
1295 }
1296
1297 /*
1298  * If it tries to access a hole, return an error.
1299  * Because, the callers, functions in dir.c and GC, should be able to know
1300  * whether this page exists or not.
1301  */
1302 struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
1303                             bool for_write)
1304 {
1305     struct address_space *mapping = inode->i_mapping;
1306     struct page *page;
1307 repeat:
1308     page = f2fs_get_read_data_page(inode, index, 0, for_write);
1309     if (IS_ERR(page))
1310         return page;
1311
1312     /* wait for read completion */
1313     lock_page(page);
1314     if (unlikely(page->mapping != mapping)) {
1315         f2fs_put_page(page, 1);
1316         goto repeat;
1317     }
1318     if (unlikely(!PageUptodate(page))) {
1319         f2fs_put_page(page, 1);
1320         return ERR_PTR(-EIO);
1321     }
1322     return page;
1323 }
1324
1325 /*
1326  * Caller ensures that this data page is never allocated.
1327  * A new zero-filled data page is allocated in the page cache.
1328  *
1329  * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
1330  * f2fs_unlock_op().
1331  * Note that, ipage is set only by make_empty_dir, and if any error occur,
1332  * ipage should be released by this function.
1333  */
1334 struct page *f2fs_get_new_data_page(struct inode *inode,
1335         struct page *ipage, pgoff_t index, bool new_i_size)
1336 {
1337     struct address_space *mapping = inode->i_mapping;
1338     struct page *page;
1339     struct dnode_of_data dn;
1340     int err;
1341
1342     page = f2fs_grab_cache_page(mapping, index, true);
1343     if (!page) {
1344         /*
1345          * before exiting, we should make sure ipage will be released
1346          * if any error occur.
1347          */
1348         f2fs_put_page(ipage, 1);
1349         return ERR_PTR(-ENOMEM);
1350     }
1351
1352     set_new_dnode(&dn, inode, ipage, NULL, 0);
1353     err = f2fs_reserve_block(&dn, index);
1354     if (err) {
1355         f2fs_put_page(page, 1);
1356         return ERR_PTR(err);
1357     }
1358     if (!ipage)
1359         f2fs_put_dnode(&dn);
1360
1361     if (PageUptodate(page))
1362         goto got_it;
1363
1364     if (dn.data_blkaddr == NEW_ADDR) {
1365         zero_user_segment(page, 0, PAGE_SIZE);
1366         if (!PageUptodate(page))
1367             SetPageUptodate(page);
1368     } else {
1369         f2fs_put_page(page, 1);
1370
1371         /* if ipage exists, blkaddr should be NEW_ADDR */
1372         f2fs_bug_on(F2FS_I_SB(inode), ipage);
1373         page = f2fs_get_lock_data_page(inode, index, true);
1374         if (IS_ERR(page))
1375             return page;
1376     }
1377 got_it:
1378     if (new_i_size && i_size_read(inode) <
1379                 ((loff_t)(index + 1) << PAGE_SHIFT))
1380         f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
1381     return page;
1382 }
1383
1384 static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
1385 {
1386     struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1387     struct f2fs_summary sum;
1388     struct node_info ni;
1389     block_t old_blkaddr;
1390     blkcnt_t count = 1;
1391     int err;
1392
1393     if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
1394         return -EPERM;
1395
1396     err = f2fs_get_node_info(sbi, dn->nid, &ni, false);
1397     if (err)
1398         return err;
1399
1400     dn->data_blkaddr = f2fs_data_blkaddr(dn);
1401     if (dn->data_blkaddr != NULL_ADDR)
1402         goto alloc;
1403
1404     if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
1405         return err;
1406
1407 alloc:
1408     set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
1409     old_blkaddr = dn->data_blkaddr;
1410     f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
1411                 &sum, seg_type, NULL);
1412     if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
1413         invalidate_mapping_pages(META_MAPPING(sbi),
1414                     old_blkaddr, old_blkaddr);
1415         f2fs_invalidate_compress_page(sbi, old_blkaddr);
1416     }
1417     f2fs_update_data_blkaddr(dn, dn->data_blkaddr);
1418     return 0;
1419 }
1420
1421 void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
1422 {
1423     if (flag == F2FS_GET_BLOCK_PRE_AIO) {
1424         if (lock)
1425             f2fs_down_read(&sbi->node_change);
1426         else
1427             f2fs_up_read(&sbi->node_change);
1428     } else {
1429         if (lock)
1430             f2fs_lock_op(sbi);
1431         else
1432             f2fs_unlock_op(sbi);
1433     }
1434 }
1435
1436 /*
1437  * f2fs_map_blocks() tries to find or build mapping relationship which
1438  * maps continuous logical blocks to physical blocks, and return such
1439  * info via f2fs_map_blocks structure.
1440  */
1441 int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
1442                         int create, int flag)
1443 {
1444     unsigned int maxblocks = map->m_len;
1445     struct dnode_of_data dn;
1446     struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1447     int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE;
1448     pgoff_t pgofs, end_offset, end;
1449     int err = 0, ofs = 1;
1450     unsigned int ofs_in_node, last_ofs_in_node;
1451     blkcnt_t prealloc;
1452     struct extent_info ei = {0, };
1453     block_t blkaddr;
1454     unsigned int start_pgofs;
1455     int bidx = 0;
1456
1457     if (!maxblocks)
1458         return 0;
1459
1460     map->m_bdev = inode->i_sb->s_bdev;
1461     map->m_multidev_dio =
1462         f2fs_allow_multi_device_dio(F2FS_I_SB(inode), flag);
1463
1464     map->m_len = 0;
1465     map->m_flags = 0;
1466
1467     /* it only supports block size == page size */
1468     pgofs = (pgoff_t)map->m_lblk;
1469     end = pgofs + maxblocks;
1470
1471     if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
1472         if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO &&
1473                             map->m_may_create)
1474             goto next_dnode;
1475
1476         map->m_pblk = ei.blk + pgofs - ei.fofs;
1477         map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
1478         map->m_flags = F2FS_MAP_MAPPED;
1479         if (map->m_next_extent)
1480             *map->m_next_extent = pgofs + map->m_len;
1481
1482         /* for hardware encryption, but to avoid potential issue in future */
1483         if (flag == F2FS_GET_BLOCK_DIO)
1484             f2fs_wait_on_block_writeback_range(inode,
1485                         map->m_pblk, map->m_len);
1486
1487         if (map->m_multidev_dio) {
1488             block_t blk_addr = map->m_pblk;
1489
1490             bidx = f2fs_target_device_index(sbi, map->m_pblk);
1491
1492             map->m_bdev = FDEV(bidx).bdev;
1493             map->m_pblk -= FDEV(bidx).start_blk;
1494             map->m_len = min(map->m_len,
1495                 FDEV(bidx).end_blk + 1 - map->m_pblk);
1496
1497             if (map->m_may_create)
1498                 f2fs_update_device_state(sbi, inode->i_ino,
1499                             blk_addr, map->m_len);
1500         }
1501         goto out;
1502     }
1503
1504 next_dnode:
1505     if (map->m_may_create)
1506         f2fs_do_map_lock(sbi, flag, true);
1507
1508     /* When reading holes, we need its node page */
1509     set_new_dnode(&dn, inode, NULL, NULL, 0);
1510     err = f2fs_get_dnode_of_data(&dn, pgofs, mode);
1511     if (err) {
1512         if (flag == F2FS_GET_BLOCK_BMAP)
1513             map->m_pblk = 0;
1514
1515         if (err == -ENOENT) {
1516             /*
1517              * There is one exceptional case that read_node_page()
1518              * may return -ENOENT due to filesystem has been
1519              * shutdown or cp_error, so force to convert error
1520              * number to EIO for such case.
1521              */
1522             if (map->m_may_create &&
1523                 (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
1524                 f2fs_cp_error(sbi))) {
1525                 err = -EIO;
1526                 goto unlock_out;
1527             }
1528
1529             err = 0;
1530             if (map->m_next_pgofs)
1531                 *map->m_next_pgofs =
1532                     f2fs_get_next_page_offset(&dn, pgofs);
1533             if (map->m_next_extent)
1534                 *map->m_next_extent =
1535                     f2fs_get_next_page_offset(&dn, pgofs);
1536         }
1537         goto unlock_out;
1538     }
1539
1540     start_pgofs = pgofs;
1541     prealloc = 0;
1542     last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
1543     end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
1544
1545 next_block:
1546     blkaddr = f2fs_data_blkaddr(&dn);
1547
1548     if (__is_valid_data_blkaddr(blkaddr) &&
1549         !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
1550         err = -EFSCORRUPTED;
1551         goto sync_out;
1552     }
1553
1554     if (__is_valid_data_blkaddr(blkaddr)) {
1555         /* use out-place-update for driect IO under LFS mode */
1556         if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO &&
1557                             map->m_may_create) {
1558             err = __allocate_data_block(&dn, map->m_seg_type);
1559             if (err)
1560                 goto sync_out;
1561             blkaddr = dn.data_blkaddr;
1562             set_inode_flag(inode, FI_APPEND_WRITE);
1563         }
1564     } else {
1565         if (create) {
1566             if (unlikely(f2fs_cp_error(sbi))) {
1567                 err = -EIO;
1568                 goto sync_out;
1569             }
1570             if (flag == F2FS_GET_BLOCK_PRE_AIO) {
1571                 if (blkaddr == NULL_ADDR) {
1572                     prealloc++;
1573                     last_ofs_in_node = dn.ofs_in_node;
1574                 }
1575             } else {
1576                 WARN_ON(flag != F2FS_GET_BLOCK_PRE_DIO &&
1577                     flag != F2FS_GET_BLOCK_DIO);
1578                 err = __allocate_data_block(&dn,
1579                             map->m_seg_type);
1580                 if (!err) {
1581                     if (flag == F2FS_GET_BLOCK_PRE_DIO)
1582                         file_need_truncate(inode);
1583                     set_inode_flag(inode, FI_APPEND_WRITE);
1584                 }
1585             }
1586             if (err)
1587                 goto sync_out;
1588             map->m_flags |= F2FS_MAP_NEW;
1589             blkaddr = dn.data_blkaddr;
1590         } else {
1591             if (f2fs_compressed_file(inode) &&
1592                     f2fs_sanity_check_cluster(&dn) &&
1593                     (flag != F2FS_GET_BLOCK_FIEMAP ||
1594                     IS_ENABLED(CONFIG_F2FS_CHECK_FS))) {
1595                 err = -EFSCORRUPTED;
1596                 goto sync_out;
1597             }
1598             if (flag == F2FS_GET_BLOCK_BMAP) {
1599                 map->m_pblk = 0;
1600                 goto sync_out;
1601             }
1602             if (flag == F2FS_GET_BLOCK_PRECACHE)
1603                 goto sync_out;
1604             if (flag == F2FS_GET_BLOCK_FIEMAP &&
1605                         blkaddr == NULL_ADDR) {
1606                 if (map->m_next_pgofs)
1607                     *map->m_next_pgofs = pgofs + 1;
1608                 goto sync_out;
1609             }
1610             if (flag != F2FS_GET_BLOCK_FIEMAP) {
1611                 /* for defragment case */
1612                 if (map->m_next_pgofs)
1613                     *map->m_next_pgofs = pgofs + 1;
1614                 goto sync_out;
1615             }
1616         }
1617     }
1618
1619     if (flag == F2FS_GET_BLOCK_PRE_AIO)
1620         goto skip;
1621
1622     if (map->m_multidev_dio)
1623         bidx = f2fs_target_device_index(sbi, blkaddr);
1624
1625     if (map->m_len == 0) {
1626         /* preallocated unwritten block should be mapped for fiemap. */
1627         if (blkaddr == NEW_ADDR)
1628             map->m_flags |= F2FS_MAP_UNWRITTEN;
1629         map->m_flags |= F2FS_MAP_MAPPED;
1630
1631         map->m_pblk = blkaddr;
1632         map->m_len = 1;
1633
1634         if (map->m_multidev_dio)
1635             map->m_bdev = FDEV(bidx).bdev;
1636     } else if ((map->m_pblk != NEW_ADDR &&
1637             blkaddr == (map->m_pblk + ofs)) ||
1638             (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
1639             flag == F2FS_GET_BLOCK_PRE_DIO) {
1640         if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev)
1641             goto sync_out;
1642         ofs++;
1643         map->m_len++;
1644     } else {
1645         goto sync_out;
1646     }
1647
1648 skip:
1649     dn.ofs_in_node++;
1650     pgofs++;
1651
1652     /* preallocate blocks in batch for one dnode page */
1653     if (flag == F2FS_GET_BLOCK_PRE_AIO &&
1654             (pgofs == end || dn.ofs_in_node == end_offset)) {
1655
1656         dn.ofs_in_node = ofs_in_node;
1657         err = f2fs_reserve_new_blocks(&dn, prealloc);
1658         if (err)
1659             goto sync_out;
1660
1661         map->m_len += dn.ofs_in_node - ofs_in_node;
1662         if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
1663             err = -ENOSPC;
1664             goto sync_out;
1665         }
1666         dn.ofs_in_node = end_offset;
1667     }
1668
1669     if (pgofs >= end)
1670         goto sync_out;
1671     else if (dn.ofs_in_node < end_offset)
1672         goto next_block;
1673
1674     if (flag == F2FS_GET_BLOCK_PRECACHE) {
1675         if (map->m_flags & F2FS_MAP_MAPPED) {
1676             unsigned int ofs = start_pgofs - map->m_lblk;
1677
1678             f2fs_update_extent_cache_range(&dn,
1679                 start_pgofs, map->m_pblk + ofs,
1680                 map->m_len - ofs);
1681         }
1682     }
1683
1684     f2fs_put_dnode(&dn);
1685
1686     if (map->m_may_create) {
1687         f2fs_do_map_lock(sbi, flag, false);
1688         f2fs_balance_fs(sbi, dn.node_changed);
1689     }
1690     goto next_dnode;
1691
1692 sync_out:
1693
1694     if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) {
1695         /*
1696          * for hardware encryption, but to avoid potential issue
1697          * in future
1698          */
1699         f2fs_wait_on_block_writeback_range(inode,
1700                         map->m_pblk, map->m_len);
1701
1702         if (map->m_multidev_dio) {
1703             block_t blk_addr = map->m_pblk;
1704
1705             bidx = f2fs_target_device_index(sbi, map->m_pblk);
1706
1707             map->m_bdev = FDEV(bidx).bdev;
1708             map->m_pblk -= FDEV(bidx).start_blk;
1709
1710             if (map->m_may_create)
1711                 f2fs_update_device_state(sbi, inode->i_ino,
1712                             blk_addr, map->m_len);
1713
1714             f2fs_bug_on(sbi, blk_addr + map->m_len >
1715                         FDEV(bidx).end_blk + 1);
1716         }
1717     }
1718
1719     if (flag == F2FS_GET_BLOCK_PRECACHE) {
1720         if (map->m_flags & F2FS_MAP_MAPPED) {
1721             unsigned int ofs = start_pgofs - map->m_lblk;
1722
1723             f2fs_update_extent_cache_range(&dn,
1724                 start_pgofs, map->m_pblk + ofs,
1725                 map->m_len - ofs);
1726         }
1727         if (map->m_next_extent)
1728             *map->m_next_extent = pgofs + 1;
1729     }
1730     f2fs_put_dnode(&dn);
1731 unlock_out:
1732     if (map->m_may_create) {
1733         f2fs_do_map_lock(sbi, flag, false);
1734         f2fs_balance_fs(sbi, dn.node_changed);
1735     }
1736 out:
1737     trace_f2fs_map_blocks(inode, map, create, flag, err);
1738     return err;
1739 }
1740
1741 bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
1742 {
1743     struct f2fs_map_blocks map;
1744     block_t last_lblk;
1745     int err;
1746
1747     if (pos + len > i_size_read(inode))
1748         return false;
1749
1750     map.m_lblk = F2FS_BYTES_TO_BLK(pos);
1751     map.m_next_pgofs = NULL;
1752     map.m_next_extent = NULL;
1753     map.m_seg_type = NO_CHECK_TYPE;
1754     map.m_may_create = false;
1755     last_lblk = F2FS_BLK_ALIGN(pos + len);
1756
1757     while (map.m_lblk < last_lblk) {
1758         map.m_len = last_lblk - map.m_lblk;
1759         err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT);
1760         if (err || map.m_len == 0)
1761             return false;
1762         map.m_lblk += map.m_len;
1763     }
1764     return true;
1765 }
1766
1767 static inline u64 bytes_to_blks(struct inode *inode, u64 bytes)
1768 {
1769     return (bytes >> inode->i_blkbits);
1770 }
1771
1772 static inline u64 blks_to_bytes(struct inode *inode, u64 blks)
1773 {
1774     return (blks << inode->i_blkbits);
1775 }
1776
1777 static int f2fs_xattr_fiemap(struct inode *inode,
1778                 struct fiemap_extent_info *fieinfo)
1779 {
1780     struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1781     struct page *page;
1782     struct node_info ni;
1783     __u64 phys = 0, len;
1784     __u32 flags;
1785     nid_t xnid = F2FS_I(inode)->i_xattr_nid;
1786     int err = 0;
1787
1788     if (f2fs_has_inline_xattr(inode)) {
1789         int offset;
1790
1791         page = f2fs_grab_cache_page(NODE_MAPPING(sbi),
1792                         inode->i_ino, false);
1793         if (!page)
1794             return -ENOMEM;
1795
1796         err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false);
1797         if (err) {
1798             f2fs_put_page(page, 1);
1799             return err;
1800         }
1801
1802         phys = blks_to_bytes(inode, ni.blk_addr);
1803         offset = offsetof(struct f2fs_inode, i_addr) +
1804                     sizeof(__le32) * (DEF_ADDRS_PER_INODE -
1805                     get_inline_xattr_addrs(inode));
1806
1807         phys += offset;
1808         len = inline_xattr_size(inode);
1809
1810         f2fs_put_page(page, 1);
1811
1812         flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED;
1813
1814         if (!xnid)
1815             flags |= FIEMAP_EXTENT_LAST;
1816
1817         err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
1818         trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
1819         if (err || err == 1)
1820             return err;
1821     }
1822
1823     if (xnid) {
1824         page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false);
1825         if (!page)
1826             return -ENOMEM;
1827
1828         err = f2fs_get_node_info(sbi, xnid, &ni, false);
1829         if (err) {
1830             f2fs_put_page(page, 1);
1831             return err;
1832         }
1833
1834         phys = blks_to_bytes(inode, ni.blk_addr);
1835         len = inode->i_sb->s_blocksize;
1836
1837         f2fs_put_page(page, 1);
1838
1839         flags = FIEMAP_EXTENT_LAST;
1840     }
1841
1842     if (phys) {
1843         err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
1844         trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
1845     }
1846
1847     return (err < 0 ? err : 0);
1848 }
1849
1850 static loff_t max_inode_blocks(struct inode *inode)
1851 {
1852     loff_t result = ADDRS_PER_INODE(inode);
1853     loff_t leaf_count = ADDRS_PER_BLOCK(inode);
1854
1855     /* two direct node blocks */
1856     result += (leaf_count * 2);
1857
1858     /* two indirect node blocks */
1859     leaf_count *= NIDS_PER_BLOCK;
1860     result += (leaf_count * 2);
1861
1862     /* one double indirect node block */
1863     leaf_count *= NIDS_PER_BLOCK;
1864     result += leaf_count;
1865
1866     return result;
1867 }
1868
1869 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1870         u64 start, u64 len)
1871 {
1872     struct f2fs_map_blocks map;
1873     sector_t start_blk, last_blk;
1874     pgoff_t next_pgofs;
1875     u64 logical = 0, phys = 0, size = 0;
1876     u32 flags = 0;
1877     int ret = 0;
1878     bool compr_cluster = false, compr_appended;
1879     unsigned int cluster_size = F2FS_I(inode)->i_cluster_size;
1880     unsigned int count_in_cluster = 0;
1881     loff_t maxbytes;
1882
1883     if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
1884         ret = f2fs_precache_extents(inode);
1885         if (ret)
1886             return ret;
1887     }
1888
1889     ret = fiemap_prep(inode, fieinfo, start, &len, FIEMAP_FLAG_XATTR);
1890     if (ret)
1891         return ret;
1892
1893     inode_lock(inode);
1894
1895     maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS;
1896     if (start > maxbytes) {
1897         ret = -EFBIG;
1898         goto out;
1899     }
1900
1901     if (len > maxbytes || (maxbytes - len) < start)
1902         len = maxbytes - start;
1903
1904     if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
1905         ret = f2fs_xattr_fiemap(inode, fieinfo);
1906         goto out;
1907     }
1908
1909     if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) {
1910         ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
1911         if (ret != -EAGAIN)
1912             goto out;
1913     }
1914
1915     if (bytes_to_blks(inode, len) == 0)
1916         len = blks_to_bytes(inode, 1);
1917
1918     start_blk = bytes_to_blks(inode, start);
1919     last_blk = bytes_to_blks(inode, start + len - 1);
1920
1921 next:
1922     memset(&map, 0, sizeof(map));
1923     map.m_lblk = start_blk;
1924     map.m_len = bytes_to_blks(inode, len);
1925     map.m_next_pgofs = &next_pgofs;
1926     map.m_seg_type = NO_CHECK_TYPE;
1927
1928     if (compr_cluster) {
1929         map.m_lblk += 1;
1930         map.m_len = cluster_size - count_in_cluster;
1931     }
1932
1933     ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP);
1934     if (ret)
1935         goto out;
1936
1937     /* HOLE */
1938     if (!compr_cluster && !(map.m_flags & F2FS_MAP_FLAGS)) {
1939         start_blk = next_pgofs;
1940
1941         if (blks_to_bytes(inode, start_blk) < blks_to_bytes(inode,
1942                         max_inode_blocks(inode)))
1943             goto prep_next;
1944
1945         flags |= FIEMAP_EXTENT_LAST;
1946     }
1947
1948     compr_appended = false;
1949     /* In a case of compressed cluster, append this to the last extent */
1950     if (compr_cluster && ((map.m_flags & F2FS_MAP_UNWRITTEN) ||
1951             !(map.m_flags & F2FS_MAP_FLAGS))) {
1952         compr_appended = true;
1953         goto skip_fill;
1954     }
1955
1956     if (size) {
1957         flags |= FIEMAP_EXTENT_MERGED;
1958         if (IS_ENCRYPTED(inode))
1959             flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;
1960
1961         ret = fiemap_fill_next_extent(fieinfo, logical,
1962                 phys, size, flags);
1963         trace_f2fs_fiemap(inode, logical, phys, size, flags, ret);
1964         if (ret)
1965             goto out;
1966         size = 0;
1967     }
1968
1969     if (start_blk > last_blk)
1970         goto out;
1971
1972 skip_fill:
1973     if (map.m_pblk == COMPRESS_ADDR) {
1974         compr_cluster = true;
1975         count_in_cluster = 1;
1976     } else if (compr_appended) {
1977         unsigned int appended_blks = cluster_size -
1978                         count_in_cluster + 1;
1979         size += blks_to_bytes(inode, appended_blks);
1980         start_blk += appended_blks;
1981         compr_cluster = false;
1982     } else {
1983         logical = blks_to_bytes(inode, start_blk);
1984         phys = __is_valid_data_blkaddr(map.m_pblk) ?
1985             blks_to_bytes(inode, map.m_pblk) : 0;
1986         size = blks_to_bytes(inode, map.m_len);
1987         flags = 0;
1988
1989         if (compr_cluster) {
1990             flags = FIEMAP_EXTENT_ENCODED;
1991             count_in_cluster += map.m_len;
1992             if (count_in_cluster == cluster_size) {
1993                 compr_cluster = false;
1994                 size += blks_to_bytes(inode, 1);
1995             }
1996         } else if (map.m_flags & F2FS_MAP_UNWRITTEN) {
1997             flags = FIEMAP_EXTENT_UNWRITTEN;
1998         }
1999
2000         start_blk += bytes_to_blks(inode, size);
2001     }
2002
2003 prep_next:
2004     cond_resched();
2005     if (fatal_signal_pending(current))
2006         ret = -EINTR;
2007     else
2008         goto next;
2009 out:
2010     if (ret == 1)
2011         ret = 0;
2012
2013     inode_unlock(inode);
2014     return ret;
2015 }
2016
2017 static inline loff_t f2fs_readpage_limit(struct inode *inode)
2018 {
2019     if (IS_ENABLED(CONFIG_FS_VERITY) &&
2020         (IS_VERITY(inode) || f2fs_verity_in_progress(inode)))
2021         return inode->i_sb->s_maxbytes;
2022
2023     return i_size_read(inode);
2024 }
2025
2026 static int f2fs_read_single_page(struct inode *inode, struct page *page,
2027                     unsigned nr_pages,
2028                     struct f2fs_map_blocks *map,
2029                     struct bio **bio_ret,
2030                     sector_t *last_block_in_bio,
2031                     bool is_readahead)
2032 {
2033     struct bio *bio = *bio_ret;
2034     const unsigned blocksize = blks_to_bytes(inode, 1);
2035     sector_t block_in_file;
2036     sector_t last_block;
2037     sector_t last_block_in_file;
2038     sector_t block_nr;
2039     int ret = 0;
2040
2041     block_in_file = (sector_t)page_index(page);
2042     last_block = block_in_file + nr_pages;
2043     last_block_in_file = bytes_to_blks(inode,
2044             f2fs_readpage_limit(inode) + blocksize - 1);
2045     if (last_block > last_block_in_file)
2046         last_block = last_block_in_file;
2047
2048     /* just zeroing out page which is beyond EOF */
2049     if (block_in_file >= last_block)
2050         goto zero_out;
2051     /*
2052      * Map blocks using the previous result first.
2053      */
2054     if ((map->m_flags & F2FS_MAP_MAPPED) &&
2055             block_in_file > map->m_lblk &&
2056             block_in_file < (map->m_lblk + map->m_len))
2057         goto got_it;
2058
2059     /*
2060      * Then do more f2fs_map_blocks() calls until we are
2061      * done with this page.
2062      */
2063     map->m_lblk = block_in_file;
2064     map->m_len = last_block - block_in_file;
2065
2066     ret = f2fs_map_blocks(inode, map, 0, F2FS_GET_BLOCK_DEFAULT);
2067     if (ret)
2068         goto out;
2069 got_it:
2070     if ((map->m_flags & F2FS_MAP_MAPPED)) {
2071         block_nr = map->m_pblk + block_in_file - map->m_lblk;
2072         SetPageMappedToDisk(page);
2073
2074         if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
2075                         DATA_GENERIC_ENHANCE_READ)) {
2076             ret = -EFSCORRUPTED;
2077             goto out;
2078         }
2079     } else {
2080 zero_out:
2081         zero_user_segment(page, 0, PAGE_SIZE);
2082         if (f2fs_need_verity(inode, page->index) &&
2083             !fsverity_verify_page(page)) {
2084             ret = -EIO;
2085             goto out;
2086         }
2087         if (!PageUptodate(page))
2088             SetPageUptodate(page);
2089         unlock_page(page);
2090         goto out;
2091     }
2092
2093     /*
2094      * This page will go to BIO.  Do we need to send this
2095      * BIO off first?
2096      */
2097     if (bio && (!page_is_mergeable(F2FS_I_SB(inode), bio,
2098                        *last_block_in_bio, block_nr) ||
2099             !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) {
2100 submit_and_realloc:
2101         __submit_bio(F2FS_I_SB(inode), bio, DATA);
2102         bio = NULL;
2103     }
2104     if (bio == NULL) {
2105         bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
2106                 is_readahead ? REQ_RAHEAD : 0, page->index,
2107                 false);
2108         if (IS_ERR(bio)) {
2109             ret = PTR_ERR(bio);
2110             bio = NULL;
2111             goto out;
2112         }
2113     }
2114
2115     /*
2116      * If the page is under writeback, we need to wait for
2117      * its completion to see the correct decrypted data.
2118      */
2119     f2fs_wait_on_block_writeback(inode, block_nr);
2120
2121     if (bio_add_page(bio, page, blocksize, 0) < blocksize)
2122         goto submit_and_realloc;
2123
2124     inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
2125     f2fs_update_iostat(F2FS_I_SB(inode), FS_DATA_READ_IO, F2FS_BLKSIZE);
2126     ClearPageError(page);
2127     *last_block_in_bio = block_nr;
2128     goto out;
2129 out:
2130     *bio_ret = bio;
2131     return ret;
2132 }
2133
2134 #ifdef CONFIG_F2FS_FS_COMPRESSION
2135 int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
2136                 unsigned nr_pages, sector_t *last_block_in_bio,
2137                 bool is_readahead, bool for_write)
2138 {
2139     struct dnode_of_data dn;
2140     struct inode *inode = cc->inode;
2141     struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2142     struct bio *bio = *bio_ret;
2143     unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size;
2144     sector_t last_block_in_file;
2145     const unsigned blocksize = blks_to_bytes(inode, 1);
2146     struct decompress_io_ctx *dic = NULL;
2147     struct extent_info ei = {0, };
2148     bool from_dnode = true;
2149     int i;
2150     int ret = 0;
2151
2152     f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc));
2153
2154     last_block_in_file = bytes_to_blks(inode,
2155             f2fs_readpage_limit(inode) + blocksize - 1);
2156
2157     /* get rid of pages beyond EOF */
2158     for (i = 0; i < cc->cluster_size; i++) {
2159         struct page *page = cc->rpages[i];
2160
2161         if (!page)
2162             continue;
2163         if ((sector_t)page->index >= last_block_in_file) {
2164             zero_user_segment(page, 0, PAGE_SIZE);
2165             if (!PageUptodate(page))
2166                 SetPageUptodate(page);
2167         } else if (!PageUptodate(page)) {
2168             continue;
2169         }
2170         unlock_page(page);
2171         if (for_write)
2172             put_page(page);
2173         cc->rpages[i] = NULL;
2174         cc->nr_rpages--;
2175     }
2176
2177     /* we are done since all pages are beyond EOF */
2178     if (f2fs_cluster_is_empty(cc))
2179         goto out;
2180
2181     if (f2fs_lookup_extent_cache(inode, start_idx, &ei))
2182         from_dnode = false;
2183
2184     if (!from_dnode)
2185         goto skip_reading_dnode;
2186
2187     set_new_dnode(&dn, inode, NULL, NULL, 0);
2188     ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
2189     if (ret)
2190         goto out;
2191
2192     f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR);
2193
2194 skip_reading_dnode:
2195     for (i = 1; i < cc->cluster_size; i++) {
2196         block_t blkaddr;
2197
2198         blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page,
2199                     dn.ofs_in_node + i) :
2200                     ei.blk + i - 1;
2201
2202         if (!__is_valid_data_blkaddr(blkaddr))
2203             break;
2204
2205         if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
2206             ret = -EFAULT;
2207             goto out_put_dnode;
2208         }
2209         cc->nr_cpages++;
2210
2211         if (!from_dnode && i >= ei.c_len)
2212             break;
2213     }
2214
2215     /* nothing to decompress */
2216     if (cc->nr_cpages == 0) {
2217         ret = 0;
2218         goto out_put_dnode;
2219     }
2220
2221     dic = f2fs_alloc_dic(cc);
2222     if (IS_ERR(dic)) {
2223         ret = PTR_ERR(dic);
2224         goto out_put_dnode;
2225     }
2226
2227     for (i = 0; i < cc->nr_cpages; i++) {
2228         struct page *page = dic->cpages[i];
2229         block_t blkaddr;
2230         struct bio_post_read_ctx *ctx;
2231
2232         blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page,
2233                     dn.ofs_in_node + i + 1) :
2234                     ei.blk + i;
2235
2236         f2fs_wait_on_block_writeback(inode, blkaddr);
2237
2238         if (f2fs_load_compressed_page(sbi, page, blkaddr)) {
2239             if (atomic_dec_and_test(&dic->remaining_pages))
2240                 f2fs_decompress_cluster(dic, true);
2241             continue;
2242         }
2243
2244         if (bio && (!page_is_mergeable(sbi, bio,
2245                     *last_block_in_bio, blkaddr) ||
2246             !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) {
2247 submit_and_realloc:
2248             __submit_bio(sbi, bio, DATA);
2249             bio = NULL;
2250         }
2251
2252         if (!bio) {
2253             bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages,
2254                     is_readahead ? REQ_RAHEAD : 0,
2255                     page->index, for_write);
2256             if (IS_ERR(bio)) {
2257                 ret = PTR_ERR(bio);
2258                 f2fs_decompress_end_io(dic, ret, true);
2259                 f2fs_put_dnode(&dn);
2260                 *bio_ret = NULL;
2261                 return ret;
2262             }
2263         }
2264
2265         if (bio_add_page(bio, page, blocksize, 0) < blocksize)
2266             goto submit_and_realloc;
2267
2268         ctx = get_post_read_ctx(bio);
2269         ctx->enabled_steps |= STEP_DECOMPRESS;
2270         refcount_inc(&dic->refcnt);
2271
2272         inc_page_count(sbi, F2FS_RD_DATA);
2273         f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
2274         f2fs_update_iostat(sbi, FS_CDATA_READ_IO, F2FS_BLKSIZE);
2275         ClearPageError(page);
2276         *last_block_in_bio = blkaddr;
2277     }
2278
2279     if (from_dnode)
2280         f2fs_put_dnode(&dn);
2281
2282     *bio_ret = bio;
2283     return 0;
2284
2285 out_put_dnode:
2286     if (from_dnode)
2287         f2fs_put_dnode(&dn);
2288 out:
2289     for (i = 0; i < cc->cluster_size; i++) {
2290         if (cc->rpages[i]) {
2291             ClearPageUptodate(cc->rpages[i]);
2292             ClearPageError(cc->rpages[i]);
2293             unlock_page(cc->rpages[i]);
2294         }
2295     }
2296     *bio_ret = bio;
2297     return ret;
2298 }
2299 #endif
2300
2301 /*
2302  * This function was originally taken from fs/mpage.c, and customized for f2fs.
2303  * Major change was from block_size == page_size in f2fs by default.
2304  */
2305 static int f2fs_mpage_readpages(struct inode *inode,
2306         struct readahead_control *rac, struct page *page)
2307 {
2308     struct bio *bio = NULL;
2309     sector_t last_block_in_bio = 0;
2310     struct f2fs_map_blocks map;
2311 #ifdef CONFIG_F2FS_FS_COMPRESSION
2312     struct compress_ctx cc = {
2313         .inode = inode,
2314         .log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
2315         .cluster_size = F2FS_I(inode)->i_cluster_size,
2316         .cluster_idx = NULL_CLUSTER,
2317         .rpages = NULL,
2318         .cpages = NULL,
2319         .nr_rpages = 0,
2320         .nr_cpages = 0,
2321     };
2322     pgoff_t nc_cluster_idx = NULL_CLUSTER;
2323 #endif
2324     unsigned nr_pages = rac ? readahead_count(rac) : 1;
2325     unsigned max_nr_pages = nr_pages;
2326     int ret = 0;
2327
2328     map.m_pblk = 0;
2329     map.m_lblk = 0;
2330     map.m_len = 0;
2331     map.m_flags = 0;
2332     map.m_next_pgofs = NULL;
2333     map.m_next_extent = NULL;
2334     map.m_seg_type = NO_CHECK_TYPE;
2335     map.m_may_create = false;
2336
2337     for (; nr_pages; nr_pages--) {
2338         if (rac) {
2339             page = readahead_page(rac);
2340             prefetchw(&page->flags);
2341         }
2342
2343 #ifdef CONFIG_F2FS_FS_COMPRESSION
2344         if (f2fs_compressed_file(inode)) {
2345             /* there are remained comressed pages, submit them */
2346             if (!f2fs_cluster_can_merge_page(&cc, page->index)) {
2347                 ret = f2fs_read_multi_pages(&cc, &bio,
2348                             max_nr_pages,
2349                             &last_block_in_bio,
2350                             rac != NULL, false);
2351                 f2fs_destroy_compress_ctx(&cc, false);
2352                 if (ret)
2353                     goto set_error_page;
2354             }
2355             if (cc.cluster_idx == NULL_CLUSTER) {
2356                 if (nc_cluster_idx ==
2357                     page->index >> cc.log_cluster_size) {
2358                     goto read_single_page;
2359                 }
2360
2361                 ret = f2fs_is_compressed_cluster(inode, page->index);
2362                 if (ret < 0)
2363                     goto set_error_page;
2364                 else if (!ret) {
2365                     nc_cluster_idx =
2366                         page->index >> cc.log_cluster_size;
2367                     goto read_single_page;
2368                 }
2369
2370                 nc_cluster_idx = NULL_CLUSTER;
2371             }
2372             ret = f2fs_init_compress_ctx(&cc);
2373             if (ret)
2374                 goto set_error_page;
2375
2376             f2fs_compress_ctx_add_page(&cc, page);
2377
2378             goto next_page;
2379         }
2380 read_single_page:
2381 #endif
2382
2383         ret = f2fs_read_single_page(inode, page, max_nr_pages, &map,
2384                     &bio, &last_block_in_bio, rac);
2385         if (ret) {
2386 #ifdef CONFIG_F2FS_FS_COMPRESSION
2387 set_error_page:
2388 #endif
2389             SetPageError(page);
2390             zero_user_segment(page, 0, PAGE_SIZE);
2391             unlock_page(page);
2392         }
2393 #ifdef CONFIG_F2FS_FS_COMPRESSION
2394 next_page:
2395 #endif
2396         if (rac)
2397             put_page(page);
2398
2399 #ifdef CONFIG_F2FS_FS_COMPRESSION
2400         if (f2fs_compressed_file(inode)) {
2401             /* last page */
2402             if (nr_pages == 1 && !f2fs_cluster_is_empty(&cc)) {
2403                 ret = f2fs_read_multi_pages(&cc, &bio,
2404                             max_nr_pages,
2405                             &last_block_in_bio,
2406                             rac != NULL, false);
2407                 f2fs_destroy_compress_ctx(&cc, false);
2408             }
2409         }
2410 #endif
2411     }
2412     if (bio)
2413         __submit_bio(F2FS_I_SB(inode), bio, DATA);
2414     return ret;
2415 }
2416
2417 static int f2fs_read_data_folio(struct file *file, struct folio *folio)
2418 {
2419     struct page *page = &folio->page;
2420     struct inode *inode = page_file_mapping(page)->host;
2421     int ret = -EAGAIN;
2422
2423     trace_f2fs_readpage(page, DATA);
2424
2425     if (!f2fs_is_compress_backend_ready(inode)) {
2426         unlock_page(page);
2427         return -EOPNOTSUPP;
2428     }
2429
2430     /* If the file has inline data, try to read it directly */
2431     if (f2fs_has_inline_data(inode))
2432         ret = f2fs_read_inline_data(inode, page);
2433     if (ret == -EAGAIN)
2434         ret = f2fs_mpage_readpages(inode, NULL, page);
2435     return ret;
2436 }
2437
2438 static void f2fs_readahead(struct readahead_control *rac)
2439 {
2440     struct inode *inode = rac->mapping->host;
2441
2442     trace_f2fs_readpages(inode, readahead_index(rac), readahead_count(rac));
2443
2444     if (!f2fs_is_compress_backend_ready(inode))
2445         return;
2446
2447     /* If the file has inline data, skip readahead */
2448     if (f2fs_has_inline_data(inode))
2449         return;
2450
2451     f2fs_mpage_readpages(inode, rac, NULL);
2452 }
2453
2454 int f2fs_encrypt_one_page(struct f2fs_io_info *fio)
2455 {
2456     struct inode *inode = fio->page->mapping->host;
2457     struct page *mpage, *page;
2458     gfp_t gfp_flags = GFP_NOFS;
2459
2460     if (!f2fs_encrypted_file(inode))
2461         return 0;
2462
2463     page = fio->compressed_page ? fio->compressed_page : fio->page;
2464
2465     /* wait for GCed page writeback via META_MAPPING */
2466     f2fs_wait_on_block_writeback(inode, fio->old_blkaddr);
2467
2468     if (fscrypt_inode_uses_inline_crypto(inode))
2469         return 0;
2470
2471 retry_encrypt:
2472     fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page,
2473                     PAGE_SIZE, 0, gfp_flags);
2474     if (IS_ERR(fio->encrypted_page)) {
2475         /* flush pending IOs and wait for a while in the ENOMEM case */
2476         if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
2477             f2fs_flush_merged_writes(fio->sbi);
2478             memalloc_retry_wait(GFP_NOFS);
2479             gfp_flags |= __GFP_NOFAIL;
2480             goto retry_encrypt;
2481         }
2482         return PTR_ERR(fio->encrypted_page);
2483     }
2484
2485     mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr);
2486     if (mpage) {
2487         if (PageUptodate(mpage))
2488             memcpy(page_address(mpage),
2489                 page_address(fio->encrypted_page), PAGE_SIZE);
2490         f2fs_put_page(mpage, 1);
2491     }
2492     return 0;
2493 }
2494
2495 static inline bool check_inplace_update_policy(struct inode *inode,
2496                 struct f2fs_io_info *fio)
2497 {
2498     struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2499     unsigned int policy = SM_I(sbi)->ipu_policy;
2500
2501     if (policy & (0x1 << F2FS_IPU_HONOR_OPU_WRITE) &&
2502             is_inode_flag_set(inode, FI_OPU_WRITE))
2503         return false;
2504     if (policy & (0x1 << F2FS_IPU_FORCE))
2505         return true;
2506     if (policy & (0x1 << F2FS_IPU_SSR) && f2fs_need_SSR(sbi))
2507         return true;
2508     if (policy & (0x1 << F2FS_IPU_UTIL) &&
2509             utilization(sbi) > SM_I(sbi)->min_ipu_util)
2510         return true;
2511     if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && f2fs_need_SSR(sbi) &&
2512             utilization(sbi) > SM_I(sbi)->min_ipu_util)
2513         return true;
2514
2515     /*
2516      * IPU for rewrite async pages
2517      */
2518     if (policy & (0x1 << F2FS_IPU_ASYNC) &&
2519             fio && fio->op == REQ_OP_WRITE &&
2520             !(fio->op_flags & REQ_SYNC) &&
2521             !IS_ENCRYPTED(inode))
2522         return true;
2523
2524     /* this is only set during fdatasync */
2525     if (policy & (0x1 << F2FS_IPU_FSYNC) &&
2526             is_inode_flag_set(inode, FI_NEED_IPU))
2527         return true;
2528
2529     if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
2530             !f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
2531         return true;
2532
2533     return false;
2534 }
2535
2536 bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
2537 {
2538     /* swap file is migrating in aligned write mode */
2539     if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
2540         return false;
2541
2542     if (f2fs_is_pinned_file(inode))
2543         return true;
2544
2545     /* if this is cold file, we should overwrite to avoid fragmentation */
2546     if (file_is_cold(inode))
2547         return true;
2548
2549     return check_inplace_update_policy(inode, fio);
2550 }
2551
2552 bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
2553 {
2554     struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2555
2556     /* The below cases were checked when setting it. */
2557     if (f2fs_is_pinned_file(inode))
2558         return false;
2559     if (fio && is_sbi_flag_set(sbi, SBI_NEED_FSCK))
2560         return true;
2561     if (f2fs_lfs_mode(sbi))
2562         return true;
2563     if (S_ISDIR(inode->i_mode))
2564         return true;
2565     if (IS_NOQUOTA(inode))
2566         return true;
2567     if (f2fs_is_atomic_file(inode))
2568         return true;
2569
2570     /* swap file is migrating in aligned write mode */
2571     if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
2572         return true;
2573
2574     if (is_inode_flag_set(inode, FI_OPU_WRITE))
2575         return true;
2576
2577     if (fio) {
2578         if (page_private_gcing(fio->page))
2579             return true;
2580         if (page_private_dummy(fio->page))
2581             return true;
2582         if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
2583             f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
2584             return true;
2585     }
2586     return false;
2587 }
2588
2589 static inline bool need_inplace_update(struct f2fs_io_info *fio)
2590 {
2591     struct inode *inode = fio->page->mapping->host;
2592
2593     if (f2fs_should_update_outplace(inode, fio))
2594         return false;
2595
2596     return f2fs_should_update_inplace(inode, fio);
2597 }
2598
2599 int f2fs_do_write_data_page(struct f2fs_io_info *fio)
2600 {
2601     struct page *page = fio->page;
2602     struct inode *inode = page->mapping->host;
2603     struct dnode_of_data dn;
2604     struct extent_info ei = {0, };
2605     struct node_info ni;
2606     bool ipu_force = false;
2607     int err = 0;
2608
2609     /* Use COW inode to make dnode_of_data for atomic write */
2610     if (f2fs_is_atomic_file(inode))
2611         set_new_dnode(&dn, F2FS_I(inode)->cow_inode, NULL, NULL, 0);
2612     else
2613         set_new_dnode(&dn, inode, NULL, NULL, 0);
2614
2615     if (need_inplace_update(fio) &&
2616             f2fs_lookup_extent_cache(inode, page->index, &ei)) {
2617         fio->old_blkaddr = ei.blk + page->index - ei.fofs;
2618
2619         if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
2620                         DATA_GENERIC_ENHANCE))
2621             return -EFSCORRUPTED;
2622
2623         ipu_force = true;
2624         fio->need_lock = LOCK_DONE;
2625         goto got_it;
2626     }
2627
2628     /* Deadlock due to between page->lock and f2fs_lock_op */
2629     if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi))
2630         return -EAGAIN;
2631
2632     err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
2633     if (err)
2634         goto out;
2635
2636     fio->old_blkaddr = dn.data_blkaddr;
2637
2638     /* This page is already truncated */
2639     if (fio->old_blkaddr == NULL_ADDR) {
2640         ClearPageUptodate(page);
2641         clear_page_private_gcing(page);
2642         goto out_writepage;
2643     }
2644 got_it:
2645     if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
2646         !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
2647                         DATA_GENERIC_ENHANCE)) {
2648         err = -EFSCORRUPTED;
2649         goto out_writepage;
2650     }
2651
2652     /*
2653      * If current allocation needs SSR,
2654      * it had better in-place writes for updated data.
2655      */
2656     if (ipu_force ||
2657         (__is_valid_data_blkaddr(fio->old_blkaddr) &&
2658                     need_inplace_update(fio))) {
2659         err = f2fs_encrypt_one_page(fio);
2660         if (err)
2661             goto out_writepage;
2662
2663         set_page_writeback(page);
2664         ClearPageError(page);
2665         f2fs_put_dnode(&dn);
2666         if (fio->need_lock == LOCK_REQ)
2667             f2fs_unlock_op(fio->sbi);
2668         err = f2fs_inplace_write_data(fio);
2669         if (err) {
2670             if (fscrypt_inode_uses_fs_layer_crypto(inode))
2671                 fscrypt_finalize_bounce_page(&fio->encrypted_page);
2672             if (PageWriteback(page))
2673                 end_page_writeback(page);
2674         } else {
2675             set_inode_flag(inode, FI_UPDATE_WRITE);
2676         }
2677         trace_f2fs_do_write_data_page(fio->page, IPU);
2678         return err;
2679     }
2680
2681     if (fio->need_lock == LOCK_RETRY) {
2682         if (!f2fs_trylock_op(fio->sbi)) {
2683             err = -EAGAIN;
2684             goto out_writepage;
2685         }
2686         fio->need_lock = LOCK_REQ;
2687     }
2688
2689     err = f2fs_get_node_info(fio->sbi, dn.nid, &ni, false);
2690     if (err)
2691         goto out_writepage;
2692
2693     fio->version = ni.version;
2694
2695     err = f2fs_encrypt_one_page(fio);
2696     if (err)
2697         goto out_writepage;
2698
2699     set_page_writeback(page);
2700     ClearPageError(page);
2701
2702     if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR)
2703         f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false);
2704
2705     /* LFS mode write path */
2706     f2fs_outplace_write_data(&dn, fio);
2707     trace_f2fs_do_write_data_page(page, OPU);
2708     set_inode_flag(inode, FI_APPEND_WRITE);
2709     if (page->index == 0)
2710         set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
2711 out_writepage:
2712     f2fs_put_dnode(&dn);
2713 out:
2714     if (fio->need_lock == LOCK_REQ)
2715         f2fs_unlock_op(fio->sbi);
2716     return err;
2717 }
2718
2719 int f2fs_write_single_data_page(struct page *page, int *submitted,
2720                 struct bio **bio,
2721                 sector_t *last_block,
2722                 struct writeback_control *wbc,
2723                 enum iostat_type io_type,
2724                 int compr_blocks,
2725                 bool allow_balance)
2726 {
2727     struct inode *inode = page->mapping->host;
2728     struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2729     loff_t i_size = i_size_read(inode);
2730     const pgoff_t end_index = ((unsigned long long)i_size)
2731                             >> PAGE_SHIFT;
2732     loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT;
2733     unsigned offset = 0;
2734     bool need_balance_fs = false;
2735     int err = 0;
2736     struct f2fs_io_info fio = {
2737         .sbi = sbi,
2738         .ino = inode->i_ino,
2739         .type = DATA,
2740         .op = REQ_OP_WRITE,
2741         .op_flags = wbc_to_write_flags(wbc),
2742         .old_blkaddr = NULL_ADDR,
2743         .page = page,
2744         .encrypted_page = NULL,
2745         .submitted = false,
2746         .compr_blocks = compr_blocks,
2747         .need_lock = LOCK_RETRY,
2748         .post_read = f2fs_post_read_required(inode),
2749         .io_type = io_type,
2750         .io_wbc = wbc,
2751         .bio = bio,
2752         .last_block = last_block,
2753     };
2754
2755     trace_f2fs_writepage(page, DATA);
2756
2757     /* we should bypass data pages to proceed the kworkder jobs */
2758     if (unlikely(f2fs_cp_error(sbi))) {
2759         mapping_set_error(page->mapping, -EIO);
2760         /*
2761          * don't drop any dirty dentry pages for keeping lastest
2762          * directory structure.
2763          */
2764         if (S_ISDIR(inode->i_mode))
2765             goto redirty_out;
2766         goto out;
2767     }
2768
2769     if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
2770         goto redirty_out;
2771
2772     if (page->index < end_index ||
2773             f2fs_verity_in_progress(inode) ||
2774             compr_blocks)
2775         goto write;
2776
2777     /*
2778      * If the offset is out-of-range of file size,
2779      * this page does not have to be written to disk.
2780      */
2781     offset = i_size & (PAGE_SIZE - 1);
2782     if ((page->index >= end_index + 1) || !offset)
2783         goto out;
2784
2785     zero_user_segment(page, offset, PAGE_SIZE);
2786 write:
2787     if (f2fs_is_drop_cache(inode))
2788         goto out;
2789
2790     /* Dentry/quota blocks are controlled by checkpoint */
2791     if (S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) {
2792         /*
2793          * We need to wait for node_write to avoid block allocation during
2794          * checkpoint. This can only happen to quota writes which can cause
2795          * the below discard race condition.
2796          */
2797         if (IS_NOQUOTA(inode))
2798             f2fs_down_read(&sbi->node_write);
2799
2800         fio.need_lock = LOCK_DONE;
2801         err = f2fs_do_write_data_page(&fio);
2802
2803         if (IS_NOQUOTA(inode))
2804             f2fs_up_read(&sbi->node_write);
2805
2806         goto done;
2807     }
2808
2809     if (!wbc->for_reclaim)
2810         need_balance_fs = true;
2811     else if (has_not_enough_free_secs(sbi, 0, 0))
2812         goto redirty_out;
2813     else
2814         set_inode_flag(inode, FI_HOT_DATA);
2815
2816     err = -EAGAIN;
2817     if (f2fs_has_inline_data(inode)) {
2818         err = f2fs_write_inline_data(inode, page);
2819         if (!err)
2820             goto out;
2821     }
2822
2823     if (err == -EAGAIN) {
2824         err = f2fs_do_write_data_page(&fio);
2825         if (err == -EAGAIN) {
2826             fio.need_lock = LOCK_REQ;
2827             err = f2fs_do_write_data_page(&fio);
2828         }
2829     }
2830
2831     if (err) {
2832         file_set_keep_isize(inode);
2833     } else {
2834         spin_lock(&F2FS_I(inode)->i_size_lock);
2835         if (F2FS_I(inode)->last_disk_size < psize)
2836             F2FS_I(inode)->last_disk_size = psize;
2837         spin_unlock(&F2FS_I(inode)->i_size_lock);
2838     }
2839
2840 done:
2841     if (err && err != -ENOENT)
2842         goto redirty_out;
2843
2844 out:
2845     inode_dec_dirty_pages(inode);
2846     if (err) {
2847         ClearPageUptodate(page);
2848         clear_page_private_gcing(page);
2849     }
2850
2851     if (wbc->for_reclaim) {
2852         f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA);
2853         clear_inode_flag(inode, FI_HOT_DATA);
2854         f2fs_remove_dirty_inode(inode);
2855         submitted = NULL;
2856     }
2857     unlock_page(page);
2858     if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
2859             !F2FS_I(inode)->cp_task && allow_balance)
2860         f2fs_balance_fs(sbi, need_balance_fs);
2861
2862     if (unlikely(f2fs_cp_error(sbi))) {
2863         f2fs_submit_merged_write(sbi, DATA);
2864         f2fs_submit_merged_ipu_write(sbi, bio, NULL);
2865         submitted = NULL;
2866     }
2867
2868     if (submitted)
2869         *submitted = fio.submitted ? 1 : 0;
2870
2871     return 0;
2872
2873 redirty_out:
2874     redirty_page_for_writepage(wbc, page);
2875     /*
2876      * pageout() in MM traslates EAGAIN, so calls handle_write_error()
2877      * -> mapping_set_error() -> set_bit(AS_EIO, ...).
2878      * file_write_and_wait_range() will see EIO error, which is critical
2879      * to return value of fsync() followed by atomic_write failure to user.
2880      */
2881     if (!err || wbc->for_reclaim)
2882         return AOP_WRITEPAGE_ACTIVATE;
2883     unlock_page(page);
2884     return err;
2885 }
2886
2887 static int f2fs_write_data_page(struct page *page,
2888                     struct writeback_control *wbc)
2889 {
2890 #ifdef CONFIG_F2FS_FS_COMPRESSION
2891     struct inode *inode = page->mapping->host;
2892
2893     if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
2894         goto out;
2895
2896     if (f2fs_compressed_file(inode)) {
2897         if (f2fs_is_compressed_cluster(inode, page->index)) {
2898             redirty_page_for_writepage(wbc, page);
2899             return AOP_WRITEPAGE_ACTIVATE;
2900         }
2901     }
2902 out:
2903 #endif
2904
2905     return f2fs_write_single_data_page(page, NULL, NULL, NULL,
2906                         wbc, FS_DATA_IO, 0, true);
2907 }
2908
2909 /*
2910  * This function was copied from write_cche_pages from mm/page-writeback.c.
2911  * The major change is making write step of cold data page separately from
2912  * warm/hot data page.
2913  */
2914 static int f2fs_write_cache_pages(struct address_space *mapping,
2915                     struct writeback_control *wbc,
2916                     enum iostat_type io_type)
2917 {
2918     int ret = 0;
2919     int done = 0, retry = 0;
2920     struct page *pages[F2FS_ONSTACK_PAGES];
2921     struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
2922     struct bio *bio = NULL;
2923     sector_t last_block;
2924 #ifdef CONFIG_F2FS_FS_COMPRESSION
2925     struct inode *inode = mapping->host;
2926     struct compress_ctx cc = {
2927         .inode = inode,
2928         .log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
2929         .cluster_size = F2FS_I(inode)->i_cluster_size,
2930         .cluster_idx = NULL_CLUSTER,
2931         .rpages = NULL,
2932         .nr_rpages = 0,
2933         .cpages = NULL,
2934         .valid_nr_cpages = 0,
2935         .rbuf = NULL,
2936         .cbuf = NULL,
2937         .rlen = PAGE_SIZE * F2FS_I(inode)->i_cluster_size,
2938         .private = NULL,
2939     };
2940 #endif
2941     int nr_pages;
2942     pgoff_t index;
2943     pgoff_t end;        /* Inclusive */
2944     pgoff_t done_index;
2945     int range_whole = 0;
2946     xa_mark_t tag;
2947     int nwritten = 0;
2948     int submitted = 0;
2949     int i;
2950
2951     if (get_dirty_pages(mapping->host) <=
2952                 SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
2953         set_inode_flag(mapping->host, FI_HOT_DATA);
2954     else
2955         clear_inode_flag(mapping->host, FI_HOT_DATA);
2956
2957     if (wbc->range_cyclic) {
2958         index = mapping->writeback_index; /* prev offset */
2959         end = -1;
2960     } else {
2961         index = wbc->range_start >> PAGE_SHIFT;
2962         end = wbc->range_end >> PAGE_SHIFT;
2963         if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2964             range_whole = 1;
2965     }
2966     if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2967         tag = PAGECACHE_TAG_TOWRITE;
2968     else
2969         tag = PAGECACHE_TAG_DIRTY;
2970 retry:
2971     retry = 0;
2972     if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2973         tag_pages_for_writeback(mapping, index, end);
2974     done_index = index;
2975     while (!done && !retry && (index <= end)) {
2976         nr_pages = find_get_pages_range_tag(mapping, &index, end,
2977                 tag, F2FS_ONSTACK_PAGES, pages);
2978         if (nr_pages == 0)
2979             break;
2980
2981         for (i = 0; i < nr_pages; i++) {
2982             struct page *page = pages[i];
2983             bool need_readd;
2984 readd:
2985             need_readd = false;
2986 #ifdef CONFIG_F2FS_FS_COMPRESSION
2987             if (f2fs_compressed_file(inode)) {
2988                 void *fsdata = NULL;
2989                 struct page *pagep;
2990                 int ret2;
2991
2992                 ret = f2fs_init_compress_ctx(&cc);
2993                 if (ret) {
2994                     done = 1;
2995                     break;
2996                 }
2997
2998                 if (!f2fs_cluster_can_merge_page(&cc,
2999                                 page->index)) {
3000                     ret = f2fs_write_multi_pages(&cc,
3001                         &submitted, wbc, io_type);
3002                     if (!ret)
3003                         need_readd = true;
3004                     goto result;
3005                 }
3006
3007                 if (unlikely(f2fs_cp_error(sbi)))
3008                     goto lock_page;
3009
3010                 if (!f2fs_cluster_is_empty(&cc))
3011                     goto lock_page;
3012
3013                 if (f2fs_all_cluster_page_ready(&cc,
3014                     pages, i, nr_pages, true))
3015                     goto lock_page;
3016
3017                 ret2 = f2fs_prepare_compress_overwrite(
3018                             inode, &pagep,
3019                             page->index, &fsdata);
3020                 if (ret2 < 0) {
3021                     ret = ret2;
3022                     done = 1;
3023                     break;
3024                 } else if (ret2 &&
3025                     (!f2fs_compress_write_end(inode,
3026                         fsdata, page->index, 1) ||
3027                      !f2fs_all_cluster_page_ready(&cc,
3028                         pages, i, nr_pages, false))) {
3029                     retry = 1;
3030                     break;
3031                 }
3032             }
3033 #endif
3034             /* give a priority to WB_SYNC threads */
3035             if (atomic_read(&sbi->wb_sync_req[DATA]) &&
3036                     wbc->sync_mode == WB_SYNC_NONE) {
3037                 done = 1;
3038                 break;
3039             }
3040 #ifdef CONFIG_F2FS_FS_COMPRESSION
3041 lock_page:
3042 #endif
3043             done_index = page->index;
3044 retry_write:
3045             lock_page(page);
3046
3047             if (unlikely(page->mapping != mapping)) {
3048 continue_unlock:
3049                 unlock_page(page);
3050                 continue;
3051             }
3052
3053             if (!PageDirty(page)) {
3054                 /* someone wrote it for us */
3055                 goto continue_unlock;
3056             }
3057
3058             if (PageWriteback(page)) {
3059                 if (wbc->sync_mode != WB_SYNC_NONE)
3060                     f2fs_wait_on_page_writeback(page,
3061                             DATA, true, true);
3062                 else
3063                     goto continue_unlock;
3064             }
3065
3066             if (!clear_page_dirty_for_io(page))
3067                 goto continue_unlock;
3068
3069 #ifdef CONFIG_F2FS_FS_COMPRESSION
3070             if (f2fs_compressed_file(inode)) {
3071                 get_page(page);
3072                 f2fs_compress_ctx_add_page(&cc, page);
3073                 continue;
3074             }
3075 #endif
3076             ret = f2fs_write_single_data_page(page, &submitted,
3077                     &bio, &last_block, wbc, io_type,
3078                     0, true);
3079             if (ret == AOP_WRITEPAGE_ACTIVATE)
3080                 unlock_page(page);
3081 #ifdef CONFIG_F2FS_FS_COMPRESSION
3082 result:
3083 #endif
3084             nwritten += submitted;
3085             wbc->nr_to_write -= submitted;
3086
3087             if (unlikely(ret)) {
3088                 /*
3089                  * keep nr_to_write, since vfs uses this to
3090                  * get # of written pages.
3091                  */
3092                 if (ret == AOP_WRITEPAGE_ACTIVATE) {
3093                     ret = 0;
3094                     goto next;
3095                 } else if (ret == -EAGAIN) {
3096                     ret = 0;
3097                     if (wbc->sync_mode == WB_SYNC_ALL) {
3098                         f2fs_io_schedule_timeout(
3099                             DEFAULT_IO_TIMEOUT);
3100                         goto retry_write;
3101                     }
3102                     goto next;
3103                 }
3104                 done_index = page->index + 1;
3105                 done = 1;
3106                 break;
3107             }
3108
3109             if (wbc->nr_to_write <= 0 &&
3110                     wbc->sync_mode == WB_SYNC_NONE) {
3111                 done = 1;
3112                 break;
3113             }
3114 next:
3115             if (need_readd)
3116                 goto readd;
3117         }
3118         release_pages(pages, nr_pages);
3119         cond_resched();
3120     }
3121 #ifdef CONFIG_F2FS_FS_COMPRESSION
3122     /* flush remained pages in compress cluster */
3123     if (f2fs_compressed_file(inode) && !f2fs_cluster_is_empty(&cc)) {
3124         ret = f2fs_write_multi_pages(&cc, &submitted, wbc, io_type);
3125         nwritten += submitted;
3126         wbc->nr_to_write -= submitted;
3127         if (ret) {
3128             done = 1;
3129             retry = 0;
3130         }
3131     }
3132     if (f2fs_compressed_file(inode))
3133         f2fs_destroy_compress_ctx(&cc, false);
3134 #endif
3135     if (retry) {
3136         index = 0;
3137         end = -1;
3138         goto retry;
3139     }
3140     if (wbc->range_cyclic && !done)
3141         done_index = 0;
3142     if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
3143         mapping->writeback_index = done_index;
3144
3145     if (nwritten)
3146         f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
3147                                 NULL, 0, DATA);
3148     /* submit cached bio of IPU write */
3149     if (bio)
3150         f2fs_submit_merged_ipu_write(sbi, &bio, NULL);
3151
3152     return ret;
3153 }
3154
3155 static inline bool __should_serialize_io(struct inode *inode,
3156                     struct writeback_control *wbc)
3157 {
3158     /* to avoid deadlock in path of data flush */
3159     if (F2FS_I(inode)->cp_task)
3160         return false;
3161
3162     if (!S_ISREG(inode->i_mode))
3163         return false;
3164     if (IS_NOQUOTA(inode))
3165         return false;
3166
3167     if (f2fs_need_compress_data(inode))
3168         return true;
3169     if (wbc->sync_mode != WB_SYNC_ALL)
3170         return true;
3171     if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
3172         return true;
3173     return false;
3174 }
3175
3176 static int __f2fs_write_data_pages(struct address_space *mapping,
3177                         struct writeback_control *wbc,
3178                         enum iostat_type io_type)
3179 {
3180     struct inode *inode = mapping->host;
3181     struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3182     struct blk_plug plug;
3183     int ret;
3184     bool locked = false;
3185
3186     /* deal with chardevs and other special file */
3187     if (!mapping->a_ops->writepage)
3188         return 0;
3189
3190     /* skip writing if there is no dirty page in this inode */
3191     if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
3192         return 0;
3193
3194     /* during POR, we don't need to trigger writepage at all. */
3195     if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
3196         goto skip_write;
3197
3198     if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
3199             wbc->sync_mode == WB_SYNC_NONE &&
3200             get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
3201             f2fs_available_free_memory(sbi, DIRTY_DENTS))
3202         goto skip_write;
3203
3204     /* skip writing in file defragment preparing stage */
3205     if (is_inode_flag_set(inode, FI_SKIP_WRITES))
3206         goto skip_write;
3207
3208     trace_f2fs_writepages(mapping->host, wbc, DATA);
3209
3210     /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
3211     if (wbc->sync_mode == WB_SYNC_ALL)
3212         atomic_inc(&sbi->wb_sync_req[DATA]);
3213     else if (atomic_read(&sbi->wb_sync_req[DATA])) {
3214         /* to avoid potential deadlock */
3215         if (current->plug)
3216             blk_finish_plug(current->plug);
3217         goto skip_write;
3218     }
3219
3220     if (__should_serialize_io(inode, wbc)) {
3221         mutex_lock(&sbi->writepages);
3222         locked = true;
3223     }
3224
3225     blk_start_plug(&plug);
3226     ret = f2fs_write_cache_pages(mapping, wbc, io_type);
3227     blk_finish_plug(&plug);
3228
3229     if (locked)
3230         mutex_unlock(&sbi->writepages);
3231
3232     if (wbc->sync_mode == WB_SYNC_ALL)
3233         atomic_dec(&sbi->wb_sync_req[DATA]);
3234     /*
3235      * if some pages were truncated, we cannot guarantee its mapping->host
3236      * to detect pending bios.
3237      */
3238
3239     f2fs_remove_dirty_inode(inode);
3240     return ret;
3241
3242 skip_write:
3243     wbc->pages_skipped += get_dirty_pages(inode);
3244     trace_f2fs_writepages(mapping->host, wbc, DATA);
3245     return 0;
3246 }
3247
3248 static int f2fs_write_data_pages(struct address_space *mapping,
3249                 struct writeback_control *wbc)
3250 {
3251     struct inode *inode = mapping->host;
3252
3253     return __f2fs_write_data_pages(mapping, wbc,
3254             F2FS_I(inode)->cp_task == current ?
3255             FS_CP_DATA_IO : FS_DATA_IO);
3256 }
3257
3258 void f2fs_write_failed(struct inode *inode, loff_t to)
3259 {
3260     loff_t i_size = i_size_read(inode);
3261
3262     if (IS_NOQUOTA(inode))
3263         return;
3264
3265     /* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
3266     if (to > i_size && !f2fs_verity_in_progress(inode)) {
3267         f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3268         filemap_invalidate_lock(inode->i_mapping);
3269
3270         truncate_pagecache(inode, i_size);
3271         f2fs_truncate_blocks(inode, i_size, true);
3272
3273         filemap_invalidate_unlock(inode->i_mapping);
3274         f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3275     }
3276 }
3277
3278 static int prepare_write_begin(struct f2fs_sb_info *sbi,
3279             struct page *page, loff_t pos, unsigned len,
3280             block_t *blk_addr, bool *node_changed)
3281 {
3282     struct inode *inode = page->mapping->host;
3283     pgoff_t index = page->index;
3284     struct dnode_of_data dn;
3285     struct page *ipage;
3286     bool locked = false;
3287     struct extent_info ei = {0, };
3288     int err = 0;
3289     int flag;
3290
3291     /*
3292      * If a whole page is being written and we already preallocated all the
3293      * blocks, then there is no need to get a block address now.
3294      */
3295     if (len == PAGE_SIZE && is_inode_flag_set(inode, FI_PREALLOCATED_ALL))
3296         return 0;
3297
3298     /* f2fs_lock_op avoids race between write CP and convert_inline_page */
3299     if (f2fs_has_inline_data(inode) && pos + len > MAX_INLINE_DATA(inode))
3300         flag = F2FS_GET_BLOCK_DEFAULT;
3301     else
3302         flag = F2FS_GET_BLOCK_PRE_AIO;
3303
3304     if (f2fs_has_inline_data(inode) ||
3305             (pos & PAGE_MASK) >= i_size_read(inode)) {
3306         f2fs_do_map_lock(sbi, flag, true);
3307         locked = true;
3308     }
3309
3310 restart:
3311     /* check inline_data */
3312     ipage = f2fs_get_node_page(sbi, inode->i_ino);
3313     if (IS_ERR(ipage)) {
3314         err = PTR_ERR(ipage);
3315         goto unlock_out;
3316     }
3317
3318     set_new_dnode(&dn, inode, ipage, ipage, 0);
3319
3320     if (f2fs_has_inline_data(inode)) {
3321         if (pos + len <= MAX_INLINE_DATA(inode)) {
3322             f2fs_do_read_inline_data(page, ipage);
3323             set_inode_flag(inode, FI_DATA_EXIST);
3324             if (inode->i_nlink)
3325                 set_page_private_inline(ipage);
3326         } else {
3327             err = f2fs_convert_inline_page(&dn, page);
3328             if (err)
3329                 goto out;
3330             if (dn.data_blkaddr == NULL_ADDR)
3331                 err = f2fs_get_block(&dn, index);
3332         }
3333     } else if (locked) {
3334         err = f2fs_get_block(&dn, index);
3335     } else {
3336         if (f2fs_lookup_extent_cache(inode, index, &ei)) {
3337             dn.data_blkaddr = ei.blk + index - ei.fofs;
3338         } else {
3339             /* hole case */
3340             err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
3341             if (err || dn.data_blkaddr == NULL_ADDR) {
3342                 f2fs_put_dnode(&dn);
3343                 f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
3344                                 true);
3345                 WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
3346                 locked = true;
3347                 goto restart;
3348             }
3349         }
3350     }
3351
3352     /* convert_inline_page can make node_changed */
3353     *blk_addr = dn.data_blkaddr;
3354     *node_changed = dn.node_changed;
3355 out:
3356     f2fs_put_dnode(&dn);
3357 unlock_out:
3358     if (locked)
3359         f2fs_do_map_lock(sbi, flag, false);
3360     return err;
3361 }
3362
3363 static int __find_data_block(struct inode *inode, pgoff_t index,
3364                 block_t *blk_addr)
3365 {
3366     struct dnode_of_data dn;
3367     struct page *ipage;
3368     struct extent_info ei = {0, };
3369     int err = 0;
3370
3371     ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino);
3372     if (IS_ERR(ipage))
3373         return PTR_ERR(ipage);
3374
3375     set_new_dnode(&dn, inode, ipage, ipage, 0);
3376
3377     if (f2fs_lookup_extent_cache(inode, index, &ei)) {
3378         dn.data_blkaddr = ei.blk + index - ei.fofs;
3379     } else {
3380         /* hole case */
3381         err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
3382         if (err) {
3383             dn.data_blkaddr = NULL_ADDR;
3384             err = 0;
3385         }
3386     }
3387     *blk_addr = dn.data_blkaddr;
3388     f2fs_put_dnode(&dn);
3389     return err;
3390 }
3391
3392 static int __reserve_data_block(struct inode *inode, pgoff_t index,
3393                 block_t *blk_addr, bool *node_changed)
3394 {
3395     struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3396     struct dnode_of_data dn;
3397     struct page *ipage;
3398     int err = 0;
3399
3400     f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
3401
3402     ipage = f2fs_get_node_page(sbi, inode->i_ino);
3403     if (IS_ERR(ipage)) {
3404         err = PTR_ERR(ipage);
3405         goto unlock_out;
3406     }
3407     set_new_dnode(&dn, inode, ipage, ipage, 0);
3408
3409     err = f2fs_get_block(&dn, index);
3410
3411     *blk_addr = dn.data_blkaddr;
3412     *node_changed = dn.node_changed;
3413     f2fs_put_dnode(&dn);
3414
3415 unlock_out:
3416     f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
3417     return err;
3418 }
3419
3420 static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi,
3421             struct page *page, loff_t pos, unsigned int len,
3422             block_t *blk_addr, bool *node_changed)
3423 {
3424     struct inode *inode = page->mapping->host;
3425     struct inode *cow_inode = F2FS_I(inode)->cow_inode;
3426     pgoff_t index = page->index;
3427     int err = 0;
3428     block_t ori_blk_addr = NULL_ADDR;
3429
3430     /* If pos is beyond the end of file, reserve a new block in COW inode */
3431     if ((pos & PAGE_MASK) >= i_size_read(inode))
3432         goto reserve_block;
3433
3434     /* Look for the block in COW inode first */
3435     err = __find_data_block(cow_inode, index, blk_addr);
3436     if (err)
3437         return err;
3438     else if (*blk_addr != NULL_ADDR)
3439         return 0;
3440
3441     /* Look for the block in the original inode */
3442     err = __find_data_block(inode, index, &ori_blk_addr);
3443     if (err)
3444         return err;
3445
3446 reserve_block:
3447     /* Finally, we should reserve a new block in COW inode for the update */
3448     err = __reserve_data_block(cow_inode, index, blk_addr, node_changed);
3449     if (err)
3450         return err;
3451     inc_atomic_write_cnt(inode);
3452
3453     if (ori_blk_addr != NULL_ADDR)
3454         *blk_addr = ori_blk_addr;
3455     return 0;
3456 }
3457
3458 static int f2fs_write_begin(struct file *file, struct address_space *mapping,
3459         loff_t pos, unsigned len, struct page **pagep, void **fsdata)
3460 {
3461     struct inode *inode = mapping->host;
3462     struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3463     struct page *page = NULL;
3464     pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
3465     bool need_balance = false;
3466     block_t blkaddr = NULL_ADDR;
3467     int err = 0;
3468
3469     trace_f2fs_write_begin(inode, pos, len);
3470
3471     if (!f2fs_is_checkpoint_ready(sbi)) {
3472         err = -ENOSPC;
3473         goto fail;
3474     }
3475
3476     /*
3477      * We should check this at this moment to avoid deadlock on inode page
3478      * and #0 page. The locking rule for inline_data conversion should be:
3479      * lock_page(page #0) -> lock_page(inode_page)
3480      */
3481     if (index != 0) {
3482         err = f2fs_convert_inline_inode(inode);
3483         if (err)
3484             goto fail;
3485     }
3486
3487 #ifdef CONFIG_F2FS_FS_COMPRESSION
3488     if (f2fs_compressed_file(inode)) {
3489         int ret;
3490
3491         *fsdata = NULL;
3492
3493         if (len == PAGE_SIZE && !(f2fs_is_atomic_file(inode)))
3494             goto repeat;
3495
3496         ret = f2fs_prepare_compress_overwrite(inode, pagep,
3497                             index, fsdata);
3498         if (ret < 0) {
3499             err = ret;
3500             goto fail;
3501         } else if (ret) {
3502             return 0;
3503         }
3504     }
3505 #endif
3506
3507 repeat:
3508     /*
3509      * Do not use grab_cache_page_write_begin() to avoid deadlock due to
3510      * wait_for_stable_page. Will wait that below with our IO control.
3511      */
3512     page = f2fs_pagecache_get_page(mapping, index,
3513                 FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
3514     if (!page) {
3515         err = -ENOMEM;
3516         goto fail;
3517     }
3518
3519     /* TODO: cluster can be compressed due to race with .writepage */
3520
3521     *pagep = page;
3522
3523     if (f2fs_is_atomic_file(inode))
3524         err = prepare_atomic_write_begin(sbi, page, pos, len,
3525                     &blkaddr, &need_balance);
3526     else
3527         err = prepare_write_begin(sbi, page, pos, len,
3528                     &blkaddr, &need_balance);
3529     if (err)
3530         goto fail;
3531
3532     if (need_balance && !IS_NOQUOTA(inode) &&
3533             has_not_enough_free_secs(sbi, 0, 0)) {
3534         unlock_page(page);
3535         f2fs_balance_fs(sbi, true);
3536         lock_page(page);
3537         if (page->mapping != mapping) {
3538             /* The page got truncated from under us */
3539             f2fs_put_page(page, 1);
3540             goto repeat;
3541         }
3542     }
3543
3544     f2fs_wait_on_page_writeback(page, DATA, false, true);
3545
3546     if (len == PAGE_SIZE || PageUptodate(page))
3547         return 0;
3548
3549     if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) &&
3550         !f2fs_verity_in_progress(inode)) {
3551         zero_user_segment(page, len, PAGE_SIZE);
3552         return 0;
3553     }
3554
3555     if (blkaddr == NEW_ADDR) {
3556         zero_user_segment(page, 0, PAGE_SIZE);
3557         SetPageUptodate(page);
3558     } else {
3559         if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
3560                 DATA_GENERIC_ENHANCE_READ)) {
3561             err = -EFSCORRUPTED;
3562             goto fail;
3563         }
3564         err = f2fs_submit_page_read(inode, page, blkaddr, 0, true);
3565         if (err)
3566             goto fail;
3567
3568         lock_page(page);
3569         if (unlikely(page->mapping != mapping)) {
3570             f2fs_put_page(page, 1);
3571             goto repeat;
3572         }
3573         if (unlikely(!PageUptodate(page))) {
3574             err = -EIO;
3575             goto fail;
3576         }
3577     }
3578     return 0;
3579
3580 fail:
3581     f2fs_put_page(page, 1);
3582     f2fs_write_failed(inode, pos + len);
3583     return err;
3584 }
3585
3586 static int f2fs_write_end(struct file *file,
3587             struct address_space *mapping,
3588             loff_t pos, unsigned len, unsigned copied,
3589             struct page *page, void *fsdata)
3590 {
3591     struct inode *inode = page->mapping->host;
3592
3593     trace_f2fs_write_end(inode, pos, len, copied);
3594
3595     /*
3596      * This should be come from len == PAGE_SIZE, and we expect copied
3597      * should be PAGE_SIZE. Otherwise, we treat it with zero copied and
3598      * let generic_perform_write() try to copy data again through copied=0.
3599      */
3600     if (!PageUptodate(page)) {
3601         if (unlikely(copied != len))
3602             copied = 0;
3603         else
3604             SetPageUptodate(page);
3605     }
3606
3607 #ifdef CONFIG_F2FS_FS_COMPRESSION
3608     /* overwrite compressed file */
3609     if (f2fs_compressed_file(inode) && fsdata) {
3610         f2fs_compress_write_end(inode, fsdata, page->index, copied);
3611         f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
3612
3613         if (pos + copied > i_size_read(inode) &&
3614                 !f2fs_verity_in_progress(inode))
3615             f2fs_i_size_write(inode, pos + copied);
3616         return copied;
3617     }
3618 #endif
3619
3620     if (!copied)
3621         goto unlock_out;
3622
3623     set_page_dirty(page);
3624
3625     if (pos + copied > i_size_read(inode) &&
3626         !f2fs_verity_in_progress(inode)) {
3627         f2fs_i_size_write(inode, pos + copied);
3628         if (f2fs_is_atomic_file(inode))
3629             f2fs_i_size_write(F2FS_I(inode)->cow_inode,
3630                     pos + copied);
3631     }
3632 unlock_out:
3633     f2fs_put_page(page, 1);
3634     f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
3635     return copied;
3636 }
3637
3638 void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
3639 {
3640     struct inode *inode = folio->mapping->host;
3641     struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3642
3643     if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
3644                 (offset || length != folio_size(folio)))
3645         return;
3646
3647     if (folio_test_dirty(folio)) {
3648         if (inode->i_ino == F2FS_META_INO(sbi)) {
3649             dec_page_count(sbi, F2FS_DIRTY_META);
3650         } else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
3651             dec_page_count(sbi, F2FS_DIRTY_NODES);
3652         } else {
3653             inode_dec_dirty_pages(inode);
3654             f2fs_remove_dirty_inode(inode);
3655         }
3656     }
3657
3658     clear_page_private_gcing(&folio->page);
3659
3660     if (test_opt(sbi, COMPRESS_CACHE) &&
3661             inode->i_ino == F2FS_COMPRESS_INO(sbi))
3662         clear_page_private_data(&folio->page);
3663
3664     folio_detach_private(folio);
3665 }
3666
3667 bool f2fs_release_folio(struct folio *folio, gfp_t wait)
3668 {
3669     struct f2fs_sb_info *sbi;
3670
3671     /* If this is dirty folio, keep private data */
3672     if (folio_test_dirty(folio))
3673         return false;
3674
3675     sbi = F2FS_M_SB(folio->mapping);
3676     if (test_opt(sbi, COMPRESS_CACHE)) {
3677         struct inode *inode = folio->mapping->host;
3678
3679         if (inode->i_ino == F2FS_COMPRESS_INO(sbi))
3680             clear_page_private_data(&folio->page);
3681     }
3682
3683     clear_page_private_gcing(&folio->page);
3684
3685     folio_detach_private(folio);
3686     return true;
3687 }
3688
3689 static bool f2fs_dirty_data_folio(struct address_space *mapping,
3690         struct folio *folio)
3691 {
3692     struct inode *inode = mapping->host;
3693
3694     trace_f2fs_set_page_dirty(&folio->page, DATA);
3695
3696     if (!folio_test_uptodate(folio))
3697         folio_mark_uptodate(folio);
3698     BUG_ON(folio_test_swapcache(folio));
3699
3700     if (!folio_test_dirty(folio)) {
3701         filemap_dirty_folio(mapping, folio);
3702         f2fs_update_dirty_folio(inode, folio);
3703         return true;
3704     }
3705     return false;
3706 }
3707
3708
3709 static sector_t f2fs_bmap_compress(struct inode *inode, sector_t block)
3710 {
3711 #ifdef CONFIG_F2FS_FS_COMPRESSION
3712     struct dnode_of_data dn;
3713     sector_t start_idx, blknr = 0;
3714     int ret;
3715
3716     start_idx = round_down(block, F2FS_I(inode)->i_cluster_size);
3717
3718     set_new_dnode(&dn, inode, NULL, NULL, 0);
3719     ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
3720     if (ret)
3721         return 0;
3722
3723     if (dn.data_blkaddr != COMPRESS_ADDR) {
3724         dn.ofs_in_node += block - start_idx;
3725         blknr = f2fs_data_blkaddr(&dn);
3726         if (!__is_valid_data_blkaddr(blknr))
3727             blknr = 0;
3728     }
3729
3730     f2fs_put_dnode(&dn);
3731     return blknr;
3732 #else
3733     return 0;
3734 #endif
3735 }
3736
3737
3738 static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
3739 {
3740     struct inode *inode = mapping->host;
3741     sector_t blknr = 0;
3742
3743     if (f2fs_has_inline_data(inode))
3744         goto out;
3745
3746     /* make sure allocating whole blocks */
3747     if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
3748         filemap_write_and_wait(mapping);
3749
3750     /* Block number less than F2FS MAX BLOCKS */
3751     if (unlikely(block >= max_file_blocks(inode)))
3752         goto out;
3753
3754     if (f2fs_compressed_file(inode)) {
3755         blknr = f2fs_bmap_compress(inode, block);
3756     } else {
3757         struct f2fs_map_blocks map;
3758
3759         memset(&map, 0, sizeof(map));
3760         map.m_lblk = block;
3761         map.m_len = 1;
3762         map.m_next_pgofs = NULL;
3763         map.m_seg_type = NO_CHECK_TYPE;
3764
3765         if (!f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_BMAP))
3766             blknr = map.m_pblk;
3767     }
3768 out:
3769     trace_f2fs_bmap(inode, block, blknr);
3770     return blknr;
3771 }
3772
3773 #ifdef CONFIG_SWAP
3774 static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
3775                             unsigned int blkcnt)
3776 {
3777     struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3778     unsigned int blkofs;
3779     unsigned int blk_per_sec = BLKS_PER_SEC(sbi);
3780     unsigned int secidx = start_blk / blk_per_sec;
3781     unsigned int end_sec = secidx + blkcnt / blk_per_sec;
3782     int ret = 0;
3783
3784     f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3785     filemap_invalidate_lock(inode->i_mapping);
3786
3787     set_inode_flag(inode, FI_ALIGNED_WRITE);
3788     set_inode_flag(inode, FI_OPU_WRITE);
3789
3790     for (; secidx < end_sec; secidx++) {
3791         f2fs_down_write(&sbi->pin_sem);
3792
3793         f2fs_lock_op(sbi);
3794         f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
3795         f2fs_unlock_op(sbi);
3796
3797         set_inode_flag(inode, FI_SKIP_WRITES);
3798
3799         for (blkofs = 0; blkofs < blk_per_sec; blkofs++) {
3800             struct page *page;
3801             unsigned int blkidx = secidx * blk_per_sec + blkofs;
3802
3803             page = f2fs_get_lock_data_page(inode, blkidx, true);
3804             if (IS_ERR(page)) {
3805                 f2fs_up_write(&sbi->pin_sem);
3806                 ret = PTR_ERR(page);
3807                 goto done;
3808             }
3809
3810             set_page_dirty(page);
3811             f2fs_put_page(page, 1);
3812         }
3813
3814         clear_inode_flag(inode, FI_SKIP_WRITES);
3815
3816         ret = filemap_fdatawrite(inode->i_mapping);
3817
3818         f2fs_up_write(&sbi->pin_sem);
3819
3820         if (ret)
3821             break;
3822     }
3823
3824 done:
3825     clear_inode_flag(inode, FI_SKIP_WRITES);
3826     clear_inode_flag(inode, FI_OPU_WRITE);
3827     clear_inode_flag(inode, FI_ALIGNED_WRITE);
3828
3829     filemap_invalidate_unlock(inode->i_mapping);
3830     f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3831
3832     return ret;
3833 }
3834
3835 static int check_swap_activate(struct swap_info_struct *sis,
3836                 struct file *swap_file, sector_t *span)
3837 {
3838     struct address_space *mapping = swap_file->f_mapping;
3839     struct inode *inode = mapping->host;
3840     struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3841     sector_t cur_lblock;
3842     sector_t last_lblock;
3843     sector_t pblock;
3844     sector_t lowest_pblock = -1;
3845     sector_t highest_pblock = 0;
3846     int nr_extents = 0;
3847     unsigned long nr_pblocks;
3848     unsigned int blks_per_sec = BLKS_PER_SEC(sbi);
3849     unsigned int sec_blks_mask = BLKS_PER_SEC(sbi) - 1;
3850     unsigned int not_aligned = 0;
3851     int ret = 0;
3852
3853     /*
3854      * Map all the blocks into the extent list.  This code doesn't try
3855      * to be very smart.
3856      */
3857     cur_lblock = 0;
3858     last_lblock = bytes_to_blks(inode, i_size_read(inode));
3859
3860     while (cur_lblock < last_lblock && cur_lblock < sis->max) {
3861         struct f2fs_map_blocks map;
3862 retry:
3863         cond_resched();
3864
3865         memset(&map, 0, sizeof(map));
3866         map.m_lblk = cur_lblock;
3867         map.m_len = last_lblock - cur_lblock;
3868         map.m_next_pgofs = NULL;
3869         map.m_next_extent = NULL;
3870         map.m_seg_type = NO_CHECK_TYPE;
3871         map.m_may_create = false;
3872
3873         ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP);
3874         if (ret)
3875             goto out;
3876
3877         /* hole */
3878         if (!(map.m_flags & F2FS_MAP_FLAGS)) {
3879             f2fs_err(sbi, "Swapfile has holes");
3880             ret = -EINVAL;
3881             goto out;
3882         }
3883
3884         pblock = map.m_pblk;
3885         nr_pblocks = map.m_len;
3886
3887         if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask ||
3888                 nr_pblocks & sec_blks_mask) {
3889             not_aligned++;
3890
3891             nr_pblocks = roundup(nr_pblocks, blks_per_sec);
3892             if (cur_lblock + nr_pblocks > sis->max)
3893                 nr_pblocks -= blks_per_sec;
3894
3895             if (!nr_pblocks) {
3896                 /* this extent is last one */
3897                 nr_pblocks = map.m_len;
3898                 f2fs_warn(sbi, "Swapfile: last extent is not aligned to section");
3899                 goto next;
3900             }
3901
3902             ret = f2fs_migrate_blocks(inode, cur_lblock,
3903                             nr_pblocks);
3904             if (ret)
3905                 goto out;
3906             goto retry;
3907         }
3908 next:
3909         if (cur_lblock + nr_pblocks >= sis->max)
3910             nr_pblocks = sis->max - cur_lblock;
3911
3912         if (cur_lblock) {   /* exclude the header page */
3913             if (pblock < lowest_pblock)
3914                 lowest_pblock = pblock;
3915             if (pblock + nr_pblocks - 1 > highest_pblock)
3916                 highest_pblock = pblock + nr_pblocks - 1;
3917         }
3918
3919         /*
3920          * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
3921          */
3922         ret = add_swap_extent(sis, cur_lblock, nr_pblocks, pblock);
3923         if (ret < 0)
3924             goto out;
3925         nr_extents += ret;
3926         cur_lblock += nr_pblocks;
3927     }
3928     ret = nr_extents;
3929     *span = 1 + highest_pblock - lowest_pblock;
3930     if (cur_lblock == 0)
3931         cur_lblock = 1; /* force Empty message */
3932     sis->max = cur_lblock;
3933     sis->pages = cur_lblock - 1;
3934     sis->highest_bit = cur_lblock - 1;
3935 out:
3936     if (not_aligned)
3937         f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(%u * N)",
3938               not_aligned, blks_per_sec * F2FS_BLKSIZE);
3939     return ret;
3940 }
3941
3942 static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
3943                 sector_t *span)
3944 {
3945     struct inode *inode = file_inode(file);
3946     int ret;
3947
3948     if (!S_ISREG(inode->i_mode))
3949         return -EINVAL;
3950
3951     if (f2fs_readonly(F2FS_I_SB(inode)->sb))
3952         return -EROFS;
3953
3954     if (f2fs_lfs_mode(F2FS_I_SB(inode))) {
3955         f2fs_err(F2FS_I_SB(inode),
3956             "Swapfile not supported in LFS mode");
3957         return -EINVAL;
3958     }
3959
3960     ret = f2fs_convert_inline_inode(inode);
3961     if (ret)
3962         return ret;
3963
3964     if (!f2fs_disable_compressed_file(inode))
3965         return -EINVAL;
3966
3967     f2fs_precache_extents(inode);
3968
3969     ret = check_swap_activate(sis, file, span);
3970     if (ret < 0)
3971         return ret;
3972
3973     set_inode_flag(inode, FI_PIN_FILE);
3974     f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
3975     return ret;
3976 }
3977
3978 static void f2fs_swap_deactivate(struct file *file)
3979 {
3980     struct inode *inode = file_inode(file);
3981
3982     clear_inode_flag(inode, FI_PIN_FILE);
3983 }
3984 #else
3985 static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
3986                 sector_t *span)
3987 {
3988     return -EOPNOTSUPP;
3989 }
3990
3991 static void f2fs_swap_deactivate(struct file *file)
3992 {
3993 }
3994 #endif
3995
3996 const struct address_space_operations f2fs_dblock_aops = {
3997     .read_folio = f2fs_read_data_folio,
3998     .readahead  = f2fs_readahead,
3999     .writepage  = f2fs_write_data_page,
4000     .writepages = f2fs_write_data_pages,
4001     .write_begin    = f2fs_write_begin,
4002     .write_end  = f2fs_write_end,
4003     .dirty_folio    = f2fs_dirty_data_folio,
4004     .migrate_folio  = filemap_migrate_folio,
4005     .invalidate_folio = f2fs_invalidate_folio,
4006     .release_folio  = f2fs_release_folio,
4007     .direct_IO  = noop_direct_IO,
4008     .bmap       = f2fs_bmap,
4009     .swap_activate  = f2fs_swap_activate,
4010     .swap_deactivate = f2fs_swap_deactivate,
4011 };
4012
4013 void f2fs_clear_page_cache_dirty_tag(struct page *page)
4014 {
4015     struct address_space *mapping = page_mapping(page);
4016     unsigned long flags;
4017
4018     xa_lock_irqsave(&mapping->i_pages, flags);
4019     __xa_clear_mark(&mapping->i_pages, page_index(page),
4020                         PAGECACHE_TAG_DIRTY);
4021     xa_unlock_irqrestore(&mapping->i_pages, flags);
4022 }
4023
4024 int __init f2fs_init_post_read_processing(void)
4025 {
4026     bio_post_read_ctx_cache =
4027         kmem_cache_create("f2fs_bio_post_read_ctx",
4028                   sizeof(struct bio_post_read_ctx), 0, 0, NULL);
4029     if (!bio_post_read_ctx_cache)
4030         goto fail;
4031     bio_post_read_ctx_pool =
4032         mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS,
4033                      bio_post_read_ctx_cache);
4034     if (!bio_post_read_ctx_pool)
4035         goto fail_free_cache;
4036     return 0;
4037
4038 fail_free_cache:
4039     kmem_cache_destroy(bio_post_read_ctx_cache);
4040 fail:
4041     return -ENOMEM;
4042 }
4043
4044 void f2fs_destroy_post_read_processing(void)
4045 {
4046     mempool_destroy(bio_post_read_ctx_pool);
4047     kmem_cache_destroy(bio_post_read_ctx_cache);
4048 }
4049
4050 int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi)
4051 {
4052     if (!f2fs_sb_has_encrypt(sbi) &&
4053         !f2fs_sb_has_verity(sbi) &&
4054         !f2fs_sb_has_compression(sbi))
4055         return 0;
4056
4057     sbi->post_read_wq = alloc_workqueue("f2fs_post_read_wq",
4058                          WQ_UNBOUND | WQ_HIGHPRI,
4059                          num_online_cpus());
4060     if (!sbi->post_read_wq)
4061         return -ENOMEM;
4062     return 0;
4063 }
4064
4065 void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi)
4066 {
4067     if (sbi->post_read_wq)
4068         destroy_workqueue(sbi->post_read_wq);
4069 }
4070
4071 int __init f2fs_init_bio_entry_cache(void)
4072 {
4073     bio_entry_slab = f2fs_kmem_cache_create("f2fs_bio_entry_slab",
4074             sizeof(struct bio_entry));
4075     if (!bio_entry_slab)
4076         return -ENOMEM;
4077     return 0;
4078 }
4079
4080 void f2fs_destroy_bio_entry_cache(void)
4081 {
4082     kmem_cache_destroy(bio_entry_slab);
4083 }
4084
4085 static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
4086                 unsigned int flags, struct iomap *iomap,
4087                 struct iomap *srcmap)
4088 {
4089     struct f2fs_map_blocks map = {};
4090     pgoff_t next_pgofs = 0;
4091     int err;
4092
4093     map.m_lblk = bytes_to_blks(inode, offset);
4094     map.m_len = bytes_to_blks(inode, offset + length - 1) - map.m_lblk + 1;
4095     map.m_next_pgofs = &next_pgofs;
4096     map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint);
4097     if (flags & IOMAP_WRITE)
4098         map.m_may_create = true;
4099
4100     err = f2fs_map_blocks(inode, &map, flags & IOMAP_WRITE,
4101                   F2FS_GET_BLOCK_DIO);
4102     if (err)
4103         return err;
4104
4105     iomap->offset = blks_to_bytes(inode, map.m_lblk);
4106
4107     /*
4108      * When inline encryption is enabled, sometimes I/O to an encrypted file
4109      * has to be broken up to guarantee DUN contiguity.  Handle this by
4110      * limiting the length of the mapping returned.
4111      */
4112     map.m_len = fscrypt_limit_io_blocks(inode, map.m_lblk, map.m_len);
4113
4114     if (map.m_flags & (F2FS_MAP_MAPPED | F2FS_MAP_UNWRITTEN)) {
4115         iomap->length = blks_to_bytes(inode, map.m_len);
4116         if (map.m_flags & F2FS_MAP_MAPPED) {
4117             iomap->type = IOMAP_MAPPED;
4118             iomap->flags |= IOMAP_F_MERGED;
4119         } else {
4120             iomap->type = IOMAP_UNWRITTEN;
4121         }
4122         if (WARN_ON_ONCE(!__is_valid_data_blkaddr(map.m_pblk)))
4123             return -EINVAL;
4124
4125         iomap->bdev = map.m_bdev;
4126         iomap->addr = blks_to_bytes(inode, map.m_pblk);
4127     } else {
4128         iomap->length = blks_to_bytes(inode, next_pgofs) -
4129                 iomap->offset;
4130         iomap->type = IOMAP_HOLE;
4131         iomap->addr = IOMAP_NULL_ADDR;
4132     }
4133
4134     if (map.m_flags & F2FS_MAP_NEW)
4135         iomap->flags |= IOMAP_F_NEW;
4136     if ((inode->i_state & I_DIRTY_DATASYNC) ||
4137         offset + length > i_size_read(inode))
4138         iomap->flags |= IOMAP_F_DIRTY;
4139
4140     return 0;
4141 }
4142
4143 const struct iomap_ops f2fs_iomap_ops = {
4144     .iomap_begin    = f2fs_iomap_begin,
4145 };