Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * fs/f2fs/checkpoint.c
0004  *
0005  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
0006  *             http://www.samsung.com/
0007  */
0008 #include <linux/fs.h>
0009 #include <linux/bio.h>
0010 #include <linux/mpage.h>
0011 #include <linux/writeback.h>
0012 #include <linux/blkdev.h>
0013 #include <linux/f2fs_fs.h>
0014 #include <linux/pagevec.h>
0015 #include <linux/swap.h>
0016 #include <linux/kthread.h>
0017 
0018 #include "f2fs.h"
0019 #include "node.h"
0020 #include "segment.h"
0021 #include "iostat.h"
0022 #include <trace/events/f2fs.h>
0023 
0024 #define DEFAULT_CHECKPOINT_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
0025 
0026 static struct kmem_cache *ino_entry_slab;
0027 struct kmem_cache *f2fs_inode_entry_slab;
0028 
0029 void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
0030 {
0031     f2fs_build_fault_attr(sbi, 0, 0);
0032     set_ckpt_flags(sbi, CP_ERROR_FLAG);
0033     if (!end_io)
0034         f2fs_flush_merged_writes(sbi);
0035 }
0036 
0037 /*
0038  * We guarantee no failure on the returned page.
0039  */
0040 struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
0041 {
0042     struct address_space *mapping = META_MAPPING(sbi);
0043     struct page *page;
0044 repeat:
0045     page = f2fs_grab_cache_page(mapping, index, false);
0046     if (!page) {
0047         cond_resched();
0048         goto repeat;
0049     }
0050     f2fs_wait_on_page_writeback(page, META, true, true);
0051     if (!PageUptodate(page))
0052         SetPageUptodate(page);
0053     return page;
0054 }
0055 
0056 static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
0057                             bool is_meta)
0058 {
0059     struct address_space *mapping = META_MAPPING(sbi);
0060     struct page *page;
0061     struct f2fs_io_info fio = {
0062         .sbi = sbi,
0063         .type = META,
0064         .op = REQ_OP_READ,
0065         .op_flags = REQ_META | REQ_PRIO,
0066         .old_blkaddr = index,
0067         .new_blkaddr = index,
0068         .encrypted_page = NULL,
0069         .is_por = !is_meta,
0070     };
0071     int err;
0072 
0073     if (unlikely(!is_meta))
0074         fio.op_flags &= ~REQ_META;
0075 repeat:
0076     page = f2fs_grab_cache_page(mapping, index, false);
0077     if (!page) {
0078         cond_resched();
0079         goto repeat;
0080     }
0081     if (PageUptodate(page))
0082         goto out;
0083 
0084     fio.page = page;
0085 
0086     err = f2fs_submit_page_bio(&fio);
0087     if (err) {
0088         f2fs_put_page(page, 1);
0089         return ERR_PTR(err);
0090     }
0091 
0092     f2fs_update_iostat(sbi, FS_META_READ_IO, F2FS_BLKSIZE);
0093 
0094     lock_page(page);
0095     if (unlikely(page->mapping != mapping)) {
0096         f2fs_put_page(page, 1);
0097         goto repeat;
0098     }
0099 
0100     if (unlikely(!PageUptodate(page))) {
0101         f2fs_handle_page_eio(sbi, page->index, META);
0102         f2fs_put_page(page, 1);
0103         return ERR_PTR(-EIO);
0104     }
0105 out:
0106     return page;
0107 }
0108 
0109 struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
0110 {
0111     return __get_meta_page(sbi, index, true);
0112 }
0113 
0114 struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index)
0115 {
0116     struct page *page;
0117     int count = 0;
0118 
0119 retry:
0120     page = __get_meta_page(sbi, index, true);
0121     if (IS_ERR(page)) {
0122         if (PTR_ERR(page) == -EIO &&
0123                 ++count <= DEFAULT_RETRY_IO_COUNT)
0124             goto retry;
0125         f2fs_stop_checkpoint(sbi, false);
0126     }
0127     return page;
0128 }
0129 
0130 /* for POR only */
0131 struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
0132 {
0133     return __get_meta_page(sbi, index, false);
0134 }
0135 
0136 static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr,
0137                             int type)
0138 {
0139     struct seg_entry *se;
0140     unsigned int segno, offset;
0141     bool exist;
0142 
0143     if (type != DATA_GENERIC_ENHANCE && type != DATA_GENERIC_ENHANCE_READ)
0144         return true;
0145 
0146     segno = GET_SEGNO(sbi, blkaddr);
0147     offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
0148     se = get_seg_entry(sbi, segno);
0149 
0150     exist = f2fs_test_bit(offset, se->cur_valid_map);
0151     if (!exist && type == DATA_GENERIC_ENHANCE) {
0152         f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d",
0153              blkaddr, exist);
0154         set_sbi_flag(sbi, SBI_NEED_FSCK);
0155         dump_stack();
0156     }
0157     return exist;
0158 }
0159 
0160 bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
0161                     block_t blkaddr, int type)
0162 {
0163     switch (type) {
0164     case META_NAT:
0165         break;
0166     case META_SIT:
0167         if (unlikely(blkaddr >= SIT_BLK_CNT(sbi)))
0168             return false;
0169         break;
0170     case META_SSA:
0171         if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) ||
0172             blkaddr < SM_I(sbi)->ssa_blkaddr))
0173             return false;
0174         break;
0175     case META_CP:
0176         if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr ||
0177             blkaddr < __start_cp_addr(sbi)))
0178             return false;
0179         break;
0180     case META_POR:
0181         if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
0182             blkaddr < MAIN_BLKADDR(sbi)))
0183             return false;
0184         break;
0185     case DATA_GENERIC:
0186     case DATA_GENERIC_ENHANCE:
0187     case DATA_GENERIC_ENHANCE_READ:
0188         if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
0189                 blkaddr < MAIN_BLKADDR(sbi))) {
0190             f2fs_warn(sbi, "access invalid blkaddr:%u",
0191                   blkaddr);
0192             set_sbi_flag(sbi, SBI_NEED_FSCK);
0193             dump_stack();
0194             return false;
0195         } else {
0196             return __is_bitmap_valid(sbi, blkaddr, type);
0197         }
0198         break;
0199     case META_GENERIC:
0200         if (unlikely(blkaddr < SEG0_BLKADDR(sbi) ||
0201             blkaddr >= MAIN_BLKADDR(sbi)))
0202             return false;
0203         break;
0204     default:
0205         BUG();
0206     }
0207 
0208     return true;
0209 }
0210 
0211 /*
0212  * Readahead CP/NAT/SIT/SSA/POR pages
0213  */
0214 int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
0215                             int type, bool sync)
0216 {
0217     struct page *page;
0218     block_t blkno = start;
0219     struct f2fs_io_info fio = {
0220         .sbi = sbi,
0221         .type = META,
0222         .op = REQ_OP_READ,
0223         .op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD,
0224         .encrypted_page = NULL,
0225         .in_list = false,
0226         .is_por = (type == META_POR),
0227     };
0228     struct blk_plug plug;
0229     int err;
0230 
0231     if (unlikely(type == META_POR))
0232         fio.op_flags &= ~REQ_META;
0233 
0234     blk_start_plug(&plug);
0235     for (; nrpages-- > 0; blkno++) {
0236 
0237         if (!f2fs_is_valid_blkaddr(sbi, blkno, type))
0238             goto out;
0239 
0240         switch (type) {
0241         case META_NAT:
0242             if (unlikely(blkno >=
0243                     NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid)))
0244                 blkno = 0;
0245             /* get nat block addr */
0246             fio.new_blkaddr = current_nat_addr(sbi,
0247                     blkno * NAT_ENTRY_PER_BLOCK);
0248             break;
0249         case META_SIT:
0250             if (unlikely(blkno >= TOTAL_SEGS(sbi)))
0251                 goto out;
0252             /* get sit block addr */
0253             fio.new_blkaddr = current_sit_addr(sbi,
0254                     blkno * SIT_ENTRY_PER_BLOCK);
0255             break;
0256         case META_SSA:
0257         case META_CP:
0258         case META_POR:
0259             fio.new_blkaddr = blkno;
0260             break;
0261         default:
0262             BUG();
0263         }
0264 
0265         page = f2fs_grab_cache_page(META_MAPPING(sbi),
0266                         fio.new_blkaddr, false);
0267         if (!page)
0268             continue;
0269         if (PageUptodate(page)) {
0270             f2fs_put_page(page, 1);
0271             continue;
0272         }
0273 
0274         fio.page = page;
0275         err = f2fs_submit_page_bio(&fio);
0276         f2fs_put_page(page, err ? 1 : 0);
0277 
0278         if (!err)
0279             f2fs_update_iostat(sbi, FS_META_READ_IO, F2FS_BLKSIZE);
0280     }
0281 out:
0282     blk_finish_plug(&plug);
0283     return blkno - start;
0284 }
0285 
0286 void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index,
0287                             unsigned int ra_blocks)
0288 {
0289     struct page *page;
0290     bool readahead = false;
0291 
0292     if (ra_blocks == RECOVERY_MIN_RA_BLOCKS)
0293         return;
0294 
0295     page = find_get_page(META_MAPPING(sbi), index);
0296     if (!page || !PageUptodate(page))
0297         readahead = true;
0298     f2fs_put_page(page, 0);
0299 
0300     if (readahead)
0301         f2fs_ra_meta_pages(sbi, index, ra_blocks, META_POR, true);
0302 }
0303 
0304 static int __f2fs_write_meta_page(struct page *page,
0305                 struct writeback_control *wbc,
0306                 enum iostat_type io_type)
0307 {
0308     struct f2fs_sb_info *sbi = F2FS_P_SB(page);
0309 
0310     trace_f2fs_writepage(page, META);
0311 
0312     if (unlikely(f2fs_cp_error(sbi)))
0313         goto redirty_out;
0314     if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
0315         goto redirty_out;
0316     if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0))
0317         goto redirty_out;
0318 
0319     f2fs_do_write_meta_page(sbi, page, io_type);
0320     dec_page_count(sbi, F2FS_DIRTY_META);
0321 
0322     if (wbc->for_reclaim)
0323         f2fs_submit_merged_write_cond(sbi, NULL, page, 0, META);
0324 
0325     unlock_page(page);
0326 
0327     if (unlikely(f2fs_cp_error(sbi)))
0328         f2fs_submit_merged_write(sbi, META);
0329 
0330     return 0;
0331 
0332 redirty_out:
0333     redirty_page_for_writepage(wbc, page);
0334     return AOP_WRITEPAGE_ACTIVATE;
0335 }
0336 
0337 static int f2fs_write_meta_page(struct page *page,
0338                 struct writeback_control *wbc)
0339 {
0340     return __f2fs_write_meta_page(page, wbc, FS_META_IO);
0341 }
0342 
0343 static int f2fs_write_meta_pages(struct address_space *mapping,
0344                 struct writeback_control *wbc)
0345 {
0346     struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
0347     long diff, written;
0348 
0349     if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
0350         goto skip_write;
0351 
0352     /* collect a number of dirty meta pages and write together */
0353     if (wbc->sync_mode != WB_SYNC_ALL &&
0354             get_pages(sbi, F2FS_DIRTY_META) <
0355                     nr_pages_to_skip(sbi, META))
0356         goto skip_write;
0357 
0358     /* if locked failed, cp will flush dirty pages instead */
0359     if (!f2fs_down_write_trylock(&sbi->cp_global_sem))
0360         goto skip_write;
0361 
0362     trace_f2fs_writepages(mapping->host, wbc, META);
0363     diff = nr_pages_to_write(sbi, META, wbc);
0364     written = f2fs_sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO);
0365     f2fs_up_write(&sbi->cp_global_sem);
0366     wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
0367     return 0;
0368 
0369 skip_write:
0370     wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META);
0371     trace_f2fs_writepages(mapping->host, wbc, META);
0372     return 0;
0373 }
0374 
0375 long f2fs_sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
0376                 long nr_to_write, enum iostat_type io_type)
0377 {
0378     struct address_space *mapping = META_MAPPING(sbi);
0379     pgoff_t index = 0, prev = ULONG_MAX;
0380     struct pagevec pvec;
0381     long nwritten = 0;
0382     int nr_pages;
0383     struct writeback_control wbc = {
0384         .for_reclaim = 0,
0385     };
0386     struct blk_plug plug;
0387 
0388     pagevec_init(&pvec);
0389 
0390     blk_start_plug(&plug);
0391 
0392     while ((nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
0393                 PAGECACHE_TAG_DIRTY))) {
0394         int i;
0395 
0396         for (i = 0; i < nr_pages; i++) {
0397             struct page *page = pvec.pages[i];
0398 
0399             if (prev == ULONG_MAX)
0400                 prev = page->index - 1;
0401             if (nr_to_write != LONG_MAX && page->index != prev + 1) {
0402                 pagevec_release(&pvec);
0403                 goto stop;
0404             }
0405 
0406             lock_page(page);
0407 
0408             if (unlikely(page->mapping != mapping)) {
0409 continue_unlock:
0410                 unlock_page(page);
0411                 continue;
0412             }
0413             if (!PageDirty(page)) {
0414                 /* someone wrote it for us */
0415                 goto continue_unlock;
0416             }
0417 
0418             f2fs_wait_on_page_writeback(page, META, true, true);
0419 
0420             if (!clear_page_dirty_for_io(page))
0421                 goto continue_unlock;
0422 
0423             if (__f2fs_write_meta_page(page, &wbc, io_type)) {
0424                 unlock_page(page);
0425                 break;
0426             }
0427             nwritten++;
0428             prev = page->index;
0429             if (unlikely(nwritten >= nr_to_write))
0430                 break;
0431         }
0432         pagevec_release(&pvec);
0433         cond_resched();
0434     }
0435 stop:
0436     if (nwritten)
0437         f2fs_submit_merged_write(sbi, type);
0438 
0439     blk_finish_plug(&plug);
0440 
0441     return nwritten;
0442 }
0443 
0444 static bool f2fs_dirty_meta_folio(struct address_space *mapping,
0445         struct folio *folio)
0446 {
0447     trace_f2fs_set_page_dirty(&folio->page, META);
0448 
0449     if (!folio_test_uptodate(folio))
0450         folio_mark_uptodate(folio);
0451     if (!folio_test_dirty(folio)) {
0452         filemap_dirty_folio(mapping, folio);
0453         inc_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_META);
0454         set_page_private_reference(&folio->page);
0455         return true;
0456     }
0457     return false;
0458 }
0459 
0460 const struct address_space_operations f2fs_meta_aops = {
0461     .writepage  = f2fs_write_meta_page,
0462     .writepages = f2fs_write_meta_pages,
0463     .dirty_folio    = f2fs_dirty_meta_folio,
0464     .invalidate_folio = f2fs_invalidate_folio,
0465     .release_folio  = f2fs_release_folio,
0466     .migrate_folio  = filemap_migrate_folio,
0467 };
0468 
0469 static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino,
0470                         unsigned int devidx, int type)
0471 {
0472     struct inode_management *im = &sbi->im[type];
0473     struct ino_entry *e = NULL, *new = NULL;
0474 
0475     if (type == FLUSH_INO) {
0476         rcu_read_lock();
0477         e = radix_tree_lookup(&im->ino_root, ino);
0478         rcu_read_unlock();
0479     }
0480 
0481 retry:
0482     if (!e)
0483         new = f2fs_kmem_cache_alloc(ino_entry_slab,
0484                         GFP_NOFS, true, NULL);
0485 
0486     radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
0487 
0488     spin_lock(&im->ino_lock);
0489     e = radix_tree_lookup(&im->ino_root, ino);
0490     if (!e) {
0491         if (!new) {
0492             spin_unlock(&im->ino_lock);
0493             goto retry;
0494         }
0495         e = new;
0496         if (unlikely(radix_tree_insert(&im->ino_root, ino, e)))
0497             f2fs_bug_on(sbi, 1);
0498 
0499         memset(e, 0, sizeof(struct ino_entry));
0500         e->ino = ino;
0501 
0502         list_add_tail(&e->list, &im->ino_list);
0503         if (type != ORPHAN_INO)
0504             im->ino_num++;
0505     }
0506 
0507     if (type == FLUSH_INO)
0508         f2fs_set_bit(devidx, (char *)&e->dirty_device);
0509 
0510     spin_unlock(&im->ino_lock);
0511     radix_tree_preload_end();
0512 
0513     if (new && e != new)
0514         kmem_cache_free(ino_entry_slab, new);
0515 }
0516 
0517 static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
0518 {
0519     struct inode_management *im = &sbi->im[type];
0520     struct ino_entry *e;
0521 
0522     spin_lock(&im->ino_lock);
0523     e = radix_tree_lookup(&im->ino_root, ino);
0524     if (e) {
0525         list_del(&e->list);
0526         radix_tree_delete(&im->ino_root, ino);
0527         im->ino_num--;
0528         spin_unlock(&im->ino_lock);
0529         kmem_cache_free(ino_entry_slab, e);
0530         return;
0531     }
0532     spin_unlock(&im->ino_lock);
0533 }
0534 
0535 void f2fs_add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
0536 {
0537     /* add new dirty ino entry into list */
0538     __add_ino_entry(sbi, ino, 0, type);
0539 }
0540 
0541 void f2fs_remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
0542 {
0543     /* remove dirty ino entry from list */
0544     __remove_ino_entry(sbi, ino, type);
0545 }
0546 
0547 /* mode should be APPEND_INO, UPDATE_INO or TRANS_DIR_INO */
0548 bool f2fs_exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
0549 {
0550     struct inode_management *im = &sbi->im[mode];
0551     struct ino_entry *e;
0552 
0553     spin_lock(&im->ino_lock);
0554     e = radix_tree_lookup(&im->ino_root, ino);
0555     spin_unlock(&im->ino_lock);
0556     return e ? true : false;
0557 }
0558 
0559 void f2fs_release_ino_entry(struct f2fs_sb_info *sbi, bool all)
0560 {
0561     struct ino_entry *e, *tmp;
0562     int i;
0563 
0564     for (i = all ? ORPHAN_INO : APPEND_INO; i < MAX_INO_ENTRY; i++) {
0565         struct inode_management *im = &sbi->im[i];
0566 
0567         spin_lock(&im->ino_lock);
0568         list_for_each_entry_safe(e, tmp, &im->ino_list, list) {
0569             list_del(&e->list);
0570             radix_tree_delete(&im->ino_root, e->ino);
0571             kmem_cache_free(ino_entry_slab, e);
0572             im->ino_num--;
0573         }
0574         spin_unlock(&im->ino_lock);
0575     }
0576 }
0577 
0578 void f2fs_set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
0579                     unsigned int devidx, int type)
0580 {
0581     __add_ino_entry(sbi, ino, devidx, type);
0582 }
0583 
0584 bool f2fs_is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
0585                     unsigned int devidx, int type)
0586 {
0587     struct inode_management *im = &sbi->im[type];
0588     struct ino_entry *e;
0589     bool is_dirty = false;
0590 
0591     spin_lock(&im->ino_lock);
0592     e = radix_tree_lookup(&im->ino_root, ino);
0593     if (e && f2fs_test_bit(devidx, (char *)&e->dirty_device))
0594         is_dirty = true;
0595     spin_unlock(&im->ino_lock);
0596     return is_dirty;
0597 }
0598 
0599 int f2fs_acquire_orphan_inode(struct f2fs_sb_info *sbi)
0600 {
0601     struct inode_management *im = &sbi->im[ORPHAN_INO];
0602     int err = 0;
0603 
0604     spin_lock(&im->ino_lock);
0605 
0606     if (time_to_inject(sbi, FAULT_ORPHAN)) {
0607         spin_unlock(&im->ino_lock);
0608         f2fs_show_injection_info(sbi, FAULT_ORPHAN);
0609         return -ENOSPC;
0610     }
0611 
0612     if (unlikely(im->ino_num >= sbi->max_orphans))
0613         err = -ENOSPC;
0614     else
0615         im->ino_num++;
0616     spin_unlock(&im->ino_lock);
0617 
0618     return err;
0619 }
0620 
0621 void f2fs_release_orphan_inode(struct f2fs_sb_info *sbi)
0622 {
0623     struct inode_management *im = &sbi->im[ORPHAN_INO];
0624 
0625     spin_lock(&im->ino_lock);
0626     f2fs_bug_on(sbi, im->ino_num == 0);
0627     im->ino_num--;
0628     spin_unlock(&im->ino_lock);
0629 }
0630 
0631 void f2fs_add_orphan_inode(struct inode *inode)
0632 {
0633     /* add new orphan ino entry into list */
0634     __add_ino_entry(F2FS_I_SB(inode), inode->i_ino, 0, ORPHAN_INO);
0635     f2fs_update_inode_page(inode);
0636 }
0637 
0638 void f2fs_remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
0639 {
0640     /* remove orphan entry from orphan list */
0641     __remove_ino_entry(sbi, ino, ORPHAN_INO);
0642 }
0643 
0644 static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
0645 {
0646     struct inode *inode;
0647     struct node_info ni;
0648     int err;
0649 
0650     inode = f2fs_iget_retry(sbi->sb, ino);
0651     if (IS_ERR(inode)) {
0652         /*
0653          * there should be a bug that we can't find the entry
0654          * to orphan inode.
0655          */
0656         f2fs_bug_on(sbi, PTR_ERR(inode) == -ENOENT);
0657         return PTR_ERR(inode);
0658     }
0659 
0660     err = f2fs_dquot_initialize(inode);
0661     if (err) {
0662         iput(inode);
0663         goto err_out;
0664     }
0665 
0666     clear_nlink(inode);
0667 
0668     /* truncate all the data during iput */
0669     iput(inode);
0670 
0671     err = f2fs_get_node_info(sbi, ino, &ni, false);
0672     if (err)
0673         goto err_out;
0674 
0675     /* ENOMEM was fully retried in f2fs_evict_inode. */
0676     if (ni.blk_addr != NULL_ADDR) {
0677         err = -EIO;
0678         goto err_out;
0679     }
0680     return 0;
0681 
0682 err_out:
0683     set_sbi_flag(sbi, SBI_NEED_FSCK);
0684     f2fs_warn(sbi, "%s: orphan failed (ino=%x), run fsck to fix.",
0685           __func__, ino);
0686     return err;
0687 }
0688 
0689 int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi)
0690 {
0691     block_t start_blk, orphan_blocks, i, j;
0692     unsigned int s_flags = sbi->sb->s_flags;
0693     int err = 0;
0694 #ifdef CONFIG_QUOTA
0695     int quota_enabled;
0696 #endif
0697 
0698     if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG))
0699         return 0;
0700 
0701     if (bdev_read_only(sbi->sb->s_bdev)) {
0702         f2fs_info(sbi, "write access unavailable, skipping orphan cleanup");
0703         return 0;
0704     }
0705 
0706     if (s_flags & SB_RDONLY) {
0707         f2fs_info(sbi, "orphan cleanup on readonly fs");
0708         sbi->sb->s_flags &= ~SB_RDONLY;
0709     }
0710 
0711 #ifdef CONFIG_QUOTA
0712     /*
0713      * Turn on quotas which were not enabled for read-only mounts if
0714      * filesystem has quota feature, so that they are updated correctly.
0715      */
0716     quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY);
0717 #endif
0718 
0719     start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
0720     orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi);
0721 
0722     f2fs_ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true);
0723 
0724     for (i = 0; i < orphan_blocks; i++) {
0725         struct page *page;
0726         struct f2fs_orphan_block *orphan_blk;
0727 
0728         page = f2fs_get_meta_page(sbi, start_blk + i);
0729         if (IS_ERR(page)) {
0730             err = PTR_ERR(page);
0731             goto out;
0732         }
0733 
0734         orphan_blk = (struct f2fs_orphan_block *)page_address(page);
0735         for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
0736             nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
0737 
0738             err = recover_orphan_inode(sbi, ino);
0739             if (err) {
0740                 f2fs_put_page(page, 1);
0741                 goto out;
0742             }
0743         }
0744         f2fs_put_page(page, 1);
0745     }
0746     /* clear Orphan Flag */
0747     clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG);
0748 out:
0749     set_sbi_flag(sbi, SBI_IS_RECOVERED);
0750 
0751 #ifdef CONFIG_QUOTA
0752     /* Turn quotas off */
0753     if (quota_enabled)
0754         f2fs_quota_off_umount(sbi->sb);
0755 #endif
0756     sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */
0757 
0758     return err;
0759 }
0760 
0761 static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
0762 {
0763     struct list_head *head;
0764     struct f2fs_orphan_block *orphan_blk = NULL;
0765     unsigned int nentries = 0;
0766     unsigned short index = 1;
0767     unsigned short orphan_blocks;
0768     struct page *page = NULL;
0769     struct ino_entry *orphan = NULL;
0770     struct inode_management *im = &sbi->im[ORPHAN_INO];
0771 
0772     orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num);
0773 
0774     /*
0775      * we don't need to do spin_lock(&im->ino_lock) here, since all the
0776      * orphan inode operations are covered under f2fs_lock_op().
0777      * And, spin_lock should be avoided due to page operations below.
0778      */
0779     head = &im->ino_list;
0780 
0781     /* loop for each orphan inode entry and write them in Jornal block */
0782     list_for_each_entry(orphan, head, list) {
0783         if (!page) {
0784             page = f2fs_grab_meta_page(sbi, start_blk++);
0785             orphan_blk =
0786                 (struct f2fs_orphan_block *)page_address(page);
0787             memset(orphan_blk, 0, sizeof(*orphan_blk));
0788         }
0789 
0790         orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
0791 
0792         if (nentries == F2FS_ORPHANS_PER_BLOCK) {
0793             /*
0794              * an orphan block is full of 1020 entries,
0795              * then we need to flush current orphan blocks
0796              * and bring another one in memory
0797              */
0798             orphan_blk->blk_addr = cpu_to_le16(index);
0799             orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
0800             orphan_blk->entry_count = cpu_to_le32(nentries);
0801             set_page_dirty(page);
0802             f2fs_put_page(page, 1);
0803             index++;
0804             nentries = 0;
0805             page = NULL;
0806         }
0807     }
0808 
0809     if (page) {
0810         orphan_blk->blk_addr = cpu_to_le16(index);
0811         orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
0812         orphan_blk->entry_count = cpu_to_le32(nentries);
0813         set_page_dirty(page);
0814         f2fs_put_page(page, 1);
0815     }
0816 }
0817 
0818 static __u32 f2fs_checkpoint_chksum(struct f2fs_sb_info *sbi,
0819                         struct f2fs_checkpoint *ckpt)
0820 {
0821     unsigned int chksum_ofs = le32_to_cpu(ckpt->checksum_offset);
0822     __u32 chksum;
0823 
0824     chksum = f2fs_crc32(sbi, ckpt, chksum_ofs);
0825     if (chksum_ofs < CP_CHKSUM_OFFSET) {
0826         chksum_ofs += sizeof(chksum);
0827         chksum = f2fs_chksum(sbi, chksum, (__u8 *)ckpt + chksum_ofs,
0828                         F2FS_BLKSIZE - chksum_ofs);
0829     }
0830     return chksum;
0831 }
0832 
0833 static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
0834         struct f2fs_checkpoint **cp_block, struct page **cp_page,
0835         unsigned long long *version)
0836 {
0837     size_t crc_offset = 0;
0838     __u32 crc;
0839 
0840     *cp_page = f2fs_get_meta_page(sbi, cp_addr);
0841     if (IS_ERR(*cp_page))
0842         return PTR_ERR(*cp_page);
0843 
0844     *cp_block = (struct f2fs_checkpoint *)page_address(*cp_page);
0845 
0846     crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
0847     if (crc_offset < CP_MIN_CHKSUM_OFFSET ||
0848             crc_offset > CP_CHKSUM_OFFSET) {
0849         f2fs_put_page(*cp_page, 1);
0850         f2fs_warn(sbi, "invalid crc_offset: %zu", crc_offset);
0851         return -EINVAL;
0852     }
0853 
0854     crc = f2fs_checkpoint_chksum(sbi, *cp_block);
0855     if (crc != cur_cp_crc(*cp_block)) {
0856         f2fs_put_page(*cp_page, 1);
0857         f2fs_warn(sbi, "invalid crc value");
0858         return -EINVAL;
0859     }
0860 
0861     *version = cur_cp_version(*cp_block);
0862     return 0;
0863 }
0864 
0865 static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
0866                 block_t cp_addr, unsigned long long *version)
0867 {
0868     struct page *cp_page_1 = NULL, *cp_page_2 = NULL;
0869     struct f2fs_checkpoint *cp_block = NULL;
0870     unsigned long long cur_version = 0, pre_version = 0;
0871     unsigned int cp_blocks;
0872     int err;
0873 
0874     err = get_checkpoint_version(sbi, cp_addr, &cp_block,
0875                     &cp_page_1, version);
0876     if (err)
0877         return NULL;
0878 
0879     cp_blocks = le32_to_cpu(cp_block->cp_pack_total_block_count);
0880 
0881     if (cp_blocks > sbi->blocks_per_seg || cp_blocks <= F2FS_CP_PACKS) {
0882         f2fs_warn(sbi, "invalid cp_pack_total_block_count:%u",
0883               le32_to_cpu(cp_block->cp_pack_total_block_count));
0884         goto invalid_cp;
0885     }
0886     pre_version = *version;
0887 
0888     cp_addr += cp_blocks - 1;
0889     err = get_checkpoint_version(sbi, cp_addr, &cp_block,
0890                     &cp_page_2, version);
0891     if (err)
0892         goto invalid_cp;
0893     cur_version = *version;
0894 
0895     if (cur_version == pre_version) {
0896         *version = cur_version;
0897         f2fs_put_page(cp_page_2, 1);
0898         return cp_page_1;
0899     }
0900     f2fs_put_page(cp_page_2, 1);
0901 invalid_cp:
0902     f2fs_put_page(cp_page_1, 1);
0903     return NULL;
0904 }
0905 
0906 int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi)
0907 {
0908     struct f2fs_checkpoint *cp_block;
0909     struct f2fs_super_block *fsb = sbi->raw_super;
0910     struct page *cp1, *cp2, *cur_page;
0911     unsigned long blk_size = sbi->blocksize;
0912     unsigned long long cp1_version = 0, cp2_version = 0;
0913     unsigned long long cp_start_blk_no;
0914     unsigned int cp_blks = 1 + __cp_payload(sbi);
0915     block_t cp_blk_no;
0916     int i;
0917     int err;
0918 
0919     sbi->ckpt = f2fs_kvzalloc(sbi, array_size(blk_size, cp_blks),
0920                   GFP_KERNEL);
0921     if (!sbi->ckpt)
0922         return -ENOMEM;
0923     /*
0924      * Finding out valid cp block involves read both
0925      * sets( cp pack 1 and cp pack 2)
0926      */
0927     cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr);
0928     cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version);
0929 
0930     /* The second checkpoint pack should start at the next segment */
0931     cp_start_blk_no += ((unsigned long long)1) <<
0932                 le32_to_cpu(fsb->log_blocks_per_seg);
0933     cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version);
0934 
0935     if (cp1 && cp2) {
0936         if (ver_after(cp2_version, cp1_version))
0937             cur_page = cp2;
0938         else
0939             cur_page = cp1;
0940     } else if (cp1) {
0941         cur_page = cp1;
0942     } else if (cp2) {
0943         cur_page = cp2;
0944     } else {
0945         err = -EFSCORRUPTED;
0946         goto fail_no_cp;
0947     }
0948 
0949     cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
0950     memcpy(sbi->ckpt, cp_block, blk_size);
0951 
0952     if (cur_page == cp1)
0953         sbi->cur_cp_pack = 1;
0954     else
0955         sbi->cur_cp_pack = 2;
0956 
0957     /* Sanity checking of checkpoint */
0958     if (f2fs_sanity_check_ckpt(sbi)) {
0959         err = -EFSCORRUPTED;
0960         goto free_fail_no_cp;
0961     }
0962 
0963     if (cp_blks <= 1)
0964         goto done;
0965 
0966     cp_blk_no = le32_to_cpu(fsb->cp_blkaddr);
0967     if (cur_page == cp2)
0968         cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);
0969 
0970     for (i = 1; i < cp_blks; i++) {
0971         void *sit_bitmap_ptr;
0972         unsigned char *ckpt = (unsigned char *)sbi->ckpt;
0973 
0974         cur_page = f2fs_get_meta_page(sbi, cp_blk_no + i);
0975         if (IS_ERR(cur_page)) {
0976             err = PTR_ERR(cur_page);
0977             goto free_fail_no_cp;
0978         }
0979         sit_bitmap_ptr = page_address(cur_page);
0980         memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
0981         f2fs_put_page(cur_page, 1);
0982     }
0983 done:
0984     f2fs_put_page(cp1, 1);
0985     f2fs_put_page(cp2, 1);
0986     return 0;
0987 
0988 free_fail_no_cp:
0989     f2fs_put_page(cp1, 1);
0990     f2fs_put_page(cp2, 1);
0991 fail_no_cp:
0992     kvfree(sbi->ckpt);
0993     return err;
0994 }
0995 
0996 static void __add_dirty_inode(struct inode *inode, enum inode_type type)
0997 {
0998     struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
0999     int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
1000 
1001     if (is_inode_flag_set(inode, flag))
1002         return;
1003 
1004     set_inode_flag(inode, flag);
1005     list_add_tail(&F2FS_I(inode)->dirty_list, &sbi->inode_list[type]);
1006     stat_inc_dirty_inode(sbi, type);
1007 }
1008 
1009 static void __remove_dirty_inode(struct inode *inode, enum inode_type type)
1010 {
1011     int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
1012 
1013     if (get_dirty_pages(inode) || !is_inode_flag_set(inode, flag))
1014         return;
1015 
1016     list_del_init(&F2FS_I(inode)->dirty_list);
1017     clear_inode_flag(inode, flag);
1018     stat_dec_dirty_inode(F2FS_I_SB(inode), type);
1019 }
1020 
1021 void f2fs_update_dirty_folio(struct inode *inode, struct folio *folio)
1022 {
1023     struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1024     enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
1025 
1026     if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
1027             !S_ISLNK(inode->i_mode))
1028         return;
1029 
1030     spin_lock(&sbi->inode_lock[type]);
1031     if (type != FILE_INODE || test_opt(sbi, DATA_FLUSH))
1032         __add_dirty_inode(inode, type);
1033     inode_inc_dirty_pages(inode);
1034     spin_unlock(&sbi->inode_lock[type]);
1035 
1036     set_page_private_reference(&folio->page);
1037 }
1038 
1039 void f2fs_remove_dirty_inode(struct inode *inode)
1040 {
1041     struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1042     enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
1043 
1044     if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
1045             !S_ISLNK(inode->i_mode))
1046         return;
1047 
1048     if (type == FILE_INODE && !test_opt(sbi, DATA_FLUSH))
1049         return;
1050 
1051     spin_lock(&sbi->inode_lock[type]);
1052     __remove_dirty_inode(inode, type);
1053     spin_unlock(&sbi->inode_lock[type]);
1054 }
1055 
1056 int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
1057 {
1058     struct list_head *head;
1059     struct inode *inode;
1060     struct f2fs_inode_info *fi;
1061     bool is_dir = (type == DIR_INODE);
1062     unsigned long ino = 0;
1063 
1064     trace_f2fs_sync_dirty_inodes_enter(sbi->sb, is_dir,
1065                 get_pages(sbi, is_dir ?
1066                 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
1067 retry:
1068     if (unlikely(f2fs_cp_error(sbi))) {
1069         trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir,
1070                 get_pages(sbi, is_dir ?
1071                 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
1072         return -EIO;
1073     }
1074 
1075     spin_lock(&sbi->inode_lock[type]);
1076 
1077     head = &sbi->inode_list[type];
1078     if (list_empty(head)) {
1079         spin_unlock(&sbi->inode_lock[type]);
1080         trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir,
1081                 get_pages(sbi, is_dir ?
1082                 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
1083         return 0;
1084     }
1085     fi = list_first_entry(head, struct f2fs_inode_info, dirty_list);
1086     inode = igrab(&fi->vfs_inode);
1087     spin_unlock(&sbi->inode_lock[type]);
1088     if (inode) {
1089         unsigned long cur_ino = inode->i_ino;
1090 
1091         F2FS_I(inode)->cp_task = current;
1092 
1093         filemap_fdatawrite(inode->i_mapping);
1094 
1095         F2FS_I(inode)->cp_task = NULL;
1096 
1097         iput(inode);
1098         /* We need to give cpu to another writers. */
1099         if (ino == cur_ino)
1100             cond_resched();
1101         else
1102             ino = cur_ino;
1103     } else {
1104         /*
1105          * We should submit bio, since it exists several
1106          * wribacking dentry pages in the freeing inode.
1107          */
1108         f2fs_submit_merged_write(sbi, DATA);
1109         cond_resched();
1110     }
1111     goto retry;
1112 }
1113 
1114 int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
1115 {
1116     struct list_head *head = &sbi->inode_list[DIRTY_META];
1117     struct inode *inode;
1118     struct f2fs_inode_info *fi;
1119     s64 total = get_pages(sbi, F2FS_DIRTY_IMETA);
1120 
1121     while (total--) {
1122         if (unlikely(f2fs_cp_error(sbi)))
1123             return -EIO;
1124 
1125         spin_lock(&sbi->inode_lock[DIRTY_META]);
1126         if (list_empty(head)) {
1127             spin_unlock(&sbi->inode_lock[DIRTY_META]);
1128             return 0;
1129         }
1130         fi = list_first_entry(head, struct f2fs_inode_info,
1131                             gdirty_list);
1132         inode = igrab(&fi->vfs_inode);
1133         spin_unlock(&sbi->inode_lock[DIRTY_META]);
1134         if (inode) {
1135             sync_inode_metadata(inode, 0);
1136 
1137             /* it's on eviction */
1138             if (is_inode_flag_set(inode, FI_DIRTY_INODE))
1139                 f2fs_update_inode_page(inode);
1140             iput(inode);
1141         }
1142     }
1143     return 0;
1144 }
1145 
1146 static void __prepare_cp_block(struct f2fs_sb_info *sbi)
1147 {
1148     struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1149     struct f2fs_nm_info *nm_i = NM_I(sbi);
1150     nid_t last_nid = nm_i->next_scan_nid;
1151 
1152     next_free_nid(sbi, &last_nid);
1153     ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
1154     ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
1155     ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
1156     ckpt->next_free_nid = cpu_to_le32(last_nid);
1157 }
1158 
1159 static bool __need_flush_quota(struct f2fs_sb_info *sbi)
1160 {
1161     bool ret = false;
1162 
1163     if (!is_journalled_quota(sbi))
1164         return false;
1165 
1166     if (!f2fs_down_write_trylock(&sbi->quota_sem))
1167         return true;
1168     if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) {
1169         ret = false;
1170     } else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) {
1171         ret = false;
1172     } else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH)) {
1173         clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
1174         ret = true;
1175     } else if (get_pages(sbi, F2FS_DIRTY_QDATA)) {
1176         ret = true;
1177     }
1178     f2fs_up_write(&sbi->quota_sem);
1179     return ret;
1180 }
1181 
1182 /*
1183  * Freeze all the FS-operations for checkpoint.
1184  */
1185 static int block_operations(struct f2fs_sb_info *sbi)
1186 {
1187     struct writeback_control wbc = {
1188         .sync_mode = WB_SYNC_ALL,
1189         .nr_to_write = LONG_MAX,
1190         .for_reclaim = 0,
1191     };
1192     int err = 0, cnt = 0;
1193 
1194     /*
1195      * Let's flush inline_data in dirty node pages.
1196      */
1197     f2fs_flush_inline_data(sbi);
1198 
1199 retry_flush_quotas:
1200     f2fs_lock_all(sbi);
1201     if (__need_flush_quota(sbi)) {
1202         int locked;
1203 
1204         if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) {
1205             set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
1206             set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
1207             goto retry_flush_dents;
1208         }
1209         f2fs_unlock_all(sbi);
1210 
1211         /* only failed during mount/umount/freeze/quotactl */
1212         locked = down_read_trylock(&sbi->sb->s_umount);
1213         f2fs_quota_sync(sbi->sb, -1);
1214         if (locked)
1215             up_read(&sbi->sb->s_umount);
1216         cond_resched();
1217         goto retry_flush_quotas;
1218     }
1219 
1220 retry_flush_dents:
1221     /* write all the dirty dentry pages */
1222     if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
1223         f2fs_unlock_all(sbi);
1224         err = f2fs_sync_dirty_inodes(sbi, DIR_INODE);
1225         if (err)
1226             return err;
1227         cond_resched();
1228         goto retry_flush_quotas;
1229     }
1230 
1231     /*
1232      * POR: we should ensure that there are no dirty node pages
1233      * until finishing nat/sit flush. inode->i_blocks can be updated.
1234      */
1235     f2fs_down_write(&sbi->node_change);
1236 
1237     if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
1238         f2fs_up_write(&sbi->node_change);
1239         f2fs_unlock_all(sbi);
1240         err = f2fs_sync_inode_meta(sbi);
1241         if (err)
1242             return err;
1243         cond_resched();
1244         goto retry_flush_quotas;
1245     }
1246 
1247 retry_flush_nodes:
1248     f2fs_down_write(&sbi->node_write);
1249 
1250     if (get_pages(sbi, F2FS_DIRTY_NODES)) {
1251         f2fs_up_write(&sbi->node_write);
1252         atomic_inc(&sbi->wb_sync_req[NODE]);
1253         err = f2fs_sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO);
1254         atomic_dec(&sbi->wb_sync_req[NODE]);
1255         if (err) {
1256             f2fs_up_write(&sbi->node_change);
1257             f2fs_unlock_all(sbi);
1258             return err;
1259         }
1260         cond_resched();
1261         goto retry_flush_nodes;
1262     }
1263 
1264     /*
1265      * sbi->node_change is used only for AIO write_begin path which produces
1266      * dirty node blocks and some checkpoint values by block allocation.
1267      */
1268     __prepare_cp_block(sbi);
1269     f2fs_up_write(&sbi->node_change);
1270     return err;
1271 }
1272 
1273 static void unblock_operations(struct f2fs_sb_info *sbi)
1274 {
1275     f2fs_up_write(&sbi->node_write);
1276     f2fs_unlock_all(sbi);
1277 }
1278 
1279 void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type)
1280 {
1281     DEFINE_WAIT(wait);
1282 
1283     for (;;) {
1284         if (!get_pages(sbi, type))
1285             break;
1286 
1287         if (unlikely(f2fs_cp_error(sbi)))
1288             break;
1289 
1290         if (type == F2FS_DIRTY_META)
1291             f2fs_sync_meta_pages(sbi, META, LONG_MAX,
1292                             FS_CP_META_IO);
1293         else if (type == F2FS_WB_CP_DATA)
1294             f2fs_submit_merged_write(sbi, DATA);
1295 
1296         prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);
1297         io_schedule_timeout(DEFAULT_IO_TIMEOUT);
1298     }
1299     finish_wait(&sbi->cp_wait, &wait);
1300 }
1301 
1302 static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1303 {
1304     unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
1305     struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1306     unsigned long flags;
1307 
1308     if (cpc->reason & CP_UMOUNT) {
1309         if (le32_to_cpu(ckpt->cp_pack_total_block_count) +
1310             NM_I(sbi)->nat_bits_blocks > sbi->blocks_per_seg) {
1311             clear_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
1312             f2fs_notice(sbi, "Disable nat_bits due to no space");
1313         } else if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG) &&
1314                         f2fs_nat_bitmap_enabled(sbi)) {
1315             f2fs_enable_nat_bits(sbi);
1316             set_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
1317             f2fs_notice(sbi, "Rebuild and enable nat_bits");
1318         }
1319     }
1320 
1321     spin_lock_irqsave(&sbi->cp_lock, flags);
1322 
1323     if (cpc->reason & CP_TRIMMED)
1324         __set_ckpt_flags(ckpt, CP_TRIMMED_FLAG);
1325     else
1326         __clear_ckpt_flags(ckpt, CP_TRIMMED_FLAG);
1327 
1328     if (cpc->reason & CP_UMOUNT)
1329         __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
1330     else
1331         __clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
1332 
1333     if (cpc->reason & CP_FASTBOOT)
1334         __set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
1335     else
1336         __clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
1337 
1338     if (orphan_num)
1339         __set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
1340     else
1341         __clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
1342 
1343     if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
1344         __set_ckpt_flags(ckpt, CP_FSCK_FLAG);
1345 
1346     if (is_sbi_flag_set(sbi, SBI_IS_RESIZEFS))
1347         __set_ckpt_flags(ckpt, CP_RESIZEFS_FLAG);
1348     else
1349         __clear_ckpt_flags(ckpt, CP_RESIZEFS_FLAG);
1350 
1351     if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
1352         __set_ckpt_flags(ckpt, CP_DISABLED_FLAG);
1353     else
1354         __clear_ckpt_flags(ckpt, CP_DISABLED_FLAG);
1355 
1356     if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK))
1357         __set_ckpt_flags(ckpt, CP_DISABLED_QUICK_FLAG);
1358     else
1359         __clear_ckpt_flags(ckpt, CP_DISABLED_QUICK_FLAG);
1360 
1361     if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
1362         __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
1363     else
1364         __clear_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
1365 
1366     if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
1367         __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
1368 
1369     /* set this flag to activate crc|cp_ver for recovery */
1370     __set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
1371     __clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG);
1372 
1373     spin_unlock_irqrestore(&sbi->cp_lock, flags);
1374 }
1375 
1376 static void commit_checkpoint(struct f2fs_sb_info *sbi,
1377     void *src, block_t blk_addr)
1378 {
1379     struct writeback_control wbc = {
1380         .for_reclaim = 0,
1381     };
1382 
1383     /*
1384      * pagevec_lookup_tag and lock_page again will take
1385      * some extra time. Therefore, f2fs_update_meta_pages and
1386      * f2fs_sync_meta_pages are combined in this function.
1387      */
1388     struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
1389     int err;
1390 
1391     f2fs_wait_on_page_writeback(page, META, true, true);
1392 
1393     memcpy(page_address(page), src, PAGE_SIZE);
1394 
1395     set_page_dirty(page);
1396     if (unlikely(!clear_page_dirty_for_io(page)))
1397         f2fs_bug_on(sbi, 1);
1398 
1399     /* writeout cp pack 2 page */
1400     err = __f2fs_write_meta_page(page, &wbc, FS_CP_META_IO);
1401     if (unlikely(err && f2fs_cp_error(sbi))) {
1402         f2fs_put_page(page, 1);
1403         return;
1404     }
1405 
1406     f2fs_bug_on(sbi, err);
1407     f2fs_put_page(page, 0);
1408 
1409     /* submit checkpoint (with barrier if NOBARRIER is not set) */
1410     f2fs_submit_merged_write(sbi, META_FLUSH);
1411 }
1412 
1413 static inline u64 get_sectors_written(struct block_device *bdev)
1414 {
1415     return (u64)part_stat_read(bdev, sectors[STAT_WRITE]);
1416 }
1417 
1418 u64 f2fs_get_sectors_written(struct f2fs_sb_info *sbi)
1419 {
1420     if (f2fs_is_multi_device(sbi)) {
1421         u64 sectors = 0;
1422         int i;
1423 
1424         for (i = 0; i < sbi->s_ndevs; i++)
1425             sectors += get_sectors_written(FDEV(i).bdev);
1426 
1427         return sectors;
1428     }
1429 
1430     return get_sectors_written(sbi->sb->s_bdev);
1431 }
1432 
1433 static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1434 {
1435     struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1436     struct f2fs_nm_info *nm_i = NM_I(sbi);
1437     unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num, flags;
1438     block_t start_blk;
1439     unsigned int data_sum_blocks, orphan_blocks;
1440     __u32 crc32 = 0;
1441     int i;
1442     int cp_payload_blks = __cp_payload(sbi);
1443     struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
1444     u64 kbytes_written;
1445     int err;
1446 
1447     /* Flush all the NAT/SIT pages */
1448     f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
1449 
1450     /* start to update checkpoint, cp ver is already updated previously */
1451     ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi, true));
1452     ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
1453     for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
1454         ckpt->cur_node_segno[i] =
1455             cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE));
1456         ckpt->cur_node_blkoff[i] =
1457             cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE));
1458         ckpt->alloc_type[i + CURSEG_HOT_NODE] =
1459                 curseg_alloc_type(sbi, i + CURSEG_HOT_NODE);
1460     }
1461     for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
1462         ckpt->cur_data_segno[i] =
1463             cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA));
1464         ckpt->cur_data_blkoff[i] =
1465             cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA));
1466         ckpt->alloc_type[i + CURSEG_HOT_DATA] =
1467                 curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
1468     }
1469 
1470     /* 2 cp + n data seg summary + orphan inode blocks */
1471     data_sum_blocks = f2fs_npages_for_summary_flush(sbi, false);
1472     spin_lock_irqsave(&sbi->cp_lock, flags);
1473     if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
1474         __set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
1475     else
1476         __clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
1477     spin_unlock_irqrestore(&sbi->cp_lock, flags);
1478 
1479     orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num);
1480     ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
1481             orphan_blocks);
1482 
1483     if (__remain_node_summaries(cpc->reason))
1484         ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS +
1485                 cp_payload_blks + data_sum_blocks +
1486                 orphan_blocks + NR_CURSEG_NODE_TYPE);
1487     else
1488         ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS +
1489                 cp_payload_blks + data_sum_blocks +
1490                 orphan_blocks);
1491 
1492     /* update ckpt flag for checkpoint */
1493     update_ckpt_flags(sbi, cpc);
1494 
1495     /* update SIT/NAT bitmap */
1496     get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
1497     get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
1498 
1499     crc32 = f2fs_checkpoint_chksum(sbi, ckpt);
1500     *((__le32 *)((unsigned char *)ckpt +
1501                 le32_to_cpu(ckpt->checksum_offset)))
1502                 = cpu_to_le32(crc32);
1503 
1504     start_blk = __start_cp_next_addr(sbi);
1505 
1506     /* write nat bits */
1507     if ((cpc->reason & CP_UMOUNT) &&
1508             is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
1509         __u64 cp_ver = cur_cp_version(ckpt);
1510         block_t blk;
1511 
1512         cp_ver |= ((__u64)crc32 << 32);
1513         *(__le64 *)nm_i->nat_bits = cpu_to_le64(cp_ver);
1514 
1515         blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks;
1516         for (i = 0; i < nm_i->nat_bits_blocks; i++)
1517             f2fs_update_meta_page(sbi, nm_i->nat_bits +
1518                     (i << F2FS_BLKSIZE_BITS), blk + i);
1519     }
1520 
1521     /* write out checkpoint buffer at block 0 */
1522     f2fs_update_meta_page(sbi, ckpt, start_blk++);
1523 
1524     for (i = 1; i < 1 + cp_payload_blks; i++)
1525         f2fs_update_meta_page(sbi, (char *)ckpt + i * F2FS_BLKSIZE,
1526                             start_blk++);
1527 
1528     if (orphan_num) {
1529         write_orphan_inodes(sbi, start_blk);
1530         start_blk += orphan_blocks;
1531     }
1532 
1533     f2fs_write_data_summaries(sbi, start_blk);
1534     start_blk += data_sum_blocks;
1535 
1536     /* Record write statistics in the hot node summary */
1537     kbytes_written = sbi->kbytes_written;
1538     kbytes_written += (f2fs_get_sectors_written(sbi) -
1539                 sbi->sectors_written_start) >> 1;
1540     seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written);
1541 
1542     if (__remain_node_summaries(cpc->reason)) {
1543         f2fs_write_node_summaries(sbi, start_blk);
1544         start_blk += NR_CURSEG_NODE_TYPE;
1545     }
1546 
1547     /* update user_block_counts */
1548     sbi->last_valid_block_count = sbi->total_valid_block_count;
1549     percpu_counter_set(&sbi->alloc_valid_block_count, 0);
1550     percpu_counter_set(&sbi->rf_node_block_count, 0);
1551 
1552     /* Here, we have one bio having CP pack except cp pack 2 page */
1553     f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
1554     /* Wait for all dirty meta pages to be submitted for IO */
1555     f2fs_wait_on_all_pages(sbi, F2FS_DIRTY_META);
1556 
1557     /* wait for previous submitted meta pages writeback */
1558     f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA);
1559 
1560     /* flush all device cache */
1561     err = f2fs_flush_device_cache(sbi);
1562     if (err)
1563         return err;
1564 
1565     /* barrier and flush checkpoint cp pack 2 page if it can */
1566     commit_checkpoint(sbi, ckpt, start_blk);
1567     f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA);
1568 
1569     /*
1570      * invalidate intermediate page cache borrowed from meta inode which are
1571      * used for migration of encrypted, verity or compressed inode's blocks.
1572      */
1573     if (f2fs_sb_has_encrypt(sbi) || f2fs_sb_has_verity(sbi) ||
1574         f2fs_sb_has_compression(sbi))
1575         invalidate_mapping_pages(META_MAPPING(sbi),
1576                 MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1);
1577 
1578     f2fs_release_ino_entry(sbi, false);
1579 
1580     f2fs_reset_fsync_node_info(sbi);
1581 
1582     clear_sbi_flag(sbi, SBI_IS_DIRTY);
1583     clear_sbi_flag(sbi, SBI_NEED_CP);
1584     clear_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
1585 
1586     spin_lock(&sbi->stat_lock);
1587     sbi->unusable_block_count = 0;
1588     spin_unlock(&sbi->stat_lock);
1589 
1590     __set_cp_next_pack(sbi);
1591 
1592     /*
1593      * redirty superblock if metadata like node page or inode cache is
1594      * updated during writing checkpoint.
1595      */
1596     if (get_pages(sbi, F2FS_DIRTY_NODES) ||
1597             get_pages(sbi, F2FS_DIRTY_IMETA))
1598         set_sbi_flag(sbi, SBI_IS_DIRTY);
1599 
1600     f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_DENTS));
1601 
1602     return unlikely(f2fs_cp_error(sbi)) ? -EIO : 0;
1603 }
1604 
1605 int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1606 {
1607     struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1608     unsigned long long ckpt_ver;
1609     int err = 0;
1610 
1611     if (f2fs_readonly(sbi->sb) || f2fs_hw_is_readonly(sbi))
1612         return -EROFS;
1613 
1614     if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
1615         if (cpc->reason != CP_PAUSE)
1616             return 0;
1617         f2fs_warn(sbi, "Start checkpoint disabled!");
1618     }
1619     if (cpc->reason != CP_RESIZE)
1620         f2fs_down_write(&sbi->cp_global_sem);
1621 
1622     if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
1623         ((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) ||
1624         ((cpc->reason & CP_DISCARD) && !sbi->discard_blks)))
1625         goto out;
1626     if (unlikely(f2fs_cp_error(sbi))) {
1627         err = -EIO;
1628         goto out;
1629     }
1630 
1631     trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");
1632 
1633     err = block_operations(sbi);
1634     if (err)
1635         goto out;
1636 
1637     trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
1638 
1639     f2fs_flush_merged_writes(sbi);
1640 
1641     /* this is the case of multiple fstrims without any changes */
1642     if (cpc->reason & CP_DISCARD) {
1643         if (!f2fs_exist_trim_candidates(sbi, cpc)) {
1644             unblock_operations(sbi);
1645             goto out;
1646         }
1647 
1648         if (NM_I(sbi)->nat_cnt[DIRTY_NAT] == 0 &&
1649                 SIT_I(sbi)->dirty_sentries == 0 &&
1650                 prefree_segments(sbi) == 0) {
1651             f2fs_flush_sit_entries(sbi, cpc);
1652             f2fs_clear_prefree_segments(sbi, cpc);
1653             unblock_operations(sbi);
1654             goto out;
1655         }
1656     }
1657 
1658     /*
1659      * update checkpoint pack index
1660      * Increase the version number so that
1661      * SIT entries and seg summaries are written at correct place
1662      */
1663     ckpt_ver = cur_cp_version(ckpt);
1664     ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
1665 
1666     /* write cached NAT/SIT entries to NAT/SIT area */
1667     err = f2fs_flush_nat_entries(sbi, cpc);
1668     if (err) {
1669         f2fs_err(sbi, "f2fs_flush_nat_entries failed err:%d, stop checkpoint", err);
1670         f2fs_bug_on(sbi, !f2fs_cp_error(sbi));
1671         goto stop;
1672     }
1673 
1674     f2fs_flush_sit_entries(sbi, cpc);
1675 
1676     /* save inmem log status */
1677     f2fs_save_inmem_curseg(sbi);
1678 
1679     err = do_checkpoint(sbi, cpc);
1680     if (err) {
1681         f2fs_err(sbi, "do_checkpoint failed err:%d, stop checkpoint", err);
1682         f2fs_bug_on(sbi, !f2fs_cp_error(sbi));
1683         f2fs_release_discard_addrs(sbi);
1684     } else {
1685         f2fs_clear_prefree_segments(sbi, cpc);
1686     }
1687 
1688     f2fs_restore_inmem_curseg(sbi);
1689 stop:
1690     unblock_operations(sbi);
1691     stat_inc_cp_count(sbi->stat_info);
1692 
1693     if (cpc->reason & CP_RECOVERY)
1694         f2fs_notice(sbi, "checkpoint: version = %llx", ckpt_ver);
1695 
1696     /* update CP_TIME to trigger checkpoint periodically */
1697     f2fs_update_time(sbi, CP_TIME);
1698     trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
1699 out:
1700     if (cpc->reason != CP_RESIZE)
1701         f2fs_up_write(&sbi->cp_global_sem);
1702     return err;
1703 }
1704 
1705 void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi)
1706 {
1707     int i;
1708 
1709     for (i = 0; i < MAX_INO_ENTRY; i++) {
1710         struct inode_management *im = &sbi->im[i];
1711 
1712         INIT_RADIX_TREE(&im->ino_root, GFP_ATOMIC);
1713         spin_lock_init(&im->ino_lock);
1714         INIT_LIST_HEAD(&im->ino_list);
1715         im->ino_num = 0;
1716     }
1717 
1718     sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
1719             NR_CURSEG_PERSIST_TYPE - __cp_payload(sbi)) *
1720                 F2FS_ORPHANS_PER_BLOCK;
1721 }
1722 
1723 int __init f2fs_create_checkpoint_caches(void)
1724 {
1725     ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry",
1726             sizeof(struct ino_entry));
1727     if (!ino_entry_slab)
1728         return -ENOMEM;
1729     f2fs_inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry",
1730             sizeof(struct inode_entry));
1731     if (!f2fs_inode_entry_slab) {
1732         kmem_cache_destroy(ino_entry_slab);
1733         return -ENOMEM;
1734     }
1735     return 0;
1736 }
1737 
1738 void f2fs_destroy_checkpoint_caches(void)
1739 {
1740     kmem_cache_destroy(ino_entry_slab);
1741     kmem_cache_destroy(f2fs_inode_entry_slab);
1742 }
1743 
1744 static int __write_checkpoint_sync(struct f2fs_sb_info *sbi)
1745 {
1746     struct cp_control cpc = { .reason = CP_SYNC, };
1747     int err;
1748 
1749     f2fs_down_write(&sbi->gc_lock);
1750     err = f2fs_write_checkpoint(sbi, &cpc);
1751     f2fs_up_write(&sbi->gc_lock);
1752 
1753     return err;
1754 }
1755 
1756 static void __checkpoint_and_complete_reqs(struct f2fs_sb_info *sbi)
1757 {
1758     struct ckpt_req_control *cprc = &sbi->cprc_info;
1759     struct ckpt_req *req, *next;
1760     struct llist_node *dispatch_list;
1761     u64 sum_diff = 0, diff, count = 0;
1762     int ret;
1763 
1764     dispatch_list = llist_del_all(&cprc->issue_list);
1765     if (!dispatch_list)
1766         return;
1767     dispatch_list = llist_reverse_order(dispatch_list);
1768 
1769     ret = __write_checkpoint_sync(sbi);
1770     atomic_inc(&cprc->issued_ckpt);
1771 
1772     llist_for_each_entry_safe(req, next, dispatch_list, llnode) {
1773         diff = (u64)ktime_ms_delta(ktime_get(), req->queue_time);
1774         req->ret = ret;
1775         complete(&req->wait);
1776 
1777         sum_diff += diff;
1778         count++;
1779     }
1780     atomic_sub(count, &cprc->queued_ckpt);
1781     atomic_add(count, &cprc->total_ckpt);
1782 
1783     spin_lock(&cprc->stat_lock);
1784     cprc->cur_time = (unsigned int)div64_u64(sum_diff, count);
1785     if (cprc->peak_time < cprc->cur_time)
1786         cprc->peak_time = cprc->cur_time;
1787     spin_unlock(&cprc->stat_lock);
1788 }
1789 
1790 static int issue_checkpoint_thread(void *data)
1791 {
1792     struct f2fs_sb_info *sbi = data;
1793     struct ckpt_req_control *cprc = &sbi->cprc_info;
1794     wait_queue_head_t *q = &cprc->ckpt_wait_queue;
1795 repeat:
1796     if (kthread_should_stop())
1797         return 0;
1798 
1799     if (!llist_empty(&cprc->issue_list))
1800         __checkpoint_and_complete_reqs(sbi);
1801 
1802     wait_event_interruptible(*q,
1803         kthread_should_stop() || !llist_empty(&cprc->issue_list));
1804     goto repeat;
1805 }
1806 
1807 static void flush_remained_ckpt_reqs(struct f2fs_sb_info *sbi,
1808         struct ckpt_req *wait_req)
1809 {
1810     struct ckpt_req_control *cprc = &sbi->cprc_info;
1811 
1812     if (!llist_empty(&cprc->issue_list)) {
1813         __checkpoint_and_complete_reqs(sbi);
1814     } else {
1815         /* already dispatched by issue_checkpoint_thread */
1816         if (wait_req)
1817             wait_for_completion(&wait_req->wait);
1818     }
1819 }
1820 
1821 static void init_ckpt_req(struct ckpt_req *req)
1822 {
1823     memset(req, 0, sizeof(struct ckpt_req));
1824 
1825     init_completion(&req->wait);
1826     req->queue_time = ktime_get();
1827 }
1828 
1829 int f2fs_issue_checkpoint(struct f2fs_sb_info *sbi)
1830 {
1831     struct ckpt_req_control *cprc = &sbi->cprc_info;
1832     struct ckpt_req req;
1833     struct cp_control cpc;
1834 
1835     cpc.reason = __get_cp_reason(sbi);
1836     if (!test_opt(sbi, MERGE_CHECKPOINT) || cpc.reason != CP_SYNC) {
1837         int ret;
1838 
1839         f2fs_down_write(&sbi->gc_lock);
1840         ret = f2fs_write_checkpoint(sbi, &cpc);
1841         f2fs_up_write(&sbi->gc_lock);
1842 
1843         return ret;
1844     }
1845 
1846     if (!cprc->f2fs_issue_ckpt)
1847         return __write_checkpoint_sync(sbi);
1848 
1849     init_ckpt_req(&req);
1850 
1851     llist_add(&req.llnode, &cprc->issue_list);
1852     atomic_inc(&cprc->queued_ckpt);
1853 
1854     /*
1855      * update issue_list before we wake up issue_checkpoint thread,
1856      * this smp_mb() pairs with another barrier in ___wait_event(),
1857      * see more details in comments of waitqueue_active().
1858      */
1859     smp_mb();
1860 
1861     if (waitqueue_active(&cprc->ckpt_wait_queue))
1862         wake_up(&cprc->ckpt_wait_queue);
1863 
1864     if (cprc->f2fs_issue_ckpt)
1865         wait_for_completion(&req.wait);
1866     else
1867         flush_remained_ckpt_reqs(sbi, &req);
1868 
1869     return req.ret;
1870 }
1871 
1872 int f2fs_start_ckpt_thread(struct f2fs_sb_info *sbi)
1873 {
1874     dev_t dev = sbi->sb->s_bdev->bd_dev;
1875     struct ckpt_req_control *cprc = &sbi->cprc_info;
1876 
1877     if (cprc->f2fs_issue_ckpt)
1878         return 0;
1879 
1880     cprc->f2fs_issue_ckpt = kthread_run(issue_checkpoint_thread, sbi,
1881             "f2fs_ckpt-%u:%u", MAJOR(dev), MINOR(dev));
1882     if (IS_ERR(cprc->f2fs_issue_ckpt)) {
1883         cprc->f2fs_issue_ckpt = NULL;
1884         return -ENOMEM;
1885     }
1886 
1887     set_task_ioprio(cprc->f2fs_issue_ckpt, cprc->ckpt_thread_ioprio);
1888 
1889     return 0;
1890 }
1891 
1892 void f2fs_stop_ckpt_thread(struct f2fs_sb_info *sbi)
1893 {
1894     struct ckpt_req_control *cprc = &sbi->cprc_info;
1895 
1896     if (cprc->f2fs_issue_ckpt) {
1897         struct task_struct *ckpt_task = cprc->f2fs_issue_ckpt;
1898 
1899         cprc->f2fs_issue_ckpt = NULL;
1900         kthread_stop(ckpt_task);
1901 
1902         flush_remained_ckpt_reqs(sbi, NULL);
1903     }
1904 }
1905 
1906 void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi)
1907 {
1908     struct ckpt_req_control *cprc = &sbi->cprc_info;
1909 
1910     atomic_set(&cprc->issued_ckpt, 0);
1911     atomic_set(&cprc->total_ckpt, 0);
1912     atomic_set(&cprc->queued_ckpt, 0);
1913     cprc->ckpt_thread_ioprio = DEFAULT_CHECKPOINT_IOPRIO;
1914     init_waitqueue_head(&cprc->ckpt_wait_queue);
1915     init_llist_head(&cprc->issue_list);
1916     spin_lock_init(&cprc->stat_lock);
1917 }