Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0+
0002 /*
0003  * NILFS segment constructor.
0004  *
0005  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
0006  *
0007  * Written by Ryusuke Konishi.
0008  *
0009  */
0010 
0011 #include <linux/pagemap.h>
0012 #include <linux/buffer_head.h>
0013 #include <linux/writeback.h>
0014 #include <linux/bitops.h>
0015 #include <linux/bio.h>
0016 #include <linux/completion.h>
0017 #include <linux/blkdev.h>
0018 #include <linux/backing-dev.h>
0019 #include <linux/freezer.h>
0020 #include <linux/kthread.h>
0021 #include <linux/crc32.h>
0022 #include <linux/pagevec.h>
0023 #include <linux/slab.h>
0024 #include <linux/sched/signal.h>
0025 
0026 #include "nilfs.h"
0027 #include "btnode.h"
0028 #include "page.h"
0029 #include "segment.h"
0030 #include "sufile.h"
0031 #include "cpfile.h"
0032 #include "ifile.h"
0033 #include "segbuf.h"
0034 
0035 
0036 /*
0037  * Segment constructor
0038  */
0039 #define SC_N_INODEVEC   16   /* Size of locally allocated inode vector */
0040 
0041 #define SC_MAX_SEGDELTA 64   /*
0042                   * Upper limit of the number of segments
0043                   * appended in collection retry loop
0044                   */
0045 
0046 /* Construction mode */
0047 enum {
0048     SC_LSEG_SR = 1, /* Make a logical segment having a super root */
0049     SC_LSEG_DSYNC,  /*
0050              * Flush data blocks of a given file and make
0051              * a logical segment without a super root.
0052              */
0053     SC_FLUSH_FILE,  /*
0054              * Flush data files, leads to segment writes without
0055              * creating a checkpoint.
0056              */
0057     SC_FLUSH_DAT,   /*
0058              * Flush DAT file.  This also creates segments
0059              * without a checkpoint.
0060              */
0061 };
0062 
0063 /* Stage numbers of dirty block collection */
0064 enum {
0065     NILFS_ST_INIT = 0,
0066     NILFS_ST_GC,        /* Collecting dirty blocks for GC */
0067     NILFS_ST_FILE,
0068     NILFS_ST_IFILE,
0069     NILFS_ST_CPFILE,
0070     NILFS_ST_SUFILE,
0071     NILFS_ST_DAT,
0072     NILFS_ST_SR,        /* Super root */
0073     NILFS_ST_DSYNC,     /* Data sync blocks */
0074     NILFS_ST_DONE,
0075 };
0076 
0077 #define CREATE_TRACE_POINTS
0078 #include <trace/events/nilfs2.h>
0079 
0080 /*
0081  * nilfs_sc_cstage_inc(), nilfs_sc_cstage_set(), nilfs_sc_cstage_get() are
0082  * wrapper functions of stage count (nilfs_sc_info->sc_stage.scnt). Users of
0083  * the variable must use them because transition of stage count must involve
0084  * trace events (trace_nilfs2_collection_stage_transition).
0085  *
0086  * nilfs_sc_cstage_get() isn't required for the above purpose because it doesn't
0087  * produce tracepoint events. It is provided just for making the intention
0088  * clear.
0089  */
0090 static inline void nilfs_sc_cstage_inc(struct nilfs_sc_info *sci)
0091 {
0092     sci->sc_stage.scnt++;
0093     trace_nilfs2_collection_stage_transition(sci);
0094 }
0095 
0096 static inline void nilfs_sc_cstage_set(struct nilfs_sc_info *sci, int next_scnt)
0097 {
0098     sci->sc_stage.scnt = next_scnt;
0099     trace_nilfs2_collection_stage_transition(sci);
0100 }
0101 
0102 static inline int nilfs_sc_cstage_get(struct nilfs_sc_info *sci)
0103 {
0104     return sci->sc_stage.scnt;
0105 }
0106 
0107 /* State flags of collection */
0108 #define NILFS_CF_NODE       0x0001  /* Collecting node blocks */
0109 #define NILFS_CF_IFILE_STARTED  0x0002  /* IFILE stage has started */
0110 #define NILFS_CF_SUFREED    0x0004  /* segment usages has been freed */
0111 #define NILFS_CF_HISTORY_MASK   (NILFS_CF_IFILE_STARTED | NILFS_CF_SUFREED)
0112 
0113 /* Operations depending on the construction mode and file type */
0114 struct nilfs_sc_operations {
0115     int (*collect_data)(struct nilfs_sc_info *, struct buffer_head *,
0116                 struct inode *);
0117     int (*collect_node)(struct nilfs_sc_info *, struct buffer_head *,
0118                 struct inode *);
0119     int (*collect_bmap)(struct nilfs_sc_info *, struct buffer_head *,
0120                 struct inode *);
0121     void (*write_data_binfo)(struct nilfs_sc_info *,
0122                  struct nilfs_segsum_pointer *,
0123                  union nilfs_binfo *);
0124     void (*write_node_binfo)(struct nilfs_sc_info *,
0125                  struct nilfs_segsum_pointer *,
0126                  union nilfs_binfo *);
0127 };
0128 
0129 /*
0130  * Other definitions
0131  */
0132 static void nilfs_segctor_start_timer(struct nilfs_sc_info *);
0133 static void nilfs_segctor_do_flush(struct nilfs_sc_info *, int);
0134 static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *);
0135 static void nilfs_dispose_list(struct the_nilfs *, struct list_head *, int);
0136 
0137 #define nilfs_cnt32_ge(a, b)   \
0138     (typecheck(__u32, a) && typecheck(__u32, b) && \
0139      ((__s32)(a) - (__s32)(b) >= 0))
0140 
0141 static int nilfs_prepare_segment_lock(struct super_block *sb,
0142                       struct nilfs_transaction_info *ti)
0143 {
0144     struct nilfs_transaction_info *cur_ti = current->journal_info;
0145     void *save = NULL;
0146 
0147     if (cur_ti) {
0148         if (cur_ti->ti_magic == NILFS_TI_MAGIC)
0149             return ++cur_ti->ti_count;
0150 
0151         /*
0152          * If journal_info field is occupied by other FS,
0153          * it is saved and will be restored on
0154          * nilfs_transaction_commit().
0155          */
0156         nilfs_warn(sb, "journal info from a different FS");
0157         save = current->journal_info;
0158     }
0159     if (!ti) {
0160         ti = kmem_cache_alloc(nilfs_transaction_cachep, GFP_NOFS);
0161         if (!ti)
0162             return -ENOMEM;
0163         ti->ti_flags = NILFS_TI_DYNAMIC_ALLOC;
0164     } else {
0165         ti->ti_flags = 0;
0166     }
0167     ti->ti_count = 0;
0168     ti->ti_save = save;
0169     ti->ti_magic = NILFS_TI_MAGIC;
0170     current->journal_info = ti;
0171     return 0;
0172 }
0173 
0174 /**
0175  * nilfs_transaction_begin - start indivisible file operations.
0176  * @sb: super block
0177  * @ti: nilfs_transaction_info
0178  * @vacancy_check: flags for vacancy rate checks
0179  *
0180  * nilfs_transaction_begin() acquires a reader/writer semaphore, called
0181  * the segment semaphore, to make a segment construction and write tasks
0182  * exclusive.  The function is used with nilfs_transaction_commit() in pairs.
0183  * The region enclosed by these two functions can be nested.  To avoid a
0184  * deadlock, the semaphore is only acquired or released in the outermost call.
0185  *
0186  * This function allocates a nilfs_transaction_info struct to keep context
0187  * information on it.  It is initialized and hooked onto the current task in
0188  * the outermost call.  If a pre-allocated struct is given to @ti, it is used
0189  * instead; otherwise a new struct is assigned from a slab.
0190  *
0191  * When @vacancy_check flag is set, this function will check the amount of
0192  * free space, and will wait for the GC to reclaim disk space if low capacity.
0193  *
0194  * Return Value: On success, 0 is returned. On error, one of the following
0195  * negative error code is returned.
0196  *
0197  * %-ENOMEM - Insufficient memory available.
0198  *
0199  * %-ENOSPC - No space left on device
0200  */
0201 int nilfs_transaction_begin(struct super_block *sb,
0202                 struct nilfs_transaction_info *ti,
0203                 int vacancy_check)
0204 {
0205     struct the_nilfs *nilfs;
0206     int ret = nilfs_prepare_segment_lock(sb, ti);
0207     struct nilfs_transaction_info *trace_ti;
0208 
0209     if (unlikely(ret < 0))
0210         return ret;
0211     if (ret > 0) {
0212         trace_ti = current->journal_info;
0213 
0214         trace_nilfs2_transaction_transition(sb, trace_ti,
0215                     trace_ti->ti_count, trace_ti->ti_flags,
0216                     TRACE_NILFS2_TRANSACTION_BEGIN);
0217         return 0;
0218     }
0219 
0220     sb_start_intwrite(sb);
0221 
0222     nilfs = sb->s_fs_info;
0223     down_read(&nilfs->ns_segctor_sem);
0224     if (vacancy_check && nilfs_near_disk_full(nilfs)) {
0225         up_read(&nilfs->ns_segctor_sem);
0226         ret = -ENOSPC;
0227         goto failed;
0228     }
0229 
0230     trace_ti = current->journal_info;
0231     trace_nilfs2_transaction_transition(sb, trace_ti, trace_ti->ti_count,
0232                         trace_ti->ti_flags,
0233                         TRACE_NILFS2_TRANSACTION_BEGIN);
0234     return 0;
0235 
0236  failed:
0237     ti = current->journal_info;
0238     current->journal_info = ti->ti_save;
0239     if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
0240         kmem_cache_free(nilfs_transaction_cachep, ti);
0241     sb_end_intwrite(sb);
0242     return ret;
0243 }
0244 
0245 /**
0246  * nilfs_transaction_commit - commit indivisible file operations.
0247  * @sb: super block
0248  *
0249  * nilfs_transaction_commit() releases the read semaphore which is
0250  * acquired by nilfs_transaction_begin(). This is only performed
0251  * in outermost call of this function.  If a commit flag is set,
0252  * nilfs_transaction_commit() sets a timer to start the segment
0253  * constructor.  If a sync flag is set, it starts construction
0254  * directly.
0255  */
0256 int nilfs_transaction_commit(struct super_block *sb)
0257 {
0258     struct nilfs_transaction_info *ti = current->journal_info;
0259     struct the_nilfs *nilfs = sb->s_fs_info;
0260     int err = 0;
0261 
0262     BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
0263     ti->ti_flags |= NILFS_TI_COMMIT;
0264     if (ti->ti_count > 0) {
0265         ti->ti_count--;
0266         trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
0267                 ti->ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT);
0268         return 0;
0269     }
0270     if (nilfs->ns_writer) {
0271         struct nilfs_sc_info *sci = nilfs->ns_writer;
0272 
0273         if (ti->ti_flags & NILFS_TI_COMMIT)
0274             nilfs_segctor_start_timer(sci);
0275         if (atomic_read(&nilfs->ns_ndirtyblks) > sci->sc_watermark)
0276             nilfs_segctor_do_flush(sci, 0);
0277     }
0278     up_read(&nilfs->ns_segctor_sem);
0279     trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
0280                 ti->ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT);
0281 
0282     current->journal_info = ti->ti_save;
0283 
0284     if (ti->ti_flags & NILFS_TI_SYNC)
0285         err = nilfs_construct_segment(sb);
0286     if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
0287         kmem_cache_free(nilfs_transaction_cachep, ti);
0288     sb_end_intwrite(sb);
0289     return err;
0290 }
0291 
0292 void nilfs_transaction_abort(struct super_block *sb)
0293 {
0294     struct nilfs_transaction_info *ti = current->journal_info;
0295     struct the_nilfs *nilfs = sb->s_fs_info;
0296 
0297     BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
0298     if (ti->ti_count > 0) {
0299         ti->ti_count--;
0300         trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
0301                 ti->ti_flags, TRACE_NILFS2_TRANSACTION_ABORT);
0302         return;
0303     }
0304     up_read(&nilfs->ns_segctor_sem);
0305 
0306     trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
0307             ti->ti_flags, TRACE_NILFS2_TRANSACTION_ABORT);
0308 
0309     current->journal_info = ti->ti_save;
0310     if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
0311         kmem_cache_free(nilfs_transaction_cachep, ti);
0312     sb_end_intwrite(sb);
0313 }
0314 
0315 void nilfs_relax_pressure_in_lock(struct super_block *sb)
0316 {
0317     struct the_nilfs *nilfs = sb->s_fs_info;
0318     struct nilfs_sc_info *sci = nilfs->ns_writer;
0319 
0320     if (!sci || !sci->sc_flush_request)
0321         return;
0322 
0323     set_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags);
0324     up_read(&nilfs->ns_segctor_sem);
0325 
0326     down_write(&nilfs->ns_segctor_sem);
0327     if (sci->sc_flush_request &&
0328         test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags)) {
0329         struct nilfs_transaction_info *ti = current->journal_info;
0330 
0331         ti->ti_flags |= NILFS_TI_WRITER;
0332         nilfs_segctor_do_immediate_flush(sci);
0333         ti->ti_flags &= ~NILFS_TI_WRITER;
0334     }
0335     downgrade_write(&nilfs->ns_segctor_sem);
0336 }
0337 
0338 static void nilfs_transaction_lock(struct super_block *sb,
0339                    struct nilfs_transaction_info *ti,
0340                    int gcflag)
0341 {
0342     struct nilfs_transaction_info *cur_ti = current->journal_info;
0343     struct the_nilfs *nilfs = sb->s_fs_info;
0344     struct nilfs_sc_info *sci = nilfs->ns_writer;
0345 
0346     WARN_ON(cur_ti);
0347     ti->ti_flags = NILFS_TI_WRITER;
0348     ti->ti_count = 0;
0349     ti->ti_save = cur_ti;
0350     ti->ti_magic = NILFS_TI_MAGIC;
0351     current->journal_info = ti;
0352 
0353     for (;;) {
0354         trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
0355                 ti->ti_flags, TRACE_NILFS2_TRANSACTION_TRYLOCK);
0356 
0357         down_write(&nilfs->ns_segctor_sem);
0358         if (!test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags))
0359             break;
0360 
0361         nilfs_segctor_do_immediate_flush(sci);
0362 
0363         up_write(&nilfs->ns_segctor_sem);
0364         cond_resched();
0365     }
0366     if (gcflag)
0367         ti->ti_flags |= NILFS_TI_GC;
0368 
0369     trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
0370                 ti->ti_flags, TRACE_NILFS2_TRANSACTION_LOCK);
0371 }
0372 
0373 static void nilfs_transaction_unlock(struct super_block *sb)
0374 {
0375     struct nilfs_transaction_info *ti = current->journal_info;
0376     struct the_nilfs *nilfs = sb->s_fs_info;
0377 
0378     BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
0379     BUG_ON(ti->ti_count > 0);
0380 
0381     up_write(&nilfs->ns_segctor_sem);
0382     current->journal_info = ti->ti_save;
0383 
0384     trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
0385                 ti->ti_flags, TRACE_NILFS2_TRANSACTION_UNLOCK);
0386 }
0387 
0388 static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci,
0389                         struct nilfs_segsum_pointer *ssp,
0390                         unsigned int bytes)
0391 {
0392     struct nilfs_segment_buffer *segbuf = sci->sc_curseg;
0393     unsigned int blocksize = sci->sc_super->s_blocksize;
0394     void *p;
0395 
0396     if (unlikely(ssp->offset + bytes > blocksize)) {
0397         ssp->offset = 0;
0398         BUG_ON(NILFS_SEGBUF_BH_IS_LAST(ssp->bh,
0399                            &segbuf->sb_segsum_buffers));
0400         ssp->bh = NILFS_SEGBUF_NEXT_BH(ssp->bh);
0401     }
0402     p = ssp->bh->b_data + ssp->offset;
0403     ssp->offset += bytes;
0404     return p;
0405 }
0406 
0407 /**
0408  * nilfs_segctor_reset_segment_buffer - reset the current segment buffer
0409  * @sci: nilfs_sc_info
0410  */
0411 static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci)
0412 {
0413     struct nilfs_segment_buffer *segbuf = sci->sc_curseg;
0414     struct buffer_head *sumbh;
0415     unsigned int sumbytes;
0416     unsigned int flags = 0;
0417     int err;
0418 
0419     if (nilfs_doing_gc())
0420         flags = NILFS_SS_GC;
0421     err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime, sci->sc_cno);
0422     if (unlikely(err))
0423         return err;
0424 
0425     sumbh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers);
0426     sumbytes = segbuf->sb_sum.sumbytes;
0427     sci->sc_finfo_ptr.bh = sumbh;  sci->sc_finfo_ptr.offset = sumbytes;
0428     sci->sc_binfo_ptr.bh = sumbh;  sci->sc_binfo_ptr.offset = sumbytes;
0429     sci->sc_blk_cnt = sci->sc_datablk_cnt = 0;
0430     return 0;
0431 }
0432 
0433 static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci)
0434 {
0435     sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks;
0436     if (NILFS_SEGBUF_IS_LAST(sci->sc_curseg, &sci->sc_segbufs))
0437         return -E2BIG; /*
0438                 * The current segment is filled up
0439                 * (internal code)
0440                 */
0441     sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg);
0442     return nilfs_segctor_reset_segment_buffer(sci);
0443 }
0444 
0445 static int nilfs_segctor_add_super_root(struct nilfs_sc_info *sci)
0446 {
0447     struct nilfs_segment_buffer *segbuf = sci->sc_curseg;
0448     int err;
0449 
0450     if (segbuf->sb_sum.nblocks >= segbuf->sb_rest_blocks) {
0451         err = nilfs_segctor_feed_segment(sci);
0452         if (err)
0453             return err;
0454         segbuf = sci->sc_curseg;
0455     }
0456     err = nilfs_segbuf_extend_payload(segbuf, &segbuf->sb_super_root);
0457     if (likely(!err))
0458         segbuf->sb_sum.flags |= NILFS_SS_SR;
0459     return err;
0460 }
0461 
0462 /*
0463  * Functions for making segment summary and payloads
0464  */
0465 static int nilfs_segctor_segsum_block_required(
0466     struct nilfs_sc_info *sci, const struct nilfs_segsum_pointer *ssp,
0467     unsigned int binfo_size)
0468 {
0469     unsigned int blocksize = sci->sc_super->s_blocksize;
0470     /* Size of finfo and binfo is enough small against blocksize */
0471 
0472     return ssp->offset + binfo_size +
0473         (!sci->sc_blk_cnt ? sizeof(struct nilfs_finfo) : 0) >
0474         blocksize;
0475 }
0476 
0477 static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci,
0478                       struct inode *inode)
0479 {
0480     sci->sc_curseg->sb_sum.nfinfo++;
0481     sci->sc_binfo_ptr = sci->sc_finfo_ptr;
0482     nilfs_segctor_map_segsum_entry(
0483         sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo));
0484 
0485     if (NILFS_I(inode)->i_root &&
0486         !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
0487         set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags);
0488     /* skip finfo */
0489 }
0490 
0491 static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci,
0492                     struct inode *inode)
0493 {
0494     struct nilfs_finfo *finfo;
0495     struct nilfs_inode_info *ii;
0496     struct nilfs_segment_buffer *segbuf;
0497     __u64 cno;
0498 
0499     if (sci->sc_blk_cnt == 0)
0500         return;
0501 
0502     ii = NILFS_I(inode);
0503 
0504     if (test_bit(NILFS_I_GCINODE, &ii->i_state))
0505         cno = ii->i_cno;
0506     else if (NILFS_ROOT_METADATA_FILE(inode->i_ino))
0507         cno = 0;
0508     else
0509         cno = sci->sc_cno;
0510 
0511     finfo = nilfs_segctor_map_segsum_entry(sci, &sci->sc_finfo_ptr,
0512                          sizeof(*finfo));
0513     finfo->fi_ino = cpu_to_le64(inode->i_ino);
0514     finfo->fi_nblocks = cpu_to_le32(sci->sc_blk_cnt);
0515     finfo->fi_ndatablk = cpu_to_le32(sci->sc_datablk_cnt);
0516     finfo->fi_cno = cpu_to_le64(cno);
0517 
0518     segbuf = sci->sc_curseg;
0519     segbuf->sb_sum.sumbytes = sci->sc_binfo_ptr.offset +
0520         sci->sc_super->s_blocksize * (segbuf->sb_sum.nsumblk - 1);
0521     sci->sc_finfo_ptr = sci->sc_binfo_ptr;
0522     sci->sc_blk_cnt = sci->sc_datablk_cnt = 0;
0523 }
0524 
0525 static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci,
0526                     struct buffer_head *bh,
0527                     struct inode *inode,
0528                     unsigned int binfo_size)
0529 {
0530     struct nilfs_segment_buffer *segbuf;
0531     int required, err = 0;
0532 
0533  retry:
0534     segbuf = sci->sc_curseg;
0535     required = nilfs_segctor_segsum_block_required(
0536         sci, &sci->sc_binfo_ptr, binfo_size);
0537     if (segbuf->sb_sum.nblocks + required + 1 > segbuf->sb_rest_blocks) {
0538         nilfs_segctor_end_finfo(sci, inode);
0539         err = nilfs_segctor_feed_segment(sci);
0540         if (err)
0541             return err;
0542         goto retry;
0543     }
0544     if (unlikely(required)) {
0545         err = nilfs_segbuf_extend_segsum(segbuf);
0546         if (unlikely(err))
0547             goto failed;
0548     }
0549     if (sci->sc_blk_cnt == 0)
0550         nilfs_segctor_begin_finfo(sci, inode);
0551 
0552     nilfs_segctor_map_segsum_entry(sci, &sci->sc_binfo_ptr, binfo_size);
0553     /* Substitution to vblocknr is delayed until update_blocknr() */
0554     nilfs_segbuf_add_file_buffer(segbuf, bh);
0555     sci->sc_blk_cnt++;
0556  failed:
0557     return err;
0558 }
0559 
0560 /*
0561  * Callback functions that enumerate, mark, and collect dirty blocks
0562  */
0563 static int nilfs_collect_file_data(struct nilfs_sc_info *sci,
0564                    struct buffer_head *bh, struct inode *inode)
0565 {
0566     int err;
0567 
0568     err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
0569     if (err < 0)
0570         return err;
0571 
0572     err = nilfs_segctor_add_file_block(sci, bh, inode,
0573                        sizeof(struct nilfs_binfo_v));
0574     if (!err)
0575         sci->sc_datablk_cnt++;
0576     return err;
0577 }
0578 
0579 static int nilfs_collect_file_node(struct nilfs_sc_info *sci,
0580                    struct buffer_head *bh,
0581                    struct inode *inode)
0582 {
0583     return nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
0584 }
0585 
0586 static int nilfs_collect_file_bmap(struct nilfs_sc_info *sci,
0587                    struct buffer_head *bh,
0588                    struct inode *inode)
0589 {
0590     WARN_ON(!buffer_dirty(bh));
0591     return nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64));
0592 }
0593 
0594 static void nilfs_write_file_data_binfo(struct nilfs_sc_info *sci,
0595                     struct nilfs_segsum_pointer *ssp,
0596                     union nilfs_binfo *binfo)
0597 {
0598     struct nilfs_binfo_v *binfo_v = nilfs_segctor_map_segsum_entry(
0599         sci, ssp, sizeof(*binfo_v));
0600     *binfo_v = binfo->bi_v;
0601 }
0602 
0603 static void nilfs_write_file_node_binfo(struct nilfs_sc_info *sci,
0604                     struct nilfs_segsum_pointer *ssp,
0605                     union nilfs_binfo *binfo)
0606 {
0607     __le64 *vblocknr = nilfs_segctor_map_segsum_entry(
0608         sci, ssp, sizeof(*vblocknr));
0609     *vblocknr = binfo->bi_v.bi_vblocknr;
0610 }
0611 
0612 static const struct nilfs_sc_operations nilfs_sc_file_ops = {
0613     .collect_data = nilfs_collect_file_data,
0614     .collect_node = nilfs_collect_file_node,
0615     .collect_bmap = nilfs_collect_file_bmap,
0616     .write_data_binfo = nilfs_write_file_data_binfo,
0617     .write_node_binfo = nilfs_write_file_node_binfo,
0618 };
0619 
0620 static int nilfs_collect_dat_data(struct nilfs_sc_info *sci,
0621                   struct buffer_head *bh, struct inode *inode)
0622 {
0623     int err;
0624 
0625     err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
0626     if (err < 0)
0627         return err;
0628 
0629     err = nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64));
0630     if (!err)
0631         sci->sc_datablk_cnt++;
0632     return err;
0633 }
0634 
0635 static int nilfs_collect_dat_bmap(struct nilfs_sc_info *sci,
0636                   struct buffer_head *bh, struct inode *inode)
0637 {
0638     WARN_ON(!buffer_dirty(bh));
0639     return nilfs_segctor_add_file_block(sci, bh, inode,
0640                         sizeof(struct nilfs_binfo_dat));
0641 }
0642 
0643 static void nilfs_write_dat_data_binfo(struct nilfs_sc_info *sci,
0644                        struct nilfs_segsum_pointer *ssp,
0645                        union nilfs_binfo *binfo)
0646 {
0647     __le64 *blkoff = nilfs_segctor_map_segsum_entry(sci, ssp,
0648                               sizeof(*blkoff));
0649     *blkoff = binfo->bi_dat.bi_blkoff;
0650 }
0651 
0652 static void nilfs_write_dat_node_binfo(struct nilfs_sc_info *sci,
0653                        struct nilfs_segsum_pointer *ssp,
0654                        union nilfs_binfo *binfo)
0655 {
0656     struct nilfs_binfo_dat *binfo_dat =
0657         nilfs_segctor_map_segsum_entry(sci, ssp, sizeof(*binfo_dat));
0658     *binfo_dat = binfo->bi_dat;
0659 }
0660 
0661 static const struct nilfs_sc_operations nilfs_sc_dat_ops = {
0662     .collect_data = nilfs_collect_dat_data,
0663     .collect_node = nilfs_collect_file_node,
0664     .collect_bmap = nilfs_collect_dat_bmap,
0665     .write_data_binfo = nilfs_write_dat_data_binfo,
0666     .write_node_binfo = nilfs_write_dat_node_binfo,
0667 };
0668 
0669 static const struct nilfs_sc_operations nilfs_sc_dsync_ops = {
0670     .collect_data = nilfs_collect_file_data,
0671     .collect_node = NULL,
0672     .collect_bmap = NULL,
0673     .write_data_binfo = nilfs_write_file_data_binfo,
0674     .write_node_binfo = NULL,
0675 };
0676 
0677 static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
0678                           struct list_head *listp,
0679                           size_t nlimit,
0680                           loff_t start, loff_t end)
0681 {
0682     struct address_space *mapping = inode->i_mapping;
0683     struct pagevec pvec;
0684     pgoff_t index = 0, last = ULONG_MAX;
0685     size_t ndirties = 0;
0686     int i;
0687 
0688     if (unlikely(start != 0 || end != LLONG_MAX)) {
0689         /*
0690          * A valid range is given for sync-ing data pages. The
0691          * range is rounded to per-page; extra dirty buffers
0692          * may be included if blocksize < pagesize.
0693          */
0694         index = start >> PAGE_SHIFT;
0695         last = end >> PAGE_SHIFT;
0696     }
0697     pagevec_init(&pvec);
0698  repeat:
0699     if (unlikely(index > last) ||
0700         !pagevec_lookup_range_tag(&pvec, mapping, &index, last,
0701                 PAGECACHE_TAG_DIRTY))
0702         return ndirties;
0703 
0704     for (i = 0; i < pagevec_count(&pvec); i++) {
0705         struct buffer_head *bh, *head;
0706         struct page *page = pvec.pages[i];
0707 
0708         lock_page(page);
0709         if (!page_has_buffers(page))
0710             create_empty_buffers(page, i_blocksize(inode), 0);
0711         unlock_page(page);
0712 
0713         bh = head = page_buffers(page);
0714         do {
0715             if (!buffer_dirty(bh) || buffer_async_write(bh))
0716                 continue;
0717             get_bh(bh);
0718             list_add_tail(&bh->b_assoc_buffers, listp);
0719             ndirties++;
0720             if (unlikely(ndirties >= nlimit)) {
0721                 pagevec_release(&pvec);
0722                 cond_resched();
0723                 return ndirties;
0724             }
0725         } while (bh = bh->b_this_page, bh != head);
0726     }
0727     pagevec_release(&pvec);
0728     cond_resched();
0729     goto repeat;
0730 }
0731 
0732 static void nilfs_lookup_dirty_node_buffers(struct inode *inode,
0733                         struct list_head *listp)
0734 {
0735     struct nilfs_inode_info *ii = NILFS_I(inode);
0736     struct inode *btnc_inode = ii->i_assoc_inode;
0737     struct pagevec pvec;
0738     struct buffer_head *bh, *head;
0739     unsigned int i;
0740     pgoff_t index = 0;
0741 
0742     if (!btnc_inode)
0743         return;
0744 
0745     pagevec_init(&pvec);
0746 
0747     while (pagevec_lookup_tag(&pvec, btnc_inode->i_mapping, &index,
0748                     PAGECACHE_TAG_DIRTY)) {
0749         for (i = 0; i < pagevec_count(&pvec); i++) {
0750             bh = head = page_buffers(pvec.pages[i]);
0751             do {
0752                 if (buffer_dirty(bh) &&
0753                         !buffer_async_write(bh)) {
0754                     get_bh(bh);
0755                     list_add_tail(&bh->b_assoc_buffers,
0756                               listp);
0757                 }
0758                 bh = bh->b_this_page;
0759             } while (bh != head);
0760         }
0761         pagevec_release(&pvec);
0762         cond_resched();
0763     }
0764 }
0765 
0766 static void nilfs_dispose_list(struct the_nilfs *nilfs,
0767                    struct list_head *head, int force)
0768 {
0769     struct nilfs_inode_info *ii, *n;
0770     struct nilfs_inode_info *ivec[SC_N_INODEVEC], **pii;
0771     unsigned int nv = 0;
0772 
0773     while (!list_empty(head)) {
0774         spin_lock(&nilfs->ns_inode_lock);
0775         list_for_each_entry_safe(ii, n, head, i_dirty) {
0776             list_del_init(&ii->i_dirty);
0777             if (force) {
0778                 if (unlikely(ii->i_bh)) {
0779                     brelse(ii->i_bh);
0780                     ii->i_bh = NULL;
0781                 }
0782             } else if (test_bit(NILFS_I_DIRTY, &ii->i_state)) {
0783                 set_bit(NILFS_I_QUEUED, &ii->i_state);
0784                 list_add_tail(&ii->i_dirty,
0785                           &nilfs->ns_dirty_files);
0786                 continue;
0787             }
0788             ivec[nv++] = ii;
0789             if (nv == SC_N_INODEVEC)
0790                 break;
0791         }
0792         spin_unlock(&nilfs->ns_inode_lock);
0793 
0794         for (pii = ivec; nv > 0; pii++, nv--)
0795             iput(&(*pii)->vfs_inode);
0796     }
0797 }
0798 
0799 static void nilfs_iput_work_func(struct work_struct *work)
0800 {
0801     struct nilfs_sc_info *sci = container_of(work, struct nilfs_sc_info,
0802                          sc_iput_work);
0803     struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
0804 
0805     nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 0);
0806 }
0807 
0808 static int nilfs_test_metadata_dirty(struct the_nilfs *nilfs,
0809                      struct nilfs_root *root)
0810 {
0811     int ret = 0;
0812 
0813     if (nilfs_mdt_fetch_dirty(root->ifile))
0814         ret++;
0815     if (nilfs_mdt_fetch_dirty(nilfs->ns_cpfile))
0816         ret++;
0817     if (nilfs_mdt_fetch_dirty(nilfs->ns_sufile))
0818         ret++;
0819     if ((ret || nilfs_doing_gc()) && nilfs_mdt_fetch_dirty(nilfs->ns_dat))
0820         ret++;
0821     return ret;
0822 }
0823 
0824 static int nilfs_segctor_clean(struct nilfs_sc_info *sci)
0825 {
0826     return list_empty(&sci->sc_dirty_files) &&
0827         !test_bit(NILFS_SC_DIRTY, &sci->sc_flags) &&
0828         sci->sc_nfreesegs == 0 &&
0829         (!nilfs_doing_gc() || list_empty(&sci->sc_gc_inodes));
0830 }
0831 
0832 static int nilfs_segctor_confirm(struct nilfs_sc_info *sci)
0833 {
0834     struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
0835     int ret = 0;
0836 
0837     if (nilfs_test_metadata_dirty(nilfs, sci->sc_root))
0838         set_bit(NILFS_SC_DIRTY, &sci->sc_flags);
0839 
0840     spin_lock(&nilfs->ns_inode_lock);
0841     if (list_empty(&nilfs->ns_dirty_files) && nilfs_segctor_clean(sci))
0842         ret++;
0843 
0844     spin_unlock(&nilfs->ns_inode_lock);
0845     return ret;
0846 }
0847 
0848 static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci)
0849 {
0850     struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
0851 
0852     nilfs_mdt_clear_dirty(sci->sc_root->ifile);
0853     nilfs_mdt_clear_dirty(nilfs->ns_cpfile);
0854     nilfs_mdt_clear_dirty(nilfs->ns_sufile);
0855     nilfs_mdt_clear_dirty(nilfs->ns_dat);
0856 }
0857 
0858 static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci)
0859 {
0860     struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
0861     struct buffer_head *bh_cp;
0862     struct nilfs_checkpoint *raw_cp;
0863     int err;
0864 
0865     /* XXX: this interface will be changed */
0866     err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 1,
0867                       &raw_cp, &bh_cp);
0868     if (likely(!err)) {
0869         /*
0870          * The following code is duplicated with cpfile.  But, it is
0871          * needed to collect the checkpoint even if it was not newly
0872          * created.
0873          */
0874         mark_buffer_dirty(bh_cp);
0875         nilfs_mdt_mark_dirty(nilfs->ns_cpfile);
0876         nilfs_cpfile_put_checkpoint(
0877             nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
0878     } else
0879         WARN_ON(err == -EINVAL || err == -ENOENT);
0880 
0881     return err;
0882 }
0883 
0884 static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci)
0885 {
0886     struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
0887     struct buffer_head *bh_cp;
0888     struct nilfs_checkpoint *raw_cp;
0889     int err;
0890 
0891     err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 0,
0892                       &raw_cp, &bh_cp);
0893     if (unlikely(err)) {
0894         WARN_ON(err == -EINVAL || err == -ENOENT);
0895         goto failed_ibh;
0896     }
0897     raw_cp->cp_snapshot_list.ssl_next = 0;
0898     raw_cp->cp_snapshot_list.ssl_prev = 0;
0899     raw_cp->cp_inodes_count =
0900         cpu_to_le64(atomic64_read(&sci->sc_root->inodes_count));
0901     raw_cp->cp_blocks_count =
0902         cpu_to_le64(atomic64_read(&sci->sc_root->blocks_count));
0903     raw_cp->cp_nblk_inc =
0904         cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc);
0905     raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime);
0906     raw_cp->cp_cno = cpu_to_le64(nilfs->ns_cno);
0907 
0908     if (test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
0909         nilfs_checkpoint_clear_minor(raw_cp);
0910     else
0911         nilfs_checkpoint_set_minor(raw_cp);
0912 
0913     nilfs_write_inode_common(sci->sc_root->ifile,
0914                  &raw_cp->cp_ifile_inode, 1);
0915     nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
0916     return 0;
0917 
0918  failed_ibh:
0919     return err;
0920 }
0921 
0922 static void nilfs_fill_in_file_bmap(struct inode *ifile,
0923                     struct nilfs_inode_info *ii)
0924 
0925 {
0926     struct buffer_head *ibh;
0927     struct nilfs_inode *raw_inode;
0928 
0929     if (test_bit(NILFS_I_BMAP, &ii->i_state)) {
0930         ibh = ii->i_bh;
0931         BUG_ON(!ibh);
0932         raw_inode = nilfs_ifile_map_inode(ifile, ii->vfs_inode.i_ino,
0933                           ibh);
0934         nilfs_bmap_write(ii->i_bmap, raw_inode);
0935         nilfs_ifile_unmap_inode(ifile, ii->vfs_inode.i_ino, ibh);
0936     }
0937 }
0938 
0939 static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci)
0940 {
0941     struct nilfs_inode_info *ii;
0942 
0943     list_for_each_entry(ii, &sci->sc_dirty_files, i_dirty) {
0944         nilfs_fill_in_file_bmap(sci->sc_root->ifile, ii);
0945         set_bit(NILFS_I_COLLECTED, &ii->i_state);
0946     }
0947 }
0948 
0949 static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
0950                          struct the_nilfs *nilfs)
0951 {
0952     struct buffer_head *bh_sr;
0953     struct nilfs_super_root *raw_sr;
0954     unsigned int isz, srsz;
0955 
0956     bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root;
0957     raw_sr = (struct nilfs_super_root *)bh_sr->b_data;
0958     isz = nilfs->ns_inode_size;
0959     srsz = NILFS_SR_BYTES(isz);
0960 
0961     raw_sr->sr_bytes = cpu_to_le16(srsz);
0962     raw_sr->sr_nongc_ctime
0963         = cpu_to_le64(nilfs_doing_gc() ?
0964                   nilfs->ns_nongc_ctime : sci->sc_seg_ctime);
0965     raw_sr->sr_flags = 0;
0966 
0967     nilfs_write_inode_common(nilfs->ns_dat, (void *)raw_sr +
0968                  NILFS_SR_DAT_OFFSET(isz), 1);
0969     nilfs_write_inode_common(nilfs->ns_cpfile, (void *)raw_sr +
0970                  NILFS_SR_CPFILE_OFFSET(isz), 1);
0971     nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr +
0972                  NILFS_SR_SUFILE_OFFSET(isz), 1);
0973     memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz);
0974 }
0975 
0976 static void nilfs_redirty_inodes(struct list_head *head)
0977 {
0978     struct nilfs_inode_info *ii;
0979 
0980     list_for_each_entry(ii, head, i_dirty) {
0981         if (test_bit(NILFS_I_COLLECTED, &ii->i_state))
0982             clear_bit(NILFS_I_COLLECTED, &ii->i_state);
0983     }
0984 }
0985 
0986 static void nilfs_drop_collected_inodes(struct list_head *head)
0987 {
0988     struct nilfs_inode_info *ii;
0989 
0990     list_for_each_entry(ii, head, i_dirty) {
0991         if (!test_and_clear_bit(NILFS_I_COLLECTED, &ii->i_state))
0992             continue;
0993 
0994         clear_bit(NILFS_I_INODE_SYNC, &ii->i_state);
0995         set_bit(NILFS_I_UPDATED, &ii->i_state);
0996     }
0997 }
0998 
0999 static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci,
1000                        struct inode *inode,
1001                        struct list_head *listp,
1002                        int (*collect)(struct nilfs_sc_info *,
1003                               struct buffer_head *,
1004                               struct inode *))
1005 {
1006     struct buffer_head *bh, *n;
1007     int err = 0;
1008 
1009     if (collect) {
1010         list_for_each_entry_safe(bh, n, listp, b_assoc_buffers) {
1011             list_del_init(&bh->b_assoc_buffers);
1012             err = collect(sci, bh, inode);
1013             brelse(bh);
1014             if (unlikely(err))
1015                 goto dispose_buffers;
1016         }
1017         return 0;
1018     }
1019 
1020  dispose_buffers:
1021     while (!list_empty(listp)) {
1022         bh = list_first_entry(listp, struct buffer_head,
1023                       b_assoc_buffers);
1024         list_del_init(&bh->b_assoc_buffers);
1025         brelse(bh);
1026     }
1027     return err;
1028 }
1029 
1030 static size_t nilfs_segctor_buffer_rest(struct nilfs_sc_info *sci)
1031 {
1032     /* Remaining number of blocks within segment buffer */
1033     return sci->sc_segbuf_nblocks -
1034         (sci->sc_nblk_this_inc + sci->sc_curseg->sb_sum.nblocks);
1035 }
1036 
1037 static int nilfs_segctor_scan_file(struct nilfs_sc_info *sci,
1038                    struct inode *inode,
1039                    const struct nilfs_sc_operations *sc_ops)
1040 {
1041     LIST_HEAD(data_buffers);
1042     LIST_HEAD(node_buffers);
1043     int err;
1044 
1045     if (!(sci->sc_stage.flags & NILFS_CF_NODE)) {
1046         size_t n, rest = nilfs_segctor_buffer_rest(sci);
1047 
1048         n = nilfs_lookup_dirty_data_buffers(
1049             inode, &data_buffers, rest + 1, 0, LLONG_MAX);
1050         if (n > rest) {
1051             err = nilfs_segctor_apply_buffers(
1052                 sci, inode, &data_buffers,
1053                 sc_ops->collect_data);
1054             BUG_ON(!err); /* always receive -E2BIG or true error */
1055             goto break_or_fail;
1056         }
1057     }
1058     nilfs_lookup_dirty_node_buffers(inode, &node_buffers);
1059 
1060     if (!(sci->sc_stage.flags & NILFS_CF_NODE)) {
1061         err = nilfs_segctor_apply_buffers(
1062             sci, inode, &data_buffers, sc_ops->collect_data);
1063         if (unlikely(err)) {
1064             /* dispose node list */
1065             nilfs_segctor_apply_buffers(
1066                 sci, inode, &node_buffers, NULL);
1067             goto break_or_fail;
1068         }
1069         sci->sc_stage.flags |= NILFS_CF_NODE;
1070     }
1071     /* Collect node */
1072     err = nilfs_segctor_apply_buffers(
1073         sci, inode, &node_buffers, sc_ops->collect_node);
1074     if (unlikely(err))
1075         goto break_or_fail;
1076 
1077     nilfs_bmap_lookup_dirty_buffers(NILFS_I(inode)->i_bmap, &node_buffers);
1078     err = nilfs_segctor_apply_buffers(
1079         sci, inode, &node_buffers, sc_ops->collect_bmap);
1080     if (unlikely(err))
1081         goto break_or_fail;
1082 
1083     nilfs_segctor_end_finfo(sci, inode);
1084     sci->sc_stage.flags &= ~NILFS_CF_NODE;
1085 
1086  break_or_fail:
1087     return err;
1088 }
1089 
1090 static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci,
1091                      struct inode *inode)
1092 {
1093     LIST_HEAD(data_buffers);
1094     size_t n, rest = nilfs_segctor_buffer_rest(sci);
1095     int err;
1096 
1097     n = nilfs_lookup_dirty_data_buffers(inode, &data_buffers, rest + 1,
1098                         sci->sc_dsync_start,
1099                         sci->sc_dsync_end);
1100 
1101     err = nilfs_segctor_apply_buffers(sci, inode, &data_buffers,
1102                       nilfs_collect_file_data);
1103     if (!err) {
1104         nilfs_segctor_end_finfo(sci, inode);
1105         BUG_ON(n > rest);
1106         /* always receive -E2BIG or true error if n > rest */
1107     }
1108     return err;
1109 }
1110 
1111 static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
1112 {
1113     struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
1114     struct list_head *head;
1115     struct nilfs_inode_info *ii;
1116     size_t ndone;
1117     int err = 0;
1118 
1119     switch (nilfs_sc_cstage_get(sci)) {
1120     case NILFS_ST_INIT:
1121         /* Pre-processes */
1122         sci->sc_stage.flags = 0;
1123 
1124         if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags)) {
1125             sci->sc_nblk_inc = 0;
1126             sci->sc_curseg->sb_sum.flags = NILFS_SS_LOGBGN;
1127             if (mode == SC_LSEG_DSYNC) {
1128                 nilfs_sc_cstage_set(sci, NILFS_ST_DSYNC);
1129                 goto dsync_mode;
1130             }
1131         }
1132 
1133         sci->sc_stage.dirty_file_ptr = NULL;
1134         sci->sc_stage.gc_inode_ptr = NULL;
1135         if (mode == SC_FLUSH_DAT) {
1136             nilfs_sc_cstage_set(sci, NILFS_ST_DAT);
1137             goto dat_stage;
1138         }
1139         nilfs_sc_cstage_inc(sci);
1140         fallthrough;
1141     case NILFS_ST_GC:
1142         if (nilfs_doing_gc()) {
1143             head = &sci->sc_gc_inodes;
1144             ii = list_prepare_entry(sci->sc_stage.gc_inode_ptr,
1145                         head, i_dirty);
1146             list_for_each_entry_continue(ii, head, i_dirty) {
1147                 err = nilfs_segctor_scan_file(
1148                     sci, &ii->vfs_inode,
1149                     &nilfs_sc_file_ops);
1150                 if (unlikely(err)) {
1151                     sci->sc_stage.gc_inode_ptr = list_entry(
1152                         ii->i_dirty.prev,
1153                         struct nilfs_inode_info,
1154                         i_dirty);
1155                     goto break_or_fail;
1156                 }
1157                 set_bit(NILFS_I_COLLECTED, &ii->i_state);
1158             }
1159             sci->sc_stage.gc_inode_ptr = NULL;
1160         }
1161         nilfs_sc_cstage_inc(sci);
1162         fallthrough;
1163     case NILFS_ST_FILE:
1164         head = &sci->sc_dirty_files;
1165         ii = list_prepare_entry(sci->sc_stage.dirty_file_ptr, head,
1166                     i_dirty);
1167         list_for_each_entry_continue(ii, head, i_dirty) {
1168             clear_bit(NILFS_I_DIRTY, &ii->i_state);
1169 
1170             err = nilfs_segctor_scan_file(sci, &ii->vfs_inode,
1171                               &nilfs_sc_file_ops);
1172             if (unlikely(err)) {
1173                 sci->sc_stage.dirty_file_ptr =
1174                     list_entry(ii->i_dirty.prev,
1175                            struct nilfs_inode_info,
1176                            i_dirty);
1177                 goto break_or_fail;
1178             }
1179             /* sci->sc_stage.dirty_file_ptr = NILFS_I(inode); */
1180             /* XXX: required ? */
1181         }
1182         sci->sc_stage.dirty_file_ptr = NULL;
1183         if (mode == SC_FLUSH_FILE) {
1184             nilfs_sc_cstage_set(sci, NILFS_ST_DONE);
1185             return 0;
1186         }
1187         nilfs_sc_cstage_inc(sci);
1188         sci->sc_stage.flags |= NILFS_CF_IFILE_STARTED;
1189         fallthrough;
1190     case NILFS_ST_IFILE:
1191         err = nilfs_segctor_scan_file(sci, sci->sc_root->ifile,
1192                           &nilfs_sc_file_ops);
1193         if (unlikely(err))
1194             break;
1195         nilfs_sc_cstage_inc(sci);
1196         /* Creating a checkpoint */
1197         err = nilfs_segctor_create_checkpoint(sci);
1198         if (unlikely(err))
1199             break;
1200         fallthrough;
1201     case NILFS_ST_CPFILE:
1202         err = nilfs_segctor_scan_file(sci, nilfs->ns_cpfile,
1203                           &nilfs_sc_file_ops);
1204         if (unlikely(err))
1205             break;
1206         nilfs_sc_cstage_inc(sci);
1207         fallthrough;
1208     case NILFS_ST_SUFILE:
1209         err = nilfs_sufile_freev(nilfs->ns_sufile, sci->sc_freesegs,
1210                      sci->sc_nfreesegs, &ndone);
1211         if (unlikely(err)) {
1212             nilfs_sufile_cancel_freev(nilfs->ns_sufile,
1213                           sci->sc_freesegs, ndone,
1214                           NULL);
1215             break;
1216         }
1217         sci->sc_stage.flags |= NILFS_CF_SUFREED;
1218 
1219         err = nilfs_segctor_scan_file(sci, nilfs->ns_sufile,
1220                           &nilfs_sc_file_ops);
1221         if (unlikely(err))
1222             break;
1223         nilfs_sc_cstage_inc(sci);
1224         fallthrough;
1225     case NILFS_ST_DAT:
1226  dat_stage:
1227         err = nilfs_segctor_scan_file(sci, nilfs->ns_dat,
1228                           &nilfs_sc_dat_ops);
1229         if (unlikely(err))
1230             break;
1231         if (mode == SC_FLUSH_DAT) {
1232             nilfs_sc_cstage_set(sci, NILFS_ST_DONE);
1233             return 0;
1234         }
1235         nilfs_sc_cstage_inc(sci);
1236         fallthrough;
1237     case NILFS_ST_SR:
1238         if (mode == SC_LSEG_SR) {
1239             /* Appending a super root */
1240             err = nilfs_segctor_add_super_root(sci);
1241             if (unlikely(err))
1242                 break;
1243         }
1244         /* End of a logical segment */
1245         sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND;
1246         nilfs_sc_cstage_set(sci, NILFS_ST_DONE);
1247         return 0;
1248     case NILFS_ST_DSYNC:
1249  dsync_mode:
1250         sci->sc_curseg->sb_sum.flags |= NILFS_SS_SYNDT;
1251         ii = sci->sc_dsync_inode;
1252         if (!test_bit(NILFS_I_BUSY, &ii->i_state))
1253             break;
1254 
1255         err = nilfs_segctor_scan_file_dsync(sci, &ii->vfs_inode);
1256         if (unlikely(err))
1257             break;
1258         sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND;
1259         nilfs_sc_cstage_set(sci, NILFS_ST_DONE);
1260         return 0;
1261     case NILFS_ST_DONE:
1262         return 0;
1263     default:
1264         BUG();
1265     }
1266 
1267  break_or_fail:
1268     return err;
1269 }
1270 
1271 /**
1272  * nilfs_segctor_begin_construction - setup segment buffer to make a new log
1273  * @sci: nilfs_sc_info
1274  * @nilfs: nilfs object
1275  */
1276 static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci,
1277                         struct the_nilfs *nilfs)
1278 {
1279     struct nilfs_segment_buffer *segbuf, *prev;
1280     __u64 nextnum;
1281     int err, alloc = 0;
1282 
1283     segbuf = nilfs_segbuf_new(sci->sc_super);
1284     if (unlikely(!segbuf))
1285         return -ENOMEM;
1286 
1287     if (list_empty(&sci->sc_write_logs)) {
1288         nilfs_segbuf_map(segbuf, nilfs->ns_segnum,
1289                  nilfs->ns_pseg_offset, nilfs);
1290         if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) {
1291             nilfs_shift_to_next_segment(nilfs);
1292             nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs);
1293         }
1294 
1295         segbuf->sb_sum.seg_seq = nilfs->ns_seg_seq;
1296         nextnum = nilfs->ns_nextnum;
1297 
1298         if (nilfs->ns_segnum == nilfs->ns_nextnum)
1299             /* Start from the head of a new full segment */
1300             alloc++;
1301     } else {
1302         /* Continue logs */
1303         prev = NILFS_LAST_SEGBUF(&sci->sc_write_logs);
1304         nilfs_segbuf_map_cont(segbuf, prev);
1305         segbuf->sb_sum.seg_seq = prev->sb_sum.seg_seq;
1306         nextnum = prev->sb_nextnum;
1307 
1308         if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) {
1309             nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs);
1310             segbuf->sb_sum.seg_seq++;
1311             alloc++;
1312         }
1313     }
1314 
1315     err = nilfs_sufile_mark_dirty(nilfs->ns_sufile, segbuf->sb_segnum);
1316     if (err)
1317         goto failed;
1318 
1319     if (alloc) {
1320         err = nilfs_sufile_alloc(nilfs->ns_sufile, &nextnum);
1321         if (err)
1322             goto failed;
1323     }
1324     nilfs_segbuf_set_next_segnum(segbuf, nextnum, nilfs);
1325 
1326     BUG_ON(!list_empty(&sci->sc_segbufs));
1327     list_add_tail(&segbuf->sb_list, &sci->sc_segbufs);
1328     sci->sc_segbuf_nblocks = segbuf->sb_rest_blocks;
1329     return 0;
1330 
1331  failed:
1332     nilfs_segbuf_free(segbuf);
1333     return err;
1334 }
1335 
1336 static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci,
1337                      struct the_nilfs *nilfs, int nadd)
1338 {
1339     struct nilfs_segment_buffer *segbuf, *prev;
1340     struct inode *sufile = nilfs->ns_sufile;
1341     __u64 nextnextnum;
1342     LIST_HEAD(list);
1343     int err, ret, i;
1344 
1345     prev = NILFS_LAST_SEGBUF(&sci->sc_segbufs);
1346     /*
1347      * Since the segment specified with nextnum might be allocated during
1348      * the previous construction, the buffer including its segusage may
1349      * not be dirty.  The following call ensures that the buffer is dirty
1350      * and will pin the buffer on memory until the sufile is written.
1351      */
1352     err = nilfs_sufile_mark_dirty(sufile, prev->sb_nextnum);
1353     if (unlikely(err))
1354         return err;
1355 
1356     for (i = 0; i < nadd; i++) {
1357         /* extend segment info */
1358         err = -ENOMEM;
1359         segbuf = nilfs_segbuf_new(sci->sc_super);
1360         if (unlikely(!segbuf))
1361             goto failed;
1362 
1363         /* map this buffer to region of segment on-disk */
1364         nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs);
1365         sci->sc_segbuf_nblocks += segbuf->sb_rest_blocks;
1366 
1367         /* allocate the next next full segment */
1368         err = nilfs_sufile_alloc(sufile, &nextnextnum);
1369         if (unlikely(err))
1370             goto failed_segbuf;
1371 
1372         segbuf->sb_sum.seg_seq = prev->sb_sum.seg_seq + 1;
1373         nilfs_segbuf_set_next_segnum(segbuf, nextnextnum, nilfs);
1374 
1375         list_add_tail(&segbuf->sb_list, &list);
1376         prev = segbuf;
1377     }
1378     list_splice_tail(&list, &sci->sc_segbufs);
1379     return 0;
1380 
1381  failed_segbuf:
1382     nilfs_segbuf_free(segbuf);
1383  failed:
1384     list_for_each_entry(segbuf, &list, sb_list) {
1385         ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
1386         WARN_ON(ret); /* never fails */
1387     }
1388     nilfs_destroy_logs(&list);
1389     return err;
1390 }
1391 
1392 static void nilfs_free_incomplete_logs(struct list_head *logs,
1393                        struct the_nilfs *nilfs)
1394 {
1395     struct nilfs_segment_buffer *segbuf, *prev;
1396     struct inode *sufile = nilfs->ns_sufile;
1397     int ret;
1398 
1399     segbuf = NILFS_FIRST_SEGBUF(logs);
1400     if (nilfs->ns_nextnum != segbuf->sb_nextnum) {
1401         ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
1402         WARN_ON(ret); /* never fails */
1403     }
1404     if (atomic_read(&segbuf->sb_err)) {
1405         /* Case 1: The first segment failed */
1406         if (segbuf->sb_pseg_start != segbuf->sb_fseg_start)
1407             /*
1408              * Case 1a:  Partial segment appended into an existing
1409              * segment
1410              */
1411             nilfs_terminate_segment(nilfs, segbuf->sb_fseg_start,
1412                         segbuf->sb_fseg_end);
1413         else /* Case 1b:  New full segment */
1414             set_nilfs_discontinued(nilfs);
1415     }
1416 
1417     prev = segbuf;
1418     list_for_each_entry_continue(segbuf, logs, sb_list) {
1419         if (prev->sb_nextnum != segbuf->sb_nextnum) {
1420             ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
1421             WARN_ON(ret); /* never fails */
1422         }
1423         if (atomic_read(&segbuf->sb_err) &&
1424             segbuf->sb_segnum != nilfs->ns_nextnum)
1425             /* Case 2: extended segment (!= next) failed */
1426             nilfs_sufile_set_error(sufile, segbuf->sb_segnum);
1427         prev = segbuf;
1428     }
1429 }
1430 
1431 static void nilfs_segctor_update_segusage(struct nilfs_sc_info *sci,
1432                       struct inode *sufile)
1433 {
1434     struct nilfs_segment_buffer *segbuf;
1435     unsigned long live_blocks;
1436     int ret;
1437 
1438     list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1439         live_blocks = segbuf->sb_sum.nblocks +
1440             (segbuf->sb_pseg_start - segbuf->sb_fseg_start);
1441         ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum,
1442                              live_blocks,
1443                              sci->sc_seg_ctime);
1444         WARN_ON(ret); /* always succeed because the segusage is dirty */
1445     }
1446 }
1447 
1448 static void nilfs_cancel_segusage(struct list_head *logs, struct inode *sufile)
1449 {
1450     struct nilfs_segment_buffer *segbuf;
1451     int ret;
1452 
1453     segbuf = NILFS_FIRST_SEGBUF(logs);
1454     ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum,
1455                          segbuf->sb_pseg_start -
1456                          segbuf->sb_fseg_start, 0);
1457     WARN_ON(ret); /* always succeed because the segusage is dirty */
1458 
1459     list_for_each_entry_continue(segbuf, logs, sb_list) {
1460         ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum,
1461                              0, 0);
1462         WARN_ON(ret); /* always succeed */
1463     }
1464 }
1465 
1466 static void nilfs_segctor_truncate_segments(struct nilfs_sc_info *sci,
1467                         struct nilfs_segment_buffer *last,
1468                         struct inode *sufile)
1469 {
1470     struct nilfs_segment_buffer *segbuf = last;
1471     int ret;
1472 
1473     list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) {
1474         sci->sc_segbuf_nblocks -= segbuf->sb_rest_blocks;
1475         ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
1476         WARN_ON(ret);
1477     }
1478     nilfs_truncate_logs(&sci->sc_segbufs, last);
1479 }
1480 
1481 
1482 static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
1483                  struct the_nilfs *nilfs, int mode)
1484 {
1485     struct nilfs_cstage prev_stage = sci->sc_stage;
1486     int err, nadd = 1;
1487 
1488     /* Collection retry loop */
1489     for (;;) {
1490         sci->sc_nblk_this_inc = 0;
1491         sci->sc_curseg = NILFS_FIRST_SEGBUF(&sci->sc_segbufs);
1492 
1493         err = nilfs_segctor_reset_segment_buffer(sci);
1494         if (unlikely(err))
1495             goto failed;
1496 
1497         err = nilfs_segctor_collect_blocks(sci, mode);
1498         sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks;
1499         if (!err)
1500             break;
1501 
1502         if (unlikely(err != -E2BIG))
1503             goto failed;
1504 
1505         /* The current segment is filled up */
1506         if (mode != SC_LSEG_SR ||
1507             nilfs_sc_cstage_get(sci) < NILFS_ST_CPFILE)
1508             break;
1509 
1510         nilfs_clear_logs(&sci->sc_segbufs);
1511 
1512         if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
1513             err = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
1514                             sci->sc_freesegs,
1515                             sci->sc_nfreesegs,
1516                             NULL);
1517             WARN_ON(err); /* do not happen */
1518             sci->sc_stage.flags &= ~NILFS_CF_SUFREED;
1519         }
1520 
1521         err = nilfs_segctor_extend_segments(sci, nilfs, nadd);
1522         if (unlikely(err))
1523             return err;
1524 
1525         nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA);
1526         sci->sc_stage = prev_stage;
1527     }
1528     nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile);
1529     return 0;
1530 
1531  failed:
1532     return err;
1533 }
1534 
1535 static void nilfs_list_replace_buffer(struct buffer_head *old_bh,
1536                       struct buffer_head *new_bh)
1537 {
1538     BUG_ON(!list_empty(&new_bh->b_assoc_buffers));
1539 
1540     list_replace_init(&old_bh->b_assoc_buffers, &new_bh->b_assoc_buffers);
1541     /* The caller must release old_bh */
1542 }
1543 
1544 static int
1545 nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
1546                      struct nilfs_segment_buffer *segbuf,
1547                      int mode)
1548 {
1549     struct inode *inode = NULL;
1550     sector_t blocknr;
1551     unsigned long nfinfo = segbuf->sb_sum.nfinfo;
1552     unsigned long nblocks = 0, ndatablk = 0;
1553     const struct nilfs_sc_operations *sc_op = NULL;
1554     struct nilfs_segsum_pointer ssp;
1555     struct nilfs_finfo *finfo = NULL;
1556     union nilfs_binfo binfo;
1557     struct buffer_head *bh, *bh_org;
1558     ino_t ino = 0;
1559     int err = 0;
1560 
1561     if (!nfinfo)
1562         goto out;
1563 
1564     blocknr = segbuf->sb_pseg_start + segbuf->sb_sum.nsumblk;
1565     ssp.bh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers);
1566     ssp.offset = sizeof(struct nilfs_segment_summary);
1567 
1568     list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) {
1569         if (bh == segbuf->sb_super_root)
1570             break;
1571         if (!finfo) {
1572             finfo = nilfs_segctor_map_segsum_entry(
1573                 sci, &ssp, sizeof(*finfo));
1574             ino = le64_to_cpu(finfo->fi_ino);
1575             nblocks = le32_to_cpu(finfo->fi_nblocks);
1576             ndatablk = le32_to_cpu(finfo->fi_ndatablk);
1577 
1578             inode = bh->b_page->mapping->host;
1579 
1580             if (mode == SC_LSEG_DSYNC)
1581                 sc_op = &nilfs_sc_dsync_ops;
1582             else if (ino == NILFS_DAT_INO)
1583                 sc_op = &nilfs_sc_dat_ops;
1584             else /* file blocks */
1585                 sc_op = &nilfs_sc_file_ops;
1586         }
1587         bh_org = bh;
1588         get_bh(bh_org);
1589         err = nilfs_bmap_assign(NILFS_I(inode)->i_bmap, &bh, blocknr,
1590                     &binfo);
1591         if (bh != bh_org)
1592             nilfs_list_replace_buffer(bh_org, bh);
1593         brelse(bh_org);
1594         if (unlikely(err))
1595             goto failed_bmap;
1596 
1597         if (ndatablk > 0)
1598             sc_op->write_data_binfo(sci, &ssp, &binfo);
1599         else
1600             sc_op->write_node_binfo(sci, &ssp, &binfo);
1601 
1602         blocknr++;
1603         if (--nblocks == 0) {
1604             finfo = NULL;
1605             if (--nfinfo == 0)
1606                 break;
1607         } else if (ndatablk > 0)
1608             ndatablk--;
1609     }
1610  out:
1611     return 0;
1612 
1613  failed_bmap:
1614     return err;
1615 }
1616 
1617 static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode)
1618 {
1619     struct nilfs_segment_buffer *segbuf;
1620     int err;
1621 
1622     list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1623         err = nilfs_segctor_update_payload_blocknr(sci, segbuf, mode);
1624         if (unlikely(err))
1625             return err;
1626         nilfs_segbuf_fill_in_segsum(segbuf);
1627     }
1628     return 0;
1629 }
1630 
1631 static void nilfs_begin_page_io(struct page *page)
1632 {
1633     if (!page || PageWriteback(page))
1634         /*
1635          * For split b-tree node pages, this function may be called
1636          * twice.  We ignore the 2nd or later calls by this check.
1637          */
1638         return;
1639 
1640     lock_page(page);
1641     clear_page_dirty_for_io(page);
1642     set_page_writeback(page);
1643     unlock_page(page);
1644 }
1645 
1646 static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
1647 {
1648     struct nilfs_segment_buffer *segbuf;
1649     struct page *bd_page = NULL, *fs_page = NULL;
1650 
1651     list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1652         struct buffer_head *bh;
1653 
1654         list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
1655                     b_assoc_buffers) {
1656             if (bh->b_page != bd_page) {
1657                 if (bd_page) {
1658                     lock_page(bd_page);
1659                     clear_page_dirty_for_io(bd_page);
1660                     set_page_writeback(bd_page);
1661                     unlock_page(bd_page);
1662                 }
1663                 bd_page = bh->b_page;
1664             }
1665         }
1666 
1667         list_for_each_entry(bh, &segbuf->sb_payload_buffers,
1668                     b_assoc_buffers) {
1669             set_buffer_async_write(bh);
1670             if (bh == segbuf->sb_super_root) {
1671                 if (bh->b_page != bd_page) {
1672                     lock_page(bd_page);
1673                     clear_page_dirty_for_io(bd_page);
1674                     set_page_writeback(bd_page);
1675                     unlock_page(bd_page);
1676                     bd_page = bh->b_page;
1677                 }
1678                 break;
1679             }
1680             if (bh->b_page != fs_page) {
1681                 nilfs_begin_page_io(fs_page);
1682                 fs_page = bh->b_page;
1683             }
1684         }
1685     }
1686     if (bd_page) {
1687         lock_page(bd_page);
1688         clear_page_dirty_for_io(bd_page);
1689         set_page_writeback(bd_page);
1690         unlock_page(bd_page);
1691     }
1692     nilfs_begin_page_io(fs_page);
1693 }
1694 
1695 static int nilfs_segctor_write(struct nilfs_sc_info *sci,
1696                    struct the_nilfs *nilfs)
1697 {
1698     int ret;
1699 
1700     ret = nilfs_write_logs(&sci->sc_segbufs, nilfs);
1701     list_splice_tail_init(&sci->sc_segbufs, &sci->sc_write_logs);
1702     return ret;
1703 }
1704 
1705 static void nilfs_end_page_io(struct page *page, int err)
1706 {
1707     if (!page)
1708         return;
1709 
1710     if (buffer_nilfs_node(page_buffers(page)) && !PageWriteback(page)) {
1711         /*
1712          * For b-tree node pages, this function may be called twice
1713          * or more because they might be split in a segment.
1714          */
1715         if (PageDirty(page)) {
1716             /*
1717              * For pages holding split b-tree node buffers, dirty
1718              * flag on the buffers may be cleared discretely.
1719              * In that case, the page is once redirtied for
1720              * remaining buffers, and it must be cancelled if
1721              * all the buffers get cleaned later.
1722              */
1723             lock_page(page);
1724             if (nilfs_page_buffers_clean(page))
1725                 __nilfs_clear_page_dirty(page);
1726             unlock_page(page);
1727         }
1728         return;
1729     }
1730 
1731     if (!err) {
1732         if (!nilfs_page_buffers_clean(page))
1733             __set_page_dirty_nobuffers(page);
1734         ClearPageError(page);
1735     } else {
1736         __set_page_dirty_nobuffers(page);
1737         SetPageError(page);
1738     }
1739 
1740     end_page_writeback(page);
1741 }
1742 
1743 static void nilfs_abort_logs(struct list_head *logs, int err)
1744 {
1745     struct nilfs_segment_buffer *segbuf;
1746     struct page *bd_page = NULL, *fs_page = NULL;
1747     struct buffer_head *bh;
1748 
1749     if (list_empty(logs))
1750         return;
1751 
1752     list_for_each_entry(segbuf, logs, sb_list) {
1753         list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
1754                     b_assoc_buffers) {
1755             if (bh->b_page != bd_page) {
1756                 if (bd_page)
1757                     end_page_writeback(bd_page);
1758                 bd_page = bh->b_page;
1759             }
1760         }
1761 
1762         list_for_each_entry(bh, &segbuf->sb_payload_buffers,
1763                     b_assoc_buffers) {
1764             clear_buffer_async_write(bh);
1765             if (bh == segbuf->sb_super_root) {
1766                 if (bh->b_page != bd_page) {
1767                     end_page_writeback(bd_page);
1768                     bd_page = bh->b_page;
1769                 }
1770                 break;
1771             }
1772             if (bh->b_page != fs_page) {
1773                 nilfs_end_page_io(fs_page, err);
1774                 fs_page = bh->b_page;
1775             }
1776         }
1777     }
1778     if (bd_page)
1779         end_page_writeback(bd_page);
1780 
1781     nilfs_end_page_io(fs_page, err);
1782 }
1783 
1784 static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci,
1785                          struct the_nilfs *nilfs, int err)
1786 {
1787     LIST_HEAD(logs);
1788     int ret;
1789 
1790     list_splice_tail_init(&sci->sc_write_logs, &logs);
1791     ret = nilfs_wait_on_logs(&logs);
1792     nilfs_abort_logs(&logs, ret ? : err);
1793 
1794     list_splice_tail_init(&sci->sc_segbufs, &logs);
1795     nilfs_cancel_segusage(&logs, nilfs->ns_sufile);
1796     nilfs_free_incomplete_logs(&logs, nilfs);
1797 
1798     if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
1799         ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
1800                         sci->sc_freesegs,
1801                         sci->sc_nfreesegs,
1802                         NULL);
1803         WARN_ON(ret); /* do not happen */
1804     }
1805 
1806     nilfs_destroy_logs(&logs);
1807 }
1808 
1809 static void nilfs_set_next_segment(struct the_nilfs *nilfs,
1810                    struct nilfs_segment_buffer *segbuf)
1811 {
1812     nilfs->ns_segnum = segbuf->sb_segnum;
1813     nilfs->ns_nextnum = segbuf->sb_nextnum;
1814     nilfs->ns_pseg_offset = segbuf->sb_pseg_start - segbuf->sb_fseg_start
1815         + segbuf->sb_sum.nblocks;
1816     nilfs->ns_seg_seq = segbuf->sb_sum.seg_seq;
1817     nilfs->ns_ctime = segbuf->sb_sum.ctime;
1818 }
1819 
1820 static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
1821 {
1822     struct nilfs_segment_buffer *segbuf;
1823     struct page *bd_page = NULL, *fs_page = NULL;
1824     struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
1825     int update_sr = false;
1826 
1827     list_for_each_entry(segbuf, &sci->sc_write_logs, sb_list) {
1828         struct buffer_head *bh;
1829 
1830         list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
1831                     b_assoc_buffers) {
1832             set_buffer_uptodate(bh);
1833             clear_buffer_dirty(bh);
1834             if (bh->b_page != bd_page) {
1835                 if (bd_page)
1836                     end_page_writeback(bd_page);
1837                 bd_page = bh->b_page;
1838             }
1839         }
1840         /*
1841          * We assume that the buffers which belong to the same page
1842          * continue over the buffer list.
1843          * Under this assumption, the last BHs of pages is
1844          * identifiable by the discontinuity of bh->b_page
1845          * (page != fs_page).
1846          *
1847          * For B-tree node blocks, however, this assumption is not
1848          * guaranteed.  The cleanup code of B-tree node pages needs
1849          * special care.
1850          */
1851         list_for_each_entry(bh, &segbuf->sb_payload_buffers,
1852                     b_assoc_buffers) {
1853             const unsigned long set_bits = BIT(BH_Uptodate);
1854             const unsigned long clear_bits =
1855                 (BIT(BH_Dirty) | BIT(BH_Async_Write) |
1856                  BIT(BH_Delay) | BIT(BH_NILFS_Volatile) |
1857                  BIT(BH_NILFS_Redirected));
1858 
1859             set_mask_bits(&bh->b_state, clear_bits, set_bits);
1860             if (bh == segbuf->sb_super_root) {
1861                 if (bh->b_page != bd_page) {
1862                     end_page_writeback(bd_page);
1863                     bd_page = bh->b_page;
1864                 }
1865                 update_sr = true;
1866                 break;
1867             }
1868             if (bh->b_page != fs_page) {
1869                 nilfs_end_page_io(fs_page, 0);
1870                 fs_page = bh->b_page;
1871             }
1872         }
1873 
1874         if (!nilfs_segbuf_simplex(segbuf)) {
1875             if (segbuf->sb_sum.flags & NILFS_SS_LOGBGN) {
1876                 set_bit(NILFS_SC_UNCLOSED, &sci->sc_flags);
1877                 sci->sc_lseg_stime = jiffies;
1878             }
1879             if (segbuf->sb_sum.flags & NILFS_SS_LOGEND)
1880                 clear_bit(NILFS_SC_UNCLOSED, &sci->sc_flags);
1881         }
1882     }
1883     /*
1884      * Since pages may continue over multiple segment buffers,
1885      * end of the last page must be checked outside of the loop.
1886      */
1887     if (bd_page)
1888         end_page_writeback(bd_page);
1889 
1890     nilfs_end_page_io(fs_page, 0);
1891 
1892     nilfs_drop_collected_inodes(&sci->sc_dirty_files);
1893 
1894     if (nilfs_doing_gc())
1895         nilfs_drop_collected_inodes(&sci->sc_gc_inodes);
1896     else
1897         nilfs->ns_nongc_ctime = sci->sc_seg_ctime;
1898 
1899     sci->sc_nblk_inc += sci->sc_nblk_this_inc;
1900 
1901     segbuf = NILFS_LAST_SEGBUF(&sci->sc_write_logs);
1902     nilfs_set_next_segment(nilfs, segbuf);
1903 
1904     if (update_sr) {
1905         nilfs->ns_flushed_device = 0;
1906         nilfs_set_last_segment(nilfs, segbuf->sb_pseg_start,
1907                        segbuf->sb_sum.seg_seq, nilfs->ns_cno++);
1908 
1909         clear_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags);
1910         clear_bit(NILFS_SC_DIRTY, &sci->sc_flags);
1911         set_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags);
1912         nilfs_segctor_clear_metadata_dirty(sci);
1913     } else
1914         clear_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags);
1915 }
1916 
1917 static int nilfs_segctor_wait(struct nilfs_sc_info *sci)
1918 {
1919     int ret;
1920 
1921     ret = nilfs_wait_on_logs(&sci->sc_write_logs);
1922     if (!ret) {
1923         nilfs_segctor_complete_write(sci);
1924         nilfs_destroy_logs(&sci->sc_write_logs);
1925     }
1926     return ret;
1927 }
1928 
1929 static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
1930                          struct the_nilfs *nilfs)
1931 {
1932     struct nilfs_inode_info *ii, *n;
1933     struct inode *ifile = sci->sc_root->ifile;
1934 
1935     spin_lock(&nilfs->ns_inode_lock);
1936  retry:
1937     list_for_each_entry_safe(ii, n, &nilfs->ns_dirty_files, i_dirty) {
1938         if (!ii->i_bh) {
1939             struct buffer_head *ibh;
1940             int err;
1941 
1942             spin_unlock(&nilfs->ns_inode_lock);
1943             err = nilfs_ifile_get_inode_block(
1944                 ifile, ii->vfs_inode.i_ino, &ibh);
1945             if (unlikely(err)) {
1946                 nilfs_warn(sci->sc_super,
1947                        "log writer: error %d getting inode block (ino=%lu)",
1948                        err, ii->vfs_inode.i_ino);
1949                 return err;
1950             }
1951             spin_lock(&nilfs->ns_inode_lock);
1952             if (likely(!ii->i_bh))
1953                 ii->i_bh = ibh;
1954             else
1955                 brelse(ibh);
1956             goto retry;
1957         }
1958 
1959         // Always redirty the buffer to avoid race condition
1960         mark_buffer_dirty(ii->i_bh);
1961         nilfs_mdt_mark_dirty(ifile);
1962 
1963         clear_bit(NILFS_I_QUEUED, &ii->i_state);
1964         set_bit(NILFS_I_BUSY, &ii->i_state);
1965         list_move_tail(&ii->i_dirty, &sci->sc_dirty_files);
1966     }
1967     spin_unlock(&nilfs->ns_inode_lock);
1968 
1969     return 0;
1970 }
1971 
1972 static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
1973                          struct the_nilfs *nilfs)
1974 {
1975     struct nilfs_inode_info *ii, *n;
1976     int during_mount = !(sci->sc_super->s_flags & SB_ACTIVE);
1977     int defer_iput = false;
1978 
1979     spin_lock(&nilfs->ns_inode_lock);
1980     list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) {
1981         if (!test_and_clear_bit(NILFS_I_UPDATED, &ii->i_state) ||
1982             test_bit(NILFS_I_DIRTY, &ii->i_state))
1983             continue;
1984 
1985         clear_bit(NILFS_I_BUSY, &ii->i_state);
1986         brelse(ii->i_bh);
1987         ii->i_bh = NULL;
1988         list_del_init(&ii->i_dirty);
1989         if (!ii->vfs_inode.i_nlink || during_mount) {
1990             /*
1991              * Defer calling iput() to avoid deadlocks if
1992              * i_nlink == 0 or mount is not yet finished.
1993              */
1994             list_add_tail(&ii->i_dirty, &sci->sc_iput_queue);
1995             defer_iput = true;
1996         } else {
1997             spin_unlock(&nilfs->ns_inode_lock);
1998             iput(&ii->vfs_inode);
1999             spin_lock(&nilfs->ns_inode_lock);
2000         }
2001     }
2002     spin_unlock(&nilfs->ns_inode_lock);
2003 
2004     if (defer_iput)
2005         schedule_work(&sci->sc_iput_work);
2006 }
2007 
2008 /*
2009  * Main procedure of segment constructor
2010  */
2011 static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2012 {
2013     struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2014     int err;
2015 
2016     nilfs_sc_cstage_set(sci, NILFS_ST_INIT);
2017     sci->sc_cno = nilfs->ns_cno;
2018 
2019     err = nilfs_segctor_collect_dirty_files(sci, nilfs);
2020     if (unlikely(err))
2021         goto out;
2022 
2023     if (nilfs_test_metadata_dirty(nilfs, sci->sc_root))
2024         set_bit(NILFS_SC_DIRTY, &sci->sc_flags);
2025 
2026     if (nilfs_segctor_clean(sci))
2027         goto out;
2028 
2029     do {
2030         sci->sc_stage.flags &= ~NILFS_CF_HISTORY_MASK;
2031 
2032         err = nilfs_segctor_begin_construction(sci, nilfs);
2033         if (unlikely(err))
2034             goto out;
2035 
2036         /* Update time stamp */
2037         sci->sc_seg_ctime = ktime_get_real_seconds();
2038 
2039         err = nilfs_segctor_collect(sci, nilfs, mode);
2040         if (unlikely(err))
2041             goto failed;
2042 
2043         /* Avoid empty segment */
2044         if (nilfs_sc_cstage_get(sci) == NILFS_ST_DONE &&
2045             nilfs_segbuf_empty(sci->sc_curseg)) {
2046             nilfs_segctor_abort_construction(sci, nilfs, 1);
2047             goto out;
2048         }
2049 
2050         err = nilfs_segctor_assign(sci, mode);
2051         if (unlikely(err))
2052             goto failed;
2053 
2054         if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED)
2055             nilfs_segctor_fill_in_file_bmap(sci);
2056 
2057         if (mode == SC_LSEG_SR &&
2058             nilfs_sc_cstage_get(sci) >= NILFS_ST_CPFILE) {
2059             err = nilfs_segctor_fill_in_checkpoint(sci);
2060             if (unlikely(err))
2061                 goto failed_to_write;
2062 
2063             nilfs_segctor_fill_in_super_root(sci, nilfs);
2064         }
2065         nilfs_segctor_update_segusage(sci, nilfs->ns_sufile);
2066 
2067         /* Write partial segments */
2068         nilfs_segctor_prepare_write(sci);
2069 
2070         nilfs_add_checksums_on_logs(&sci->sc_segbufs,
2071                         nilfs->ns_crc_seed);
2072 
2073         err = nilfs_segctor_write(sci, nilfs);
2074         if (unlikely(err))
2075             goto failed_to_write;
2076 
2077         if (nilfs_sc_cstage_get(sci) == NILFS_ST_DONE ||
2078             nilfs->ns_blocksize_bits != PAGE_SHIFT) {
2079             /*
2080              * At this point, we avoid double buffering
2081              * for blocksize < pagesize because page dirty
2082              * flag is turned off during write and dirty
2083              * buffers are not properly collected for
2084              * pages crossing over segments.
2085              */
2086             err = nilfs_segctor_wait(sci);
2087             if (err)
2088                 goto failed_to_write;
2089         }
2090     } while (nilfs_sc_cstage_get(sci) != NILFS_ST_DONE);
2091 
2092  out:
2093     nilfs_segctor_drop_written_files(sci, nilfs);
2094     return err;
2095 
2096  failed_to_write:
2097     if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED)
2098         nilfs_redirty_inodes(&sci->sc_dirty_files);
2099 
2100  failed:
2101     if (nilfs_doing_gc())
2102         nilfs_redirty_inodes(&sci->sc_gc_inodes);
2103     nilfs_segctor_abort_construction(sci, nilfs, err);
2104     goto out;
2105 }
2106 
2107 /**
2108  * nilfs_segctor_start_timer - set timer of background write
2109  * @sci: nilfs_sc_info
2110  *
2111  * If the timer has already been set, it ignores the new request.
2112  * This function MUST be called within a section locking the segment
2113  * semaphore.
2114  */
2115 static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci)
2116 {
2117     spin_lock(&sci->sc_state_lock);
2118     if (!(sci->sc_state & NILFS_SEGCTOR_COMMIT)) {
2119         sci->sc_timer.expires = jiffies + sci->sc_interval;
2120         add_timer(&sci->sc_timer);
2121         sci->sc_state |= NILFS_SEGCTOR_COMMIT;
2122     }
2123     spin_unlock(&sci->sc_state_lock);
2124 }
2125 
2126 static void nilfs_segctor_do_flush(struct nilfs_sc_info *sci, int bn)
2127 {
2128     spin_lock(&sci->sc_state_lock);
2129     if (!(sci->sc_flush_request & BIT(bn))) {
2130         unsigned long prev_req = sci->sc_flush_request;
2131 
2132         sci->sc_flush_request |= BIT(bn);
2133         if (!prev_req)
2134             wake_up(&sci->sc_wait_daemon);
2135     }
2136     spin_unlock(&sci->sc_state_lock);
2137 }
2138 
2139 /**
2140  * nilfs_flush_segment - trigger a segment construction for resource control
2141  * @sb: super block
2142  * @ino: inode number of the file to be flushed out.
2143  */
2144 void nilfs_flush_segment(struct super_block *sb, ino_t ino)
2145 {
2146     struct the_nilfs *nilfs = sb->s_fs_info;
2147     struct nilfs_sc_info *sci = nilfs->ns_writer;
2148 
2149     if (!sci || nilfs_doing_construction())
2150         return;
2151     nilfs_segctor_do_flush(sci, NILFS_MDT_INODE(sb, ino) ? ino : 0);
2152                     /* assign bit 0 to data files */
2153 }
2154 
2155 struct nilfs_segctor_wait_request {
2156     wait_queue_entry_t  wq;
2157     __u32       seq;
2158     int     err;
2159     atomic_t    done;
2160 };
2161 
2162 static int nilfs_segctor_sync(struct nilfs_sc_info *sci)
2163 {
2164     struct nilfs_segctor_wait_request wait_req;
2165     int err = 0;
2166 
2167     spin_lock(&sci->sc_state_lock);
2168     init_wait(&wait_req.wq);
2169     wait_req.err = 0;
2170     atomic_set(&wait_req.done, 0);
2171     wait_req.seq = ++sci->sc_seq_request;
2172     spin_unlock(&sci->sc_state_lock);
2173 
2174     init_waitqueue_entry(&wait_req.wq, current);
2175     add_wait_queue(&sci->sc_wait_request, &wait_req.wq);
2176     set_current_state(TASK_INTERRUPTIBLE);
2177     wake_up(&sci->sc_wait_daemon);
2178 
2179     for (;;) {
2180         if (atomic_read(&wait_req.done)) {
2181             err = wait_req.err;
2182             break;
2183         }
2184         if (!signal_pending(current)) {
2185             schedule();
2186             continue;
2187         }
2188         err = -ERESTARTSYS;
2189         break;
2190     }
2191     finish_wait(&sci->sc_wait_request, &wait_req.wq);
2192     return err;
2193 }
2194 
2195 static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err)
2196 {
2197     struct nilfs_segctor_wait_request *wrq, *n;
2198     unsigned long flags;
2199 
2200     spin_lock_irqsave(&sci->sc_wait_request.lock, flags);
2201     list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.head, wq.entry) {
2202         if (!atomic_read(&wrq->done) &&
2203             nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) {
2204             wrq->err = err;
2205             atomic_set(&wrq->done, 1);
2206         }
2207         if (atomic_read(&wrq->done)) {
2208             wrq->wq.func(&wrq->wq,
2209                      TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,
2210                      0, NULL);
2211         }
2212     }
2213     spin_unlock_irqrestore(&sci->sc_wait_request.lock, flags);
2214 }
2215 
2216 /**
2217  * nilfs_construct_segment - construct a logical segment
2218  * @sb: super block
2219  *
2220  * Return Value: On success, 0 is returned. On errors, one of the following
2221  * negative error code is returned.
2222  *
2223  * %-EROFS - Read only filesystem.
2224  *
2225  * %-EIO - I/O error
2226  *
2227  * %-ENOSPC - No space left on device (only in a panic state).
2228  *
2229  * %-ERESTARTSYS - Interrupted.
2230  *
2231  * %-ENOMEM - Insufficient memory available.
2232  */
2233 int nilfs_construct_segment(struct super_block *sb)
2234 {
2235     struct the_nilfs *nilfs = sb->s_fs_info;
2236     struct nilfs_sc_info *sci = nilfs->ns_writer;
2237     struct nilfs_transaction_info *ti;
2238     int err;
2239 
2240     if (!sci)
2241         return -EROFS;
2242 
2243     /* A call inside transactions causes a deadlock. */
2244     BUG_ON((ti = current->journal_info) && ti->ti_magic == NILFS_TI_MAGIC);
2245 
2246     err = nilfs_segctor_sync(sci);
2247     return err;
2248 }
2249 
2250 /**
2251  * nilfs_construct_dsync_segment - construct a data-only logical segment
2252  * @sb: super block
2253  * @inode: inode whose data blocks should be written out
2254  * @start: start byte offset
2255  * @end: end byte offset (inclusive)
2256  *
2257  * Return Value: On success, 0 is returned. On errors, one of the following
2258  * negative error code is returned.
2259  *
2260  * %-EROFS - Read only filesystem.
2261  *
2262  * %-EIO - I/O error
2263  *
2264  * %-ENOSPC - No space left on device (only in a panic state).
2265  *
2266  * %-ERESTARTSYS - Interrupted.
2267  *
2268  * %-ENOMEM - Insufficient memory available.
2269  */
2270 int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode,
2271                   loff_t start, loff_t end)
2272 {
2273     struct the_nilfs *nilfs = sb->s_fs_info;
2274     struct nilfs_sc_info *sci = nilfs->ns_writer;
2275     struct nilfs_inode_info *ii;
2276     struct nilfs_transaction_info ti;
2277     int err = 0;
2278 
2279     if (!sci)
2280         return -EROFS;
2281 
2282     nilfs_transaction_lock(sb, &ti, 0);
2283 
2284     ii = NILFS_I(inode);
2285     if (test_bit(NILFS_I_INODE_SYNC, &ii->i_state) ||
2286         nilfs_test_opt(nilfs, STRICT_ORDER) ||
2287         test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) ||
2288         nilfs_discontinued(nilfs)) {
2289         nilfs_transaction_unlock(sb);
2290         err = nilfs_segctor_sync(sci);
2291         return err;
2292     }
2293 
2294     spin_lock(&nilfs->ns_inode_lock);
2295     if (!test_bit(NILFS_I_QUEUED, &ii->i_state) &&
2296         !test_bit(NILFS_I_BUSY, &ii->i_state)) {
2297         spin_unlock(&nilfs->ns_inode_lock);
2298         nilfs_transaction_unlock(sb);
2299         return 0;
2300     }
2301     spin_unlock(&nilfs->ns_inode_lock);
2302     sci->sc_dsync_inode = ii;
2303     sci->sc_dsync_start = start;
2304     sci->sc_dsync_end = end;
2305 
2306     err = nilfs_segctor_do_construct(sci, SC_LSEG_DSYNC);
2307     if (!err)
2308         nilfs->ns_flushed_device = 0;
2309 
2310     nilfs_transaction_unlock(sb);
2311     return err;
2312 }
2313 
2314 #define FLUSH_FILE_BIT  (0x1) /* data file only */
2315 #define FLUSH_DAT_BIT   BIT(NILFS_DAT_INO) /* DAT only */
2316 
2317 /**
2318  * nilfs_segctor_accept - record accepted sequence count of log-write requests
2319  * @sci: segment constructor object
2320  */
2321 static void nilfs_segctor_accept(struct nilfs_sc_info *sci)
2322 {
2323     spin_lock(&sci->sc_state_lock);
2324     sci->sc_seq_accepted = sci->sc_seq_request;
2325     spin_unlock(&sci->sc_state_lock);
2326     del_timer_sync(&sci->sc_timer);
2327 }
2328 
2329 /**
2330  * nilfs_segctor_notify - notify the result of request to caller threads
2331  * @sci: segment constructor object
2332  * @mode: mode of log forming
2333  * @err: error code to be notified
2334  */
2335 static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err)
2336 {
2337     /* Clear requests (even when the construction failed) */
2338     spin_lock(&sci->sc_state_lock);
2339 
2340     if (mode == SC_LSEG_SR) {
2341         sci->sc_state &= ~NILFS_SEGCTOR_COMMIT;
2342         sci->sc_seq_done = sci->sc_seq_accepted;
2343         nilfs_segctor_wakeup(sci, err);
2344         sci->sc_flush_request = 0;
2345     } else {
2346         if (mode == SC_FLUSH_FILE)
2347             sci->sc_flush_request &= ~FLUSH_FILE_BIT;
2348         else if (mode == SC_FLUSH_DAT)
2349             sci->sc_flush_request &= ~FLUSH_DAT_BIT;
2350 
2351         /* re-enable timer if checkpoint creation was not done */
2352         if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
2353             time_before(jiffies, sci->sc_timer.expires))
2354             add_timer(&sci->sc_timer);
2355     }
2356     spin_unlock(&sci->sc_state_lock);
2357 }
2358 
2359 /**
2360  * nilfs_segctor_construct - form logs and write them to disk
2361  * @sci: segment constructor object
2362  * @mode: mode of log forming
2363  */
2364 static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode)
2365 {
2366     struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2367     struct nilfs_super_block **sbp;
2368     int err = 0;
2369 
2370     nilfs_segctor_accept(sci);
2371 
2372     if (nilfs_discontinued(nilfs))
2373         mode = SC_LSEG_SR;
2374     if (!nilfs_segctor_confirm(sci))
2375         err = nilfs_segctor_do_construct(sci, mode);
2376 
2377     if (likely(!err)) {
2378         if (mode != SC_FLUSH_DAT)
2379             atomic_set(&nilfs->ns_ndirtyblks, 0);
2380         if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) &&
2381             nilfs_discontinued(nilfs)) {
2382             down_write(&nilfs->ns_sem);
2383             err = -EIO;
2384             sbp = nilfs_prepare_super(sci->sc_super,
2385                           nilfs_sb_will_flip(nilfs));
2386             if (likely(sbp)) {
2387                 nilfs_set_log_cursor(sbp[0], nilfs);
2388                 err = nilfs_commit_super(sci->sc_super,
2389                              NILFS_SB_COMMIT);
2390             }
2391             up_write(&nilfs->ns_sem);
2392         }
2393     }
2394 
2395     nilfs_segctor_notify(sci, mode, err);
2396     return err;
2397 }
2398 
2399 static void nilfs_construction_timeout(struct timer_list *t)
2400 {
2401     struct nilfs_sc_info *sci = from_timer(sci, t, sc_timer);
2402 
2403     wake_up_process(sci->sc_timer_task);
2404 }
2405 
2406 static void
2407 nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head)
2408 {
2409     struct nilfs_inode_info *ii, *n;
2410 
2411     list_for_each_entry_safe(ii, n, head, i_dirty) {
2412         if (!test_bit(NILFS_I_UPDATED, &ii->i_state))
2413             continue;
2414         list_del_init(&ii->i_dirty);
2415         truncate_inode_pages(&ii->vfs_inode.i_data, 0);
2416         nilfs_btnode_cache_clear(ii->i_assoc_inode->i_mapping);
2417         iput(&ii->vfs_inode);
2418     }
2419 }
2420 
2421 int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv,
2422              void **kbufs)
2423 {
2424     struct the_nilfs *nilfs = sb->s_fs_info;
2425     struct nilfs_sc_info *sci = nilfs->ns_writer;
2426     struct nilfs_transaction_info ti;
2427     int err;
2428 
2429     if (unlikely(!sci))
2430         return -EROFS;
2431 
2432     nilfs_transaction_lock(sb, &ti, 1);
2433 
2434     err = nilfs_mdt_save_to_shadow_map(nilfs->ns_dat);
2435     if (unlikely(err))
2436         goto out_unlock;
2437 
2438     err = nilfs_ioctl_prepare_clean_segments(nilfs, argv, kbufs);
2439     if (unlikely(err)) {
2440         nilfs_mdt_restore_from_shadow_map(nilfs->ns_dat);
2441         goto out_unlock;
2442     }
2443 
2444     sci->sc_freesegs = kbufs[4];
2445     sci->sc_nfreesegs = argv[4].v_nmembs;
2446     list_splice_tail_init(&nilfs->ns_gc_inodes, &sci->sc_gc_inodes);
2447 
2448     for (;;) {
2449         err = nilfs_segctor_construct(sci, SC_LSEG_SR);
2450         nilfs_remove_written_gcinodes(nilfs, &sci->sc_gc_inodes);
2451 
2452         if (likely(!err))
2453             break;
2454 
2455         nilfs_warn(sb, "error %d cleaning segments", err);
2456         set_current_state(TASK_INTERRUPTIBLE);
2457         schedule_timeout(sci->sc_interval);
2458     }
2459     if (nilfs_test_opt(nilfs, DISCARD)) {
2460         int ret = nilfs_discard_segments(nilfs, sci->sc_freesegs,
2461                          sci->sc_nfreesegs);
2462         if (ret) {
2463             nilfs_warn(sb,
2464                    "error %d on discard request, turning discards off for the device",
2465                    ret);
2466             nilfs_clear_opt(nilfs, DISCARD);
2467         }
2468     }
2469 
2470  out_unlock:
2471     sci->sc_freesegs = NULL;
2472     sci->sc_nfreesegs = 0;
2473     nilfs_mdt_clear_shadow_map(nilfs->ns_dat);
2474     nilfs_transaction_unlock(sb);
2475     return err;
2476 }
2477 
2478 static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode)
2479 {
2480     struct nilfs_transaction_info ti;
2481 
2482     nilfs_transaction_lock(sci->sc_super, &ti, 0);
2483     nilfs_segctor_construct(sci, mode);
2484 
2485     /*
2486      * Unclosed segment should be retried.  We do this using sc_timer.
2487      * Timeout of sc_timer will invoke complete construction which leads
2488      * to close the current logical segment.
2489      */
2490     if (test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags))
2491         nilfs_segctor_start_timer(sci);
2492 
2493     nilfs_transaction_unlock(sci->sc_super);
2494 }
2495 
2496 static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci)
2497 {
2498     int mode = 0;
2499 
2500     spin_lock(&sci->sc_state_lock);
2501     mode = (sci->sc_flush_request & FLUSH_DAT_BIT) ?
2502         SC_FLUSH_DAT : SC_FLUSH_FILE;
2503     spin_unlock(&sci->sc_state_lock);
2504 
2505     if (mode) {
2506         nilfs_segctor_do_construct(sci, mode);
2507 
2508         spin_lock(&sci->sc_state_lock);
2509         sci->sc_flush_request &= (mode == SC_FLUSH_FILE) ?
2510             ~FLUSH_FILE_BIT : ~FLUSH_DAT_BIT;
2511         spin_unlock(&sci->sc_state_lock);
2512     }
2513     clear_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags);
2514 }
2515 
2516 static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci)
2517 {
2518     if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) ||
2519         time_before(jiffies, sci->sc_lseg_stime + sci->sc_mjcp_freq)) {
2520         if (!(sci->sc_flush_request & ~FLUSH_FILE_BIT))
2521             return SC_FLUSH_FILE;
2522         else if (!(sci->sc_flush_request & ~FLUSH_DAT_BIT))
2523             return SC_FLUSH_DAT;
2524     }
2525     return SC_LSEG_SR;
2526 }
2527 
2528 /**
2529  * nilfs_segctor_thread - main loop of the segment constructor thread.
2530  * @arg: pointer to a struct nilfs_sc_info.
2531  *
2532  * nilfs_segctor_thread() initializes a timer and serves as a daemon
2533  * to execute segment constructions.
2534  */
2535 static int nilfs_segctor_thread(void *arg)
2536 {
2537     struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg;
2538     struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2539     int timeout = 0;
2540 
2541     sci->sc_timer_task = current;
2542 
2543     /* start sync. */
2544     sci->sc_task = current;
2545     wake_up(&sci->sc_wait_task); /* for nilfs_segctor_start_thread() */
2546     nilfs_info(sci->sc_super,
2547            "segctord starting. Construction interval = %lu seconds, CP frequency < %lu seconds",
2548            sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ);
2549 
2550     spin_lock(&sci->sc_state_lock);
2551  loop:
2552     for (;;) {
2553         int mode;
2554 
2555         if (sci->sc_state & NILFS_SEGCTOR_QUIT)
2556             goto end_thread;
2557 
2558         if (timeout || sci->sc_seq_request != sci->sc_seq_done)
2559             mode = SC_LSEG_SR;
2560         else if (sci->sc_flush_request)
2561             mode = nilfs_segctor_flush_mode(sci);
2562         else
2563             break;
2564 
2565         spin_unlock(&sci->sc_state_lock);
2566         nilfs_segctor_thread_construct(sci, mode);
2567         spin_lock(&sci->sc_state_lock);
2568         timeout = 0;
2569     }
2570 
2571 
2572     if (freezing(current)) {
2573         spin_unlock(&sci->sc_state_lock);
2574         try_to_freeze();
2575         spin_lock(&sci->sc_state_lock);
2576     } else {
2577         DEFINE_WAIT(wait);
2578         int should_sleep = 1;
2579 
2580         prepare_to_wait(&sci->sc_wait_daemon, &wait,
2581                 TASK_INTERRUPTIBLE);
2582 
2583         if (sci->sc_seq_request != sci->sc_seq_done)
2584             should_sleep = 0;
2585         else if (sci->sc_flush_request)
2586             should_sleep = 0;
2587         else if (sci->sc_state & NILFS_SEGCTOR_COMMIT)
2588             should_sleep = time_before(jiffies,
2589                     sci->sc_timer.expires);
2590 
2591         if (should_sleep) {
2592             spin_unlock(&sci->sc_state_lock);
2593             schedule();
2594             spin_lock(&sci->sc_state_lock);
2595         }
2596         finish_wait(&sci->sc_wait_daemon, &wait);
2597         timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
2598                time_after_eq(jiffies, sci->sc_timer.expires));
2599 
2600         if (nilfs_sb_dirty(nilfs) && nilfs_sb_need_update(nilfs))
2601             set_nilfs_discontinued(nilfs);
2602     }
2603     goto loop;
2604 
2605  end_thread:
2606     spin_unlock(&sci->sc_state_lock);
2607 
2608     /* end sync. */
2609     sci->sc_task = NULL;
2610     wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */
2611     return 0;
2612 }
2613 
2614 static int nilfs_segctor_start_thread(struct nilfs_sc_info *sci)
2615 {
2616     struct task_struct *t;
2617 
2618     t = kthread_run(nilfs_segctor_thread, sci, "segctord");
2619     if (IS_ERR(t)) {
2620         int err = PTR_ERR(t);
2621 
2622         nilfs_err(sci->sc_super, "error %d creating segctord thread",
2623               err);
2624         return err;
2625     }
2626     wait_event(sci->sc_wait_task, sci->sc_task != NULL);
2627     return 0;
2628 }
2629 
2630 static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci)
2631     __acquires(&sci->sc_state_lock)
2632     __releases(&sci->sc_state_lock)
2633 {
2634     sci->sc_state |= NILFS_SEGCTOR_QUIT;
2635 
2636     while (sci->sc_task) {
2637         wake_up(&sci->sc_wait_daemon);
2638         spin_unlock(&sci->sc_state_lock);
2639         wait_event(sci->sc_wait_task, sci->sc_task == NULL);
2640         spin_lock(&sci->sc_state_lock);
2641     }
2642 }
2643 
2644 /*
2645  * Setup & clean-up functions
2646  */
2647 static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
2648                            struct nilfs_root *root)
2649 {
2650     struct the_nilfs *nilfs = sb->s_fs_info;
2651     struct nilfs_sc_info *sci;
2652 
2653     sci = kzalloc(sizeof(*sci), GFP_KERNEL);
2654     if (!sci)
2655         return NULL;
2656 
2657     sci->sc_super = sb;
2658 
2659     nilfs_get_root(root);
2660     sci->sc_root = root;
2661 
2662     init_waitqueue_head(&sci->sc_wait_request);
2663     init_waitqueue_head(&sci->sc_wait_daemon);
2664     init_waitqueue_head(&sci->sc_wait_task);
2665     spin_lock_init(&sci->sc_state_lock);
2666     INIT_LIST_HEAD(&sci->sc_dirty_files);
2667     INIT_LIST_HEAD(&sci->sc_segbufs);
2668     INIT_LIST_HEAD(&sci->sc_write_logs);
2669     INIT_LIST_HEAD(&sci->sc_gc_inodes);
2670     INIT_LIST_HEAD(&sci->sc_iput_queue);
2671     INIT_WORK(&sci->sc_iput_work, nilfs_iput_work_func);
2672     timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0);
2673 
2674     sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT;
2675     sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ;
2676     sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK;
2677 
2678     if (nilfs->ns_interval)
2679         sci->sc_interval = HZ * nilfs->ns_interval;
2680     if (nilfs->ns_watermark)
2681         sci->sc_watermark = nilfs->ns_watermark;
2682     return sci;
2683 }
2684 
2685 static void nilfs_segctor_write_out(struct nilfs_sc_info *sci)
2686 {
2687     int ret, retrycount = NILFS_SC_CLEANUP_RETRY;
2688 
2689     /*
2690      * The segctord thread was stopped and its timer was removed.
2691      * But some tasks remain.
2692      */
2693     do {
2694         struct nilfs_transaction_info ti;
2695 
2696         nilfs_transaction_lock(sci->sc_super, &ti, 0);
2697         ret = nilfs_segctor_construct(sci, SC_LSEG_SR);
2698         nilfs_transaction_unlock(sci->sc_super);
2699 
2700         flush_work(&sci->sc_iput_work);
2701 
2702     } while (ret && retrycount-- > 0);
2703 }
2704 
2705 /**
2706  * nilfs_segctor_destroy - destroy the segment constructor.
2707  * @sci: nilfs_sc_info
2708  *
2709  * nilfs_segctor_destroy() kills the segctord thread and frees
2710  * the nilfs_sc_info struct.
2711  * Caller must hold the segment semaphore.
2712  */
2713 static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
2714 {
2715     struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2716     int flag;
2717 
2718     up_write(&nilfs->ns_segctor_sem);
2719 
2720     spin_lock(&sci->sc_state_lock);
2721     nilfs_segctor_kill_thread(sci);
2722     flag = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) || sci->sc_flush_request
2723         || sci->sc_seq_request != sci->sc_seq_done);
2724     spin_unlock(&sci->sc_state_lock);
2725 
2726     if (flush_work(&sci->sc_iput_work))
2727         flag = true;
2728 
2729     if (flag || !nilfs_segctor_confirm(sci))
2730         nilfs_segctor_write_out(sci);
2731 
2732     if (!list_empty(&sci->sc_dirty_files)) {
2733         nilfs_warn(sci->sc_super,
2734                "disposed unprocessed dirty file(s) when stopping log writer");
2735         nilfs_dispose_list(nilfs, &sci->sc_dirty_files, 1);
2736     }
2737 
2738     if (!list_empty(&sci->sc_iput_queue)) {
2739         nilfs_warn(sci->sc_super,
2740                "disposed unprocessed inode(s) in iput queue when stopping log writer");
2741         nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 1);
2742     }
2743 
2744     WARN_ON(!list_empty(&sci->sc_segbufs));
2745     WARN_ON(!list_empty(&sci->sc_write_logs));
2746 
2747     nilfs_put_root(sci->sc_root);
2748 
2749     down_write(&nilfs->ns_segctor_sem);
2750 
2751     del_timer_sync(&sci->sc_timer);
2752     kfree(sci);
2753 }
2754 
2755 /**
2756  * nilfs_attach_log_writer - attach log writer
2757  * @sb: super block instance
2758  * @root: root object of the current filesystem tree
2759  *
2760  * This allocates a log writer object, initializes it, and starts the
2761  * log writer.
2762  *
2763  * Return Value: On success, 0 is returned. On error, one of the following
2764  * negative error code is returned.
2765  *
2766  * %-ENOMEM - Insufficient memory available.
2767  */
2768 int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root)
2769 {
2770     struct the_nilfs *nilfs = sb->s_fs_info;
2771     int err;
2772 
2773     if (nilfs->ns_writer) {
2774         /*
2775          * This happens if the filesystem was remounted
2776          * read/write after nilfs_error degenerated it into a
2777          * read-only mount.
2778          */
2779         nilfs_detach_log_writer(sb);
2780     }
2781 
2782     nilfs->ns_writer = nilfs_segctor_new(sb, root);
2783     if (!nilfs->ns_writer)
2784         return -ENOMEM;
2785 
2786     inode_attach_wb(nilfs->ns_bdev->bd_inode, NULL);
2787 
2788     err = nilfs_segctor_start_thread(nilfs->ns_writer);
2789     if (err) {
2790         kfree(nilfs->ns_writer);
2791         nilfs->ns_writer = NULL;
2792     }
2793     return err;
2794 }
2795 
2796 /**
2797  * nilfs_detach_log_writer - destroy log writer
2798  * @sb: super block instance
2799  *
2800  * This kills log writer daemon, frees the log writer object, and
2801  * destroys list of dirty files.
2802  */
2803 void nilfs_detach_log_writer(struct super_block *sb)
2804 {
2805     struct the_nilfs *nilfs = sb->s_fs_info;
2806     LIST_HEAD(garbage_list);
2807 
2808     down_write(&nilfs->ns_segctor_sem);
2809     if (nilfs->ns_writer) {
2810         nilfs_segctor_destroy(nilfs->ns_writer);
2811         nilfs->ns_writer = NULL;
2812     }
2813 
2814     /* Force to free the list of dirty files */
2815     spin_lock(&nilfs->ns_inode_lock);
2816     if (!list_empty(&nilfs->ns_dirty_files)) {
2817         list_splice_init(&nilfs->ns_dirty_files, &garbage_list);
2818         nilfs_warn(sb,
2819                "disposed unprocessed dirty file(s) when detaching log writer");
2820     }
2821     spin_unlock(&nilfs->ns_inode_lock);
2822     up_write(&nilfs->ns_segctor_sem);
2823 
2824     nilfs_dispose_list(nilfs, &garbage_list, 1);
2825 }