fs/jbd2/checkpoint.c

0001 // SPDX-License-Identifier: GPL-2.0+
0002 /*
0003  * linux/fs/jbd2/checkpoint.c
0004  *
0005  * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
0006  *
0007  * Copyright 1999 Red Hat Software --- All Rights Reserved
0008  *
0009  * Checkpoint routines for the generic filesystem journaling code.
0010  * Part of the ext2fs journaling system.
0011  *
0012  * Checkpointing is the process of ensuring that a section of the log is
0013  * committed fully to disk, so that that portion of the log can be
0014  * reused.
0015  */
0016
0017 #include <linux/time.h>
0018 #include <linux/fs.h>
0019 #include <linux/jbd2.h>
0020 #include <linux/errno.h>
0021 #include <linux/slab.h>
0022 #include <linux/blkdev.h>
0023 #include <trace/events/jbd2.h>
0024
0025 /*
0026  * Unlink a buffer from a transaction checkpoint list.
0027  *
0028  * Called with j_list_lock held.
0029  */
0030 static inline void __buffer_unlink_first(struct journal_head *jh)
0031 {
0032     transaction_t *transaction = jh->b_cp_transaction;
0033
0034     jh->b_cpnext->b_cpprev = jh->b_cpprev;
0035     jh->b_cpprev->b_cpnext = jh->b_cpnext;
0036     if (transaction->t_checkpoint_list == jh) {
0037         transaction->t_checkpoint_list = jh->b_cpnext;
0038         if (transaction->t_checkpoint_list == jh)
0039             transaction->t_checkpoint_list = NULL;
0040     }
0041 }
0042
0043 /*
0044  * Unlink a buffer from a transaction checkpoint(io) list.
0045  *
0046  * Called with j_list_lock held.
0047  */
0048 static inline void __buffer_unlink(struct journal_head *jh)
0049 {
0050     transaction_t *transaction = jh->b_cp_transaction;
0051
0052     __buffer_unlink_first(jh);
0053     if (transaction->t_checkpoint_io_list == jh) {
0054         transaction->t_checkpoint_io_list = jh->b_cpnext;
0055         if (transaction->t_checkpoint_io_list == jh)
0056             transaction->t_checkpoint_io_list = NULL;
0057     }
0058 }
0059
0060 /*
0061  * Move a buffer from the checkpoint list to the checkpoint io list
0062  *
0063  * Called with j_list_lock held
0064  */
0065 static inline void __buffer_relink_io(struct journal_head *jh)
0066 {
0067     transaction_t *transaction = jh->b_cp_transaction;
0068
0069     __buffer_unlink_first(jh);
0070
0071     if (!transaction->t_checkpoint_io_list) {
0072         jh->b_cpnext = jh->b_cpprev = jh;
0073     } else {
0074         jh->b_cpnext = transaction->t_checkpoint_io_list;
0075         jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
0076         jh->b_cpprev->b_cpnext = jh;
0077         jh->b_cpnext->b_cpprev = jh;
0078     }
0079     transaction->t_checkpoint_io_list = jh;
0080 }
0081
0082 /*
0083  * Check a checkpoint buffer could be release or not.
0084  *
0085  * Requires j_list_lock
0086  */
0087 static inline bool __cp_buffer_busy(struct journal_head *jh)
0088 {
0089     struct buffer_head *bh = jh2bh(jh);
0090
0091     return (jh->b_transaction || buffer_locked(bh) || buffer_dirty(bh));
0092 }
0093
0094 /*
0095  * __jbd2_log_wait_for_space: wait until there is space in the journal.
0096  *
0097  * Called under j-state_lock *only*.  It will be unlocked if we have to wait
0098  * for a checkpoint to free up some space in the log.
0099  */
0100 void __jbd2_log_wait_for_space(journal_t *journal)
0101 __acquires(&journal->j_state_lock)
0102 __releases(&journal->j_state_lock)
0103 {
0104     int nblocks, space_left;
0105     /* assert_spin_locked(&journal->j_state_lock); */
0106
0107     nblocks = journal->j_max_transaction_buffers;
0108     while (jbd2_log_space_left(journal) < nblocks) {
0109         write_unlock(&journal->j_state_lock);
0110         mutex_lock_io(&journal->j_checkpoint_mutex);
0111
0112         /*
0113          * Test again, another process may have checkpointed while we
0114          * were waiting for the checkpoint lock. If there are no
0115          * transactions ready to be checkpointed, try to recover
0116          * journal space by calling cleanup_journal_tail(), and if
0117          * that doesn't work, by waiting for the currently committing
0118          * transaction to complete.  If there is absolutely no way
0119          * to make progress, this is either a BUG or corrupted
0120          * filesystem, so abort the journal and leave a stack
0121          * trace for forensic evidence.
0122          */
0123         write_lock(&journal->j_state_lock);
0124         if (journal->j_flags & JBD2_ABORT) {
0125             mutex_unlock(&journal->j_checkpoint_mutex);
0126             return;
0127         }
0128         spin_lock(&journal->j_list_lock);
0129         space_left = jbd2_log_space_left(journal);
0130         if (space_left < nblocks) {
0131             int chkpt = journal->j_checkpoint_transactions != NULL;
0132             tid_t tid = 0;
0133
0134             if (journal->j_committing_transaction)
0135                 tid = journal->j_committing_transaction->t_tid;
0136             spin_unlock(&journal->j_list_lock);
0137             write_unlock(&journal->j_state_lock);
0138             if (chkpt) {
0139                 jbd2_log_do_checkpoint(journal);
0140             } else if (jbd2_cleanup_journal_tail(journal) == 0) {
0141                 /* We were able to recover space; yay! */
0142                 ;
0143             } else if (tid) {
0144                 /*
0145                  * jbd2_journal_commit_transaction() may want
0146                  * to take the checkpoint_mutex if JBD2_FLUSHED
0147                  * is set.  So we need to temporarily drop it.
0148                  */
0149                 mutex_unlock(&journal->j_checkpoint_mutex);
0150                 jbd2_log_wait_commit(journal, tid);
0151                 write_lock(&journal->j_state_lock);
0152                 continue;
0153             } else {
0154                 printk(KERN_ERR "%s: needed %d blocks and "
0155                        "only had %d space available\n",
0156                        __func__, nblocks, space_left);
0157                 printk(KERN_ERR "%s: no way to get more "
0158                        "journal space in %s\n", __func__,
0159                        journal->j_devname);
0160                 WARN_ON(1);
0161                 jbd2_journal_abort(journal, -EIO);
0162             }
0163             write_lock(&journal->j_state_lock);
0164         } else {
0165             spin_unlock(&journal->j_list_lock);
0166         }
0167         mutex_unlock(&journal->j_checkpoint_mutex);
0168     }
0169 }
0170
0171 static void
0172 __flush_batch(journal_t *journal, int *batch_count)
0173 {
0174     int i;
0175     struct blk_plug plug;
0176
0177     blk_start_plug(&plug);
0178     for (i = 0; i < *batch_count; i++)
0179         write_dirty_buffer(journal->j_chkpt_bhs[i], REQ_SYNC);
0180     blk_finish_plug(&plug);
0181
0182     for (i = 0; i < *batch_count; i++) {
0183         struct buffer_head *bh = journal->j_chkpt_bhs[i];
0184         BUFFER_TRACE(bh, "brelse");
0185         __brelse(bh);
0186     }
0187     *batch_count = 0;
0188 }
0189
0190 /*
0191  * Perform an actual checkpoint. We take the first transaction on the
0192  * list of transactions to be checkpointed and send all its buffers
0193  * to disk. We submit larger chunks of data at once.
0194  *
0195  * The journal should be locked before calling this function.
0196  * Called with j_checkpoint_mutex held.
0197  */
0198 int jbd2_log_do_checkpoint(journal_t *journal)
0199 {
0200     struct journal_head *jh;
0201     struct buffer_head  *bh;
0202     transaction_t       *transaction;
0203     tid_t           this_tid;
0204     int         result, batch_count = 0;
0205
0206     jbd2_debug(1, "Start checkpoint\n");
0207
0208     /*
0209      * First thing: if there are any transactions in the log which
0210      * don't need checkpointing, just eliminate them from the
0211      * journal straight away.
0212      */
0213     result = jbd2_cleanup_journal_tail(journal);
0214     trace_jbd2_checkpoint(journal, result);
0215     jbd2_debug(1, "cleanup_journal_tail returned %d\n", result);
0216     if (result <= 0)
0217         return result;
0218
0219     /*
0220      * OK, we need to start writing disk blocks.  Take one transaction
0221      * and write it.
0222      */
0223     spin_lock(&journal->j_list_lock);
0224     if (!journal->j_checkpoint_transactions)
0225         goto out;
0226     transaction = journal->j_checkpoint_transactions;
0227     if (transaction->t_chp_stats.cs_chp_time == 0)
0228         transaction->t_chp_stats.cs_chp_time = jiffies;
0229     this_tid = transaction->t_tid;
0230 restart:
0231     /*
0232      * If someone cleaned up this transaction while we slept, we're
0233      * done (maybe it's a new transaction, but it fell at the same
0234      * address).
0235      */
0236     if (journal->j_checkpoint_transactions != transaction ||
0237         transaction->t_tid != this_tid)
0238         goto out;
0239
0240     /* checkpoint all of the transaction's buffers */
0241     while (transaction->t_checkpoint_list) {
0242         jh = transaction->t_checkpoint_list;
0243         bh = jh2bh(jh);
0244
0245         if (buffer_locked(bh)) {
0246             get_bh(bh);
0247             spin_unlock(&journal->j_list_lock);
0248             wait_on_buffer(bh);
0249             /* the journal_head may have gone by now */
0250             BUFFER_TRACE(bh, "brelse");
0251             __brelse(bh);
0252             goto retry;
0253         }
0254         if (jh->b_transaction != NULL) {
0255             transaction_t *t = jh->b_transaction;
0256             tid_t tid = t->t_tid;
0257
0258             transaction->t_chp_stats.cs_forced_to_close++;
0259             spin_unlock(&journal->j_list_lock);
0260             if (unlikely(journal->j_flags & JBD2_UNMOUNT))
0261                 /*
0262                  * The journal thread is dead; so
0263                  * starting and waiting for a commit
0264                  * to finish will cause us to wait for
0265                  * a _very_ long time.
0266                  */
0267                 printk(KERN_ERR
0268         "JBD2: %s: Waiting for Godot: block %llu\n",
0269         journal->j_devname, (unsigned long long) bh->b_blocknr);
0270
0271             if (batch_count)
0272                 __flush_batch(journal, &batch_count);
0273             jbd2_log_start_commit(journal, tid);
0274             /*
0275              * jbd2_journal_commit_transaction() may want
0276              * to take the checkpoint_mutex if JBD2_FLUSHED
0277              * is set, jbd2_update_log_tail() called by
0278              * jbd2_journal_commit_transaction() may also take
0279              * checkpoint_mutex.  So we need to temporarily
0280              * drop it.
0281              */
0282             mutex_unlock(&journal->j_checkpoint_mutex);
0283             jbd2_log_wait_commit(journal, tid);
0284             mutex_lock_io(&journal->j_checkpoint_mutex);
0285             spin_lock(&journal->j_list_lock);
0286             goto restart;
0287         }
0288         if (!buffer_dirty(bh)) {
0289             BUFFER_TRACE(bh, "remove from checkpoint");
0290             if (__jbd2_journal_remove_checkpoint(jh))
0291                 /* The transaction was released; we're done */
0292                 goto out;
0293             continue;
0294         }
0295         /*
0296          * Important: we are about to write the buffer, and
0297          * possibly block, while still holding the journal
0298          * lock.  We cannot afford to let the transaction
0299          * logic start messing around with this buffer before
0300          * we write it to disk, as that would break
0301          * recoverability.
0302          */
0303         BUFFER_TRACE(bh, "queue");
0304         get_bh(bh);
0305         J_ASSERT_BH(bh, !buffer_jwrite(bh));
0306         journal->j_chkpt_bhs[batch_count++] = bh;
0307         __buffer_relink_io(jh);
0308         transaction->t_chp_stats.cs_written++;
0309         if ((batch_count == JBD2_NR_BATCH) ||
0310             need_resched() ||
0311             spin_needbreak(&journal->j_list_lock))
0312             goto unlock_and_flush;
0313     }
0314
0315     if (batch_count) {
0316         unlock_and_flush:
0317             spin_unlock(&journal->j_list_lock);
0318         retry:
0319             if (batch_count)
0320                 __flush_batch(journal, &batch_count);
0321             spin_lock(&journal->j_list_lock);
0322             goto restart;
0323     }
0324
0325     /*
0326      * Now we issued all of the transaction's buffers, let's deal
0327      * with the buffers that are out for I/O.
0328      */
0329 restart2:
0330     /* Did somebody clean up the transaction in the meanwhile? */
0331     if (journal->j_checkpoint_transactions != transaction ||
0332         transaction->t_tid != this_tid)
0333         goto out;
0334
0335     while (transaction->t_checkpoint_io_list) {
0336         jh = transaction->t_checkpoint_io_list;
0337         bh = jh2bh(jh);
0338         if (buffer_locked(bh)) {
0339             get_bh(bh);
0340             spin_unlock(&journal->j_list_lock);
0341             wait_on_buffer(bh);
0342             /* the journal_head may have gone by now */
0343             BUFFER_TRACE(bh, "brelse");
0344             __brelse(bh);
0345             spin_lock(&journal->j_list_lock);
0346             goto restart2;
0347         }
0348
0349         /*
0350          * Now in whatever state the buffer currently is, we
0351          * know that it has been written out and so we can
0352          * drop it from the list
0353          */
0354         if (__jbd2_journal_remove_checkpoint(jh))
0355             break;
0356     }
0357 out:
0358     spin_unlock(&journal->j_list_lock);
0359     result = jbd2_cleanup_journal_tail(journal);
0360
0361     return (result < 0) ? result : 0;
0362 }
0363
0364 /*
0365  * Check the list of checkpoint transactions for the journal to see if
0366  * we have already got rid of any since the last update of the log tail
0367  * in the journal superblock.  If so, we can instantly roll the
0368  * superblock forward to remove those transactions from the log.
0369  *
0370  * Return <0 on error, 0 on success, 1 if there was nothing to clean up.
0371  *
0372  * Called with the journal lock held.
0373  *
0374  * This is the only part of the journaling code which really needs to be
0375  * aware of transaction aborts.  Checkpointing involves writing to the
0376  * main filesystem area rather than to the journal, so it can proceed
0377  * even in abort state, but we must not update the super block if
0378  * checkpointing may have failed.  Otherwise, we would lose some metadata
0379  * buffers which should be written-back to the filesystem.
0380  */
0381
0382 int jbd2_cleanup_journal_tail(journal_t *journal)
0383 {
0384     tid_t       first_tid;
0385     unsigned long   blocknr;
0386
0387     if (is_journal_aborted(journal))
0388         return -EIO;
0389
0390     if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr))
0391         return 1;
0392     J_ASSERT(blocknr != 0);
0393
0394     /*
0395      * We need to make sure that any blocks that were recently written out
0396      * --- perhaps by jbd2_log_do_checkpoint() --- are flushed out before
0397      * we drop the transactions from the journal. It's unlikely this will
0398      * be necessary, especially with an appropriately sized journal, but we
0399      * need this to guarantee correctness.  Fortunately
0400      * jbd2_cleanup_journal_tail() doesn't get called all that often.
0401      */
0402     if (journal->j_flags & JBD2_BARRIER)
0403         blkdev_issue_flush(journal->j_fs_dev);
0404
0405     return __jbd2_update_log_tail(journal, first_tid, blocknr);
0406 }
0407
0408
0409 /* Checkpoint list management */
0410
0411 /*
0412  * journal_clean_one_cp_list
0413  *
0414  * Find all the written-back checkpoint buffers in the given list and
0415  * release them. If 'destroy' is set, clean all buffers unconditionally.
0416  *
0417  * Called with j_list_lock held.
0418  * Returns 1 if we freed the transaction, 0 otherwise.
0419  */
0420 static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy)
0421 {
0422     struct journal_head *last_jh;
0423     struct journal_head *next_jh = jh;
0424
0425     if (!jh)
0426         return 0;
0427
0428     last_jh = jh->b_cpprev;
0429     do {
0430         jh = next_jh;
0431         next_jh = jh->b_cpnext;
0432
0433         if (!destroy && __cp_buffer_busy(jh))
0434             return 0;
0435
0436         if (__jbd2_journal_remove_checkpoint(jh))
0437             return 1;
0438         /*
0439          * This function only frees up some memory
0440          * if possible so we dont have an obligation
0441          * to finish processing. Bail out if preemption
0442          * requested:
0443          */
0444         if (need_resched())
0445             return 0;
0446     } while (jh != last_jh);
0447
0448     return 0;
0449 }
0450
0451 /*
0452  * journal_shrink_one_cp_list
0453  *
0454  * Find 'nr_to_scan' written-back checkpoint buffers in the given list
0455  * and try to release them. If the whole transaction is released, set
0456  * the 'released' parameter. Return the number of released checkpointed
0457  * buffers.
0458  *
0459  * Called with j_list_lock held.
0460  */
0461 static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
0462                         unsigned long *nr_to_scan,
0463                         bool *released)
0464 {
0465     struct journal_head *last_jh;
0466     struct journal_head *next_jh = jh;
0467     unsigned long nr_freed = 0;
0468     int ret;
0469
0470     if (!jh || *nr_to_scan == 0)
0471         return 0;
0472
0473     last_jh = jh->b_cpprev;
0474     do {
0475         jh = next_jh;
0476         next_jh = jh->b_cpnext;
0477
0478         (*nr_to_scan)--;
0479         if (__cp_buffer_busy(jh))
0480             continue;
0481
0482         nr_freed++;
0483         ret = __jbd2_journal_remove_checkpoint(jh);
0484         if (ret) {
0485             *released = true;
0486             break;
0487         }
0488
0489         if (need_resched())
0490             break;
0491     } while (jh != last_jh && *nr_to_scan);
0492
0493     return nr_freed;
0494 }
0495
0496 /*
0497  * jbd2_journal_shrink_checkpoint_list
0498  *
0499  * Find 'nr_to_scan' written-back checkpoint buffers in the journal
0500  * and try to release them. Return the number of released checkpointed
0501  * buffers.
0502  *
0503  * Called with j_list_lock held.
0504  */
0505 unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
0506                           unsigned long *nr_to_scan)
0507 {
0508     transaction_t *transaction, *last_transaction, *next_transaction;
0509     bool released;
0510     tid_t first_tid = 0, last_tid = 0, next_tid = 0;
0511     tid_t tid = 0;
0512     unsigned long nr_freed = 0;
0513     unsigned long nr_scanned = *nr_to_scan;
0514
0515 again:
0516     spin_lock(&journal->j_list_lock);
0517     if (!journal->j_checkpoint_transactions) {
0518         spin_unlock(&journal->j_list_lock);
0519         goto out;
0520     }
0521
0522     /*
0523      * Get next shrink transaction, resume previous scan or start
0524      * over again. If some others do checkpoint and drop transaction
0525      * from the checkpoint list, we ignore saved j_shrink_transaction
0526      * and start over unconditionally.
0527      */
0528     if (journal->j_shrink_transaction)
0529         transaction = journal->j_shrink_transaction;
0530     else
0531         transaction = journal->j_checkpoint_transactions;
0532
0533     if (!first_tid)
0534         first_tid = transaction->t_tid;
0535     last_transaction = journal->j_checkpoint_transactions->t_cpprev;
0536     next_transaction = transaction;
0537     last_tid = last_transaction->t_tid;
0538     do {
0539         transaction = next_transaction;
0540         next_transaction = transaction->t_cpnext;
0541         tid = transaction->t_tid;
0542         released = false;
0543
0544         nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_list,
0545                                nr_to_scan, &released);
0546         if (*nr_to_scan == 0)
0547             break;
0548         if (need_resched() || spin_needbreak(&journal->j_list_lock))
0549             break;
0550         if (released)
0551             continue;
0552
0553         nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_io_list,
0554                                nr_to_scan, &released);
0555         if (*nr_to_scan == 0)
0556             break;
0557         if (need_resched() || spin_needbreak(&journal->j_list_lock))
0558             break;
0559     } while (transaction != last_transaction);
0560
0561     if (transaction != last_transaction) {
0562         journal->j_shrink_transaction = next_transaction;
0563         next_tid = next_transaction->t_tid;
0564     } else {
0565         journal->j_shrink_transaction = NULL;
0566         next_tid = 0;
0567     }
0568
0569     spin_unlock(&journal->j_list_lock);
0570     cond_resched();
0571
0572     if (*nr_to_scan && next_tid)
0573         goto again;
0574 out:
0575     nr_scanned -= *nr_to_scan;
0576     trace_jbd2_shrink_checkpoint_list(journal, first_tid, tid, last_tid,
0577                       nr_freed, nr_scanned, next_tid);
0578
0579     return nr_freed;
0580 }
0581
0582 /*
0583  * journal_clean_checkpoint_list
0584  *
0585  * Find all the written-back checkpoint buffers in the journal and release them.
0586  * If 'destroy' is set, release all buffers unconditionally.
0587  *
0588  * Called with j_list_lock held.
0589  */
0590 void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
0591 {
0592     transaction_t *transaction, *last_transaction, *next_transaction;
0593     int ret;
0594
0595     transaction = journal->j_checkpoint_transactions;
0596     if (!transaction)
0597         return;
0598
0599     last_transaction = transaction->t_cpprev;
0600     next_transaction = transaction;
0601     do {
0602         transaction = next_transaction;
0603         next_transaction = transaction->t_cpnext;
0604         ret = journal_clean_one_cp_list(transaction->t_checkpoint_list,
0605                         destroy);
0606         /*
0607          * This function only frees up some memory if possible so we
0608          * dont have an obligation to finish processing. Bail out if
0609          * preemption requested:
0610          */
0611         if (need_resched())
0612             return;
0613         if (ret)
0614             continue;
0615         /*
0616          * It is essential that we are as careful as in the case of
0617          * t_checkpoint_list with removing the buffer from the list as
0618          * we can possibly see not yet submitted buffers on io_list
0619          */
0620         ret = journal_clean_one_cp_list(transaction->
0621                 t_checkpoint_io_list, destroy);
0622         if (need_resched())
0623             return;
0624         /*
0625          * Stop scanning if we couldn't free the transaction. This
0626          * avoids pointless scanning of transactions which still
0627          * weren't checkpointed.
0628          */
0629         if (!ret)
0630             return;
0631     } while (transaction != last_transaction);
0632 }
0633
0634 /*
0635  * Remove buffers from all checkpoint lists as journal is aborted and we just
0636  * need to free memory
0637  */
0638 void jbd2_journal_destroy_checkpoint(journal_t *journal)
0639 {
0640     /*
0641      * We loop because __jbd2_journal_clean_checkpoint_list() may abort
0642      * early due to a need of rescheduling.
0643      */
0644     while (1) {
0645         spin_lock(&journal->j_list_lock);
0646         if (!journal->j_checkpoint_transactions) {
0647             spin_unlock(&journal->j_list_lock);
0648             break;
0649         }
0650         __jbd2_journal_clean_checkpoint_list(journal, true);
0651         spin_unlock(&journal->j_list_lock);
0652         cond_resched();
0653     }
0654 }
0655
0656 /*
0657  * journal_remove_checkpoint: called after a buffer has been committed
0658  * to disk (either by being write-back flushed to disk, or being
0659  * committed to the log).
0660  *
0661  * We cannot safely clean a transaction out of the log until all of the
0662  * buffer updates committed in that transaction have safely been stored
0663  * elsewhere on disk.  To achieve this, all of the buffers in a
0664  * transaction need to be maintained on the transaction's checkpoint
0665  * lists until they have been rewritten, at which point this function is
0666  * called to remove the buffer from the existing transaction's
0667  * checkpoint lists.
0668  *
0669  * The function returns 1 if it frees the transaction, 0 otherwise.
0670  * The function can free jh and bh.
0671  *
0672  * This function is called with j_list_lock held.
0673  */
0674 int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
0675 {
0676     struct transaction_chp_stats_s *stats;
0677     transaction_t *transaction;
0678     journal_t *journal;
0679     struct buffer_head *bh = jh2bh(jh);
0680
0681     JBUFFER_TRACE(jh, "entry");
0682
0683     transaction = jh->b_cp_transaction;
0684     if (!transaction) {
0685         JBUFFER_TRACE(jh, "not on transaction");
0686         return 0;
0687     }
0688     journal = transaction->t_journal;
0689
0690     JBUFFER_TRACE(jh, "removing from transaction");
0691
0692     /*
0693      * If we have failed to write the buffer out to disk, the filesystem
0694      * may become inconsistent. We cannot abort the journal here since
0695      * we hold j_list_lock and we have to be careful about races with
0696      * jbd2_journal_destroy(). So mark the writeback IO error in the
0697      * journal here and we abort the journal later from a better context.
0698      */
0699     if (buffer_write_io_error(bh))
0700         set_bit(JBD2_CHECKPOINT_IO_ERROR, &journal->j_atomic_flags);
0701
0702     __buffer_unlink(jh);
0703     jh->b_cp_transaction = NULL;
0704     percpu_counter_dec(&journal->j_checkpoint_jh_count);
0705     jbd2_journal_put_journal_head(jh);
0706
0707     /* Is this transaction empty? */
0708     if (transaction->t_checkpoint_list || transaction->t_checkpoint_io_list)
0709         return 0;
0710
0711     /*
0712      * There is one special case to worry about: if we have just pulled the
0713      * buffer off a running or committing transaction's checkpoing list,
0714      * then even if the checkpoint list is empty, the transaction obviously
0715      * cannot be dropped!
0716      *
0717      * The locking here around t_state is a bit sleazy.
0718      * See the comment at the end of jbd2_journal_commit_transaction().
0719      */
0720     if (transaction->t_state != T_FINISHED)
0721         return 0;
0722
0723     /*
0724      * OK, that was the last buffer for the transaction, we can now
0725      * safely remove this transaction from the log.
0726      */
0727     stats = &transaction->t_chp_stats;
0728     if (stats->cs_chp_time)
0729         stats->cs_chp_time = jbd2_time_diff(stats->cs_chp_time,
0730                             jiffies);
0731     trace_jbd2_checkpoint_stats(journal->j_fs_dev->bd_dev,
0732                     transaction->t_tid, stats);
0733
0734     __jbd2_journal_drop_transaction(journal, transaction);
0735     jbd2_journal_free_transaction(transaction);
0736     return 1;
0737 }
0738
0739 /*
0740  * journal_insert_checkpoint: put a committed buffer onto a checkpoint
0741  * list so that we know when it is safe to clean the transaction out of
0742  * the log.
0743  *
0744  * Called with the journal locked.
0745  * Called with j_list_lock held.
0746  */
0747 void __jbd2_journal_insert_checkpoint(struct journal_head *jh,
0748                    transaction_t *transaction)
0749 {
0750     JBUFFER_TRACE(jh, "entry");
0751     J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
0752     J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
0753
0754     /* Get reference for checkpointing transaction */
0755     jbd2_journal_grab_journal_head(jh2bh(jh));
0756     jh->b_cp_transaction = transaction;
0757
0758     if (!transaction->t_checkpoint_list) {
0759         jh->b_cpnext = jh->b_cpprev = jh;
0760     } else {
0761         jh->b_cpnext = transaction->t_checkpoint_list;
0762         jh->b_cpprev = transaction->t_checkpoint_list->b_cpprev;
0763         jh->b_cpprev->b_cpnext = jh;
0764         jh->b_cpnext->b_cpprev = jh;
0765     }
0766     transaction->t_checkpoint_list = jh;
0767     percpu_counter_inc(&transaction->t_journal->j_checkpoint_jh_count);
0768 }
0769
0770 /*
0771  * We've finished with this transaction structure: adios...
0772  *
0773  * The transaction must have no links except for the checkpoint by this
0774  * point.
0775  *
0776  * Called with the journal locked.
0777  * Called with j_list_lock held.
0778  */
0779
0780 void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transaction)
0781 {
0782     assert_spin_locked(&journal->j_list_lock);
0783
0784     journal->j_shrink_transaction = NULL;
0785     if (transaction->t_cpnext) {
0786         transaction->t_cpnext->t_cpprev = transaction->t_cpprev;
0787         transaction->t_cpprev->t_cpnext = transaction->t_cpnext;
0788         if (journal->j_checkpoint_transactions == transaction)
0789             journal->j_checkpoint_transactions =
0790                 transaction->t_cpnext;
0791         if (journal->j_checkpoint_transactions == transaction)
0792             journal->j_checkpoint_transactions = NULL;
0793     }
0794
0795     J_ASSERT(transaction->t_state == T_FINISHED);
0796     J_ASSERT(transaction->t_buffers == NULL);
0797     J_ASSERT(transaction->t_forget == NULL);
0798     J_ASSERT(transaction->t_shadow_list == NULL);
0799     J_ASSERT(transaction->t_checkpoint_list == NULL);
0800     J_ASSERT(transaction->t_checkpoint_io_list == NULL);
0801     J_ASSERT(atomic_read(&transaction->t_updates) == 0);
0802     J_ASSERT(journal->j_committing_transaction != transaction);
0803     J_ASSERT(journal->j_running_transaction != transaction);
0804
0805     trace_jbd2_drop_transaction(journal, transaction);
0806
0807     jbd2_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
0808 }