Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0+
0002 /*
0003  * linux/fs/jbd2/recovery.c
0004  *
0005  * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
0006  *
0007  * Copyright 1999-2000 Red Hat Software --- All Rights Reserved
0008  *
0009  * Journal recovery routines for the generic filesystem journaling code;
0010  * part of the ext2fs journaling system.
0011  */
0012 
0013 #ifndef __KERNEL__
0014 #include "jfs_user.h"
0015 #else
0016 #include <linux/time.h>
0017 #include <linux/fs.h>
0018 #include <linux/jbd2.h>
0019 #include <linux/errno.h>
0020 #include <linux/crc32.h>
0021 #include <linux/blkdev.h>
0022 #endif
0023 
0024 /*
0025  * Maintain information about the progress of the recovery job, so that
0026  * the different passes can carry information between them.
0027  */
0028 struct recovery_info
0029 {
0030     tid_t       start_transaction;
0031     tid_t       end_transaction;
0032 
0033     int     nr_replays;
0034     int     nr_revokes;
0035     int     nr_revoke_hits;
0036 };
0037 
0038 static int do_one_pass(journal_t *journal,
0039                 struct recovery_info *info, enum passtype pass);
0040 static int scan_revoke_records(journal_t *, struct buffer_head *,
0041                 tid_t, struct recovery_info *);
0042 
0043 #ifdef __KERNEL__
0044 
0045 /* Release readahead buffers after use */
0046 static void journal_brelse_array(struct buffer_head *b[], int n)
0047 {
0048     while (--n >= 0)
0049         brelse (b[n]);
0050 }
0051 
0052 
0053 /*
0054  * When reading from the journal, we are going through the block device
0055  * layer directly and so there is no readahead being done for us.  We
0056  * need to implement any readahead ourselves if we want it to happen at
0057  * all.  Recovery is basically one long sequential read, so make sure we
0058  * do the IO in reasonably large chunks.
0059  *
0060  * This is not so critical that we need to be enormously clever about
0061  * the readahead size, though.  128K is a purely arbitrary, good-enough
0062  * fixed value.
0063  */
0064 
0065 #define MAXBUF 8
0066 static int do_readahead(journal_t *journal, unsigned int start)
0067 {
0068     int err;
0069     unsigned int max, nbufs, next;
0070     unsigned long long blocknr;
0071     struct buffer_head *bh;
0072 
0073     struct buffer_head * bufs[MAXBUF];
0074 
0075     /* Do up to 128K of readahead */
0076     max = start + (128 * 1024 / journal->j_blocksize);
0077     if (max > journal->j_total_len)
0078         max = journal->j_total_len;
0079 
0080     /* Do the readahead itself.  We'll submit MAXBUF buffer_heads at
0081      * a time to the block device IO layer. */
0082 
0083     nbufs = 0;
0084 
0085     for (next = start; next < max; next++) {
0086         err = jbd2_journal_bmap(journal, next, &blocknr);
0087 
0088         if (err) {
0089             printk(KERN_ERR "JBD2: bad block at offset %u\n",
0090                 next);
0091             goto failed;
0092         }
0093 
0094         bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
0095         if (!bh) {
0096             err = -ENOMEM;
0097             goto failed;
0098         }
0099 
0100         if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
0101             bufs[nbufs++] = bh;
0102             if (nbufs == MAXBUF) {
0103                 ll_rw_block(REQ_OP_READ, nbufs, bufs);
0104                 journal_brelse_array(bufs, nbufs);
0105                 nbufs = 0;
0106             }
0107         } else
0108             brelse(bh);
0109     }
0110 
0111     if (nbufs)
0112         ll_rw_block(REQ_OP_READ, nbufs, bufs);
0113     err = 0;
0114 
0115 failed:
0116     if (nbufs)
0117         journal_brelse_array(bufs, nbufs);
0118     return err;
0119 }
0120 
0121 #endif /* __KERNEL__ */
0122 
0123 
0124 /*
0125  * Read a block from the journal
0126  */
0127 
0128 static int jread(struct buffer_head **bhp, journal_t *journal,
0129          unsigned int offset)
0130 {
0131     int err;
0132     unsigned long long blocknr;
0133     struct buffer_head *bh;
0134 
0135     *bhp = NULL;
0136 
0137     if (offset >= journal->j_total_len) {
0138         printk(KERN_ERR "JBD2: corrupted journal superblock\n");
0139         return -EFSCORRUPTED;
0140     }
0141 
0142     err = jbd2_journal_bmap(journal, offset, &blocknr);
0143 
0144     if (err) {
0145         printk(KERN_ERR "JBD2: bad block at offset %u\n",
0146             offset);
0147         return err;
0148     }
0149 
0150     bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
0151     if (!bh)
0152         return -ENOMEM;
0153 
0154     if (!buffer_uptodate(bh)) {
0155         /* If this is a brand new buffer, start readahead.
0156                    Otherwise, we assume we are already reading it.  */
0157         if (!buffer_req(bh))
0158             do_readahead(journal, offset);
0159         wait_on_buffer(bh);
0160     }
0161 
0162     if (!buffer_uptodate(bh)) {
0163         printk(KERN_ERR "JBD2: Failed to read block at offset %u\n",
0164             offset);
0165         brelse(bh);
0166         return -EIO;
0167     }
0168 
0169     *bhp = bh;
0170     return 0;
0171 }
0172 
0173 static int jbd2_descriptor_block_csum_verify(journal_t *j, void *buf)
0174 {
0175     struct jbd2_journal_block_tail *tail;
0176     __be32 provided;
0177     __u32 calculated;
0178 
0179     if (!jbd2_journal_has_csum_v2or3(j))
0180         return 1;
0181 
0182     tail = (struct jbd2_journal_block_tail *)((char *)buf +
0183         j->j_blocksize - sizeof(struct jbd2_journal_block_tail));
0184     provided = tail->t_checksum;
0185     tail->t_checksum = 0;
0186     calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
0187     tail->t_checksum = provided;
0188 
0189     return provided == cpu_to_be32(calculated);
0190 }
0191 
0192 /*
0193  * Count the number of in-use tags in a journal descriptor block.
0194  */
0195 
0196 static int count_tags(journal_t *journal, struct buffer_head *bh)
0197 {
0198     char *          tagp;
0199     journal_block_tag_t tag;
0200     int         nr = 0, size = journal->j_blocksize;
0201     int         tag_bytes = journal_tag_bytes(journal);
0202 
0203     if (jbd2_journal_has_csum_v2or3(journal))
0204         size -= sizeof(struct jbd2_journal_block_tail);
0205 
0206     tagp = &bh->b_data[sizeof(journal_header_t)];
0207 
0208     while ((tagp - bh->b_data + tag_bytes) <= size) {
0209         memcpy(&tag, tagp, sizeof(tag));
0210 
0211         nr++;
0212         tagp += tag_bytes;
0213         if (!(tag.t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID)))
0214             tagp += 16;
0215 
0216         if (tag.t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG))
0217             break;
0218     }
0219 
0220     return nr;
0221 }
0222 
0223 
0224 /* Make sure we wrap around the log correctly! */
0225 #define wrap(journal, var)                      \
0226 do {                                    \
0227     unsigned long _wrap_last =                  \
0228         jbd2_has_feature_fast_commit(journal) ?         \
0229             (journal)->j_fc_last : (journal)->j_last;   \
0230                                     \
0231     if (var >= _wrap_last)                      \
0232         var -= (_wrap_last - (journal)->j_first);       \
0233 } while (0)
0234 
0235 static int fc_do_one_pass(journal_t *journal,
0236               struct recovery_info *info, enum passtype pass)
0237 {
0238     unsigned int expected_commit_id = info->end_transaction;
0239     unsigned long next_fc_block;
0240     struct buffer_head *bh;
0241     int err = 0;
0242 
0243     next_fc_block = journal->j_fc_first;
0244     if (!journal->j_fc_replay_callback)
0245         return 0;
0246 
0247     while (next_fc_block <= journal->j_fc_last) {
0248         jbd2_debug(3, "Fast commit replay: next block %ld\n",
0249               next_fc_block);
0250         err = jread(&bh, journal, next_fc_block);
0251         if (err) {
0252             jbd2_debug(3, "Fast commit replay: read error\n");
0253             break;
0254         }
0255 
0256         err = journal->j_fc_replay_callback(journal, bh, pass,
0257                     next_fc_block - journal->j_fc_first,
0258                     expected_commit_id);
0259         next_fc_block++;
0260         if (err < 0 || err == JBD2_FC_REPLAY_STOP)
0261             break;
0262         err = 0;
0263     }
0264 
0265     if (err)
0266         jbd2_debug(3, "Fast commit replay failed, err = %d\n", err);
0267 
0268     return err;
0269 }
0270 
0271 /**
0272  * jbd2_journal_recover - recovers a on-disk journal
0273  * @journal: the journal to recover
0274  *
0275  * The primary function for recovering the log contents when mounting a
0276  * journaled device.
0277  *
0278  * Recovery is done in three passes.  In the first pass, we look for the
0279  * end of the log.  In the second, we assemble the list of revoke
0280  * blocks.  In the third and final pass, we replay any un-revoked blocks
0281  * in the log.
0282  */
0283 int jbd2_journal_recover(journal_t *journal)
0284 {
0285     int         err, err2;
0286     journal_superblock_t *  sb;
0287 
0288     struct recovery_info    info;
0289 
0290     memset(&info, 0, sizeof(info));
0291     sb = journal->j_superblock;
0292 
0293     /*
0294      * The journal superblock's s_start field (the current log head)
0295      * is always zero if, and only if, the journal was cleanly
0296      * unmounted.
0297      */
0298 
0299     if (!sb->s_start) {
0300         jbd2_debug(1, "No recovery required, last transaction %d\n",
0301               be32_to_cpu(sb->s_sequence));
0302         journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
0303         return 0;
0304     }
0305 
0306     err = do_one_pass(journal, &info, PASS_SCAN);
0307     if (!err)
0308         err = do_one_pass(journal, &info, PASS_REVOKE);
0309     if (!err)
0310         err = do_one_pass(journal, &info, PASS_REPLAY);
0311 
0312     jbd2_debug(1, "JBD2: recovery, exit status %d, "
0313           "recovered transactions %u to %u\n",
0314           err, info.start_transaction, info.end_transaction);
0315     jbd2_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n",
0316           info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
0317 
0318     /* Restart the log at the next transaction ID, thus invalidating
0319      * any existing commit records in the log. */
0320     journal->j_transaction_sequence = ++info.end_transaction;
0321 
0322     jbd2_journal_clear_revoke(journal);
0323     err2 = sync_blockdev(journal->j_fs_dev);
0324     if (!err)
0325         err = err2;
0326     /* Make sure all replayed data is on permanent storage */
0327     if (journal->j_flags & JBD2_BARRIER) {
0328         err2 = blkdev_issue_flush(journal->j_fs_dev);
0329         if (!err)
0330             err = err2;
0331     }
0332     return err;
0333 }
0334 
0335 /**
0336  * jbd2_journal_skip_recovery - Start journal and wipe exiting records
0337  * @journal: journal to startup
0338  *
0339  * Locate any valid recovery information from the journal and set up the
0340  * journal structures in memory to ignore it (presumably because the
0341  * caller has evidence that it is out of date).
0342  * This function doesn't appear to be exported..
0343  *
0344  * We perform one pass over the journal to allow us to tell the user how
0345  * much recovery information is being erased, and to let us initialise
0346  * the journal transaction sequence numbers to the next unused ID.
0347  */
0348 int jbd2_journal_skip_recovery(journal_t *journal)
0349 {
0350     int         err;
0351 
0352     struct recovery_info    info;
0353 
0354     memset (&info, 0, sizeof(info));
0355 
0356     err = do_one_pass(journal, &info, PASS_SCAN);
0357 
0358     if (err) {
0359         printk(KERN_ERR "JBD2: error %d scanning journal\n", err);
0360         ++journal->j_transaction_sequence;
0361     } else {
0362 #ifdef CONFIG_JBD2_DEBUG
0363         int dropped = info.end_transaction - 
0364             be32_to_cpu(journal->j_superblock->s_sequence);
0365         jbd2_debug(1,
0366               "JBD2: ignoring %d transaction%s from the journal.\n",
0367               dropped, (dropped == 1) ? "" : "s");
0368 #endif
0369         journal->j_transaction_sequence = ++info.end_transaction;
0370     }
0371 
0372     journal->j_tail = 0;
0373     return err;
0374 }
0375 
0376 static inline unsigned long long read_tag_block(journal_t *journal,
0377                         journal_block_tag_t *tag)
0378 {
0379     unsigned long long block = be32_to_cpu(tag->t_blocknr);
0380     if (jbd2_has_feature_64bit(journal))
0381         block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
0382     return block;
0383 }
0384 
0385 /*
0386  * calc_chksums calculates the checksums for the blocks described in the
0387  * descriptor block.
0388  */
0389 static int calc_chksums(journal_t *journal, struct buffer_head *bh,
0390             unsigned long *next_log_block, __u32 *crc32_sum)
0391 {
0392     int i, num_blks, err;
0393     unsigned long io_block;
0394     struct buffer_head *obh;
0395 
0396     num_blks = count_tags(journal, bh);
0397     /* Calculate checksum of the descriptor block. */
0398     *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size);
0399 
0400     for (i = 0; i < num_blks; i++) {
0401         io_block = (*next_log_block)++;
0402         wrap(journal, *next_log_block);
0403         err = jread(&obh, journal, io_block);
0404         if (err) {
0405             printk(KERN_ERR "JBD2: IO error %d recovering block "
0406                 "%lu in log\n", err, io_block);
0407             return 1;
0408         } else {
0409             *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
0410                      obh->b_size);
0411         }
0412         put_bh(obh);
0413     }
0414     return 0;
0415 }
0416 
0417 static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
0418 {
0419     struct commit_header *h;
0420     __be32 provided;
0421     __u32 calculated;
0422 
0423     if (!jbd2_journal_has_csum_v2or3(j))
0424         return 1;
0425 
0426     h = buf;
0427     provided = h->h_chksum[0];
0428     h->h_chksum[0] = 0;
0429     calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
0430     h->h_chksum[0] = provided;
0431 
0432     return provided == cpu_to_be32(calculated);
0433 }
0434 
0435 static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
0436                       journal_block_tag3_t *tag3,
0437                       void *buf, __u32 sequence)
0438 {
0439     __u32 csum32;
0440     __be32 seq;
0441 
0442     if (!jbd2_journal_has_csum_v2or3(j))
0443         return 1;
0444 
0445     seq = cpu_to_be32(sequence);
0446     csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
0447     csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize);
0448 
0449     if (jbd2_has_feature_csum3(j))
0450         return tag3->t_checksum == cpu_to_be32(csum32);
0451     else
0452         return tag->t_checksum == cpu_to_be16(csum32);
0453 }
0454 
0455 static int do_one_pass(journal_t *journal,
0456             struct recovery_info *info, enum passtype pass)
0457 {
0458     unsigned int        first_commit_ID, next_commit_ID;
0459     unsigned long       next_log_block;
0460     int         err, success = 0;
0461     journal_superblock_t *  sb;
0462     journal_header_t *  tmp;
0463     struct buffer_head *    bh;
0464     unsigned int        sequence;
0465     int         blocktype;
0466     int         tag_bytes = journal_tag_bytes(journal);
0467     __u32           crc32_sum = ~0; /* Transactional Checksums */
0468     int         descr_csum_size = 0;
0469     int         block_error = 0;
0470     bool            need_check_commit_time = false;
0471     __u64           last_trans_commit_time = 0, commit_time;
0472 
0473     /*
0474      * First thing is to establish what we expect to find in the log
0475      * (in terms of transaction IDs), and where (in terms of log
0476      * block offsets): query the superblock.
0477      */
0478 
0479     sb = journal->j_superblock;
0480     next_commit_ID = be32_to_cpu(sb->s_sequence);
0481     next_log_block = be32_to_cpu(sb->s_start);
0482 
0483     first_commit_ID = next_commit_ID;
0484     if (pass == PASS_SCAN)
0485         info->start_transaction = first_commit_ID;
0486 
0487     jbd2_debug(1, "Starting recovery pass %d\n", pass);
0488 
0489     /*
0490      * Now we walk through the log, transaction by transaction,
0491      * making sure that each transaction has a commit block in the
0492      * expected place.  Each complete transaction gets replayed back
0493      * into the main filesystem.
0494      */
0495 
0496     while (1) {
0497         int         flags;
0498         char *          tagp;
0499         journal_block_tag_t tag;
0500         struct buffer_head *    obh;
0501         struct buffer_head *    nbh;
0502 
0503         cond_resched();
0504 
0505         /* If we already know where to stop the log traversal,
0506          * check right now that we haven't gone past the end of
0507          * the log. */
0508 
0509         if (pass != PASS_SCAN)
0510             if (tid_geq(next_commit_ID, info->end_transaction))
0511                 break;
0512 
0513         jbd2_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
0514               next_commit_ID, next_log_block,
0515               jbd2_has_feature_fast_commit(journal) ?
0516               journal->j_fc_last : journal->j_last);
0517 
0518         /* Skip over each chunk of the transaction looking
0519          * either the next descriptor block or the final commit
0520          * record. */
0521 
0522         jbd2_debug(3, "JBD2: checking block %ld\n", next_log_block);
0523         err = jread(&bh, journal, next_log_block);
0524         if (err)
0525             goto failed;
0526 
0527         next_log_block++;
0528         wrap(journal, next_log_block);
0529 
0530         /* What kind of buffer is it?
0531          *
0532          * If it is a descriptor block, check that it has the
0533          * expected sequence number.  Otherwise, we're all done
0534          * here. */
0535 
0536         tmp = (journal_header_t *)bh->b_data;
0537 
0538         if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) {
0539             brelse(bh);
0540             break;
0541         }
0542 
0543         blocktype = be32_to_cpu(tmp->h_blocktype);
0544         sequence = be32_to_cpu(tmp->h_sequence);
0545         jbd2_debug(3, "Found magic %d, sequence %d\n",
0546               blocktype, sequence);
0547 
0548         if (sequence != next_commit_ID) {
0549             brelse(bh);
0550             break;
0551         }
0552 
0553         /* OK, we have a valid descriptor block which matches
0554          * all of the sequence number checks.  What are we going
0555          * to do with it?  That depends on the pass... */
0556 
0557         switch(blocktype) {
0558         case JBD2_DESCRIPTOR_BLOCK:
0559             /* Verify checksum first */
0560             if (jbd2_journal_has_csum_v2or3(journal))
0561                 descr_csum_size =
0562                     sizeof(struct jbd2_journal_block_tail);
0563             if (descr_csum_size > 0 &&
0564                 !jbd2_descriptor_block_csum_verify(journal,
0565                                    bh->b_data)) {
0566                 /*
0567                  * PASS_SCAN can see stale blocks due to lazy
0568                  * journal init. Don't error out on those yet.
0569                  */
0570                 if (pass != PASS_SCAN) {
0571                     pr_err("JBD2: Invalid checksum recovering block %lu in log\n",
0572                            next_log_block);
0573                     err = -EFSBADCRC;
0574                     brelse(bh);
0575                     goto failed;
0576                 }
0577                 need_check_commit_time = true;
0578                 jbd2_debug(1,
0579                     "invalid descriptor block found in %lu\n",
0580                     next_log_block);
0581             }
0582 
0583             /* If it is a valid descriptor block, replay it
0584              * in pass REPLAY; if journal_checksums enabled, then
0585              * calculate checksums in PASS_SCAN, otherwise,
0586              * just skip over the blocks it describes. */
0587             if (pass != PASS_REPLAY) {
0588                 if (pass == PASS_SCAN &&
0589                     jbd2_has_feature_checksum(journal) &&
0590                     !need_check_commit_time &&
0591                     !info->end_transaction) {
0592                     if (calc_chksums(journal, bh,
0593                             &next_log_block,
0594                             &crc32_sum)) {
0595                         put_bh(bh);
0596                         break;
0597                     }
0598                     put_bh(bh);
0599                     continue;
0600                 }
0601                 next_log_block += count_tags(journal, bh);
0602                 wrap(journal, next_log_block);
0603                 put_bh(bh);
0604                 continue;
0605             }
0606 
0607             /* A descriptor block: we can now write all of
0608              * the data blocks.  Yay, useful work is finally
0609              * getting done here! */
0610 
0611             tagp = &bh->b_data[sizeof(journal_header_t)];
0612             while ((tagp - bh->b_data + tag_bytes)
0613                    <= journal->j_blocksize - descr_csum_size) {
0614                 unsigned long io_block;
0615 
0616                 memcpy(&tag, tagp, sizeof(tag));
0617                 flags = be16_to_cpu(tag.t_flags);
0618 
0619                 io_block = next_log_block++;
0620                 wrap(journal, next_log_block);
0621                 err = jread(&obh, journal, io_block);
0622                 if (err) {
0623                     /* Recover what we can, but
0624                      * report failure at the end. */
0625                     success = err;
0626                     printk(KERN_ERR
0627                         "JBD2: IO error %d recovering "
0628                         "block %ld in log\n",
0629                         err, io_block);
0630                 } else {
0631                     unsigned long long blocknr;
0632 
0633                     J_ASSERT(obh != NULL);
0634                     blocknr = read_tag_block(journal,
0635                                  &tag);
0636 
0637                     /* If the block has been
0638                      * revoked, then we're all done
0639                      * here. */
0640                     if (jbd2_journal_test_revoke
0641                         (journal, blocknr,
0642                          next_commit_ID)) {
0643                         brelse(obh);
0644                         ++info->nr_revoke_hits;
0645                         goto skip_write;
0646                     }
0647 
0648                     /* Look for block corruption */
0649                     if (!jbd2_block_tag_csum_verify(
0650             journal, &tag, (journal_block_tag3_t *)tagp,
0651             obh->b_data, be32_to_cpu(tmp->h_sequence))) {
0652                         brelse(obh);
0653                         success = -EFSBADCRC;
0654                         printk(KERN_ERR "JBD2: Invalid "
0655                                "checksum recovering "
0656                                "data block %llu in "
0657                                "log\n", blocknr);
0658                         block_error = 1;
0659                         goto skip_write;
0660                     }
0661 
0662                     /* Find a buffer for the new
0663                      * data being restored */
0664                     nbh = __getblk(journal->j_fs_dev,
0665                             blocknr,
0666                             journal->j_blocksize);
0667                     if (nbh == NULL) {
0668                         printk(KERN_ERR
0669                                "JBD2: Out of memory "
0670                                "during recovery.\n");
0671                         err = -ENOMEM;
0672                         brelse(bh);
0673                         brelse(obh);
0674                         goto failed;
0675                     }
0676 
0677                     lock_buffer(nbh);
0678                     memcpy(nbh->b_data, obh->b_data,
0679                             journal->j_blocksize);
0680                     if (flags & JBD2_FLAG_ESCAPE) {
0681                         *((__be32 *)nbh->b_data) =
0682                         cpu_to_be32(JBD2_MAGIC_NUMBER);
0683                     }
0684 
0685                     BUFFER_TRACE(nbh, "marking dirty");
0686                     set_buffer_uptodate(nbh);
0687                     mark_buffer_dirty(nbh);
0688                     BUFFER_TRACE(nbh, "marking uptodate");
0689                     ++info->nr_replays;
0690                     /* ll_rw_block(WRITE, 1, &nbh); */
0691                     unlock_buffer(nbh);
0692                     brelse(obh);
0693                     brelse(nbh);
0694                 }
0695 
0696             skip_write:
0697                 tagp += tag_bytes;
0698                 if (!(flags & JBD2_FLAG_SAME_UUID))
0699                     tagp += 16;
0700 
0701                 if (flags & JBD2_FLAG_LAST_TAG)
0702                     break;
0703             }
0704 
0705             brelse(bh);
0706             continue;
0707 
0708         case JBD2_COMMIT_BLOCK:
0709             /*     How to differentiate between interrupted commit
0710              *               and journal corruption ?
0711              *
0712              * {nth transaction}
0713              *        Checksum Verification Failed
0714              *           |
0715              *       ____________________
0716              *      |            |
0717              *  async_commit             sync_commit
0718              *          |                    |
0719              *      | GO TO NEXT    "Journal Corruption"
0720              *      | TRANSACTION
0721              *      |
0722              * {(n+1)th transanction}
0723              *      |
0724              *   _______|______________
0725              *  |             |
0726              * Commit block found   Commit block not found
0727              *      |             |
0728              * "Journal Corruption"       |
0729              *       _____________|_________
0730              *          |               |
0731              *  nth trans corrupt   OR   nth trans
0732              *  and (n+1)th interrupted     interrupted
0733              *  before commit block
0734              *      could reach the disk.
0735              *  (Cannot find the difference in above
0736              *   mentioned conditions. Hence assume
0737              *   "Interrupted Commit".)
0738              */
0739             commit_time = be64_to_cpu(
0740                 ((struct commit_header *)bh->b_data)->h_commit_sec);
0741             /*
0742              * If need_check_commit_time is set, it means we are in
0743              * PASS_SCAN and csum verify failed before. If
0744              * commit_time is increasing, it's the same journal,
0745              * otherwise it is stale journal block, just end this
0746              * recovery.
0747              */
0748             if (need_check_commit_time) {
0749                 if (commit_time >= last_trans_commit_time) {
0750                     pr_err("JBD2: Invalid checksum found in transaction %u\n",
0751                            next_commit_ID);
0752                     err = -EFSBADCRC;
0753                     brelse(bh);
0754                     goto failed;
0755                 }
0756             ignore_crc_mismatch:
0757                 /*
0758                  * It likely does not belong to same journal,
0759                  * just end this recovery with success.
0760                  */
0761                 jbd2_debug(1, "JBD2: Invalid checksum ignored in transaction %u, likely stale data\n",
0762                       next_commit_ID);
0763                 brelse(bh);
0764                 goto done;
0765             }
0766 
0767             /*
0768              * Found an expected commit block: if checksums
0769              * are present, verify them in PASS_SCAN; else not
0770              * much to do other than move on to the next sequence
0771              * number.
0772              */
0773             if (pass == PASS_SCAN &&
0774                 jbd2_has_feature_checksum(journal)) {
0775                 struct commit_header *cbh =
0776                     (struct commit_header *)bh->b_data;
0777                 unsigned found_chksum =
0778                     be32_to_cpu(cbh->h_chksum[0]);
0779 
0780                 if (info->end_transaction) {
0781                     journal->j_failed_commit =
0782                         info->end_transaction;
0783                     brelse(bh);
0784                     break;
0785                 }
0786 
0787                 /* Neither checksum match nor unused? */
0788                 if (!((crc32_sum == found_chksum &&
0789                        cbh->h_chksum_type ==
0790                         JBD2_CRC32_CHKSUM &&
0791                        cbh->h_chksum_size ==
0792                         JBD2_CRC32_CHKSUM_SIZE) ||
0793                       (cbh->h_chksum_type == 0 &&
0794                        cbh->h_chksum_size == 0 &&
0795                        found_chksum == 0)))
0796                     goto chksum_error;
0797 
0798                 crc32_sum = ~0;
0799             }
0800             if (pass == PASS_SCAN &&
0801                 !jbd2_commit_block_csum_verify(journal,
0802                                bh->b_data)) {
0803             chksum_error:
0804                 if (commit_time < last_trans_commit_time)
0805                     goto ignore_crc_mismatch;
0806                 info->end_transaction = next_commit_ID;
0807 
0808                 if (!jbd2_has_feature_async_commit(journal)) {
0809                     journal->j_failed_commit =
0810                         next_commit_ID;
0811                     brelse(bh);
0812                     break;
0813                 }
0814             }
0815             if (pass == PASS_SCAN)
0816                 last_trans_commit_time = commit_time;
0817             brelse(bh);
0818             next_commit_ID++;
0819             continue;
0820 
0821         case JBD2_REVOKE_BLOCK:
0822             /*
0823              * Check revoke block crc in pass_scan, if csum verify
0824              * failed, check commit block time later.
0825              */
0826             if (pass == PASS_SCAN &&
0827                 !jbd2_descriptor_block_csum_verify(journal,
0828                                    bh->b_data)) {
0829                 jbd2_debug(1, "JBD2: invalid revoke block found in %lu\n",
0830                       next_log_block);
0831                 need_check_commit_time = true;
0832             }
0833             /* If we aren't in the REVOKE pass, then we can
0834              * just skip over this block. */
0835             if (pass != PASS_REVOKE) {
0836                 brelse(bh);
0837                 continue;
0838             }
0839 
0840             err = scan_revoke_records(journal, bh,
0841                           next_commit_ID, info);
0842             brelse(bh);
0843             if (err)
0844                 goto failed;
0845             continue;
0846 
0847         default:
0848             jbd2_debug(3, "Unrecognised magic %d, end of scan.\n",
0849                   blocktype);
0850             brelse(bh);
0851             goto done;
0852         }
0853     }
0854 
0855  done:
0856     /*
0857      * We broke out of the log scan loop: either we came to the
0858      * known end of the log or we found an unexpected block in the
0859      * log.  If the latter happened, then we know that the "current"
0860      * transaction marks the end of the valid log.
0861      */
0862 
0863     if (pass == PASS_SCAN) {
0864         if (!info->end_transaction)
0865             info->end_transaction = next_commit_ID;
0866     } else {
0867         /* It's really bad news if different passes end up at
0868          * different places (but possible due to IO errors). */
0869         if (info->end_transaction != next_commit_ID) {
0870             printk(KERN_ERR "JBD2: recovery pass %d ended at "
0871                 "transaction %u, expected %u\n",
0872                 pass, next_commit_ID, info->end_transaction);
0873             if (!success)
0874                 success = -EIO;
0875         }
0876     }
0877 
0878     if (jbd2_has_feature_fast_commit(journal) &&  pass != PASS_REVOKE) {
0879         err = fc_do_one_pass(journal, info, pass);
0880         if (err)
0881             success = err;
0882     }
0883 
0884     if (block_error && success == 0)
0885         success = -EIO;
0886     return success;
0887 
0888  failed:
0889     return err;
0890 }
0891 
0892 /* Scan a revoke record, marking all blocks mentioned as revoked. */
0893 
0894 static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
0895                    tid_t sequence, struct recovery_info *info)
0896 {
0897     jbd2_journal_revoke_header_t *header;
0898     int offset, max;
0899     unsigned csum_size = 0;
0900     __u32 rcount;
0901     int record_len = 4;
0902 
0903     header = (jbd2_journal_revoke_header_t *) bh->b_data;
0904     offset = sizeof(jbd2_journal_revoke_header_t);
0905     rcount = be32_to_cpu(header->r_count);
0906 
0907     if (jbd2_journal_has_csum_v2or3(journal))
0908         csum_size = sizeof(struct jbd2_journal_block_tail);
0909     if (rcount > journal->j_blocksize - csum_size)
0910         return -EINVAL;
0911     max = rcount;
0912 
0913     if (jbd2_has_feature_64bit(journal))
0914         record_len = 8;
0915 
0916     while (offset + record_len <= max) {
0917         unsigned long long blocknr;
0918         int err;
0919 
0920         if (record_len == 4)
0921             blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
0922         else
0923             blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset)));
0924         offset += record_len;
0925         err = jbd2_journal_set_revoke(journal, blocknr, sequence);
0926         if (err)
0927             return err;
0928         ++info->nr_revokes;
0929     }
0930     return 0;
0931 }