Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
0004  * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
0005  */
0006 
0007 #include <linux/module.h>
0008 #include <linux/slab.h>
0009 #include <linux/spinlock.h>
0010 #include <linux/completion.h>
0011 #include <linux/buffer_head.h>
0012 #include <linux/gfs2_ondisk.h>
0013 #include <linux/crc32.h>
0014 #include <linux/crc32c.h>
0015 #include <linux/ktime.h>
0016 
0017 #include "gfs2.h"
0018 #include "incore.h"
0019 #include "bmap.h"
0020 #include "glock.h"
0021 #include "glops.h"
0022 #include "log.h"
0023 #include "lops.h"
0024 #include "meta_io.h"
0025 #include "recovery.h"
0026 #include "super.h"
0027 #include "util.h"
0028 #include "dir.h"
0029 
0030 struct workqueue_struct *gfs_recovery_wq;
0031 
0032 int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
0033                struct buffer_head **bh)
0034 {
0035     struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
0036     struct gfs2_glock *gl = ip->i_gl;
0037     u64 dblock;
0038     u32 extlen;
0039     int error;
0040 
0041     extlen = 32;
0042     error = gfs2_get_extent(&ip->i_inode, blk, &dblock, &extlen);
0043     if (error)
0044         return error;
0045     if (!dblock) {
0046         gfs2_consist_inode(ip);
0047         return -EIO;
0048     }
0049 
0050     *bh = gfs2_meta_ra(gl, dblock, extlen);
0051 
0052     return error;
0053 }
0054 
0055 int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where)
0056 {
0057     struct list_head *head = &jd->jd_revoke_list;
0058     struct gfs2_revoke_replay *rr = NULL, *iter;
0059 
0060     list_for_each_entry(iter, head, rr_list) {
0061         if (iter->rr_blkno == blkno) {
0062             rr = iter;
0063             break;
0064         }
0065     }
0066 
0067     if (rr) {
0068         rr->rr_where = where;
0069         return 0;
0070     }
0071 
0072     rr = kmalloc(sizeof(struct gfs2_revoke_replay), GFP_NOFS);
0073     if (!rr)
0074         return -ENOMEM;
0075 
0076     rr->rr_blkno = blkno;
0077     rr->rr_where = where;
0078     list_add(&rr->rr_list, head);
0079 
0080     return 1;
0081 }
0082 
0083 int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where)
0084 {
0085     struct gfs2_revoke_replay *rr = NULL, *iter;
0086     int wrap, a, b, revoke;
0087 
0088     list_for_each_entry(iter, &jd->jd_revoke_list, rr_list) {
0089         if (iter->rr_blkno == blkno) {
0090             rr = iter;
0091             break;
0092         }
0093     }
0094 
0095     if (!rr)
0096         return 0;
0097 
0098     wrap = (rr->rr_where < jd->jd_replay_tail);
0099     a = (jd->jd_replay_tail < where);
0100     b = (where < rr->rr_where);
0101     revoke = (wrap) ? (a || b) : (a && b);
0102 
0103     return revoke;
0104 }
0105 
0106 void gfs2_revoke_clean(struct gfs2_jdesc *jd)
0107 {
0108     struct list_head *head = &jd->jd_revoke_list;
0109     struct gfs2_revoke_replay *rr;
0110 
0111     while (!list_empty(head)) {
0112         rr = list_first_entry(head, struct gfs2_revoke_replay, rr_list);
0113         list_del(&rr->rr_list);
0114         kfree(rr);
0115     }
0116 }
0117 
0118 int __get_log_header(struct gfs2_sbd *sdp, const struct gfs2_log_header *lh,
0119              unsigned int blkno, struct gfs2_log_header_host *head)
0120 {
0121     u32 hash, crc;
0122 
0123     if (lh->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
0124         lh->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH) ||
0125         (blkno && be32_to_cpu(lh->lh_blkno) != blkno))
0126         return 1;
0127 
0128     hash = crc32(~0, lh, LH_V1_SIZE - 4);
0129     hash = ~crc32_le_shift(hash, 4); /* assume lh_hash is zero */
0130 
0131     if (be32_to_cpu(lh->lh_hash) != hash)
0132         return 1;
0133 
0134     crc = crc32c(~0, (void *)lh + LH_V1_SIZE + 4,
0135              sdp->sd_sb.sb_bsize - LH_V1_SIZE - 4);
0136 
0137     if ((lh->lh_crc != 0 && be32_to_cpu(lh->lh_crc) != crc))
0138         return 1;
0139 
0140     head->lh_sequence = be64_to_cpu(lh->lh_sequence);
0141     head->lh_flags = be32_to_cpu(lh->lh_flags);
0142     head->lh_tail = be32_to_cpu(lh->lh_tail);
0143     head->lh_blkno = be32_to_cpu(lh->lh_blkno);
0144 
0145     head->lh_local_total = be64_to_cpu(lh->lh_local_total);
0146     head->lh_local_free = be64_to_cpu(lh->lh_local_free);
0147     head->lh_local_dinodes = be64_to_cpu(lh->lh_local_dinodes);
0148 
0149     return 0;
0150 }
0151 /**
0152  * get_log_header - read the log header for a given segment
0153  * @jd: the journal
0154  * @blk: the block to look at
0155  * @head: the log header to return
0156  *
0157  * Read the log header for a given segement in a given journal.  Do a few
0158  * sanity checks on it.
0159  *
0160  * Returns: 0 on success,
0161  *          1 if the header was invalid or incomplete,
0162  *          errno on error
0163  */
0164 
0165 static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
0166               struct gfs2_log_header_host *head)
0167 {
0168     struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
0169     struct buffer_head *bh;
0170     int error;
0171 
0172     error = gfs2_replay_read_block(jd, blk, &bh);
0173     if (error)
0174         return error;
0175 
0176     error = __get_log_header(sdp, (const struct gfs2_log_header *)bh->b_data,
0177                  blk, head);
0178     brelse(bh);
0179 
0180     return error;
0181 }
0182 
0183 /**
0184  * foreach_descriptor - go through the active part of the log
0185  * @jd: the journal
0186  * @start: the first log header in the active region
0187  * @end: the last log header (don't process the contents of this entry))
0188  * @pass: iteration number (foreach_descriptor() is called in a for() loop)
0189  *
0190  * Call a given function once for every log descriptor in the active
0191  * portion of the log.
0192  *
0193  * Returns: errno
0194  */
0195 
0196 static int foreach_descriptor(struct gfs2_jdesc *jd, u32 start,
0197                   unsigned int end, int pass)
0198 {
0199     struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
0200     struct buffer_head *bh;
0201     struct gfs2_log_descriptor *ld;
0202     int error = 0;
0203     u32 length;
0204     __be64 *ptr;
0205     unsigned int offset = sizeof(struct gfs2_log_descriptor);
0206     offset += sizeof(__be64) - 1;
0207     offset &= ~(sizeof(__be64) - 1);
0208 
0209     while (start != end) {
0210         error = gfs2_replay_read_block(jd, start, &bh);
0211         if (error)
0212             return error;
0213         if (gfs2_meta_check(sdp, bh)) {
0214             brelse(bh);
0215             return -EIO;
0216         }
0217         ld = (struct gfs2_log_descriptor *)bh->b_data;
0218         length = be32_to_cpu(ld->ld_length);
0219 
0220         if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) {
0221             struct gfs2_log_header_host lh;
0222             error = get_log_header(jd, start, &lh);
0223             if (!error) {
0224                 gfs2_replay_incr_blk(jd, &start);
0225                 brelse(bh);
0226                 continue;
0227             }
0228             if (error == 1) {
0229                 gfs2_consist_inode(GFS2_I(jd->jd_inode));
0230                 error = -EIO;
0231             }
0232             brelse(bh);
0233             return error;
0234         } else if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LD)) {
0235             brelse(bh);
0236             return -EIO;
0237         }
0238         ptr = (__be64 *)(bh->b_data + offset);
0239         error = lops_scan_elements(jd, start, ld, ptr, pass);
0240         if (error) {
0241             brelse(bh);
0242             return error;
0243         }
0244 
0245         while (length--)
0246             gfs2_replay_incr_blk(jd, &start);
0247 
0248         brelse(bh);
0249     }
0250 
0251     return 0;
0252 }
0253 
0254 /**
0255  * clean_journal - mark a dirty journal as being clean
0256  * @jd: the journal
0257  * @head: the head journal to start from
0258  *
0259  * Returns: errno
0260  */
0261 
0262 static void clean_journal(struct gfs2_jdesc *jd,
0263               struct gfs2_log_header_host *head)
0264 {
0265     struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
0266     u32 lblock = head->lh_blkno;
0267 
0268     gfs2_replay_incr_blk(jd, &lblock);
0269     gfs2_write_log_header(sdp, jd, head->lh_sequence + 1, 0, lblock,
0270                   GFS2_LOG_HEAD_UNMOUNT | GFS2_LOG_HEAD_RECOVERY,
0271                   REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC);
0272     if (jd->jd_jid == sdp->sd_lockstruct.ls_jid) {
0273         sdp->sd_log_flush_head = lblock;
0274         gfs2_log_incr_head(sdp);
0275     }
0276 }
0277 
0278 
0279 static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
0280                                unsigned int message)
0281 {
0282     char env_jid[20];
0283     char env_status[20];
0284     char *envp[] = { env_jid, env_status, NULL };
0285     struct lm_lockstruct *ls = &sdp->sd_lockstruct;
0286 
0287         ls->ls_recover_jid_done = jid;
0288         ls->ls_recover_jid_status = message;
0289     sprintf(env_jid, "JID=%u", jid);
0290     sprintf(env_status, "RECOVERY=%s",
0291         message == LM_RD_SUCCESS ? "Done" : "Failed");
0292         kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
0293 
0294     if (sdp->sd_lockstruct.ls_ops->lm_recovery_result)
0295         sdp->sd_lockstruct.ls_ops->lm_recovery_result(sdp, jid, message);
0296 }
0297 
0298 /**
0299  * update_statfs_inode - Update the master statfs inode or zero out the local
0300  *           statfs inode for a given journal.
0301  * @jd: The journal
0302  * @head: If NULL, @inode is the local statfs inode and we need to zero it out.
0303  *    Otherwise, it @head contains the statfs change info that needs to be
0304  *    synced to the master statfs inode (pointed to by @inode).
0305  * @inode: statfs inode to update.
0306  */
0307 static int update_statfs_inode(struct gfs2_jdesc *jd,
0308                    struct gfs2_log_header_host *head,
0309                    struct inode *inode)
0310 {
0311     struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
0312     struct gfs2_inode *ip;
0313     struct buffer_head *bh;
0314     struct gfs2_statfs_change_host sc;
0315     int error = 0;
0316 
0317     BUG_ON(!inode);
0318     ip = GFS2_I(inode);
0319 
0320     error = gfs2_meta_inode_buffer(ip, &bh);
0321     if (error)
0322         goto out;
0323 
0324     spin_lock(&sdp->sd_statfs_spin);
0325 
0326     if (head) { /* Update the master statfs inode */
0327         gfs2_statfs_change_in(&sc, bh->b_data + sizeof(struct gfs2_dinode));
0328         sc.sc_total += head->lh_local_total;
0329         sc.sc_free += head->lh_local_free;
0330         sc.sc_dinodes += head->lh_local_dinodes;
0331         gfs2_statfs_change_out(&sc, bh->b_data + sizeof(struct gfs2_dinode));
0332 
0333         fs_info(sdp, "jid=%u: Updated master statfs Total:%lld, "
0334             "Free:%lld, Dinodes:%lld after change "
0335             "[%+lld,%+lld,%+lld]\n", jd->jd_jid, sc.sc_total,
0336             sc.sc_free, sc.sc_dinodes, head->lh_local_total,
0337             head->lh_local_free, head->lh_local_dinodes);
0338     } else { /* Zero out the local statfs inode */
0339         memset(bh->b_data + sizeof(struct gfs2_dinode), 0,
0340                sizeof(struct gfs2_statfs_change));
0341         /* If it's our own journal, reset any in-memory changes too */
0342         if (jd->jd_jid == sdp->sd_lockstruct.ls_jid) {
0343             memset(&sdp->sd_statfs_local, 0,
0344                    sizeof(struct gfs2_statfs_change_host));
0345         }
0346     }
0347     spin_unlock(&sdp->sd_statfs_spin);
0348 
0349     mark_buffer_dirty(bh);
0350     brelse(bh);
0351     gfs2_inode_metasync(ip->i_gl);
0352 
0353 out:
0354     return error;
0355 }
0356 
0357 /**
0358  * recover_local_statfs - Update the master and local statfs changes for this
0359  *            journal.
0360  *
0361  * Previously, statfs updates would be read in from the local statfs inode and
0362  * synced to the master statfs inode during recovery.
0363  *
0364  * We now use the statfs updates in the journal head to update the master statfs
0365  * inode instead of reading in from the local statfs inode. To preserve backward
0366  * compatibility with kernels that can't do this, we still need to keep the
0367  * local statfs inode up to date by writing changes to it. At some point in the
0368  * future, we can do away with the local statfs inodes altogether and keep the
0369  * statfs changes solely in the journal.
0370  *
0371  * @jd: the journal
0372  * @head: the journal head
0373  *
0374  * Returns: errno
0375  */
0376 static void recover_local_statfs(struct gfs2_jdesc *jd,
0377                  struct gfs2_log_header_host *head)
0378 {
0379     int error;
0380     struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
0381 
0382     if (!head->lh_local_total && !head->lh_local_free
0383         && !head->lh_local_dinodes) /* No change */
0384         goto zero_local;
0385 
0386      /* First update the master statfs inode with the changes we
0387       * found in the journal. */
0388     error = update_statfs_inode(jd, head, sdp->sd_statfs_inode);
0389     if (error)
0390         goto out;
0391 
0392 zero_local:
0393     /* Zero out the local statfs inode so any changes in there
0394      * are not re-recovered. */
0395     error = update_statfs_inode(jd, NULL,
0396                     find_local_statfs_inode(sdp, jd->jd_jid));
0397 out:
0398     return;
0399 }
0400 
0401 void gfs2_recover_func(struct work_struct *work)
0402 {
0403     struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
0404     struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
0405     struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
0406     struct gfs2_log_header_host head;
0407     struct gfs2_holder j_gh, ji_gh, thaw_gh;
0408     ktime_t t_start, t_jlck, t_jhd, t_tlck, t_rep;
0409     int ro = 0;
0410     unsigned int pass;
0411     int error = 0;
0412     int jlocked = 0;
0413 
0414     if (gfs2_withdrawn(sdp)) {
0415         fs_err(sdp, "jid=%u: Recovery not attempted due to withdraw.\n",
0416                jd->jd_jid);
0417         goto fail;
0418     }
0419     t_start = ktime_get();
0420     if (sdp->sd_args.ar_spectator)
0421         goto fail;
0422     if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) {
0423         fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n",
0424             jd->jd_jid);
0425         jlocked = 1;
0426         /* Acquire the journal lock so we can do recovery */
0427 
0428         error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops,
0429                       LM_ST_EXCLUSIVE,
0430                       LM_FLAG_NOEXP | LM_FLAG_TRY | GL_NOCACHE,
0431                       &j_gh);
0432         switch (error) {
0433         case 0:
0434             break;
0435 
0436         case GLR_TRYFAILED:
0437             fs_info(sdp, "jid=%u: Busy\n", jd->jd_jid);
0438             error = 0;
0439             goto fail;
0440 
0441         default:
0442             goto fail;
0443         }
0444 
0445         error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
0446                        LM_FLAG_NOEXP | GL_NOCACHE, &ji_gh);
0447         if (error)
0448             goto fail_gunlock_j;
0449     } else {
0450         fs_info(sdp, "jid=%u, already locked for use\n", jd->jd_jid);
0451     }
0452 
0453     t_jlck = ktime_get();
0454     fs_info(sdp, "jid=%u: Looking at journal...\n", jd->jd_jid);
0455 
0456     error = gfs2_jdesc_check(jd);
0457     if (error)
0458         goto fail_gunlock_ji;
0459 
0460     error = gfs2_find_jhead(jd, &head, true);
0461     if (error)
0462         goto fail_gunlock_ji;
0463     t_jhd = ktime_get();
0464     fs_info(sdp, "jid=%u: Journal head lookup took %lldms\n", jd->jd_jid,
0465         ktime_ms_delta(t_jhd, t_jlck));
0466 
0467     if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
0468         fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n",
0469             jd->jd_jid);
0470 
0471         /* Acquire a shared hold on the freeze lock */
0472 
0473         error = gfs2_freeze_lock(sdp, &thaw_gh, LM_FLAG_PRIORITY);
0474         if (error)
0475             goto fail_gunlock_ji;
0476 
0477         if (test_bit(SDF_RORECOVERY, &sdp->sd_flags)) {
0478             ro = 1;
0479         } else if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) {
0480             if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
0481                 ro = 1;
0482         } else {
0483             if (sb_rdonly(sdp->sd_vfs)) {
0484                 /* check if device itself is read-only */
0485                 ro = bdev_read_only(sdp->sd_vfs->s_bdev);
0486                 if (!ro) {
0487                     fs_info(sdp, "recovery required on "
0488                         "read-only filesystem.\n");
0489                     fs_info(sdp, "write access will be "
0490                         "enabled during recovery.\n");
0491                 }
0492             }
0493         }
0494 
0495         if (ro) {
0496             fs_warn(sdp, "jid=%u: Can't replay: read-only block "
0497                 "device\n", jd->jd_jid);
0498             error = -EROFS;
0499             goto fail_gunlock_thaw;
0500         }
0501 
0502         t_tlck = ktime_get();
0503         fs_info(sdp, "jid=%u: Replaying journal...0x%x to 0x%x\n",
0504             jd->jd_jid, head.lh_tail, head.lh_blkno);
0505 
0506         /* We take the sd_log_flush_lock here primarily to prevent log
0507          * flushes and simultaneous journal replays from stomping on
0508          * each other wrt jd_log_bio. */
0509         down_read(&sdp->sd_log_flush_lock);
0510         for (pass = 0; pass < 2; pass++) {
0511             lops_before_scan(jd, &head, pass);
0512             error = foreach_descriptor(jd, head.lh_tail,
0513                            head.lh_blkno, pass);
0514             lops_after_scan(jd, error, pass);
0515             if (error) {
0516                 up_read(&sdp->sd_log_flush_lock);
0517                 goto fail_gunlock_thaw;
0518             }
0519         }
0520 
0521         recover_local_statfs(jd, &head);
0522         clean_journal(jd, &head);
0523         up_read(&sdp->sd_log_flush_lock);
0524 
0525         gfs2_freeze_unlock(&thaw_gh);
0526         t_rep = ktime_get();
0527         fs_info(sdp, "jid=%u: Journal replayed in %lldms [jlck:%lldms, "
0528             "jhead:%lldms, tlck:%lldms, replay:%lldms]\n",
0529             jd->jd_jid, ktime_ms_delta(t_rep, t_start),
0530             ktime_ms_delta(t_jlck, t_start),
0531             ktime_ms_delta(t_jhd, t_jlck),
0532             ktime_ms_delta(t_tlck, t_jhd),
0533             ktime_ms_delta(t_rep, t_tlck));
0534     }
0535 
0536     gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
0537 
0538     if (jlocked) {
0539         gfs2_glock_dq_uninit(&ji_gh);
0540         gfs2_glock_dq_uninit(&j_gh);
0541     }
0542 
0543     fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
0544     goto done;
0545 
0546 fail_gunlock_thaw:
0547     gfs2_freeze_unlock(&thaw_gh);
0548 fail_gunlock_ji:
0549     if (jlocked) {
0550         gfs2_glock_dq_uninit(&ji_gh);
0551 fail_gunlock_j:
0552         gfs2_glock_dq_uninit(&j_gh);
0553     }
0554 
0555     fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done");
0556 fail:
0557     jd->jd_recover_error = error;
0558     gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
0559 done:
0560     clear_bit(JDF_RECOVERY, &jd->jd_flags);
0561     smp_mb__after_atomic();
0562     wake_up_bit(&jd->jd_flags, JDF_RECOVERY);
0563 }
0564 
0565 int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
0566 {
0567     int rv;
0568 
0569     if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags))
0570         return -EBUSY;
0571 
0572     /* we have JDF_RECOVERY, queue should always succeed */
0573     rv = queue_work(gfs_recovery_wq, &jd->jd_work);
0574     BUG_ON(!rv);
0575 
0576     if (wait)
0577         wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
0578                 TASK_UNINTERRUPTIBLE);
0579 
0580     return wait ? jd->jd_recover_error : 0;
0581 }
0582