0001
0002
0003
0004
0005
0006
0007 #include <linux/module.h>
0008 #include <linux/slab.h>
0009 #include <linux/spinlock.h>
0010 #include <linux/completion.h>
0011 #include <linux/buffer_head.h>
0012 #include <linux/gfs2_ondisk.h>
0013 #include <linux/crc32.h>
0014 #include <linux/crc32c.h>
0015 #include <linux/ktime.h>
0016
0017 #include "gfs2.h"
0018 #include "incore.h"
0019 #include "bmap.h"
0020 #include "glock.h"
0021 #include "glops.h"
0022 #include "log.h"
0023 #include "lops.h"
0024 #include "meta_io.h"
0025 #include "recovery.h"
0026 #include "super.h"
0027 #include "util.h"
0028 #include "dir.h"
0029
0030 struct workqueue_struct *gfs_recovery_wq;
0031
0032 int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
0033 struct buffer_head **bh)
0034 {
0035 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
0036 struct gfs2_glock *gl = ip->i_gl;
0037 u64 dblock;
0038 u32 extlen;
0039 int error;
0040
0041 extlen = 32;
0042 error = gfs2_get_extent(&ip->i_inode, blk, &dblock, &extlen);
0043 if (error)
0044 return error;
0045 if (!dblock) {
0046 gfs2_consist_inode(ip);
0047 return -EIO;
0048 }
0049
0050 *bh = gfs2_meta_ra(gl, dblock, extlen);
0051
0052 return error;
0053 }
0054
0055 int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where)
0056 {
0057 struct list_head *head = &jd->jd_revoke_list;
0058 struct gfs2_revoke_replay *rr = NULL, *iter;
0059
0060 list_for_each_entry(iter, head, rr_list) {
0061 if (iter->rr_blkno == blkno) {
0062 rr = iter;
0063 break;
0064 }
0065 }
0066
0067 if (rr) {
0068 rr->rr_where = where;
0069 return 0;
0070 }
0071
0072 rr = kmalloc(sizeof(struct gfs2_revoke_replay), GFP_NOFS);
0073 if (!rr)
0074 return -ENOMEM;
0075
0076 rr->rr_blkno = blkno;
0077 rr->rr_where = where;
0078 list_add(&rr->rr_list, head);
0079
0080 return 1;
0081 }
0082
0083 int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where)
0084 {
0085 struct gfs2_revoke_replay *rr = NULL, *iter;
0086 int wrap, a, b, revoke;
0087
0088 list_for_each_entry(iter, &jd->jd_revoke_list, rr_list) {
0089 if (iter->rr_blkno == blkno) {
0090 rr = iter;
0091 break;
0092 }
0093 }
0094
0095 if (!rr)
0096 return 0;
0097
0098 wrap = (rr->rr_where < jd->jd_replay_tail);
0099 a = (jd->jd_replay_tail < where);
0100 b = (where < rr->rr_where);
0101 revoke = (wrap) ? (a || b) : (a && b);
0102
0103 return revoke;
0104 }
0105
0106 void gfs2_revoke_clean(struct gfs2_jdesc *jd)
0107 {
0108 struct list_head *head = &jd->jd_revoke_list;
0109 struct gfs2_revoke_replay *rr;
0110
0111 while (!list_empty(head)) {
0112 rr = list_first_entry(head, struct gfs2_revoke_replay, rr_list);
0113 list_del(&rr->rr_list);
0114 kfree(rr);
0115 }
0116 }
0117
0118 int __get_log_header(struct gfs2_sbd *sdp, const struct gfs2_log_header *lh,
0119 unsigned int blkno, struct gfs2_log_header_host *head)
0120 {
0121 u32 hash, crc;
0122
0123 if (lh->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
0124 lh->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH) ||
0125 (blkno && be32_to_cpu(lh->lh_blkno) != blkno))
0126 return 1;
0127
0128 hash = crc32(~0, lh, LH_V1_SIZE - 4);
0129 hash = ~crc32_le_shift(hash, 4);
0130
0131 if (be32_to_cpu(lh->lh_hash) != hash)
0132 return 1;
0133
0134 crc = crc32c(~0, (void *)lh + LH_V1_SIZE + 4,
0135 sdp->sd_sb.sb_bsize - LH_V1_SIZE - 4);
0136
0137 if ((lh->lh_crc != 0 && be32_to_cpu(lh->lh_crc) != crc))
0138 return 1;
0139
0140 head->lh_sequence = be64_to_cpu(lh->lh_sequence);
0141 head->lh_flags = be32_to_cpu(lh->lh_flags);
0142 head->lh_tail = be32_to_cpu(lh->lh_tail);
0143 head->lh_blkno = be32_to_cpu(lh->lh_blkno);
0144
0145 head->lh_local_total = be64_to_cpu(lh->lh_local_total);
0146 head->lh_local_free = be64_to_cpu(lh->lh_local_free);
0147 head->lh_local_dinodes = be64_to_cpu(lh->lh_local_dinodes);
0148
0149 return 0;
0150 }
0151
0152
0153
0154
0155
0156
0157
0158
0159
0160
0161
0162
0163
0164
0165 static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
0166 struct gfs2_log_header_host *head)
0167 {
0168 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
0169 struct buffer_head *bh;
0170 int error;
0171
0172 error = gfs2_replay_read_block(jd, blk, &bh);
0173 if (error)
0174 return error;
0175
0176 error = __get_log_header(sdp, (const struct gfs2_log_header *)bh->b_data,
0177 blk, head);
0178 brelse(bh);
0179
0180 return error;
0181 }
0182
0183
0184
0185
0186
0187
0188
0189
0190
0191
0192
0193
0194
0195
0196 static int foreach_descriptor(struct gfs2_jdesc *jd, u32 start,
0197 unsigned int end, int pass)
0198 {
0199 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
0200 struct buffer_head *bh;
0201 struct gfs2_log_descriptor *ld;
0202 int error = 0;
0203 u32 length;
0204 __be64 *ptr;
0205 unsigned int offset = sizeof(struct gfs2_log_descriptor);
0206 offset += sizeof(__be64) - 1;
0207 offset &= ~(sizeof(__be64) - 1);
0208
0209 while (start != end) {
0210 error = gfs2_replay_read_block(jd, start, &bh);
0211 if (error)
0212 return error;
0213 if (gfs2_meta_check(sdp, bh)) {
0214 brelse(bh);
0215 return -EIO;
0216 }
0217 ld = (struct gfs2_log_descriptor *)bh->b_data;
0218 length = be32_to_cpu(ld->ld_length);
0219
0220 if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) {
0221 struct gfs2_log_header_host lh;
0222 error = get_log_header(jd, start, &lh);
0223 if (!error) {
0224 gfs2_replay_incr_blk(jd, &start);
0225 brelse(bh);
0226 continue;
0227 }
0228 if (error == 1) {
0229 gfs2_consist_inode(GFS2_I(jd->jd_inode));
0230 error = -EIO;
0231 }
0232 brelse(bh);
0233 return error;
0234 } else if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LD)) {
0235 brelse(bh);
0236 return -EIO;
0237 }
0238 ptr = (__be64 *)(bh->b_data + offset);
0239 error = lops_scan_elements(jd, start, ld, ptr, pass);
0240 if (error) {
0241 brelse(bh);
0242 return error;
0243 }
0244
0245 while (length--)
0246 gfs2_replay_incr_blk(jd, &start);
0247
0248 brelse(bh);
0249 }
0250
0251 return 0;
0252 }
0253
0254
0255
0256
0257
0258
0259
0260
0261
0262 static void clean_journal(struct gfs2_jdesc *jd,
0263 struct gfs2_log_header_host *head)
0264 {
0265 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
0266 u32 lblock = head->lh_blkno;
0267
0268 gfs2_replay_incr_blk(jd, &lblock);
0269 gfs2_write_log_header(sdp, jd, head->lh_sequence + 1, 0, lblock,
0270 GFS2_LOG_HEAD_UNMOUNT | GFS2_LOG_HEAD_RECOVERY,
0271 REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC);
0272 if (jd->jd_jid == sdp->sd_lockstruct.ls_jid) {
0273 sdp->sd_log_flush_head = lblock;
0274 gfs2_log_incr_head(sdp);
0275 }
0276 }
0277
0278
0279 static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
0280 unsigned int message)
0281 {
0282 char env_jid[20];
0283 char env_status[20];
0284 char *envp[] = { env_jid, env_status, NULL };
0285 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
0286
0287 ls->ls_recover_jid_done = jid;
0288 ls->ls_recover_jid_status = message;
0289 sprintf(env_jid, "JID=%u", jid);
0290 sprintf(env_status, "RECOVERY=%s",
0291 message == LM_RD_SUCCESS ? "Done" : "Failed");
0292 kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
0293
0294 if (sdp->sd_lockstruct.ls_ops->lm_recovery_result)
0295 sdp->sd_lockstruct.ls_ops->lm_recovery_result(sdp, jid, message);
0296 }
0297
0298
0299
0300
0301
0302
0303
0304
0305
0306
0307 static int update_statfs_inode(struct gfs2_jdesc *jd,
0308 struct gfs2_log_header_host *head,
0309 struct inode *inode)
0310 {
0311 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
0312 struct gfs2_inode *ip;
0313 struct buffer_head *bh;
0314 struct gfs2_statfs_change_host sc;
0315 int error = 0;
0316
0317 BUG_ON(!inode);
0318 ip = GFS2_I(inode);
0319
0320 error = gfs2_meta_inode_buffer(ip, &bh);
0321 if (error)
0322 goto out;
0323
0324 spin_lock(&sdp->sd_statfs_spin);
0325
0326 if (head) {
0327 gfs2_statfs_change_in(&sc, bh->b_data + sizeof(struct gfs2_dinode));
0328 sc.sc_total += head->lh_local_total;
0329 sc.sc_free += head->lh_local_free;
0330 sc.sc_dinodes += head->lh_local_dinodes;
0331 gfs2_statfs_change_out(&sc, bh->b_data + sizeof(struct gfs2_dinode));
0332
0333 fs_info(sdp, "jid=%u: Updated master statfs Total:%lld, "
0334 "Free:%lld, Dinodes:%lld after change "
0335 "[%+lld,%+lld,%+lld]\n", jd->jd_jid, sc.sc_total,
0336 sc.sc_free, sc.sc_dinodes, head->lh_local_total,
0337 head->lh_local_free, head->lh_local_dinodes);
0338 } else {
0339 memset(bh->b_data + sizeof(struct gfs2_dinode), 0,
0340 sizeof(struct gfs2_statfs_change));
0341
0342 if (jd->jd_jid == sdp->sd_lockstruct.ls_jid) {
0343 memset(&sdp->sd_statfs_local, 0,
0344 sizeof(struct gfs2_statfs_change_host));
0345 }
0346 }
0347 spin_unlock(&sdp->sd_statfs_spin);
0348
0349 mark_buffer_dirty(bh);
0350 brelse(bh);
0351 gfs2_inode_metasync(ip->i_gl);
0352
0353 out:
0354 return error;
0355 }
0356
0357
0358
0359
0360
0361
0362
0363
0364
0365
0366
0367
0368
0369
0370
0371
0372
0373
0374
0375
0376 static void recover_local_statfs(struct gfs2_jdesc *jd,
0377 struct gfs2_log_header_host *head)
0378 {
0379 int error;
0380 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
0381
0382 if (!head->lh_local_total && !head->lh_local_free
0383 && !head->lh_local_dinodes)
0384 goto zero_local;
0385
0386
0387
0388 error = update_statfs_inode(jd, head, sdp->sd_statfs_inode);
0389 if (error)
0390 goto out;
0391
0392 zero_local:
0393
0394
0395 error = update_statfs_inode(jd, NULL,
0396 find_local_statfs_inode(sdp, jd->jd_jid));
0397 out:
0398 return;
0399 }
0400
0401 void gfs2_recover_func(struct work_struct *work)
0402 {
0403 struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
0404 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
0405 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
0406 struct gfs2_log_header_host head;
0407 struct gfs2_holder j_gh, ji_gh, thaw_gh;
0408 ktime_t t_start, t_jlck, t_jhd, t_tlck, t_rep;
0409 int ro = 0;
0410 unsigned int pass;
0411 int error = 0;
0412 int jlocked = 0;
0413
0414 if (gfs2_withdrawn(sdp)) {
0415 fs_err(sdp, "jid=%u: Recovery not attempted due to withdraw.\n",
0416 jd->jd_jid);
0417 goto fail;
0418 }
0419 t_start = ktime_get();
0420 if (sdp->sd_args.ar_spectator)
0421 goto fail;
0422 if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) {
0423 fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n",
0424 jd->jd_jid);
0425 jlocked = 1;
0426
0427
0428 error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops,
0429 LM_ST_EXCLUSIVE,
0430 LM_FLAG_NOEXP | LM_FLAG_TRY | GL_NOCACHE,
0431 &j_gh);
0432 switch (error) {
0433 case 0:
0434 break;
0435
0436 case GLR_TRYFAILED:
0437 fs_info(sdp, "jid=%u: Busy\n", jd->jd_jid);
0438 error = 0;
0439 goto fail;
0440
0441 default:
0442 goto fail;
0443 }
0444
0445 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
0446 LM_FLAG_NOEXP | GL_NOCACHE, &ji_gh);
0447 if (error)
0448 goto fail_gunlock_j;
0449 } else {
0450 fs_info(sdp, "jid=%u, already locked for use\n", jd->jd_jid);
0451 }
0452
0453 t_jlck = ktime_get();
0454 fs_info(sdp, "jid=%u: Looking at journal...\n", jd->jd_jid);
0455
0456 error = gfs2_jdesc_check(jd);
0457 if (error)
0458 goto fail_gunlock_ji;
0459
0460 error = gfs2_find_jhead(jd, &head, true);
0461 if (error)
0462 goto fail_gunlock_ji;
0463 t_jhd = ktime_get();
0464 fs_info(sdp, "jid=%u: Journal head lookup took %lldms\n", jd->jd_jid,
0465 ktime_ms_delta(t_jhd, t_jlck));
0466
0467 if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
0468 fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n",
0469 jd->jd_jid);
0470
0471
0472
0473 error = gfs2_freeze_lock(sdp, &thaw_gh, LM_FLAG_PRIORITY);
0474 if (error)
0475 goto fail_gunlock_ji;
0476
0477 if (test_bit(SDF_RORECOVERY, &sdp->sd_flags)) {
0478 ro = 1;
0479 } else if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) {
0480 if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
0481 ro = 1;
0482 } else {
0483 if (sb_rdonly(sdp->sd_vfs)) {
0484
0485 ro = bdev_read_only(sdp->sd_vfs->s_bdev);
0486 if (!ro) {
0487 fs_info(sdp, "recovery required on "
0488 "read-only filesystem.\n");
0489 fs_info(sdp, "write access will be "
0490 "enabled during recovery.\n");
0491 }
0492 }
0493 }
0494
0495 if (ro) {
0496 fs_warn(sdp, "jid=%u: Can't replay: read-only block "
0497 "device\n", jd->jd_jid);
0498 error = -EROFS;
0499 goto fail_gunlock_thaw;
0500 }
0501
0502 t_tlck = ktime_get();
0503 fs_info(sdp, "jid=%u: Replaying journal...0x%x to 0x%x\n",
0504 jd->jd_jid, head.lh_tail, head.lh_blkno);
0505
0506
0507
0508
0509 down_read(&sdp->sd_log_flush_lock);
0510 for (pass = 0; pass < 2; pass++) {
0511 lops_before_scan(jd, &head, pass);
0512 error = foreach_descriptor(jd, head.lh_tail,
0513 head.lh_blkno, pass);
0514 lops_after_scan(jd, error, pass);
0515 if (error) {
0516 up_read(&sdp->sd_log_flush_lock);
0517 goto fail_gunlock_thaw;
0518 }
0519 }
0520
0521 recover_local_statfs(jd, &head);
0522 clean_journal(jd, &head);
0523 up_read(&sdp->sd_log_flush_lock);
0524
0525 gfs2_freeze_unlock(&thaw_gh);
0526 t_rep = ktime_get();
0527 fs_info(sdp, "jid=%u: Journal replayed in %lldms [jlck:%lldms, "
0528 "jhead:%lldms, tlck:%lldms, replay:%lldms]\n",
0529 jd->jd_jid, ktime_ms_delta(t_rep, t_start),
0530 ktime_ms_delta(t_jlck, t_start),
0531 ktime_ms_delta(t_jhd, t_jlck),
0532 ktime_ms_delta(t_tlck, t_jhd),
0533 ktime_ms_delta(t_rep, t_tlck));
0534 }
0535
0536 gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
0537
0538 if (jlocked) {
0539 gfs2_glock_dq_uninit(&ji_gh);
0540 gfs2_glock_dq_uninit(&j_gh);
0541 }
0542
0543 fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
0544 goto done;
0545
0546 fail_gunlock_thaw:
0547 gfs2_freeze_unlock(&thaw_gh);
0548 fail_gunlock_ji:
0549 if (jlocked) {
0550 gfs2_glock_dq_uninit(&ji_gh);
0551 fail_gunlock_j:
0552 gfs2_glock_dq_uninit(&j_gh);
0553 }
0554
0555 fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done");
0556 fail:
0557 jd->jd_recover_error = error;
0558 gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
0559 done:
0560 clear_bit(JDF_RECOVERY, &jd->jd_flags);
0561 smp_mb__after_atomic();
0562 wake_up_bit(&jd->jd_flags, JDF_RECOVERY);
0563 }
0564
0565 int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
0566 {
0567 int rv;
0568
0569 if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags))
0570 return -EBUSY;
0571
0572
0573 rv = queue_work(gfs_recovery_wq, &jd->jd_work);
0574 BUG_ON(!rv);
0575
0576 if (wait)
0577 wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
0578 TASK_UNINTERRUPTIBLE);
0579
0580 return wait ? jd->jd_recover_error : 0;
0581 }
0582