0001
0002
0003
0004
0005
0006
0007 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0008
0009 #include <linux/fs.h>
0010 #include <linux/dlm.h>
0011 #include <linux/slab.h>
0012 #include <linux/types.h>
0013 #include <linux/delay.h>
0014 #include <linux/gfs2_ondisk.h>
0015 #include <linux/sched/signal.h>
0016
0017 #include "incore.h"
0018 #include "glock.h"
0019 #include "glops.h"
0020 #include "recovery.h"
0021 #include "util.h"
0022 #include "sys.h"
0023 #include "trace_gfs2.h"
0024
0025
0026
0027
0028
0029
0030
0031 static inline void gfs2_update_stats(struct gfs2_lkstats *s, unsigned index,
0032 s64 sample)
0033 {
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052 s64 delta = sample - s->stats[index];
0053 s->stats[index] += (delta >> 3);
0054 index++;
0055 s->stats[index] += (s64)(abs(delta) - s->stats[index]) >> 2;
0056 }
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075 static inline void gfs2_update_reply_times(struct gfs2_glock *gl)
0076 {
0077 struct gfs2_pcpu_lkstats *lks;
0078 const unsigned gltype = gl->gl_name.ln_type;
0079 unsigned index = test_bit(GLF_BLOCKING, &gl->gl_flags) ?
0080 GFS2_LKS_SRTTB : GFS2_LKS_SRTT;
0081 s64 rtt;
0082
0083 preempt_disable();
0084 rtt = ktime_to_ns(ktime_sub(ktime_get_real(), gl->gl_dstamp));
0085 lks = this_cpu_ptr(gl->gl_name.ln_sbd->sd_lkstats);
0086 gfs2_update_stats(&gl->gl_stats, index, rtt);
0087 gfs2_update_stats(&lks->lkstats[gltype], index, rtt);
0088 preempt_enable();
0089
0090 trace_gfs2_glock_lock_time(gl, rtt);
0091 }
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102 static inline void gfs2_update_request_times(struct gfs2_glock *gl)
0103 {
0104 struct gfs2_pcpu_lkstats *lks;
0105 const unsigned gltype = gl->gl_name.ln_type;
0106 ktime_t dstamp;
0107 s64 irt;
0108
0109 preempt_disable();
0110 dstamp = gl->gl_dstamp;
0111 gl->gl_dstamp = ktime_get_real();
0112 irt = ktime_to_ns(ktime_sub(gl->gl_dstamp, dstamp));
0113 lks = this_cpu_ptr(gl->gl_name.ln_sbd->sd_lkstats);
0114 gfs2_update_stats(&gl->gl_stats, GFS2_LKS_SIRT, irt);
0115 gfs2_update_stats(&lks->lkstats[gltype], GFS2_LKS_SIRT, irt);
0116 preempt_enable();
0117 }
0118
0119 static void gdlm_ast(void *arg)
0120 {
0121 struct gfs2_glock *gl = arg;
0122 unsigned ret = gl->gl_state;
0123
0124 gfs2_update_reply_times(gl);
0125 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
0126
0127 if ((gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID) && gl->gl_lksb.sb_lvbptr)
0128 memset(gl->gl_lksb.sb_lvbptr, 0, GDLM_LVB_SIZE);
0129
0130 switch (gl->gl_lksb.sb_status) {
0131 case -DLM_EUNLOCK:
0132 if (gl->gl_ops->go_free)
0133 gl->gl_ops->go_free(gl);
0134 gfs2_glock_free(gl);
0135 return;
0136 case -DLM_ECANCEL:
0137 ret |= LM_OUT_CANCELED;
0138 goto out;
0139 case -EAGAIN:
0140 case -EDEADLK:
0141 goto out;
0142 case -ETIMEDOUT:
0143 ret |= LM_OUT_ERROR;
0144 goto out;
0145 case 0:
0146 break;
0147 default:
0148 BUG();
0149 }
0150
0151 ret = gl->gl_req;
0152 if (gl->gl_lksb.sb_flags & DLM_SBF_ALTMODE) {
0153 if (gl->gl_req == LM_ST_SHARED)
0154 ret = LM_ST_DEFERRED;
0155 else if (gl->gl_req == LM_ST_DEFERRED)
0156 ret = LM_ST_SHARED;
0157 else
0158 BUG();
0159 }
0160
0161 set_bit(GLF_INITIAL, &gl->gl_flags);
0162 gfs2_glock_complete(gl, ret);
0163 return;
0164 out:
0165 if (!test_bit(GLF_INITIAL, &gl->gl_flags))
0166 gl->gl_lksb.sb_lkid = 0;
0167 gfs2_glock_complete(gl, ret);
0168 }
0169
0170 static void gdlm_bast(void *arg, int mode)
0171 {
0172 struct gfs2_glock *gl = arg;
0173
0174 switch (mode) {
0175 case DLM_LOCK_EX:
0176 gfs2_glock_cb(gl, LM_ST_UNLOCKED);
0177 break;
0178 case DLM_LOCK_CW:
0179 gfs2_glock_cb(gl, LM_ST_DEFERRED);
0180 break;
0181 case DLM_LOCK_PR:
0182 gfs2_glock_cb(gl, LM_ST_SHARED);
0183 break;
0184 default:
0185 fs_err(gl->gl_name.ln_sbd, "unknown bast mode %d\n", mode);
0186 BUG();
0187 }
0188 }
0189
0190
0191
0192 static int make_mode(struct gfs2_sbd *sdp, const unsigned int lmstate)
0193 {
0194 switch (lmstate) {
0195 case LM_ST_UNLOCKED:
0196 return DLM_LOCK_NL;
0197 case LM_ST_EXCLUSIVE:
0198 return DLM_LOCK_EX;
0199 case LM_ST_DEFERRED:
0200 return DLM_LOCK_CW;
0201 case LM_ST_SHARED:
0202 return DLM_LOCK_PR;
0203 }
0204 fs_err(sdp, "unknown LM state %d\n", lmstate);
0205 BUG();
0206 return -1;
0207 }
0208
0209 static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags,
0210 const int req)
0211 {
0212 u32 lkf = 0;
0213
0214 if (gl->gl_lksb.sb_lvbptr)
0215 lkf |= DLM_LKF_VALBLK;
0216
0217 if (gfs_flags & LM_FLAG_TRY)
0218 lkf |= DLM_LKF_NOQUEUE;
0219
0220 if (gfs_flags & LM_FLAG_TRY_1CB) {
0221 lkf |= DLM_LKF_NOQUEUE;
0222 lkf |= DLM_LKF_NOQUEUEBAST;
0223 }
0224
0225 if (gfs_flags & LM_FLAG_PRIORITY) {
0226 lkf |= DLM_LKF_NOORDER;
0227 lkf |= DLM_LKF_HEADQUE;
0228 }
0229
0230 if (gfs_flags & LM_FLAG_ANY) {
0231 if (req == DLM_LOCK_PR)
0232 lkf |= DLM_LKF_ALTCW;
0233 else if (req == DLM_LOCK_CW)
0234 lkf |= DLM_LKF_ALTPR;
0235 else
0236 BUG();
0237 }
0238
0239 if (gl->gl_lksb.sb_lkid != 0) {
0240 lkf |= DLM_LKF_CONVERT;
0241 if (test_bit(GLF_BLOCKING, &gl->gl_flags))
0242 lkf |= DLM_LKF_QUECVT;
0243 }
0244
0245 return lkf;
0246 }
0247
0248 static void gfs2_reverse_hex(char *c, u64 value)
0249 {
0250 *c = '0';
0251 while (value) {
0252 *c-- = hex_asc[value & 0x0f];
0253 value >>= 4;
0254 }
0255 }
0256
0257 static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
0258 unsigned int flags)
0259 {
0260 struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct;
0261 int req;
0262 u32 lkf;
0263 char strname[GDLM_STRNAME_BYTES] = "";
0264 int error;
0265
0266 req = make_mode(gl->gl_name.ln_sbd, req_state);
0267 lkf = make_flags(gl, flags, req);
0268 gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
0269 gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
0270 if (gl->gl_lksb.sb_lkid) {
0271 gfs2_update_request_times(gl);
0272 } else {
0273 memset(strname, ' ', GDLM_STRNAME_BYTES - 1);
0274 strname[GDLM_STRNAME_BYTES - 1] = '\0';
0275 gfs2_reverse_hex(strname + 7, gl->gl_name.ln_type);
0276 gfs2_reverse_hex(strname + 23, gl->gl_name.ln_number);
0277 gl->gl_dstamp = ktime_get_real();
0278 }
0279
0280
0281
0282
0283 again:
0284 error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname,
0285 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
0286 if (error == -EBUSY) {
0287 msleep(20);
0288 goto again;
0289 }
0290 return error;
0291 }
0292
0293 static void gdlm_put_lock(struct gfs2_glock *gl)
0294 {
0295 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
0296 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
0297 int error;
0298
0299 if (gl->gl_lksb.sb_lkid == 0) {
0300 gfs2_glock_free(gl);
0301 return;
0302 }
0303
0304 clear_bit(GLF_BLOCKING, &gl->gl_flags);
0305 gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
0306 gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
0307 gfs2_update_request_times(gl);
0308
0309
0310 if (test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) {
0311 gfs2_glock_free(gl);
0312 return;
0313 }
0314
0315
0316 if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
0317 !gl->gl_lksb.sb_lvbptr) {
0318 gfs2_glock_free(gl);
0319 return;
0320 }
0321
0322 again:
0323 error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK,
0324 NULL, gl);
0325 if (error == -EBUSY) {
0326 msleep(20);
0327 goto again;
0328 }
0329
0330 if (error) {
0331 fs_err(sdp, "gdlm_unlock %x,%llx err=%d\n",
0332 gl->gl_name.ln_type,
0333 (unsigned long long)gl->gl_name.ln_number, error);
0334 return;
0335 }
0336 }
0337
0338 static void gdlm_cancel(struct gfs2_glock *gl)
0339 {
0340 struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct;
0341 dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl);
0342 }
0343
0344
0345
0346
0347
0348
0349
0350
0351
0352
0353
0354
0355
0356
0357
0358
0359
0360
0361
0362
0363
0364
0365
0366
0367
0368
0369
0370
0371
0372
0373
0374
0375
0376
0377
0378
0379
0380
0381
0382
0383
0384
0385
0386
0387
0388
0389
0390
0391
0392
0393
0394
0395
0396
0397
0398
0399
0400
0401
0402
0403
0404
0405
0406
0407
0408
0409
0410
0411
0412
0413
0414
0415
0416
0417
0418
0419
0420
0421
0422
0423
0424
0425
0426
0427
0428
0429
0430
0431
0432
0433
0434
0435
0436
0437
0438
0439
0440
0441
0442
0443
0444
0445
0446
0447
0448
0449
0450
0451
0452
0453
0454
0455
0456
0457
0458
0459
0460
0461
0462
0463
0464
0465
0466
0467
0468
0469
0470
0471
0472
0473
0474
0475
0476
0477
0478
0479
0480
0481
0482
0483
0484
0485 #define JID_BITMAP_OFFSET 8
0486
0487 static void control_lvb_read(struct lm_lockstruct *ls, uint32_t *lvb_gen,
0488 char *lvb_bits)
0489 {
0490 __le32 gen;
0491 memcpy(lvb_bits, ls->ls_control_lvb, GDLM_LVB_SIZE);
0492 memcpy(&gen, lvb_bits, sizeof(__le32));
0493 *lvb_gen = le32_to_cpu(gen);
0494 }
0495
0496 static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen,
0497 char *lvb_bits)
0498 {
0499 __le32 gen;
0500 memcpy(ls->ls_control_lvb, lvb_bits, GDLM_LVB_SIZE);
0501 gen = cpu_to_le32(lvb_gen);
0502 memcpy(ls->ls_control_lvb, &gen, sizeof(__le32));
0503 }
0504
0505 static int all_jid_bits_clear(char *lvb)
0506 {
0507 return !memchr_inv(lvb + JID_BITMAP_OFFSET, 0,
0508 GDLM_LVB_SIZE - JID_BITMAP_OFFSET);
0509 }
0510
0511 static void sync_wait_cb(void *arg)
0512 {
0513 struct lm_lockstruct *ls = arg;
0514 complete(&ls->ls_sync_wait);
0515 }
0516
0517 static int sync_unlock(struct gfs2_sbd *sdp, struct dlm_lksb *lksb, char *name)
0518 {
0519 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
0520 int error;
0521
0522 error = dlm_unlock(ls->ls_dlm, lksb->sb_lkid, 0, lksb, ls);
0523 if (error) {
0524 fs_err(sdp, "%s lkid %x error %d\n",
0525 name, lksb->sb_lkid, error);
0526 return error;
0527 }
0528
0529 wait_for_completion(&ls->ls_sync_wait);
0530
0531 if (lksb->sb_status != -DLM_EUNLOCK) {
0532 fs_err(sdp, "%s lkid %x status %d\n",
0533 name, lksb->sb_lkid, lksb->sb_status);
0534 return -1;
0535 }
0536 return 0;
0537 }
0538
0539 static int sync_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags,
0540 unsigned int num, struct dlm_lksb *lksb, char *name)
0541 {
0542 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
0543 char strname[GDLM_STRNAME_BYTES];
0544 int error, status;
0545
0546 memset(strname, 0, GDLM_STRNAME_BYTES);
0547 snprintf(strname, GDLM_STRNAME_BYTES, "%8x%16x", LM_TYPE_NONDISK, num);
0548
0549 error = dlm_lock(ls->ls_dlm, mode, lksb, flags,
0550 strname, GDLM_STRNAME_BYTES - 1,
0551 0, sync_wait_cb, ls, NULL);
0552 if (error) {
0553 fs_err(sdp, "%s lkid %x flags %x mode %d error %d\n",
0554 name, lksb->sb_lkid, flags, mode, error);
0555 return error;
0556 }
0557
0558 wait_for_completion(&ls->ls_sync_wait);
0559
0560 status = lksb->sb_status;
0561
0562 if (status && status != -EAGAIN) {
0563 fs_err(sdp, "%s lkid %x flags %x mode %d status %d\n",
0564 name, lksb->sb_lkid, flags, mode, status);
0565 }
0566
0567 return status;
0568 }
0569
0570 static int mounted_unlock(struct gfs2_sbd *sdp)
0571 {
0572 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
0573 return sync_unlock(sdp, &ls->ls_mounted_lksb, "mounted_lock");
0574 }
0575
0576 static int mounted_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags)
0577 {
0578 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
0579 return sync_lock(sdp, mode, flags, GFS2_MOUNTED_LOCK,
0580 &ls->ls_mounted_lksb, "mounted_lock");
0581 }
0582
0583 static int control_unlock(struct gfs2_sbd *sdp)
0584 {
0585 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
0586 return sync_unlock(sdp, &ls->ls_control_lksb, "control_lock");
0587 }
0588
0589 static int control_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags)
0590 {
0591 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
0592 return sync_lock(sdp, mode, flags, GFS2_CONTROL_LOCK,
0593 &ls->ls_control_lksb, "control_lock");
0594 }
0595
0596
0597
0598
0599
0600 static void remote_withdraw(struct gfs2_sbd *sdp)
0601 {
0602 struct gfs2_jdesc *jd;
0603 int ret = 0, count = 0;
0604
0605 list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
0606 if (jd->jd_jid == sdp->sd_lockstruct.ls_jid)
0607 continue;
0608 ret = gfs2_recover_journal(jd, true);
0609 if (ret)
0610 break;
0611 count++;
0612 }
0613
0614
0615 fs_err(sdp, "Journals checked: %d, ret = %d.\n", count, ret);
0616 }
0617
0618 static void gfs2_control_func(struct work_struct *work)
0619 {
0620 struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_control_work.work);
0621 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
0622 uint32_t block_gen, start_gen, lvb_gen, flags;
0623 int recover_set = 0;
0624 int write_lvb = 0;
0625 int recover_size;
0626 int i, error;
0627
0628
0629 if (test_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags)) {
0630 remote_withdraw(sdp);
0631 clear_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags);
0632 return;
0633 }
0634
0635 spin_lock(&ls->ls_recover_spin);
0636
0637
0638
0639
0640
0641
0642
0643
0644
0645 if (!test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) ||
0646 test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) {
0647 spin_unlock(&ls->ls_recover_spin);
0648 return;
0649 }
0650 block_gen = ls->ls_recover_block;
0651 start_gen = ls->ls_recover_start;
0652 spin_unlock(&ls->ls_recover_spin);
0653
0654
0655
0656
0657
0658
0659
0660
0661 if (block_gen == start_gen)
0662 return;
0663
0664
0665
0666
0667
0668
0669
0670
0671
0672
0673
0674
0675
0676 error = control_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_VALBLK);
0677 if (error) {
0678 fs_err(sdp, "control lock EX error %d\n", error);
0679 return;
0680 }
0681
0682 control_lvb_read(ls, &lvb_gen, ls->ls_lvb_bits);
0683
0684 spin_lock(&ls->ls_recover_spin);
0685 if (block_gen != ls->ls_recover_block ||
0686 start_gen != ls->ls_recover_start) {
0687 fs_info(sdp, "recover generation %u block1 %u %u\n",
0688 start_gen, block_gen, ls->ls_recover_block);
0689 spin_unlock(&ls->ls_recover_spin);
0690 control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT);
0691 return;
0692 }
0693
0694 recover_size = ls->ls_recover_size;
0695
0696 if (lvb_gen <= start_gen) {
0697
0698
0699
0700
0701
0702
0703
0704
0705
0706 for (i = 0; i < recover_size; i++) {
0707 if (ls->ls_recover_result[i] != LM_RD_SUCCESS)
0708 continue;
0709
0710 ls->ls_recover_result[i] = 0;
0711
0712 if (!test_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET))
0713 continue;
0714
0715 __clear_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET);
0716 write_lvb = 1;
0717 }
0718 }
0719
0720 if (lvb_gen == start_gen) {
0721
0722
0723
0724 for (i = 0; i < recover_size; i++) {
0725 if (!ls->ls_recover_submit[i])
0726 continue;
0727 if (ls->ls_recover_submit[i] < lvb_gen)
0728 ls->ls_recover_submit[i] = 0;
0729 }
0730 } else if (lvb_gen < start_gen) {
0731
0732
0733
0734 for (i = 0; i < recover_size; i++) {
0735 if (!ls->ls_recover_submit[i])
0736 continue;
0737 if (ls->ls_recover_submit[i] < start_gen) {
0738 ls->ls_recover_submit[i] = 0;
0739 __set_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET);
0740 }
0741 }
0742
0743
0744 write_lvb = 1;
0745 } else {
0746
0747
0748
0749 }
0750 spin_unlock(&ls->ls_recover_spin);
0751
0752 if (write_lvb) {
0753 control_lvb_write(ls, start_gen, ls->ls_lvb_bits);
0754 flags = DLM_LKF_CONVERT | DLM_LKF_VALBLK;
0755 } else {
0756 flags = DLM_LKF_CONVERT;
0757 }
0758
0759 error = control_lock(sdp, DLM_LOCK_NL, flags);
0760 if (error) {
0761 fs_err(sdp, "control lock NL error %d\n", error);
0762 return;
0763 }
0764
0765
0766
0767
0768
0769
0770
0771
0772 for (i = 0; i < recover_size; i++) {
0773 if (test_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET)) {
0774 fs_info(sdp, "recover generation %u jid %d\n",
0775 start_gen, i);
0776 gfs2_recover_set(sdp, i);
0777 recover_set++;
0778 }
0779 }
0780 if (recover_set)
0781 return;
0782
0783
0784
0785
0786
0787
0788
0789 spin_lock(&ls->ls_recover_spin);
0790 if (ls->ls_recover_block == block_gen &&
0791 ls->ls_recover_start == start_gen) {
0792 clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
0793 spin_unlock(&ls->ls_recover_spin);
0794 fs_info(sdp, "recover generation %u done\n", start_gen);
0795 gfs2_glock_thaw(sdp);
0796 } else {
0797 fs_info(sdp, "recover generation %u block2 %u %u\n",
0798 start_gen, block_gen, ls->ls_recover_block);
0799 spin_unlock(&ls->ls_recover_spin);
0800 }
0801 }
0802
0803 static int control_mount(struct gfs2_sbd *sdp)
0804 {
0805 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
0806 uint32_t start_gen, block_gen, mount_gen, lvb_gen;
0807 int mounted_mode;
0808 int retries = 0;
0809 int error;
0810
0811 memset(&ls->ls_mounted_lksb, 0, sizeof(struct dlm_lksb));
0812 memset(&ls->ls_control_lksb, 0, sizeof(struct dlm_lksb));
0813 memset(&ls->ls_control_lvb, 0, GDLM_LVB_SIZE);
0814 ls->ls_control_lksb.sb_lvbptr = ls->ls_control_lvb;
0815 init_completion(&ls->ls_sync_wait);
0816
0817 set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
0818
0819 error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_VALBLK);
0820 if (error) {
0821 fs_err(sdp, "control_mount control_lock NL error %d\n", error);
0822 return error;
0823 }
0824
0825 error = mounted_lock(sdp, DLM_LOCK_NL, 0);
0826 if (error) {
0827 fs_err(sdp, "control_mount mounted_lock NL error %d\n", error);
0828 control_unlock(sdp);
0829 return error;
0830 }
0831 mounted_mode = DLM_LOCK_NL;
0832
0833 restart:
0834 if (retries++ && signal_pending(current)) {
0835 error = -EINTR;
0836 goto fail;
0837 }
0838
0839
0840
0841
0842
0843
0844 if (mounted_mode != DLM_LOCK_NL) {
0845 error = mounted_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT);
0846 if (error)
0847 goto fail;
0848 mounted_mode = DLM_LOCK_NL;
0849 }
0850
0851
0852
0853
0854
0855
0856
0857 msleep_interruptible(500);
0858
0859
0860
0861
0862
0863
0864
0865 error = control_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE|DLM_LKF_VALBLK);
0866 if (error == -EAGAIN) {
0867 goto restart;
0868 } else if (error) {
0869 fs_err(sdp, "control_mount control_lock EX error %d\n", error);
0870 goto fail;
0871 }
0872
0873
0874
0875
0876
0877 if (sdp->sd_args.ar_spectator)
0878 goto locks_done;
0879
0880 error = mounted_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE);
0881 if (!error) {
0882 mounted_mode = DLM_LOCK_EX;
0883 goto locks_done;
0884 } else if (error != -EAGAIN) {
0885 fs_err(sdp, "control_mount mounted_lock EX error %d\n", error);
0886 goto fail;
0887 }
0888
0889 error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE);
0890 if (!error) {
0891 mounted_mode = DLM_LOCK_PR;
0892 goto locks_done;
0893 } else {
0894
0895 fs_err(sdp, "control_mount mounted_lock PR error %d\n", error);
0896 goto fail;
0897 }
0898
0899 locks_done:
0900
0901
0902
0903
0904
0905
0906
0907
0908
0909
0910
0911 control_lvb_read(ls, &lvb_gen, ls->ls_lvb_bits);
0912
0913 if (lvb_gen == 0xFFFFFFFF) {
0914
0915 fs_err(sdp, "control_mount control_lock disabled\n");
0916 error = -EINVAL;
0917 goto fail;
0918 }
0919
0920 if (mounted_mode == DLM_LOCK_EX) {
0921
0922 spin_lock(&ls->ls_recover_spin);
0923 clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
0924 set_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags);
0925 set_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags);
0926 spin_unlock(&ls->ls_recover_spin);
0927 fs_info(sdp, "first mounter control generation %u\n", lvb_gen);
0928 return 0;
0929 }
0930
0931 error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT);
0932 if (error)
0933 goto fail;
0934
0935
0936
0937
0938
0939
0940
0941 if (!all_jid_bits_clear(ls->ls_lvb_bits)) {
0942
0943 fs_info(sdp, "control_mount wait for journal recovery\n");
0944 goto restart;
0945 }
0946
0947 spin_lock(&ls->ls_recover_spin);
0948 block_gen = ls->ls_recover_block;
0949 start_gen = ls->ls_recover_start;
0950 mount_gen = ls->ls_recover_mount;
0951
0952 if (lvb_gen < mount_gen) {
0953
0954
0955 if (sdp->sd_args.ar_spectator) {
0956 fs_info(sdp, "Recovery is required. Waiting for a "
0957 "non-spectator to mount.\n");
0958 msleep_interruptible(1000);
0959 } else {
0960 fs_info(sdp, "control_mount wait1 block %u start %u "
0961 "mount %u lvb %u flags %lx\n", block_gen,
0962 start_gen, mount_gen, lvb_gen,
0963 ls->ls_recover_flags);
0964 }
0965 spin_unlock(&ls->ls_recover_spin);
0966 goto restart;
0967 }
0968
0969 if (lvb_gen != start_gen) {
0970
0971
0972 fs_info(sdp, "control_mount wait2 block %u start %u mount %u "
0973 "lvb %u flags %lx\n", block_gen, start_gen, mount_gen,
0974 lvb_gen, ls->ls_recover_flags);
0975 spin_unlock(&ls->ls_recover_spin);
0976 goto restart;
0977 }
0978
0979 if (block_gen == start_gen) {
0980
0981 fs_info(sdp, "control_mount wait3 block %u start %u mount %u "
0982 "lvb %u flags %lx\n", block_gen, start_gen, mount_gen,
0983 lvb_gen, ls->ls_recover_flags);
0984 spin_unlock(&ls->ls_recover_spin);
0985 goto restart;
0986 }
0987
0988 clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
0989 set_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags);
0990 memset(ls->ls_recover_submit, 0, ls->ls_recover_size*sizeof(uint32_t));
0991 memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t));
0992 spin_unlock(&ls->ls_recover_spin);
0993 return 0;
0994
0995 fail:
0996 mounted_unlock(sdp);
0997 control_unlock(sdp);
0998 return error;
0999 }
1000
1001 static int control_first_done(struct gfs2_sbd *sdp)
1002 {
1003 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1004 uint32_t start_gen, block_gen;
1005 int error;
1006
1007 restart:
1008 spin_lock(&ls->ls_recover_spin);
1009 start_gen = ls->ls_recover_start;
1010 block_gen = ls->ls_recover_block;
1011
1012 if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags) ||
1013 !test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) ||
1014 !test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) {
1015
1016 fs_err(sdp, "control_first_done start %u block %u flags %lx\n",
1017 start_gen, block_gen, ls->ls_recover_flags);
1018 spin_unlock(&ls->ls_recover_spin);
1019 control_unlock(sdp);
1020 return -1;
1021 }
1022
1023 if (start_gen == block_gen) {
1024
1025
1026
1027
1028
1029
1030
1031 spin_unlock(&ls->ls_recover_spin);
1032 fs_info(sdp, "control_first_done wait gen %u\n", start_gen);
1033
1034 wait_on_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY,
1035 TASK_UNINTERRUPTIBLE);
1036 goto restart;
1037 }
1038
1039 clear_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags);
1040 set_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags);
1041 memset(ls->ls_recover_submit, 0, ls->ls_recover_size*sizeof(uint32_t));
1042 memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t));
1043 spin_unlock(&ls->ls_recover_spin);
1044
1045 memset(ls->ls_lvb_bits, 0, GDLM_LVB_SIZE);
1046 control_lvb_write(ls, start_gen, ls->ls_lvb_bits);
1047
1048 error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT);
1049 if (error)
1050 fs_err(sdp, "control_first_done mounted PR error %d\n", error);
1051
1052 error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT|DLM_LKF_VALBLK);
1053 if (error)
1054 fs_err(sdp, "control_first_done control NL error %d\n", error);
1055
1056 return error;
1057 }
1058
1059
1060
1061
1062
1063
1064
1065 #define RECOVER_SIZE_INC 16
1066
1067 static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots,
1068 int num_slots)
1069 {
1070 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1071 uint32_t *submit = NULL;
1072 uint32_t *result = NULL;
1073 uint32_t old_size, new_size;
1074 int i, max_jid;
1075
1076 if (!ls->ls_lvb_bits) {
1077 ls->ls_lvb_bits = kzalloc(GDLM_LVB_SIZE, GFP_NOFS);
1078 if (!ls->ls_lvb_bits)
1079 return -ENOMEM;
1080 }
1081
1082 max_jid = 0;
1083 for (i = 0; i < num_slots; i++) {
1084 if (max_jid < slots[i].slot - 1)
1085 max_jid = slots[i].slot - 1;
1086 }
1087
1088 old_size = ls->ls_recover_size;
1089 new_size = old_size;
1090 while (new_size < max_jid + 1)
1091 new_size += RECOVER_SIZE_INC;
1092 if (new_size == old_size)
1093 return 0;
1094
1095 submit = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS);
1096 result = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS);
1097 if (!submit || !result) {
1098 kfree(submit);
1099 kfree(result);
1100 return -ENOMEM;
1101 }
1102
1103 spin_lock(&ls->ls_recover_spin);
1104 memcpy(submit, ls->ls_recover_submit, old_size * sizeof(uint32_t));
1105 memcpy(result, ls->ls_recover_result, old_size * sizeof(uint32_t));
1106 kfree(ls->ls_recover_submit);
1107 kfree(ls->ls_recover_result);
1108 ls->ls_recover_submit = submit;
1109 ls->ls_recover_result = result;
1110 ls->ls_recover_size = new_size;
1111 spin_unlock(&ls->ls_recover_spin);
1112 return 0;
1113 }
1114
1115 static void free_recover_size(struct lm_lockstruct *ls)
1116 {
1117 kfree(ls->ls_lvb_bits);
1118 kfree(ls->ls_recover_submit);
1119 kfree(ls->ls_recover_result);
1120 ls->ls_recover_submit = NULL;
1121 ls->ls_recover_result = NULL;
1122 ls->ls_recover_size = 0;
1123 ls->ls_lvb_bits = NULL;
1124 }
1125
1126
1127
1128 static void gdlm_recover_prep(void *arg)
1129 {
1130 struct gfs2_sbd *sdp = arg;
1131 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1132
1133 if (gfs2_withdrawn(sdp)) {
1134 fs_err(sdp, "recover_prep ignored due to withdraw.\n");
1135 return;
1136 }
1137 spin_lock(&ls->ls_recover_spin);
1138 ls->ls_recover_block = ls->ls_recover_start;
1139 set_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags);
1140
1141 if (!test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) ||
1142 test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) {
1143 spin_unlock(&ls->ls_recover_spin);
1144 return;
1145 }
1146 set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
1147 spin_unlock(&ls->ls_recover_spin);
1148 }
1149
1150
1151
1152
1153 static void gdlm_recover_slot(void *arg, struct dlm_slot *slot)
1154 {
1155 struct gfs2_sbd *sdp = arg;
1156 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1157 int jid = slot->slot - 1;
1158
1159 if (gfs2_withdrawn(sdp)) {
1160 fs_err(sdp, "recover_slot jid %d ignored due to withdraw.\n",
1161 jid);
1162 return;
1163 }
1164 spin_lock(&ls->ls_recover_spin);
1165 if (ls->ls_recover_size < jid + 1) {
1166 fs_err(sdp, "recover_slot jid %d gen %u short size %d\n",
1167 jid, ls->ls_recover_block, ls->ls_recover_size);
1168 spin_unlock(&ls->ls_recover_spin);
1169 return;
1170 }
1171
1172 if (ls->ls_recover_submit[jid]) {
1173 fs_info(sdp, "recover_slot jid %d gen %u prev %u\n",
1174 jid, ls->ls_recover_block, ls->ls_recover_submit[jid]);
1175 }
1176 ls->ls_recover_submit[jid] = ls->ls_recover_block;
1177 spin_unlock(&ls->ls_recover_spin);
1178 }
1179
1180
1181
1182 static void gdlm_recover_done(void *arg, struct dlm_slot *slots, int num_slots,
1183 int our_slot, uint32_t generation)
1184 {
1185 struct gfs2_sbd *sdp = arg;
1186 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1187
1188 if (gfs2_withdrawn(sdp)) {
1189 fs_err(sdp, "recover_done ignored due to withdraw.\n");
1190 return;
1191 }
1192
1193 set_recover_size(sdp, slots, num_slots);
1194
1195 spin_lock(&ls->ls_recover_spin);
1196 ls->ls_recover_start = generation;
1197
1198 if (!ls->ls_recover_mount) {
1199 ls->ls_recover_mount = generation;
1200 ls->ls_jid = our_slot - 1;
1201 }
1202
1203 if (!test_bit(DFL_UNMOUNT, &ls->ls_recover_flags))
1204 queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, 0);
1205
1206 clear_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags);
1207 smp_mb__after_atomic();
1208 wake_up_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY);
1209 spin_unlock(&ls->ls_recover_spin);
1210 }
1211
1212
1213
1214 static void gdlm_recovery_result(struct gfs2_sbd *sdp, unsigned int jid,
1215 unsigned int result)
1216 {
1217 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1218
1219 if (gfs2_withdrawn(sdp)) {
1220 fs_err(sdp, "recovery_result jid %d ignored due to withdraw.\n",
1221 jid);
1222 return;
1223 }
1224 if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags))
1225 return;
1226
1227
1228 if (jid == ls->ls_jid)
1229 return;
1230
1231 spin_lock(&ls->ls_recover_spin);
1232 if (test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) {
1233 spin_unlock(&ls->ls_recover_spin);
1234 return;
1235 }
1236 if (ls->ls_recover_size < jid + 1) {
1237 fs_err(sdp, "recovery_result jid %d short size %d\n",
1238 jid, ls->ls_recover_size);
1239 spin_unlock(&ls->ls_recover_spin);
1240 return;
1241 }
1242
1243 fs_info(sdp, "recover jid %d result %s\n", jid,
1244 result == LM_RD_GAVEUP ? "busy" : "success");
1245
1246 ls->ls_recover_result[jid] = result;
1247
1248
1249
1250
1251
1252 if (!test_bit(DFL_UNMOUNT, &ls->ls_recover_flags))
1253 queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work,
1254 result == LM_RD_GAVEUP ? HZ : 0);
1255 spin_unlock(&ls->ls_recover_spin);
1256 }
1257
1258 static const struct dlm_lockspace_ops gdlm_lockspace_ops = {
1259 .recover_prep = gdlm_recover_prep,
1260 .recover_slot = gdlm_recover_slot,
1261 .recover_done = gdlm_recover_done,
1262 };
1263
1264 static int gdlm_mount(struct gfs2_sbd *sdp, const char *table)
1265 {
1266 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1267 char cluster[GFS2_LOCKNAME_LEN];
1268 const char *fsname;
1269 uint32_t flags;
1270 int error, ops_result;
1271
1272
1273
1274
1275
1276 INIT_DELAYED_WORK(&sdp->sd_control_work, gfs2_control_func);
1277 spin_lock_init(&ls->ls_recover_spin);
1278 ls->ls_recover_flags = 0;
1279 ls->ls_recover_mount = 0;
1280 ls->ls_recover_start = 0;
1281 ls->ls_recover_block = 0;
1282 ls->ls_recover_size = 0;
1283 ls->ls_recover_submit = NULL;
1284 ls->ls_recover_result = NULL;
1285 ls->ls_lvb_bits = NULL;
1286
1287 error = set_recover_size(sdp, NULL, 0);
1288 if (error)
1289 goto fail;
1290
1291
1292
1293
1294
1295 fsname = strchr(table, ':');
1296 if (!fsname) {
1297 fs_info(sdp, "no fsname found\n");
1298 error = -EINVAL;
1299 goto fail_free;
1300 }
1301 memset(cluster, 0, sizeof(cluster));
1302 memcpy(cluster, table, strlen(table) - strlen(fsname));
1303 fsname++;
1304
1305 flags = DLM_LSFL_FS | DLM_LSFL_NEWEXCL;
1306
1307
1308
1309
1310
1311 error = dlm_new_lockspace(fsname, cluster, flags, GDLM_LVB_SIZE,
1312 &gdlm_lockspace_ops, sdp, &ops_result,
1313 &ls->ls_dlm);
1314 if (error) {
1315 fs_err(sdp, "dlm_new_lockspace error %d\n", error);
1316 goto fail_free;
1317 }
1318
1319 if (ops_result < 0) {
1320
1321
1322
1323
1324 fs_info(sdp, "dlm lockspace ops not used\n");
1325 free_recover_size(ls);
1326 set_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags);
1327 return 0;
1328 }
1329
1330 if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags)) {
1331 fs_err(sdp, "dlm lockspace ops disallow jid preset\n");
1332 error = -EINVAL;
1333 goto fail_release;
1334 }
1335
1336
1337
1338
1339
1340
1341 error = control_mount(sdp);
1342 if (error) {
1343 fs_err(sdp, "mount control error %d\n", error);
1344 goto fail_release;
1345 }
1346
1347 ls->ls_first = !!test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags);
1348 clear_bit(SDF_NOJOURNALID, &sdp->sd_flags);
1349 smp_mb__after_atomic();
1350 wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID);
1351 return 0;
1352
1353 fail_release:
1354 dlm_release_lockspace(ls->ls_dlm, 2);
1355 fail_free:
1356 free_recover_size(ls);
1357 fail:
1358 return error;
1359 }
1360
1361 static void gdlm_first_done(struct gfs2_sbd *sdp)
1362 {
1363 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1364 int error;
1365
1366 if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags))
1367 return;
1368
1369 error = control_first_done(sdp);
1370 if (error)
1371 fs_err(sdp, "mount first_done error %d\n", error);
1372 }
1373
1374 static void gdlm_unmount(struct gfs2_sbd *sdp)
1375 {
1376 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1377
1378 if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags))
1379 goto release;
1380
1381
1382
1383 spin_lock(&ls->ls_recover_spin);
1384 set_bit(DFL_UNMOUNT, &ls->ls_recover_flags);
1385 spin_unlock(&ls->ls_recover_spin);
1386 flush_delayed_work(&sdp->sd_control_work);
1387
1388
1389 release:
1390 if (ls->ls_dlm) {
1391 dlm_release_lockspace(ls->ls_dlm, 2);
1392 ls->ls_dlm = NULL;
1393 }
1394
1395 free_recover_size(ls);
1396 }
1397
1398 static const match_table_t dlm_tokens = {
1399 { Opt_jid, "jid=%d"},
1400 { Opt_id, "id=%d"},
1401 { Opt_first, "first=%d"},
1402 { Opt_nodir, "nodir=%d"},
1403 { Opt_err, NULL },
1404 };
1405
1406 const struct lm_lockops gfs2_dlm_ops = {
1407 .lm_proto_name = "lock_dlm",
1408 .lm_mount = gdlm_mount,
1409 .lm_first_done = gdlm_first_done,
1410 .lm_recovery_result = gdlm_recovery_result,
1411 .lm_unmount = gdlm_unmount,
1412 .lm_put_lock = gdlm_put_lock,
1413 .lm_lock = gdlm_lock,
1414 .lm_cancel = gdlm_cancel,
1415 .lm_tokens = &dlm_tokens,
1416 };
1417