0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030 #include <linux/kernel.h>
0031 #include <linux/workqueue.h>
0032 #include <linux/reboot.h>
0033
0034 #include "heartbeat.h"
0035 #include "nodemanager.h"
0036 #define MLOG_MASK_PREFIX ML_QUORUM
0037 #include "masklog.h"
0038 #include "quorum.h"
0039
0040 static struct o2quo_state {
0041 spinlock_t qs_lock;
0042 struct work_struct qs_work;
0043 int qs_pending;
0044 int qs_heartbeating;
0045 unsigned long qs_hb_bm[BITS_TO_LONGS(O2NM_MAX_NODES)];
0046 int qs_connected;
0047 unsigned long qs_conn_bm[BITS_TO_LONGS(O2NM_MAX_NODES)];
0048 int qs_holds;
0049 unsigned long qs_hold_bm[BITS_TO_LONGS(O2NM_MAX_NODES)];
0050 } o2quo_state;
0051
0052
0053
0054 static void o2quo_fence_self(void)
0055 {
0056
0057
0058 o2hb_stop_all_regions();
0059
0060 switch (o2nm_single_cluster->cl_fence_method) {
0061 case O2NM_FENCE_PANIC:
0062 panic("*** ocfs2 is very sorry to be fencing this system by "
0063 "panicing ***\n");
0064 break;
0065 default:
0066 WARN_ON(o2nm_single_cluster->cl_fence_method >=
0067 O2NM_FENCE_METHODS);
0068 fallthrough;
0069 case O2NM_FENCE_RESET:
0070 printk(KERN_ERR "*** ocfs2 is very sorry to be fencing this "
0071 "system by restarting ***\n");
0072 emergency_restart();
0073 break;
0074 }
0075 }
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085 void o2quo_disk_timeout(void)
0086 {
0087 o2quo_fence_self();
0088 }
0089
0090 static void o2quo_make_decision(struct work_struct *work)
0091 {
0092 int quorum;
0093 int lowest_hb, lowest_reachable = 0, fence = 0;
0094 struct o2quo_state *qs = &o2quo_state;
0095
0096 spin_lock(&qs->qs_lock);
0097
0098 lowest_hb = find_first_bit(qs->qs_hb_bm, O2NM_MAX_NODES);
0099 if (lowest_hb != O2NM_MAX_NODES)
0100 lowest_reachable = test_bit(lowest_hb, qs->qs_conn_bm);
0101
0102 mlog(0, "heartbeating: %d, connected: %d, "
0103 "lowest: %d (%sreachable)\n", qs->qs_heartbeating,
0104 qs->qs_connected, lowest_hb, lowest_reachable ? "" : "un");
0105
0106 if (!test_bit(o2nm_this_node(), qs->qs_hb_bm) ||
0107 qs->qs_heartbeating == 1)
0108 goto out;
0109
0110 if (qs->qs_heartbeating & 1) {
0111
0112
0113 quorum = (qs->qs_heartbeating + 1)/2;
0114 if (qs->qs_connected < quorum) {
0115 mlog(ML_ERROR, "fencing this node because it is "
0116 "only connected to %u nodes and %u is needed "
0117 "to make a quorum out of %u heartbeating nodes\n",
0118 qs->qs_connected, quorum,
0119 qs->qs_heartbeating);
0120 fence = 1;
0121 }
0122 } else {
0123
0124
0125
0126
0127 quorum = qs->qs_heartbeating / 2;
0128 if (qs->qs_connected < quorum) {
0129 mlog(ML_ERROR, "fencing this node because it is "
0130 "only connected to %u nodes and %u is needed "
0131 "to make a quorum out of %u heartbeating nodes\n",
0132 qs->qs_connected, quorum,
0133 qs->qs_heartbeating);
0134 fence = 1;
0135 }
0136 else if ((qs->qs_connected == quorum) &&
0137 !lowest_reachable) {
0138 mlog(ML_ERROR, "fencing this node because it is "
0139 "connected to a half-quorum of %u out of %u "
0140 "nodes which doesn't include the lowest active "
0141 "node %u\n", quorum, qs->qs_heartbeating,
0142 lowest_hb);
0143 fence = 1;
0144 }
0145 }
0146
0147 out:
0148 if (fence) {
0149 spin_unlock(&qs->qs_lock);
0150 o2quo_fence_self();
0151 } else {
0152 mlog(ML_NOTICE, "not fencing this node, heartbeating: %d, "
0153 "connected: %d, lowest: %d (%sreachable)\n",
0154 qs->qs_heartbeating, qs->qs_connected, lowest_hb,
0155 lowest_reachable ? "" : "un");
0156 spin_unlock(&qs->qs_lock);
0157
0158 }
0159
0160 }
0161
0162 static void o2quo_set_hold(struct o2quo_state *qs, u8 node)
0163 {
0164 assert_spin_locked(&qs->qs_lock);
0165
0166 if (!test_and_set_bit(node, qs->qs_hold_bm)) {
0167 qs->qs_holds++;
0168 mlog_bug_on_msg(qs->qs_holds == O2NM_MAX_NODES,
0169 "node %u\n", node);
0170 mlog(0, "node %u, %d total\n", node, qs->qs_holds);
0171 }
0172 }
0173
0174 static void o2quo_clear_hold(struct o2quo_state *qs, u8 node)
0175 {
0176 assert_spin_locked(&qs->qs_lock);
0177
0178 if (test_and_clear_bit(node, qs->qs_hold_bm)) {
0179 mlog(0, "node %u, %d total\n", node, qs->qs_holds - 1);
0180 if (--qs->qs_holds == 0) {
0181 if (qs->qs_pending) {
0182 qs->qs_pending = 0;
0183 schedule_work(&qs->qs_work);
0184 }
0185 }
0186 mlog_bug_on_msg(qs->qs_holds < 0, "node %u, holds %d\n",
0187 node, qs->qs_holds);
0188 }
0189 }
0190
0191
0192
0193
0194
0195 void o2quo_hb_up(u8 node)
0196 {
0197 struct o2quo_state *qs = &o2quo_state;
0198
0199 spin_lock(&qs->qs_lock);
0200
0201 qs->qs_heartbeating++;
0202 mlog_bug_on_msg(qs->qs_heartbeating == O2NM_MAX_NODES,
0203 "node %u\n", node);
0204 mlog_bug_on_msg(test_bit(node, qs->qs_hb_bm), "node %u\n", node);
0205 set_bit(node, qs->qs_hb_bm);
0206
0207 mlog(0, "node %u, %d total\n", node, qs->qs_heartbeating);
0208
0209 if (!test_bit(node, qs->qs_conn_bm))
0210 o2quo_set_hold(qs, node);
0211 else
0212 o2quo_clear_hold(qs, node);
0213
0214 spin_unlock(&qs->qs_lock);
0215 }
0216
0217
0218
0219 void o2quo_hb_down(u8 node)
0220 {
0221 struct o2quo_state *qs = &o2quo_state;
0222
0223 spin_lock(&qs->qs_lock);
0224
0225 qs->qs_heartbeating--;
0226 mlog_bug_on_msg(qs->qs_heartbeating < 0,
0227 "node %u, %d heartbeating\n",
0228 node, qs->qs_heartbeating);
0229 mlog_bug_on_msg(!test_bit(node, qs->qs_hb_bm), "node %u\n", node);
0230 clear_bit(node, qs->qs_hb_bm);
0231
0232 mlog(0, "node %u, %d total\n", node, qs->qs_heartbeating);
0233
0234 o2quo_clear_hold(qs, node);
0235
0236 spin_unlock(&qs->qs_lock);
0237 }
0238
0239
0240
0241
0242
0243
0244 void o2quo_hb_still_up(u8 node)
0245 {
0246 struct o2quo_state *qs = &o2quo_state;
0247
0248 spin_lock(&qs->qs_lock);
0249
0250 mlog(0, "node %u\n", node);
0251
0252 qs->qs_pending = 1;
0253 o2quo_clear_hold(qs, node);
0254
0255 spin_unlock(&qs->qs_lock);
0256 }
0257
0258
0259
0260
0261
0262
0263 void o2quo_conn_up(u8 node)
0264 {
0265 struct o2quo_state *qs = &o2quo_state;
0266
0267 spin_lock(&qs->qs_lock);
0268
0269 qs->qs_connected++;
0270 mlog_bug_on_msg(qs->qs_connected == O2NM_MAX_NODES,
0271 "node %u\n", node);
0272 mlog_bug_on_msg(test_bit(node, qs->qs_conn_bm), "node %u\n", node);
0273 set_bit(node, qs->qs_conn_bm);
0274
0275 mlog(0, "node %u, %d total\n", node, qs->qs_connected);
0276
0277 if (!test_bit(node, qs->qs_hb_bm))
0278 o2quo_set_hold(qs, node);
0279 else
0280 o2quo_clear_hold(qs, node);
0281
0282 spin_unlock(&qs->qs_lock);
0283 }
0284
0285
0286
0287
0288
0289 void o2quo_conn_err(u8 node)
0290 {
0291 struct o2quo_state *qs = &o2quo_state;
0292
0293 spin_lock(&qs->qs_lock);
0294
0295 if (test_bit(node, qs->qs_conn_bm)) {
0296 qs->qs_connected--;
0297 mlog_bug_on_msg(qs->qs_connected < 0,
0298 "node %u, connected %d\n",
0299 node, qs->qs_connected);
0300
0301 clear_bit(node, qs->qs_conn_bm);
0302
0303 if (test_bit(node, qs->qs_hb_bm))
0304 o2quo_set_hold(qs, node);
0305 }
0306
0307 mlog(0, "node %u, %d total\n", node, qs->qs_connected);
0308
0309
0310 spin_unlock(&qs->qs_lock);
0311 }
0312
0313 void o2quo_init(void)
0314 {
0315 struct o2quo_state *qs = &o2quo_state;
0316
0317 spin_lock_init(&qs->qs_lock);
0318 INIT_WORK(&qs->qs_work, o2quo_make_decision);
0319 }
0320
0321 void o2quo_exit(void)
0322 {
0323 struct o2quo_state *qs = &o2quo_state;
0324
0325 flush_work(&qs->qs_work);
0326 }