0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024 #include "dm.h"
0025 #include "dm-path-selector.h"
0026
0027 #include <linux/blkdev.h>
0028 #include <linux/slab.h>
0029 #include <linux/module.h>
0030
0031
0032 #define DM_MSG_PREFIX "multipath historical-service-time"
0033 #define HST_MIN_IO 1
0034 #define HST_VERSION "0.1.1"
0035
0036 #define HST_FIXED_SHIFT 10
0037 #define HST_FIXED_MAX (ULLONG_MAX >> HST_FIXED_SHIFT)
0038 #define HST_FIXED_1 (1 << HST_FIXED_SHIFT)
0039 #define HST_FIXED_95 972
0040 #define HST_MAX_INFLIGHT HST_FIXED_1
0041 #define HST_BUCKET_SHIFT 24
0042 #define HST_WEIGHT_COUNT 64ULL
0043
0044 struct selector {
0045 struct list_head valid_paths;
0046 struct list_head failed_paths;
0047 int valid_count;
0048 spinlock_t lock;
0049
0050 unsigned int weights[HST_WEIGHT_COUNT];
0051 unsigned int threshold_multiplier;
0052 };
0053
0054 struct path_info {
0055 struct list_head list;
0056 struct dm_path *path;
0057 unsigned int repeat_count;
0058
0059 spinlock_t lock;
0060
0061 u64 historical_service_time;
0062
0063 u64 stale_after;
0064 u64 last_finish;
0065
0066 u64 outstanding;
0067 };
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086 static u64 fixed_power(u64 x, unsigned int frac_bits, unsigned int n)
0087 {
0088 unsigned long result = 1UL << frac_bits;
0089
0090 if (n) {
0091 for (;;) {
0092 if (n & 1) {
0093 result *= x;
0094 result += 1UL << (frac_bits - 1);
0095 result >>= frac_bits;
0096 }
0097 n >>= 1;
0098 if (!n)
0099 break;
0100 x *= x;
0101 x += 1UL << (frac_bits - 1);
0102 x >>= frac_bits;
0103 }
0104 }
0105
0106 return result;
0107 }
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117
0118
0119
0120
0121
0122 static u64 fixed_ema(u64 last, u64 next, u64 weight)
0123 {
0124 last *= weight;
0125 last += next * (HST_FIXED_1 - weight);
0126 last += 1ULL << (HST_FIXED_SHIFT - 1);
0127 return last >> HST_FIXED_SHIFT;
0128 }
0129
0130 static struct selector *alloc_selector(void)
0131 {
0132 struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL);
0133
0134 if (s) {
0135 INIT_LIST_HEAD(&s->valid_paths);
0136 INIT_LIST_HEAD(&s->failed_paths);
0137 spin_lock_init(&s->lock);
0138 s->valid_count = 0;
0139 }
0140
0141 return s;
0142 }
0143
0144
0145
0146
0147 static u64 hst_weight(struct path_selector *ps, u64 delta)
0148 {
0149 struct selector *s = ps->context;
0150 int bucket = clamp(delta >> HST_BUCKET_SHIFT, 0ULL,
0151 HST_WEIGHT_COUNT - 1);
0152
0153 return s->weights[bucket];
0154 }
0155
0156
0157
0158
0159
0160
0161
0162 static void hst_set_weights(struct path_selector *ps, unsigned int base)
0163 {
0164 struct selector *s = ps->context;
0165 int i;
0166
0167 if (base >= HST_FIXED_1)
0168 return;
0169
0170 for (i = 0; i < HST_WEIGHT_COUNT - 1; i++)
0171 s->weights[i] = fixed_power(base, HST_FIXED_SHIFT, i + 1);
0172 s->weights[HST_WEIGHT_COUNT - 1] = 0;
0173 }
0174
0175 static int hst_create(struct path_selector *ps, unsigned int argc, char **argv)
0176 {
0177 struct selector *s;
0178 unsigned int base_weight = HST_FIXED_95;
0179 unsigned int threshold_multiplier = 0;
0180 char dummy;
0181
0182
0183
0184
0185
0186
0187
0188
0189
0190
0191
0192
0193 if (argc > 2)
0194 return -EINVAL;
0195
0196 if (argc && (sscanf(argv[0], "%u%c", &base_weight, &dummy) != 1 ||
0197 base_weight >= HST_FIXED_1)) {
0198 return -EINVAL;
0199 }
0200
0201 if (argc > 1 && (sscanf(argv[1], "%u%c",
0202 &threshold_multiplier, &dummy) != 1)) {
0203 return -EINVAL;
0204 }
0205
0206 s = alloc_selector();
0207 if (!s)
0208 return -ENOMEM;
0209
0210 ps->context = s;
0211
0212 hst_set_weights(ps, base_weight);
0213 s->threshold_multiplier = threshold_multiplier;
0214 return 0;
0215 }
0216
0217 static void free_paths(struct list_head *paths)
0218 {
0219 struct path_info *pi, *next;
0220
0221 list_for_each_entry_safe(pi, next, paths, list) {
0222 list_del(&pi->list);
0223 kfree(pi);
0224 }
0225 }
0226
0227 static void hst_destroy(struct path_selector *ps)
0228 {
0229 struct selector *s = ps->context;
0230
0231 free_paths(&s->valid_paths);
0232 free_paths(&s->failed_paths);
0233 kfree(s);
0234 ps->context = NULL;
0235 }
0236
0237 static int hst_status(struct path_selector *ps, struct dm_path *path,
0238 status_type_t type, char *result, unsigned int maxlen)
0239 {
0240 unsigned int sz = 0;
0241 struct path_info *pi;
0242
0243 if (!path) {
0244 struct selector *s = ps->context;
0245
0246 DMEMIT("2 %u %u ", s->weights[0], s->threshold_multiplier);
0247 } else {
0248 pi = path->pscontext;
0249
0250 switch (type) {
0251 case STATUSTYPE_INFO:
0252 DMEMIT("%llu %llu %llu ", pi->historical_service_time,
0253 pi->outstanding, pi->stale_after);
0254 break;
0255 case STATUSTYPE_TABLE:
0256 DMEMIT("0 ");
0257 break;
0258 case STATUSTYPE_IMA:
0259 *result = '\0';
0260 break;
0261 }
0262 }
0263
0264 return sz;
0265 }
0266
0267 static int hst_add_path(struct path_selector *ps, struct dm_path *path,
0268 int argc, char **argv, char **error)
0269 {
0270 struct selector *s = ps->context;
0271 struct path_info *pi;
0272 unsigned int repeat_count = HST_MIN_IO;
0273 char dummy;
0274 unsigned long flags;
0275
0276
0277
0278
0279
0280
0281 if (argc > 1) {
0282 *error = "historical-service-time ps: incorrect number of arguments";
0283 return -EINVAL;
0284 }
0285
0286 if (argc && (sscanf(argv[0], "%u%c", &repeat_count, &dummy) != 1)) {
0287 *error = "historical-service-time ps: invalid repeat count";
0288 return -EINVAL;
0289 }
0290
0291
0292 pi = kmalloc(sizeof(*pi), GFP_KERNEL);
0293 if (!pi) {
0294 *error = "historical-service-time ps: Error allocating path context";
0295 return -ENOMEM;
0296 }
0297
0298 pi->path = path;
0299 pi->repeat_count = repeat_count;
0300
0301 pi->historical_service_time = HST_FIXED_1;
0302
0303 spin_lock_init(&pi->lock);
0304 pi->outstanding = 0;
0305
0306 pi->stale_after = 0;
0307 pi->last_finish = 0;
0308
0309 path->pscontext = pi;
0310
0311 spin_lock_irqsave(&s->lock, flags);
0312 list_add_tail(&pi->list, &s->valid_paths);
0313 s->valid_count++;
0314 spin_unlock_irqrestore(&s->lock, flags);
0315
0316 return 0;
0317 }
0318
0319 static void hst_fail_path(struct path_selector *ps, struct dm_path *path)
0320 {
0321 struct selector *s = ps->context;
0322 struct path_info *pi = path->pscontext;
0323 unsigned long flags;
0324
0325 spin_lock_irqsave(&s->lock, flags);
0326 list_move(&pi->list, &s->failed_paths);
0327 s->valid_count--;
0328 spin_unlock_irqrestore(&s->lock, flags);
0329 }
0330
0331 static int hst_reinstate_path(struct path_selector *ps, struct dm_path *path)
0332 {
0333 struct selector *s = ps->context;
0334 struct path_info *pi = path->pscontext;
0335 unsigned long flags;
0336
0337 spin_lock_irqsave(&s->lock, flags);
0338 list_move_tail(&pi->list, &s->valid_paths);
0339 s->valid_count++;
0340 spin_unlock_irqrestore(&s->lock, flags);
0341
0342 return 0;
0343 }
0344
0345 static void hst_fill_compare(struct path_info *pi, u64 *hst,
0346 u64 *out, u64 *stale)
0347 {
0348 unsigned long flags;
0349
0350 spin_lock_irqsave(&pi->lock, flags);
0351 *hst = pi->historical_service_time;
0352 *out = pi->outstanding;
0353 *stale = pi->stale_after;
0354 spin_unlock_irqrestore(&pi->lock, flags);
0355 }
0356
0357
0358
0359
0360
0361
0362
0363
0364
0365
0366
0367 static long long hst_compare(struct path_info *pi1, struct path_info *pi2,
0368 u64 time_now, struct path_selector *ps)
0369 {
0370 struct selector *s = ps->context;
0371 u64 hst1, hst2;
0372 long long out1, out2, stale1, stale2;
0373 int pi2_better, over_threshold;
0374
0375 hst_fill_compare(pi1, &hst1, &out1, &stale1);
0376 hst_fill_compare(pi2, &hst2, &out2, &stale2);
0377
0378
0379
0380
0381
0382
0383 if (hst1 > hst2)
0384 over_threshold = hst1 > (s->threshold_multiplier * hst2);
0385 else
0386 over_threshold = hst2 > (s->threshold_multiplier * hst1);
0387
0388 if (!over_threshold)
0389 return out1 - out2;
0390
0391
0392
0393
0394
0395
0396 if ((!out1 && stale1 < time_now) || (!out2 && stale2 < time_now) ||
0397 (!out1 && !out2))
0398 return (!out2 * stale1) - (!out1 * stale2);
0399
0400
0401
0402
0403 if (out1 == out2) {
0404 pi2_better = hst1 > hst2;
0405 } else {
0406
0407 if (unlikely(out1 >= HST_MAX_INFLIGHT ||
0408 out2 >= HST_MAX_INFLIGHT)) {
0409
0410
0411
0412
0413 hst1 >>= HST_FIXED_SHIFT;
0414 hst2 >>= HST_FIXED_SHIFT;
0415 }
0416 pi2_better = (1 + out1) * hst1 > (1 + out2) * hst2;
0417 }
0418
0419
0420 if (pi2_better) {
0421 if (stale2 < time_now)
0422 return out1 - out2;
0423 return 1;
0424 }
0425 if (stale1 < time_now)
0426 return out1 - out2;
0427 return -1;
0428 }
0429
0430 static struct dm_path *hst_select_path(struct path_selector *ps,
0431 size_t nr_bytes)
0432 {
0433 struct selector *s = ps->context;
0434 struct path_info *pi = NULL, *best = NULL;
0435 u64 time_now = ktime_get_ns();
0436 struct dm_path *ret = NULL;
0437 unsigned long flags;
0438
0439 spin_lock_irqsave(&s->lock, flags);
0440 if (list_empty(&s->valid_paths))
0441 goto out;
0442
0443 list_for_each_entry(pi, &s->valid_paths, list) {
0444 if (!best || (hst_compare(pi, best, time_now, ps) < 0))
0445 best = pi;
0446 }
0447
0448 if (!best)
0449 goto out;
0450
0451
0452 list_move_tail(&best->list, &s->valid_paths);
0453
0454 ret = best->path;
0455
0456 out:
0457 spin_unlock_irqrestore(&s->lock, flags);
0458 return ret;
0459 }
0460
0461 static int hst_start_io(struct path_selector *ps, struct dm_path *path,
0462 size_t nr_bytes)
0463 {
0464 struct path_info *pi = path->pscontext;
0465 unsigned long flags;
0466
0467 spin_lock_irqsave(&pi->lock, flags);
0468 pi->outstanding++;
0469 spin_unlock_irqrestore(&pi->lock, flags);
0470
0471 return 0;
0472 }
0473
0474 static u64 path_service_time(struct path_info *pi, u64 start_time)
0475 {
0476 u64 now = ktime_get_ns();
0477
0478
0479
0480
0481
0482 if (time_after64(pi->last_finish, start_time))
0483 start_time = pi->last_finish;
0484
0485 pi->last_finish = now;
0486 if (time_before64(now, start_time))
0487 return 0;
0488
0489 return now - start_time;
0490 }
0491
0492 static int hst_end_io(struct path_selector *ps, struct dm_path *path,
0493 size_t nr_bytes, u64 start_time)
0494 {
0495 struct path_info *pi = path->pscontext;
0496 struct selector *s = ps->context;
0497 unsigned long flags;
0498 u64 st;
0499
0500 spin_lock_irqsave(&pi->lock, flags);
0501
0502 st = path_service_time(pi, start_time);
0503 pi->outstanding--;
0504 pi->historical_service_time =
0505 fixed_ema(pi->historical_service_time,
0506 min(st * HST_FIXED_1, HST_FIXED_MAX),
0507 hst_weight(ps, st));
0508
0509
0510
0511
0512
0513
0514
0515 pi->stale_after = pi->last_finish +
0516 (s->valid_count * (pi->historical_service_time >> HST_FIXED_SHIFT));
0517
0518 spin_unlock_irqrestore(&pi->lock, flags);
0519
0520 return 0;
0521 }
0522
0523 static struct path_selector_type hst_ps = {
0524 .name = "historical-service-time",
0525 .module = THIS_MODULE,
0526 .features = DM_PS_USE_HR_TIMER,
0527 .table_args = 1,
0528 .info_args = 3,
0529 .create = hst_create,
0530 .destroy = hst_destroy,
0531 .status = hst_status,
0532 .add_path = hst_add_path,
0533 .fail_path = hst_fail_path,
0534 .reinstate_path = hst_reinstate_path,
0535 .select_path = hst_select_path,
0536 .start_io = hst_start_io,
0537 .end_io = hst_end_io,
0538 };
0539
0540 static int __init dm_hst_init(void)
0541 {
0542 int r = dm_register_path_selector(&hst_ps);
0543
0544 if (r < 0)
0545 DMERR("register failed %d", r);
0546
0547 DMINFO("version " HST_VERSION " loaded");
0548
0549 return r;
0550 }
0551
0552 static void __exit dm_hst_exit(void)
0553 {
0554 int r = dm_unregister_path_selector(&hst_ps);
0555
0556 if (r < 0)
0557 DMERR("unregister failed %d", r);
0558 }
0559
0560 module_init(dm_hst_init);
0561 module_exit(dm_hst_exit);
0562
0563 MODULE_DESCRIPTION(DM_NAME " measured service time oriented path selector");
0564 MODULE_AUTHOR("Khazhismel Kumykov <khazhy@google.com>");
0565 MODULE_LICENSE("GPL");