0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013 #include <linux/device-mapper.h>
0014
0015 #include <linux/module.h>
0016 #include <linux/init.h>
0017 #include <linux/vmalloc.h>
0018
0019 #define DM_MSG_PREFIX "switch"
0020
0021
0022
0023
0024
0025 typedef unsigned long region_table_slot_t;
0026
0027
0028
0029
0030 struct switch_path {
0031 struct dm_dev *dmdev;
0032 sector_t start;
0033 };
0034
0035
0036
0037
0038 struct switch_ctx {
0039 struct dm_target *ti;
0040
0041 unsigned nr_paths;
0042
0043 unsigned region_size;
0044 unsigned long nr_regions;
0045 signed char region_size_bits;
0046
0047 unsigned char region_table_entry_bits;
0048 unsigned char region_entries_per_slot;
0049 signed char region_entries_per_slot_bits;
0050
0051 region_table_slot_t *region_table;
0052
0053
0054
0055
0056 struct switch_path path_list[];
0057 };
0058
0059 static struct switch_ctx *alloc_switch_ctx(struct dm_target *ti, unsigned nr_paths,
0060 unsigned region_size)
0061 {
0062 struct switch_ctx *sctx;
0063
0064 sctx = kzalloc(struct_size(sctx, path_list, nr_paths), GFP_KERNEL);
0065 if (!sctx)
0066 return NULL;
0067
0068 sctx->ti = ti;
0069 sctx->region_size = region_size;
0070
0071 ti->private = sctx;
0072
0073 return sctx;
0074 }
0075
0076 static int alloc_region_table(struct dm_target *ti, unsigned nr_paths)
0077 {
0078 struct switch_ctx *sctx = ti->private;
0079 sector_t nr_regions = ti->len;
0080 sector_t nr_slots;
0081
0082 if (!(sctx->region_size & (sctx->region_size - 1)))
0083 sctx->region_size_bits = __ffs(sctx->region_size);
0084 else
0085 sctx->region_size_bits = -1;
0086
0087 sctx->region_table_entry_bits = 1;
0088 while (sctx->region_table_entry_bits < sizeof(region_table_slot_t) * 8 &&
0089 (region_table_slot_t)1 << sctx->region_table_entry_bits < nr_paths)
0090 sctx->region_table_entry_bits++;
0091
0092 sctx->region_entries_per_slot = (sizeof(region_table_slot_t) * 8) / sctx->region_table_entry_bits;
0093 if (!(sctx->region_entries_per_slot & (sctx->region_entries_per_slot - 1)))
0094 sctx->region_entries_per_slot_bits = __ffs(sctx->region_entries_per_slot);
0095 else
0096 sctx->region_entries_per_slot_bits = -1;
0097
0098 if (sector_div(nr_regions, sctx->region_size))
0099 nr_regions++;
0100
0101 if (nr_regions >= ULONG_MAX) {
0102 ti->error = "Region table too large";
0103 return -EINVAL;
0104 }
0105 sctx->nr_regions = nr_regions;
0106
0107 nr_slots = nr_regions;
0108 if (sector_div(nr_slots, sctx->region_entries_per_slot))
0109 nr_slots++;
0110
0111 if (nr_slots > ULONG_MAX / sizeof(region_table_slot_t)) {
0112 ti->error = "Region table too large";
0113 return -EINVAL;
0114 }
0115
0116 sctx->region_table = vmalloc(array_size(nr_slots,
0117 sizeof(region_table_slot_t)));
0118 if (!sctx->region_table) {
0119 ti->error = "Cannot allocate region table";
0120 return -ENOMEM;
0121 }
0122
0123 return 0;
0124 }
0125
0126 static void switch_get_position(struct switch_ctx *sctx, unsigned long region_nr,
0127 unsigned long *region_index, unsigned *bit)
0128 {
0129 if (sctx->region_entries_per_slot_bits >= 0) {
0130 *region_index = region_nr >> sctx->region_entries_per_slot_bits;
0131 *bit = region_nr & (sctx->region_entries_per_slot - 1);
0132 } else {
0133 *region_index = region_nr / sctx->region_entries_per_slot;
0134 *bit = region_nr % sctx->region_entries_per_slot;
0135 }
0136
0137 *bit *= sctx->region_table_entry_bits;
0138 }
0139
0140 static unsigned switch_region_table_read(struct switch_ctx *sctx, unsigned long region_nr)
0141 {
0142 unsigned long region_index;
0143 unsigned bit;
0144
0145 switch_get_position(sctx, region_nr, ®ion_index, &bit);
0146
0147 return (READ_ONCE(sctx->region_table[region_index]) >> bit) &
0148 ((1 << sctx->region_table_entry_bits) - 1);
0149 }
0150
0151
0152
0153
0154 static unsigned switch_get_path_nr(struct switch_ctx *sctx, sector_t offset)
0155 {
0156 unsigned path_nr;
0157 sector_t p;
0158
0159 p = offset;
0160 if (sctx->region_size_bits >= 0)
0161 p >>= sctx->region_size_bits;
0162 else
0163 sector_div(p, sctx->region_size);
0164
0165 path_nr = switch_region_table_read(sctx, p);
0166
0167
0168 if (unlikely(path_nr >= sctx->nr_paths))
0169 path_nr = 0;
0170
0171 return path_nr;
0172 }
0173
0174 static void switch_region_table_write(struct switch_ctx *sctx, unsigned long region_nr,
0175 unsigned value)
0176 {
0177 unsigned long region_index;
0178 unsigned bit;
0179 region_table_slot_t pte;
0180
0181 switch_get_position(sctx, region_nr, ®ion_index, &bit);
0182
0183 pte = sctx->region_table[region_index];
0184 pte &= ~((((region_table_slot_t)1 << sctx->region_table_entry_bits) - 1) << bit);
0185 pte |= (region_table_slot_t)value << bit;
0186 sctx->region_table[region_index] = pte;
0187 }
0188
0189
0190
0191
0192 static void initialise_region_table(struct switch_ctx *sctx)
0193 {
0194 unsigned path_nr = 0;
0195 unsigned long region_nr;
0196
0197 for (region_nr = 0; region_nr < sctx->nr_regions; region_nr++) {
0198 switch_region_table_write(sctx, region_nr, path_nr);
0199 if (++path_nr >= sctx->nr_paths)
0200 path_nr = 0;
0201 }
0202 }
0203
0204 static int parse_path(struct dm_arg_set *as, struct dm_target *ti)
0205 {
0206 struct switch_ctx *sctx = ti->private;
0207 unsigned long long start;
0208 int r;
0209
0210 r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table),
0211 &sctx->path_list[sctx->nr_paths].dmdev);
0212 if (r) {
0213 ti->error = "Device lookup failed";
0214 return r;
0215 }
0216
0217 if (kstrtoull(dm_shift_arg(as), 10, &start) || start != (sector_t)start) {
0218 ti->error = "Invalid device starting offset";
0219 dm_put_device(ti, sctx->path_list[sctx->nr_paths].dmdev);
0220 return -EINVAL;
0221 }
0222
0223 sctx->path_list[sctx->nr_paths].start = start;
0224
0225 sctx->nr_paths++;
0226
0227 return 0;
0228 }
0229
0230
0231
0232
0233 static void switch_dtr(struct dm_target *ti)
0234 {
0235 struct switch_ctx *sctx = ti->private;
0236
0237 while (sctx->nr_paths--)
0238 dm_put_device(ti, sctx->path_list[sctx->nr_paths].dmdev);
0239
0240 vfree(sctx->region_table);
0241 kfree(sctx);
0242 }
0243
0244
0245
0246
0247
0248
0249
0250
0251
0252 static int switch_ctr(struct dm_target *ti, unsigned argc, char **argv)
0253 {
0254 static const struct dm_arg _args[] = {
0255 {1, (KMALLOC_MAX_SIZE - sizeof(struct switch_ctx)) / sizeof(struct switch_path), "Invalid number of paths"},
0256 {1, UINT_MAX, "Invalid region size"},
0257 {0, 0, "Invalid number of optional args"},
0258 };
0259
0260 struct switch_ctx *sctx;
0261 struct dm_arg_set as;
0262 unsigned nr_paths, region_size, nr_optional_args;
0263 int r;
0264
0265 as.argc = argc;
0266 as.argv = argv;
0267
0268 r = dm_read_arg(_args, &as, &nr_paths, &ti->error);
0269 if (r)
0270 return -EINVAL;
0271
0272 r = dm_read_arg(_args + 1, &as, ®ion_size, &ti->error);
0273 if (r)
0274 return r;
0275
0276 r = dm_read_arg_group(_args + 2, &as, &nr_optional_args, &ti->error);
0277 if (r)
0278 return r;
0279
0280
0281 if (as.argc != nr_paths * 2) {
0282 ti->error = "Incorrect number of path arguments";
0283 return -EINVAL;
0284 }
0285
0286 sctx = alloc_switch_ctx(ti, nr_paths, region_size);
0287 if (!sctx) {
0288 ti->error = "Cannot allocate redirection context";
0289 return -ENOMEM;
0290 }
0291
0292 r = dm_set_target_max_io_len(ti, region_size);
0293 if (r)
0294 goto error;
0295
0296 while (as.argc) {
0297 r = parse_path(&as, ti);
0298 if (r)
0299 goto error;
0300 }
0301
0302 r = alloc_region_table(ti, nr_paths);
0303 if (r)
0304 goto error;
0305
0306 initialise_region_table(sctx);
0307
0308
0309 ti->num_discard_bios = 1;
0310
0311 return 0;
0312
0313 error:
0314 switch_dtr(ti);
0315
0316 return r;
0317 }
0318
0319 static int switch_map(struct dm_target *ti, struct bio *bio)
0320 {
0321 struct switch_ctx *sctx = ti->private;
0322 sector_t offset = dm_target_offset(ti, bio->bi_iter.bi_sector);
0323 unsigned path_nr = switch_get_path_nr(sctx, offset);
0324
0325 bio_set_dev(bio, sctx->path_list[path_nr].dmdev->bdev);
0326 bio->bi_iter.bi_sector = sctx->path_list[path_nr].start + offset;
0327
0328 return DM_MAPIO_REMAPPED;
0329 }
0330
0331
0332
0333
0334
0335
0336
0337
0338
0339
0340
0341 static const unsigned char hex_table[256] = {
0342 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
0343 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
0344 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
0345 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, 255, 255, 255, 255,
0346 255, 10, 11, 12, 13, 14, 15, 255, 255, 255, 255, 255, 255, 255, 255, 255,
0347 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
0348 255, 10, 11, 12, 13, 14, 15, 255, 255, 255, 255, 255, 255, 255, 255, 255,
0349 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
0350 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
0351 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
0352 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
0353 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
0354 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
0355 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
0356 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
0357 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255
0358 };
0359
0360 static __always_inline unsigned long parse_hex(const char **string)
0361 {
0362 unsigned char d;
0363 unsigned long r = 0;
0364
0365 while ((d = hex_table[(unsigned char)**string]) < 16) {
0366 r = (r << 4) | d;
0367 (*string)++;
0368 }
0369
0370 return r;
0371 }
0372
0373 static int process_set_region_mappings(struct switch_ctx *sctx,
0374 unsigned argc, char **argv)
0375 {
0376 unsigned i;
0377 unsigned long region_index = 0;
0378
0379 for (i = 1; i < argc; i++) {
0380 unsigned long path_nr;
0381 const char *string = argv[i];
0382
0383 if ((*string & 0xdf) == 'R') {
0384 unsigned long cycle_length, num_write;
0385
0386 string++;
0387 if (unlikely(*string == ',')) {
0388 DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
0389 return -EINVAL;
0390 }
0391 cycle_length = parse_hex(&string);
0392 if (unlikely(*string != ',')) {
0393 DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
0394 return -EINVAL;
0395 }
0396 string++;
0397 if (unlikely(!*string)) {
0398 DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
0399 return -EINVAL;
0400 }
0401 num_write = parse_hex(&string);
0402 if (unlikely(*string)) {
0403 DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
0404 return -EINVAL;
0405 }
0406
0407 if (unlikely(!cycle_length) || unlikely(cycle_length - 1 > region_index)) {
0408 DMWARN("invalid set_region_mappings cycle length: %lu > %lu",
0409 cycle_length - 1, region_index);
0410 return -EINVAL;
0411 }
0412 if (unlikely(region_index + num_write < region_index) ||
0413 unlikely(region_index + num_write >= sctx->nr_regions)) {
0414 DMWARN("invalid set_region_mappings region number: %lu + %lu >= %lu",
0415 region_index, num_write, sctx->nr_regions);
0416 return -EINVAL;
0417 }
0418
0419 while (num_write--) {
0420 region_index++;
0421 path_nr = switch_region_table_read(sctx, region_index - cycle_length);
0422 switch_region_table_write(sctx, region_index, path_nr);
0423 }
0424
0425 continue;
0426 }
0427
0428 if (*string == ':')
0429 region_index++;
0430 else {
0431 region_index = parse_hex(&string);
0432 if (unlikely(*string != ':')) {
0433 DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
0434 return -EINVAL;
0435 }
0436 }
0437
0438 string++;
0439 if (unlikely(!*string)) {
0440 DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
0441 return -EINVAL;
0442 }
0443
0444 path_nr = parse_hex(&string);
0445 if (unlikely(*string)) {
0446 DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
0447 return -EINVAL;
0448 }
0449 if (unlikely(region_index >= sctx->nr_regions)) {
0450 DMWARN("invalid set_region_mappings region number: %lu >= %lu", region_index, sctx->nr_regions);
0451 return -EINVAL;
0452 }
0453 if (unlikely(path_nr >= sctx->nr_paths)) {
0454 DMWARN("invalid set_region_mappings device: %lu >= %u", path_nr, sctx->nr_paths);
0455 return -EINVAL;
0456 }
0457
0458 switch_region_table_write(sctx, region_index, path_nr);
0459 }
0460
0461 return 0;
0462 }
0463
0464
0465
0466
0467
0468
0469 static int switch_message(struct dm_target *ti, unsigned argc, char **argv,
0470 char *result, unsigned maxlen)
0471 {
0472 static DEFINE_MUTEX(message_mutex);
0473
0474 struct switch_ctx *sctx = ti->private;
0475 int r = -EINVAL;
0476
0477 mutex_lock(&message_mutex);
0478
0479 if (!strcasecmp(argv[0], "set_region_mappings"))
0480 r = process_set_region_mappings(sctx, argc, argv);
0481 else
0482 DMWARN("Unrecognised message received.");
0483
0484 mutex_unlock(&message_mutex);
0485
0486 return r;
0487 }
0488
0489 static void switch_status(struct dm_target *ti, status_type_t type,
0490 unsigned status_flags, char *result, unsigned maxlen)
0491 {
0492 struct switch_ctx *sctx = ti->private;
0493 unsigned sz = 0;
0494 int path_nr;
0495
0496 switch (type) {
0497 case STATUSTYPE_INFO:
0498 result[0] = '\0';
0499 break;
0500
0501 case STATUSTYPE_TABLE:
0502 DMEMIT("%u %u 0", sctx->nr_paths, sctx->region_size);
0503 for (path_nr = 0; path_nr < sctx->nr_paths; path_nr++)
0504 DMEMIT(" %s %llu", sctx->path_list[path_nr].dmdev->name,
0505 (unsigned long long)sctx->path_list[path_nr].start);
0506 break;
0507
0508 case STATUSTYPE_IMA:
0509 result[0] = '\0';
0510 break;
0511 }
0512 }
0513
0514
0515
0516
0517
0518
0519 static int switch_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
0520 {
0521 struct switch_ctx *sctx = ti->private;
0522 unsigned path_nr;
0523
0524 path_nr = switch_get_path_nr(sctx, 0);
0525
0526 *bdev = sctx->path_list[path_nr].dmdev->bdev;
0527
0528
0529
0530
0531 if (ti->len + sctx->path_list[path_nr].start !=
0532 bdev_nr_sectors((*bdev)))
0533 return 1;
0534 return 0;
0535 }
0536
0537 static int switch_iterate_devices(struct dm_target *ti,
0538 iterate_devices_callout_fn fn, void *data)
0539 {
0540 struct switch_ctx *sctx = ti->private;
0541 int path_nr;
0542 int r;
0543
0544 for (path_nr = 0; path_nr < sctx->nr_paths; path_nr++) {
0545 r = fn(ti, sctx->path_list[path_nr].dmdev,
0546 sctx->path_list[path_nr].start, ti->len, data);
0547 if (r)
0548 return r;
0549 }
0550
0551 return 0;
0552 }
0553
0554 static struct target_type switch_target = {
0555 .name = "switch",
0556 .version = {1, 1, 0},
0557 .features = DM_TARGET_NOWAIT,
0558 .module = THIS_MODULE,
0559 .ctr = switch_ctr,
0560 .dtr = switch_dtr,
0561 .map = switch_map,
0562 .message = switch_message,
0563 .status = switch_status,
0564 .prepare_ioctl = switch_prepare_ioctl,
0565 .iterate_devices = switch_iterate_devices,
0566 };
0567
0568 static int __init dm_switch_init(void)
0569 {
0570 int r;
0571
0572 r = dm_register_target(&switch_target);
0573 if (r < 0)
0574 DMERR("dm_register_target() failed %d", r);
0575
0576 return r;
0577 }
0578
0579 static void __exit dm_switch_exit(void)
0580 {
0581 dm_unregister_target(&switch_target);
0582 }
0583
0584 module_init(dm_switch_init);
0585 module_exit(dm_switch_exit);
0586
0587 MODULE_DESCRIPTION(DM_NAME " dynamic path switching target");
0588 MODULE_AUTHOR("Kevin D. O'Kelley <Kevin_OKelley@dell.com>");
0589 MODULE_AUTHOR("Narendran Ganapathy <Narendran_Ganapathy@dell.com>");
0590 MODULE_AUTHOR("Jim Ramsay <Jim_Ramsay@dell.com>");
0591 MODULE_AUTHOR("Mikulas Patocka <mpatocka@redhat.com>");
0592 MODULE_LICENSE("GPL");