0001
0002
0003
0004
0005
0006
0007
0008 #include "dm-core.h"
0009 #include "dm-rq.h"
0010
0011 #include <linux/module.h>
0012 #include <linux/vmalloc.h>
0013 #include <linux/blkdev.h>
0014 #include <linux/blk-integrity.h>
0015 #include <linux/namei.h>
0016 #include <linux/ctype.h>
0017 #include <linux/string.h>
0018 #include <linux/slab.h>
0019 #include <linux/interrupt.h>
0020 #include <linux/mutex.h>
0021 #include <linux/delay.h>
0022 #include <linux/atomic.h>
0023 #include <linux/blk-mq.h>
0024 #include <linux/mount.h>
0025 #include <linux/dax.h>
0026
0027 #define DM_MSG_PREFIX "table"
0028
0029 #define NODE_SIZE L1_CACHE_BYTES
0030 #define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t))
0031 #define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)
0032
0033
0034
0035
0036 static unsigned int int_log(unsigned int n, unsigned int base)
0037 {
0038 int result = 0;
0039
0040 while (n > 1) {
0041 n = dm_div_up(n, base);
0042 result++;
0043 }
0044
0045 return result;
0046 }
0047
0048
0049
0050
0051 static inline unsigned int get_child(unsigned int n, unsigned int k)
0052 {
0053 return (n * CHILDREN_PER_NODE) + k;
0054 }
0055
0056
0057
0058
0059 static inline sector_t *get_node(struct dm_table *t,
0060 unsigned int l, unsigned int n)
0061 {
0062 return t->index[l] + (n * KEYS_PER_NODE);
0063 }
0064
0065
0066
0067
0068
0069 static sector_t high(struct dm_table *t, unsigned int l, unsigned int n)
0070 {
0071 for (; l < t->depth - 1; l++)
0072 n = get_child(n, CHILDREN_PER_NODE - 1);
0073
0074 if (n >= t->counts[l])
0075 return (sector_t) - 1;
0076
0077 return get_node(t, l, n)[KEYS_PER_NODE - 1];
0078 }
0079
0080
0081
0082
0083
0084 static int setup_btree_index(unsigned int l, struct dm_table *t)
0085 {
0086 unsigned int n, k;
0087 sector_t *node;
0088
0089 for (n = 0U; n < t->counts[l]; n++) {
0090 node = get_node(t, l, n);
0091
0092 for (k = 0U; k < KEYS_PER_NODE; k++)
0093 node[k] = high(t, l + 1, get_child(n, k));
0094 }
0095
0096 return 0;
0097 }
0098
0099
0100
0101
0102
0103 static int alloc_targets(struct dm_table *t, unsigned int num)
0104 {
0105 sector_t *n_highs;
0106 struct dm_target *n_targets;
0107
0108
0109
0110
0111 n_highs = kvcalloc(num, sizeof(struct dm_target) + sizeof(sector_t),
0112 GFP_KERNEL);
0113 if (!n_highs)
0114 return -ENOMEM;
0115
0116 n_targets = (struct dm_target *) (n_highs + num);
0117
0118 memset(n_highs, -1, sizeof(*n_highs) * num);
0119 kvfree(t->highs);
0120
0121 t->num_allocated = num;
0122 t->highs = n_highs;
0123 t->targets = n_targets;
0124
0125 return 0;
0126 }
0127
0128 int dm_table_create(struct dm_table **result, fmode_t mode,
0129 unsigned num_targets, struct mapped_device *md)
0130 {
0131 struct dm_table *t = kzalloc(sizeof(*t), GFP_KERNEL);
0132
0133 if (!t)
0134 return -ENOMEM;
0135
0136 INIT_LIST_HEAD(&t->devices);
0137
0138 if (!num_targets)
0139 num_targets = KEYS_PER_NODE;
0140
0141 num_targets = dm_round_up(num_targets, KEYS_PER_NODE);
0142
0143 if (!num_targets) {
0144 kfree(t);
0145 return -ENOMEM;
0146 }
0147
0148 if (alloc_targets(t, num_targets)) {
0149 kfree(t);
0150 return -ENOMEM;
0151 }
0152
0153 t->type = DM_TYPE_NONE;
0154 t->mode = mode;
0155 t->md = md;
0156 *result = t;
0157 return 0;
0158 }
0159
0160 static void free_devices(struct list_head *devices, struct mapped_device *md)
0161 {
0162 struct list_head *tmp, *next;
0163
0164 list_for_each_safe(tmp, next, devices) {
0165 struct dm_dev_internal *dd =
0166 list_entry(tmp, struct dm_dev_internal, list);
0167 DMWARN("%s: dm_table_destroy: dm_put_device call missing for %s",
0168 dm_device_name(md), dd->dm_dev->name);
0169 dm_put_table_device(md, dd->dm_dev);
0170 kfree(dd);
0171 }
0172 }
0173
0174 static void dm_table_destroy_crypto_profile(struct dm_table *t);
0175
0176 void dm_table_destroy(struct dm_table *t)
0177 {
0178 if (!t)
0179 return;
0180
0181
0182 if (t->depth >= 2)
0183 kvfree(t->index[t->depth - 2]);
0184
0185
0186 for (unsigned int i = 0; i < t->num_targets; i++) {
0187 struct dm_target *ti = dm_table_get_target(t, i);
0188
0189 if (ti->type->dtr)
0190 ti->type->dtr(ti);
0191
0192 dm_put_target_type(ti->type);
0193 }
0194
0195 kvfree(t->highs);
0196
0197
0198 free_devices(&t->devices, t->md);
0199
0200 dm_free_md_mempools(t->mempools);
0201
0202 dm_table_destroy_crypto_profile(t);
0203
0204 kfree(t);
0205 }
0206
0207
0208
0209
0210 static struct dm_dev_internal *find_device(struct list_head *l, dev_t dev)
0211 {
0212 struct dm_dev_internal *dd;
0213
0214 list_for_each_entry (dd, l, list)
0215 if (dd->dm_dev->bdev->bd_dev == dev)
0216 return dd;
0217
0218 return NULL;
0219 }
0220
0221
0222
0223
0224 static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
0225 sector_t start, sector_t len, void *data)
0226 {
0227 struct queue_limits *limits = data;
0228 struct block_device *bdev = dev->bdev;
0229 sector_t dev_size = bdev_nr_sectors(bdev);
0230 unsigned short logical_block_size_sectors =
0231 limits->logical_block_size >> SECTOR_SHIFT;
0232
0233 if (!dev_size)
0234 return 0;
0235
0236 if ((start >= dev_size) || (start + len > dev_size)) {
0237 DMWARN("%s: %pg too small for target: "
0238 "start=%llu, len=%llu, dev_size=%llu",
0239 dm_device_name(ti->table->md), bdev,
0240 (unsigned long long)start,
0241 (unsigned long long)len,
0242 (unsigned long long)dev_size);
0243 return 1;
0244 }
0245
0246
0247
0248
0249
0250 if (bdev_is_zoned(bdev)) {
0251 unsigned int zone_sectors = bdev_zone_sectors(bdev);
0252
0253 if (start & (zone_sectors - 1)) {
0254 DMWARN("%s: start=%llu not aligned to h/w zone size %u of %pg",
0255 dm_device_name(ti->table->md),
0256 (unsigned long long)start,
0257 zone_sectors, bdev);
0258 return 1;
0259 }
0260
0261
0262
0263
0264
0265
0266
0267
0268
0269
0270 if (len & (zone_sectors - 1)) {
0271 DMWARN("%s: len=%llu not aligned to h/w zone size %u of %pg",
0272 dm_device_name(ti->table->md),
0273 (unsigned long long)len,
0274 zone_sectors, bdev);
0275 return 1;
0276 }
0277 }
0278
0279 if (logical_block_size_sectors <= 1)
0280 return 0;
0281
0282 if (start & (logical_block_size_sectors - 1)) {
0283 DMWARN("%s: start=%llu not aligned to h/w "
0284 "logical block size %u of %pg",
0285 dm_device_name(ti->table->md),
0286 (unsigned long long)start,
0287 limits->logical_block_size, bdev);
0288 return 1;
0289 }
0290
0291 if (len & (logical_block_size_sectors - 1)) {
0292 DMWARN("%s: len=%llu not aligned to h/w "
0293 "logical block size %u of %pg",
0294 dm_device_name(ti->table->md),
0295 (unsigned long long)len,
0296 limits->logical_block_size, bdev);
0297 return 1;
0298 }
0299
0300 return 0;
0301 }
0302
0303
0304
0305
0306
0307
0308
0309 static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
0310 struct mapped_device *md)
0311 {
0312 int r;
0313 struct dm_dev *old_dev, *new_dev;
0314
0315 old_dev = dd->dm_dev;
0316
0317 r = dm_get_table_device(md, dd->dm_dev->bdev->bd_dev,
0318 dd->dm_dev->mode | new_mode, &new_dev);
0319 if (r)
0320 return r;
0321
0322 dd->dm_dev = new_dev;
0323 dm_put_table_device(md, old_dev);
0324
0325 return 0;
0326 }
0327
0328
0329
0330
0331 dev_t dm_get_dev_t(const char *path)
0332 {
0333 dev_t dev;
0334
0335 if (lookup_bdev(path, &dev))
0336 dev = name_to_dev_t(path);
0337 return dev;
0338 }
0339 EXPORT_SYMBOL_GPL(dm_get_dev_t);
0340
0341
0342
0343
0344
0345 int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
0346 struct dm_dev **result)
0347 {
0348 int r;
0349 dev_t dev;
0350 unsigned int major, minor;
0351 char dummy;
0352 struct dm_dev_internal *dd;
0353 struct dm_table *t = ti->table;
0354
0355 BUG_ON(!t);
0356
0357 if (sscanf(path, "%u:%u%c", &major, &minor, &dummy) == 2) {
0358
0359 dev = MKDEV(major, minor);
0360 if (MAJOR(dev) != major || MINOR(dev) != minor)
0361 return -EOVERFLOW;
0362 } else {
0363 dev = dm_get_dev_t(path);
0364 if (!dev)
0365 return -ENODEV;
0366 }
0367
0368 dd = find_device(&t->devices, dev);
0369 if (!dd) {
0370 dd = kmalloc(sizeof(*dd), GFP_KERNEL);
0371 if (!dd)
0372 return -ENOMEM;
0373
0374 if ((r = dm_get_table_device(t->md, dev, mode, &dd->dm_dev))) {
0375 kfree(dd);
0376 return r;
0377 }
0378
0379 refcount_set(&dd->count, 1);
0380 list_add(&dd->list, &t->devices);
0381 goto out;
0382
0383 } else if (dd->dm_dev->mode != (mode | dd->dm_dev->mode)) {
0384 r = upgrade_mode(dd, mode, t->md);
0385 if (r)
0386 return r;
0387 }
0388 refcount_inc(&dd->count);
0389 out:
0390 *result = dd->dm_dev;
0391 return 0;
0392 }
0393 EXPORT_SYMBOL(dm_get_device);
0394
0395 static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
0396 sector_t start, sector_t len, void *data)
0397 {
0398 struct queue_limits *limits = data;
0399 struct block_device *bdev = dev->bdev;
0400 struct request_queue *q = bdev_get_queue(bdev);
0401
0402 if (unlikely(!q)) {
0403 DMWARN("%s: Cannot set limits for nonexistent device %pg",
0404 dm_device_name(ti->table->md), bdev);
0405 return 0;
0406 }
0407
0408 if (blk_stack_limits(limits, &q->limits,
0409 get_start_sect(bdev) + start) < 0)
0410 DMWARN("%s: adding target device %pg caused an alignment inconsistency: "
0411 "physical_block_size=%u, logical_block_size=%u, "
0412 "alignment_offset=%u, start=%llu",
0413 dm_device_name(ti->table->md), bdev,
0414 q->limits.physical_block_size,
0415 q->limits.logical_block_size,
0416 q->limits.alignment_offset,
0417 (unsigned long long) start << SECTOR_SHIFT);
0418 return 0;
0419 }
0420
0421
0422
0423
0424 void dm_put_device(struct dm_target *ti, struct dm_dev *d)
0425 {
0426 int found = 0;
0427 struct list_head *devices = &ti->table->devices;
0428 struct dm_dev_internal *dd;
0429
0430 list_for_each_entry(dd, devices, list) {
0431 if (dd->dm_dev == d) {
0432 found = 1;
0433 break;
0434 }
0435 }
0436 if (!found) {
0437 DMWARN("%s: device %s not in table devices list",
0438 dm_device_name(ti->table->md), d->name);
0439 return;
0440 }
0441 if (refcount_dec_and_test(&dd->count)) {
0442 dm_put_table_device(ti->table->md, d);
0443 list_del(&dd->list);
0444 kfree(dd);
0445 }
0446 }
0447 EXPORT_SYMBOL(dm_put_device);
0448
0449
0450
0451
0452 static int adjoin(struct dm_table *t, struct dm_target *ti)
0453 {
0454 struct dm_target *prev;
0455
0456 if (!t->num_targets)
0457 return !ti->begin;
0458
0459 prev = &t->targets[t->num_targets - 1];
0460 return (ti->begin == (prev->begin + prev->len));
0461 }
0462
0463
0464
0465
0466
0467
0468
0469
0470
0471
0472
0473 static char **realloc_argv(unsigned *size, char **old_argv)
0474 {
0475 char **argv;
0476 unsigned new_size;
0477 gfp_t gfp;
0478
0479 if (*size) {
0480 new_size = *size * 2;
0481 gfp = GFP_KERNEL;
0482 } else {
0483 new_size = 8;
0484 gfp = GFP_NOIO;
0485 }
0486 argv = kmalloc_array(new_size, sizeof(*argv), gfp);
0487 if (argv && old_argv) {
0488 memcpy(argv, old_argv, *size * sizeof(*argv));
0489 *size = new_size;
0490 }
0491
0492 kfree(old_argv);
0493 return argv;
0494 }
0495
0496
0497
0498
0499 int dm_split_args(int *argc, char ***argvp, char *input)
0500 {
0501 char *start, *end = input, *out, **argv = NULL;
0502 unsigned array_size = 0;
0503
0504 *argc = 0;
0505
0506 if (!input) {
0507 *argvp = NULL;
0508 return 0;
0509 }
0510
0511 argv = realloc_argv(&array_size, argv);
0512 if (!argv)
0513 return -ENOMEM;
0514
0515 while (1) {
0516
0517 start = skip_spaces(end);
0518
0519 if (!*start)
0520 break;
0521
0522
0523 end = out = start;
0524 while (*end) {
0525
0526 if (*end == '\\' && *(end + 1)) {
0527 *out++ = *(end + 1);
0528 end += 2;
0529 continue;
0530 }
0531
0532 if (isspace(*end))
0533 break;
0534
0535 *out++ = *end++;
0536 }
0537
0538
0539 if ((*argc + 1) > array_size) {
0540 argv = realloc_argv(&array_size, argv);
0541 if (!argv)
0542 return -ENOMEM;
0543 }
0544
0545
0546 if (*end)
0547 end++;
0548
0549
0550 *out = '\0';
0551 argv[*argc] = start;
0552 (*argc)++;
0553 }
0554
0555 *argvp = argv;
0556 return 0;
0557 }
0558
0559
0560
0561
0562
0563
0564
0565
0566 static int validate_hardware_logical_block_alignment(struct dm_table *t,
0567 struct queue_limits *limits)
0568 {
0569
0570
0571
0572
0573 unsigned short device_logical_block_size_sects =
0574 limits->logical_block_size >> SECTOR_SHIFT;
0575
0576
0577
0578
0579 unsigned short next_target_start = 0;
0580
0581
0582
0583
0584
0585 unsigned short remaining = 0;
0586
0587 struct dm_target *ti;
0588 struct queue_limits ti_limits;
0589 unsigned int i;
0590
0591
0592
0593
0594 for (i = 0; i < t->num_targets; i++) {
0595 ti = dm_table_get_target(t, i);
0596
0597 blk_set_stacking_limits(&ti_limits);
0598
0599
0600 if (ti->type->iterate_devices)
0601 ti->type->iterate_devices(ti, dm_set_device_limits,
0602 &ti_limits);
0603
0604
0605
0606
0607
0608 if (remaining < ti->len &&
0609 remaining & ((ti_limits.logical_block_size >>
0610 SECTOR_SHIFT) - 1))
0611 break;
0612
0613 next_target_start =
0614 (unsigned short) ((next_target_start + ti->len) &
0615 (device_logical_block_size_sects - 1));
0616 remaining = next_target_start ?
0617 device_logical_block_size_sects - next_target_start : 0;
0618 }
0619
0620 if (remaining) {
0621 DMWARN("%s: table line %u (start sect %llu len %llu) "
0622 "not aligned to h/w logical block size %u",
0623 dm_device_name(t->md), i,
0624 (unsigned long long) ti->begin,
0625 (unsigned long long) ti->len,
0626 limits->logical_block_size);
0627 return -EINVAL;
0628 }
0629
0630 return 0;
0631 }
0632
0633 int dm_table_add_target(struct dm_table *t, const char *type,
0634 sector_t start, sector_t len, char *params)
0635 {
0636 int r = -EINVAL, argc;
0637 char **argv;
0638 struct dm_target *ti;
0639
0640 if (t->singleton) {
0641 DMERR("%s: target type %s must appear alone in table",
0642 dm_device_name(t->md), t->targets->type->name);
0643 return -EINVAL;
0644 }
0645
0646 BUG_ON(t->num_targets >= t->num_allocated);
0647
0648 ti = t->targets + t->num_targets;
0649 memset(ti, 0, sizeof(*ti));
0650
0651 if (!len) {
0652 DMERR("%s: zero-length target", dm_device_name(t->md));
0653 return -EINVAL;
0654 }
0655
0656 ti->type = dm_get_target_type(type);
0657 if (!ti->type) {
0658 DMERR("%s: %s: unknown target type", dm_device_name(t->md), type);
0659 return -EINVAL;
0660 }
0661
0662 if (dm_target_needs_singleton(ti->type)) {
0663 if (t->num_targets) {
0664 ti->error = "singleton target type must appear alone in table";
0665 goto bad;
0666 }
0667 t->singleton = true;
0668 }
0669
0670 if (dm_target_always_writeable(ti->type) && !(t->mode & FMODE_WRITE)) {
0671 ti->error = "target type may not be included in a read-only table";
0672 goto bad;
0673 }
0674
0675 if (t->immutable_target_type) {
0676 if (t->immutable_target_type != ti->type) {
0677 ti->error = "immutable target type cannot be mixed with other target types";
0678 goto bad;
0679 }
0680 } else if (dm_target_is_immutable(ti->type)) {
0681 if (t->num_targets) {
0682 ti->error = "immutable target type cannot be mixed with other target types";
0683 goto bad;
0684 }
0685 t->immutable_target_type = ti->type;
0686 }
0687
0688 if (dm_target_has_integrity(ti->type))
0689 t->integrity_added = 1;
0690
0691 ti->table = t;
0692 ti->begin = start;
0693 ti->len = len;
0694 ti->error = "Unknown error";
0695
0696
0697
0698
0699 if (!adjoin(t, ti)) {
0700 ti->error = "Gap in table";
0701 goto bad;
0702 }
0703
0704 r = dm_split_args(&argc, &argv, params);
0705 if (r) {
0706 ti->error = "couldn't split parameters";
0707 goto bad;
0708 }
0709
0710 r = ti->type->ctr(ti, argc, argv);
0711 kfree(argv);
0712 if (r)
0713 goto bad;
0714
0715 t->highs[t->num_targets++] = ti->begin + ti->len - 1;
0716
0717 if (!ti->num_discard_bios && ti->discards_supported)
0718 DMWARN("%s: %s: ignoring discards_supported because num_discard_bios is zero.",
0719 dm_device_name(t->md), type);
0720
0721 if (ti->limit_swap_bios && !static_key_enabled(&swap_bios_enabled.key))
0722 static_branch_enable(&swap_bios_enabled);
0723
0724 return 0;
0725
0726 bad:
0727 DMERR("%s: %s: %s (%pe)", dm_device_name(t->md), type, ti->error, ERR_PTR(r));
0728 dm_put_target_type(ti->type);
0729 return r;
0730 }
0731
0732
0733
0734
0735 static int validate_next_arg(const struct dm_arg *arg,
0736 struct dm_arg_set *arg_set,
0737 unsigned *value, char **error, unsigned grouped)
0738 {
0739 const char *arg_str = dm_shift_arg(arg_set);
0740 char dummy;
0741
0742 if (!arg_str ||
0743 (sscanf(arg_str, "%u%c", value, &dummy) != 1) ||
0744 (*value < arg->min) ||
0745 (*value > arg->max) ||
0746 (grouped && arg_set->argc < *value)) {
0747 *error = arg->error;
0748 return -EINVAL;
0749 }
0750
0751 return 0;
0752 }
0753
0754 int dm_read_arg(const struct dm_arg *arg, struct dm_arg_set *arg_set,
0755 unsigned *value, char **error)
0756 {
0757 return validate_next_arg(arg, arg_set, value, error, 0);
0758 }
0759 EXPORT_SYMBOL(dm_read_arg);
0760
0761 int dm_read_arg_group(const struct dm_arg *arg, struct dm_arg_set *arg_set,
0762 unsigned *value, char **error)
0763 {
0764 return validate_next_arg(arg, arg_set, value, error, 1);
0765 }
0766 EXPORT_SYMBOL(dm_read_arg_group);
0767
0768 const char *dm_shift_arg(struct dm_arg_set *as)
0769 {
0770 char *r;
0771
0772 if (as->argc) {
0773 as->argc--;
0774 r = *as->argv;
0775 as->argv++;
0776 return r;
0777 }
0778
0779 return NULL;
0780 }
0781 EXPORT_SYMBOL(dm_shift_arg);
0782
0783 void dm_consume_args(struct dm_arg_set *as, unsigned num_args)
0784 {
0785 BUG_ON(as->argc < num_args);
0786 as->argc -= num_args;
0787 as->argv += num_args;
0788 }
0789 EXPORT_SYMBOL(dm_consume_args);
0790
0791 static bool __table_type_bio_based(enum dm_queue_mode table_type)
0792 {
0793 return (table_type == DM_TYPE_BIO_BASED ||
0794 table_type == DM_TYPE_DAX_BIO_BASED);
0795 }
0796
0797 static bool __table_type_request_based(enum dm_queue_mode table_type)
0798 {
0799 return table_type == DM_TYPE_REQUEST_BASED;
0800 }
0801
0802 void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type)
0803 {
0804 t->type = type;
0805 }
0806 EXPORT_SYMBOL_GPL(dm_table_set_type);
0807
0808
0809 static int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev,
0810 sector_t start, sector_t len, void *data)
0811 {
0812 if (dev->dax_dev)
0813 return false;
0814
0815 DMDEBUG("%pg: error: dax unsupported by block device", dev->bdev);
0816 return true;
0817 }
0818
0819
0820 static int device_not_dax_synchronous_capable(struct dm_target *ti, struct dm_dev *dev,
0821 sector_t start, sector_t len, void *data)
0822 {
0823 return !dev->dax_dev || !dax_synchronous(dev->dax_dev);
0824 }
0825
0826 static bool dm_table_supports_dax(struct dm_table *t,
0827 iterate_devices_callout_fn iterate_fn)
0828 {
0829
0830 for (unsigned int i = 0; i < t->num_targets; i++) {
0831 struct dm_target *ti = dm_table_get_target(t, i);
0832
0833 if (!ti->type->direct_access)
0834 return false;
0835
0836 if (!ti->type->iterate_devices ||
0837 ti->type->iterate_devices(ti, iterate_fn, NULL))
0838 return false;
0839 }
0840
0841 return true;
0842 }
0843
0844 static int device_is_rq_stackable(struct dm_target *ti, struct dm_dev *dev,
0845 sector_t start, sector_t len, void *data)
0846 {
0847 struct block_device *bdev = dev->bdev;
0848 struct request_queue *q = bdev_get_queue(bdev);
0849
0850
0851 if (bdev_is_partition(bdev))
0852 return false;
0853
0854 return queue_is_mq(q);
0855 }
0856
0857 static int dm_table_determine_type(struct dm_table *t)
0858 {
0859 unsigned bio_based = 0, request_based = 0, hybrid = 0;
0860 struct dm_target *ti;
0861 struct list_head *devices = dm_table_get_devices(t);
0862 enum dm_queue_mode live_md_type = dm_get_md_type(t->md);
0863
0864 if (t->type != DM_TYPE_NONE) {
0865
0866 if (t->type == DM_TYPE_BIO_BASED) {
0867
0868 goto verify_bio_based;
0869 }
0870 BUG_ON(t->type == DM_TYPE_DAX_BIO_BASED);
0871 goto verify_rq_based;
0872 }
0873
0874 for (unsigned int i = 0; i < t->num_targets; i++) {
0875 ti = dm_table_get_target(t, i);
0876 if (dm_target_hybrid(ti))
0877 hybrid = 1;
0878 else if (dm_target_request_based(ti))
0879 request_based = 1;
0880 else
0881 bio_based = 1;
0882
0883 if (bio_based && request_based) {
0884 DMERR("Inconsistent table: different target types"
0885 " can't be mixed up");
0886 return -EINVAL;
0887 }
0888 }
0889
0890 if (hybrid && !bio_based && !request_based) {
0891
0892
0893
0894
0895
0896 if (__table_type_request_based(live_md_type))
0897 request_based = 1;
0898 else
0899 bio_based = 1;
0900 }
0901
0902 if (bio_based) {
0903 verify_bio_based:
0904
0905 t->type = DM_TYPE_BIO_BASED;
0906 if (dm_table_supports_dax(t, device_not_dax_capable) ||
0907 (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) {
0908 t->type = DM_TYPE_DAX_BIO_BASED;
0909 }
0910 return 0;
0911 }
0912
0913 BUG_ON(!request_based);
0914
0915 t->type = DM_TYPE_REQUEST_BASED;
0916
0917 verify_rq_based:
0918
0919
0920
0921
0922
0923
0924 if (t->num_targets > 1) {
0925 DMERR("request-based DM doesn't support multiple targets");
0926 return -EINVAL;
0927 }
0928
0929 if (list_empty(devices)) {
0930 int srcu_idx;
0931 struct dm_table *live_table = dm_get_live_table(t->md, &srcu_idx);
0932
0933
0934 if (live_table)
0935 t->type = live_table->type;
0936 dm_put_live_table(t->md, srcu_idx);
0937 return 0;
0938 }
0939
0940 ti = dm_table_get_immutable_target(t);
0941 if (!ti) {
0942 DMERR("table load rejected: immutable target is required");
0943 return -EINVAL;
0944 } else if (ti->max_io_len) {
0945 DMERR("table load rejected: immutable target that splits IO is not supported");
0946 return -EINVAL;
0947 }
0948
0949
0950 if (!ti->type->iterate_devices ||
0951 !ti->type->iterate_devices(ti, device_is_rq_stackable, NULL)) {
0952 DMERR("table load rejected: including non-request-stackable devices");
0953 return -EINVAL;
0954 }
0955
0956 return 0;
0957 }
0958
0959 enum dm_queue_mode dm_table_get_type(struct dm_table *t)
0960 {
0961 return t->type;
0962 }
0963
0964 struct target_type *dm_table_get_immutable_target_type(struct dm_table *t)
0965 {
0966 return t->immutable_target_type;
0967 }
0968
0969 struct dm_target *dm_table_get_immutable_target(struct dm_table *t)
0970 {
0971
0972 if (t->num_targets > 1 ||
0973 !dm_target_is_immutable(t->targets[0].type))
0974 return NULL;
0975
0976 return t->targets;
0977 }
0978
0979 struct dm_target *dm_table_get_wildcard_target(struct dm_table *t)
0980 {
0981 for (unsigned int i = 0; i < t->num_targets; i++) {
0982 struct dm_target *ti = dm_table_get_target(t, i);
0983
0984 if (dm_target_is_wildcard(ti->type))
0985 return ti;
0986 }
0987
0988 return NULL;
0989 }
0990
0991 bool dm_table_bio_based(struct dm_table *t)
0992 {
0993 return __table_type_bio_based(dm_table_get_type(t));
0994 }
0995
0996 bool dm_table_request_based(struct dm_table *t)
0997 {
0998 return __table_type_request_based(dm_table_get_type(t));
0999 }
1000
1001 static bool dm_table_supports_poll(struct dm_table *t);
1002
1003 static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md)
1004 {
1005 enum dm_queue_mode type = dm_table_get_type(t);
1006 unsigned int per_io_data_size = 0, front_pad, io_front_pad;
1007 unsigned int min_pool_size = 0, pool_size;
1008 struct dm_md_mempools *pools;
1009
1010 if (unlikely(type == DM_TYPE_NONE)) {
1011 DMWARN("no table type is set, can't allocate mempools");
1012 return -EINVAL;
1013 }
1014
1015 pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id);
1016 if (!pools)
1017 return -ENOMEM;
1018
1019 if (type == DM_TYPE_REQUEST_BASED) {
1020 pool_size = dm_get_reserved_rq_based_ios();
1021 front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
1022 goto init_bs;
1023 }
1024
1025 for (unsigned int i = 0; i < t->num_targets; i++) {
1026 struct dm_target *ti = dm_table_get_target(t, i);
1027
1028 per_io_data_size = max(per_io_data_size, ti->per_io_data_size);
1029 min_pool_size = max(min_pool_size, ti->num_flush_bios);
1030 }
1031 pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size);
1032 front_pad = roundup(per_io_data_size,
1033 __alignof__(struct dm_target_io)) + DM_TARGET_IO_BIO_OFFSET;
1034
1035 io_front_pad = roundup(per_io_data_size,
1036 __alignof__(struct dm_io)) + DM_IO_BIO_OFFSET;
1037 if (bioset_init(&pools->io_bs, pool_size, io_front_pad,
1038 dm_table_supports_poll(t) ? BIOSET_PERCPU_CACHE : 0))
1039 goto out_free_pools;
1040 if (t->integrity_supported &&
1041 bioset_integrity_create(&pools->io_bs, pool_size))
1042 goto out_free_pools;
1043 init_bs:
1044 if (bioset_init(&pools->bs, pool_size, front_pad, 0))
1045 goto out_free_pools;
1046 if (t->integrity_supported &&
1047 bioset_integrity_create(&pools->bs, pool_size))
1048 goto out_free_pools;
1049
1050 t->mempools = pools;
1051 return 0;
1052
1053 out_free_pools:
1054 dm_free_md_mempools(pools);
1055 return -ENOMEM;
1056 }
1057
1058 static int setup_indexes(struct dm_table *t)
1059 {
1060 int i;
1061 unsigned int total = 0;
1062 sector_t *indexes;
1063
1064
1065 for (i = t->depth - 2; i >= 0; i--) {
1066 t->counts[i] = dm_div_up(t->counts[i + 1], CHILDREN_PER_NODE);
1067 total += t->counts[i];
1068 }
1069
1070 indexes = kvcalloc(total, NODE_SIZE, GFP_KERNEL);
1071 if (!indexes)
1072 return -ENOMEM;
1073
1074
1075 for (i = t->depth - 2; i >= 0; i--) {
1076 t->index[i] = indexes;
1077 indexes += (KEYS_PER_NODE * t->counts[i]);
1078 setup_btree_index(i, t);
1079 }
1080
1081 return 0;
1082 }
1083
1084
1085
1086
1087 static int dm_table_build_index(struct dm_table *t)
1088 {
1089 int r = 0;
1090 unsigned int leaf_nodes;
1091
1092
1093 leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE);
1094 t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE);
1095
1096
1097 t->counts[t->depth - 1] = leaf_nodes;
1098 t->index[t->depth - 1] = t->highs;
1099
1100 if (t->depth >= 2)
1101 r = setup_indexes(t);
1102
1103 return r;
1104 }
1105
1106 static bool integrity_profile_exists(struct gendisk *disk)
1107 {
1108 return !!blk_get_integrity(disk);
1109 }
1110
1111
1112
1113
1114
1115 static struct gendisk * dm_table_get_integrity_disk(struct dm_table *t)
1116 {
1117 struct list_head *devices = dm_table_get_devices(t);
1118 struct dm_dev_internal *dd = NULL;
1119 struct gendisk *prev_disk = NULL, *template_disk = NULL;
1120
1121 for (unsigned int i = 0; i < t->num_targets; i++) {
1122 struct dm_target *ti = dm_table_get_target(t, i);
1123
1124 if (!dm_target_passes_integrity(ti->type))
1125 goto no_integrity;
1126 }
1127
1128 list_for_each_entry(dd, devices, list) {
1129 template_disk = dd->dm_dev->bdev->bd_disk;
1130 if (!integrity_profile_exists(template_disk))
1131 goto no_integrity;
1132 else if (prev_disk &&
1133 blk_integrity_compare(prev_disk, template_disk) < 0)
1134 goto no_integrity;
1135 prev_disk = template_disk;
1136 }
1137
1138 return template_disk;
1139
1140 no_integrity:
1141 if (prev_disk)
1142 DMWARN("%s: integrity not set: %s and %s profile mismatch",
1143 dm_device_name(t->md),
1144 prev_disk->disk_name,
1145 template_disk->disk_name);
1146 return NULL;
1147 }
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159 static int dm_table_register_integrity(struct dm_table *t)
1160 {
1161 struct mapped_device *md = t->md;
1162 struct gendisk *template_disk = NULL;
1163
1164
1165 if (t->integrity_added)
1166 return 0;
1167
1168 template_disk = dm_table_get_integrity_disk(t);
1169 if (!template_disk)
1170 return 0;
1171
1172 if (!integrity_profile_exists(dm_disk(md))) {
1173 t->integrity_supported = true;
1174
1175
1176
1177
1178 blk_integrity_register(dm_disk(md),
1179 blk_get_integrity(template_disk));
1180 return 0;
1181 }
1182
1183
1184
1185
1186
1187 if (blk_integrity_compare(dm_disk(md), template_disk) < 0) {
1188 DMWARN("%s: conflict with existing integrity profile: "
1189 "%s profile mismatch",
1190 dm_device_name(t->md),
1191 template_disk->disk_name);
1192 return 1;
1193 }
1194
1195
1196 t->integrity_supported = true;
1197 return 0;
1198 }
1199
1200 #ifdef CONFIG_BLK_INLINE_ENCRYPTION
1201
1202 struct dm_crypto_profile {
1203 struct blk_crypto_profile profile;
1204 struct mapped_device *md;
1205 };
1206
1207 struct dm_keyslot_evict_args {
1208 const struct blk_crypto_key *key;
1209 int err;
1210 };
1211
1212 static int dm_keyslot_evict_callback(struct dm_target *ti, struct dm_dev *dev,
1213 sector_t start, sector_t len, void *data)
1214 {
1215 struct dm_keyslot_evict_args *args = data;
1216 int err;
1217
1218 err = blk_crypto_evict_key(bdev_get_queue(dev->bdev), args->key);
1219 if (!args->err)
1220 args->err = err;
1221
1222 return 0;
1223 }
1224
1225
1226
1227
1228
1229 static int dm_keyslot_evict(struct blk_crypto_profile *profile,
1230 const struct blk_crypto_key *key, unsigned int slot)
1231 {
1232 struct mapped_device *md =
1233 container_of(profile, struct dm_crypto_profile, profile)->md;
1234 struct dm_keyslot_evict_args args = { key };
1235 struct dm_table *t;
1236 int srcu_idx;
1237
1238 t = dm_get_live_table(md, &srcu_idx);
1239 if (!t)
1240 return 0;
1241
1242 for (unsigned int i = 0; i < t->num_targets; i++) {
1243 struct dm_target *ti = dm_table_get_target(t, i);
1244
1245 if (!ti->type->iterate_devices)
1246 continue;
1247 ti->type->iterate_devices(ti, dm_keyslot_evict_callback, &args);
1248 }
1249
1250 dm_put_live_table(md, srcu_idx);
1251 return args.err;
1252 }
1253
1254 static int
1255 device_intersect_crypto_capabilities(struct dm_target *ti, struct dm_dev *dev,
1256 sector_t start, sector_t len, void *data)
1257 {
1258 struct blk_crypto_profile *parent = data;
1259 struct blk_crypto_profile *child =
1260 bdev_get_queue(dev->bdev)->crypto_profile;
1261
1262 blk_crypto_intersect_capabilities(parent, child);
1263 return 0;
1264 }
1265
1266 void dm_destroy_crypto_profile(struct blk_crypto_profile *profile)
1267 {
1268 struct dm_crypto_profile *dmcp = container_of(profile,
1269 struct dm_crypto_profile,
1270 profile);
1271
1272 if (!profile)
1273 return;
1274
1275 blk_crypto_profile_destroy(profile);
1276 kfree(dmcp);
1277 }
1278
1279 static void dm_table_destroy_crypto_profile(struct dm_table *t)
1280 {
1281 dm_destroy_crypto_profile(t->crypto_profile);
1282 t->crypto_profile = NULL;
1283 }
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294 static int dm_table_construct_crypto_profile(struct dm_table *t)
1295 {
1296 struct dm_crypto_profile *dmcp;
1297 struct blk_crypto_profile *profile;
1298 unsigned int i;
1299 bool empty_profile = true;
1300
1301 dmcp = kmalloc(sizeof(*dmcp), GFP_KERNEL);
1302 if (!dmcp)
1303 return -ENOMEM;
1304 dmcp->md = t->md;
1305
1306 profile = &dmcp->profile;
1307 blk_crypto_profile_init(profile, 0);
1308 profile->ll_ops.keyslot_evict = dm_keyslot_evict;
1309 profile->max_dun_bytes_supported = UINT_MAX;
1310 memset(profile->modes_supported, 0xFF,
1311 sizeof(profile->modes_supported));
1312
1313 for (i = 0; i < t->num_targets; i++) {
1314 struct dm_target *ti = dm_table_get_target(t, i);
1315
1316 if (!dm_target_passes_crypto(ti->type)) {
1317 blk_crypto_intersect_capabilities(profile, NULL);
1318 break;
1319 }
1320 if (!ti->type->iterate_devices)
1321 continue;
1322 ti->type->iterate_devices(ti,
1323 device_intersect_crypto_capabilities,
1324 profile);
1325 }
1326
1327 if (t->md->queue &&
1328 !blk_crypto_has_capabilities(profile,
1329 t->md->queue->crypto_profile)) {
1330 DMWARN("Inline encryption capabilities of new DM table were more restrictive than the old table's. This is not supported!");
1331 dm_destroy_crypto_profile(profile);
1332 return -EINVAL;
1333 }
1334
1335
1336
1337
1338
1339 for (i = 0; i < ARRAY_SIZE(profile->modes_supported); i++) {
1340 if (profile->modes_supported[i]) {
1341 empty_profile = false;
1342 break;
1343 }
1344 }
1345
1346 if (empty_profile) {
1347 dm_destroy_crypto_profile(profile);
1348 profile = NULL;
1349 }
1350
1351
1352
1353
1354
1355
1356 t->crypto_profile = profile;
1357
1358 return 0;
1359 }
1360
1361 static void dm_update_crypto_profile(struct request_queue *q,
1362 struct dm_table *t)
1363 {
1364 if (!t->crypto_profile)
1365 return;
1366
1367
1368 if (!q->crypto_profile) {
1369 blk_crypto_register(t->crypto_profile, q);
1370 } else {
1371 blk_crypto_update_capabilities(q->crypto_profile,
1372 t->crypto_profile);
1373 dm_destroy_crypto_profile(t->crypto_profile);
1374 }
1375 t->crypto_profile = NULL;
1376 }
1377
1378 #else
1379
1380 static int dm_table_construct_crypto_profile(struct dm_table *t)
1381 {
1382 return 0;
1383 }
1384
1385 void dm_destroy_crypto_profile(struct blk_crypto_profile *profile)
1386 {
1387 }
1388
1389 static void dm_table_destroy_crypto_profile(struct dm_table *t)
1390 {
1391 }
1392
1393 static void dm_update_crypto_profile(struct request_queue *q,
1394 struct dm_table *t)
1395 {
1396 }
1397
1398 #endif
1399
1400
1401
1402
1403
1404 int dm_table_complete(struct dm_table *t)
1405 {
1406 int r;
1407
1408 r = dm_table_determine_type(t);
1409 if (r) {
1410 DMERR("unable to determine table type");
1411 return r;
1412 }
1413
1414 r = dm_table_build_index(t);
1415 if (r) {
1416 DMERR("unable to build btrees");
1417 return r;
1418 }
1419
1420 r = dm_table_register_integrity(t);
1421 if (r) {
1422 DMERR("could not register integrity profile.");
1423 return r;
1424 }
1425
1426 r = dm_table_construct_crypto_profile(t);
1427 if (r) {
1428 DMERR("could not construct crypto profile.");
1429 return r;
1430 }
1431
1432 r = dm_table_alloc_md_mempools(t, t->md);
1433 if (r)
1434 DMERR("unable to allocate mempools");
1435
1436 return r;
1437 }
1438
1439 static DEFINE_MUTEX(_event_lock);
1440 void dm_table_event_callback(struct dm_table *t,
1441 void (*fn)(void *), void *context)
1442 {
1443 mutex_lock(&_event_lock);
1444 t->event_fn = fn;
1445 t->event_context = context;
1446 mutex_unlock(&_event_lock);
1447 }
1448
1449 void dm_table_event(struct dm_table *t)
1450 {
1451 mutex_lock(&_event_lock);
1452 if (t->event_fn)
1453 t->event_fn(t->event_context);
1454 mutex_unlock(&_event_lock);
1455 }
1456 EXPORT_SYMBOL(dm_table_event);
1457
1458 inline sector_t dm_table_get_size(struct dm_table *t)
1459 {
1460 return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0;
1461 }
1462 EXPORT_SYMBOL(dm_table_get_size);
1463
1464
1465
1466
1467
1468
1469
1470 struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
1471 {
1472 unsigned int l, n = 0, k = 0;
1473 sector_t *node;
1474
1475 if (unlikely(sector >= dm_table_get_size(t)))
1476 return NULL;
1477
1478 for (l = 0; l < t->depth; l++) {
1479 n = get_child(n, k);
1480 node = get_node(t, l, n);
1481
1482 for (k = 0; k < KEYS_PER_NODE; k++)
1483 if (node[k] >= sector)
1484 break;
1485 }
1486
1487 return &t->targets[(KEYS_PER_NODE * n) + k];
1488 }
1489
1490 static int device_not_poll_capable(struct dm_target *ti, struct dm_dev *dev,
1491 sector_t start, sector_t len, void *data)
1492 {
1493 struct request_queue *q = bdev_get_queue(dev->bdev);
1494
1495 return !test_bit(QUEUE_FLAG_POLL, &q->queue_flags);
1496 }
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521 static bool dm_table_any_dev_attr(struct dm_table *t,
1522 iterate_devices_callout_fn func, void *data)
1523 {
1524 for (unsigned int i = 0; i < t->num_targets; i++) {
1525 struct dm_target *ti = dm_table_get_target(t, i);
1526
1527 if (ti->type->iterate_devices &&
1528 ti->type->iterate_devices(ti, func, data))
1529 return true;
1530 }
1531
1532 return false;
1533 }
1534
1535 static int count_device(struct dm_target *ti, struct dm_dev *dev,
1536 sector_t start, sector_t len, void *data)
1537 {
1538 unsigned *num_devices = data;
1539
1540 (*num_devices)++;
1541
1542 return 0;
1543 }
1544
1545 static bool dm_table_supports_poll(struct dm_table *t)
1546 {
1547 for (unsigned int i = 0; i < t->num_targets; i++) {
1548 struct dm_target *ti = dm_table_get_target(t, i);
1549
1550 if (!ti->type->iterate_devices ||
1551 ti->type->iterate_devices(ti, device_not_poll_capable, NULL))
1552 return false;
1553 }
1554
1555 return true;
1556 }
1557
1558
1559
1560
1561
1562
1563
1564 bool dm_table_has_no_data_devices(struct dm_table *t)
1565 {
1566 for (unsigned int i = 0; i < t->num_targets; i++) {
1567 struct dm_target *ti = dm_table_get_target(t, i);
1568 unsigned num_devices = 0;
1569
1570 if (!ti->type->iterate_devices)
1571 return false;
1572
1573 ti->type->iterate_devices(ti, count_device, &num_devices);
1574 if (num_devices)
1575 return false;
1576 }
1577
1578 return true;
1579 }
1580
1581 static int device_not_zoned_model(struct dm_target *ti, struct dm_dev *dev,
1582 sector_t start, sector_t len, void *data)
1583 {
1584 struct request_queue *q = bdev_get_queue(dev->bdev);
1585 enum blk_zoned_model *zoned_model = data;
1586
1587 return blk_queue_zoned_model(q) != *zoned_model;
1588 }
1589
1590
1591
1592
1593
1594
1595
1596
1597 static bool dm_table_supports_zoned_model(struct dm_table *t,
1598 enum blk_zoned_model zoned_model)
1599 {
1600 for (unsigned int i = 0; i < t->num_targets; i++) {
1601 struct dm_target *ti = dm_table_get_target(t, i);
1602
1603 if (dm_target_supports_zoned_hm(ti->type)) {
1604 if (!ti->type->iterate_devices ||
1605 ti->type->iterate_devices(ti, device_not_zoned_model,
1606 &zoned_model))
1607 return false;
1608 } else if (!dm_target_supports_mixed_zoned_model(ti->type)) {
1609 if (zoned_model == BLK_ZONED_HM)
1610 return false;
1611 }
1612 }
1613
1614 return true;
1615 }
1616
1617 static int device_not_matches_zone_sectors(struct dm_target *ti, struct dm_dev *dev,
1618 sector_t start, sector_t len, void *data)
1619 {
1620 unsigned int *zone_sectors = data;
1621
1622 if (!bdev_is_zoned(dev->bdev))
1623 return 0;
1624 return bdev_zone_sectors(dev->bdev) != *zone_sectors;
1625 }
1626
1627
1628
1629
1630
1631
1632 static int validate_hardware_zoned_model(struct dm_table *t,
1633 enum blk_zoned_model zoned_model,
1634 unsigned int zone_sectors)
1635 {
1636 if (zoned_model == BLK_ZONED_NONE)
1637 return 0;
1638
1639 if (!dm_table_supports_zoned_model(t, zoned_model)) {
1640 DMERR("%s: zoned model is not consistent across all devices",
1641 dm_device_name(t->md));
1642 return -EINVAL;
1643 }
1644
1645
1646 if (!zone_sectors || !is_power_of_2(zone_sectors))
1647 return -EINVAL;
1648
1649 if (dm_table_any_dev_attr(t, device_not_matches_zone_sectors, &zone_sectors)) {
1650 DMERR("%s: zone sectors is not consistent across all zoned devices",
1651 dm_device_name(t->md));
1652 return -EINVAL;
1653 }
1654
1655 return 0;
1656 }
1657
1658
1659
1660
1661 int dm_calculate_queue_limits(struct dm_table *t,
1662 struct queue_limits *limits)
1663 {
1664 struct queue_limits ti_limits;
1665 enum blk_zoned_model zoned_model = BLK_ZONED_NONE;
1666 unsigned int zone_sectors = 0;
1667
1668 blk_set_stacking_limits(limits);
1669
1670 for (unsigned int i = 0; i < t->num_targets; i++) {
1671 struct dm_target *ti = dm_table_get_target(t, i);
1672
1673 blk_set_stacking_limits(&ti_limits);
1674
1675 if (!ti->type->iterate_devices)
1676 goto combine_limits;
1677
1678
1679
1680
1681 ti->type->iterate_devices(ti, dm_set_device_limits,
1682 &ti_limits);
1683
1684 if (zoned_model == BLK_ZONED_NONE && ti_limits.zoned != BLK_ZONED_NONE) {
1685
1686
1687
1688
1689 zoned_model = ti_limits.zoned;
1690 zone_sectors = ti_limits.chunk_sectors;
1691 }
1692
1693
1694 if (ti->type->io_hints)
1695 ti->type->io_hints(ti, &ti_limits);
1696
1697
1698
1699
1700
1701 if (ti->type->iterate_devices(ti, device_area_is_invalid,
1702 &ti_limits))
1703 return -EINVAL;
1704
1705 combine_limits:
1706
1707
1708
1709
1710 if (blk_stack_limits(limits, &ti_limits, 0) < 0)
1711 DMWARN("%s: adding target device "
1712 "(start sect %llu len %llu) "
1713 "caused an alignment inconsistency",
1714 dm_device_name(t->md),
1715 (unsigned long long) ti->begin,
1716 (unsigned long long) ti->len);
1717 }
1718
1719
1720
1721
1722
1723
1724
1725
1726 if (limits->zoned != BLK_ZONED_NONE) {
1727
1728
1729
1730
1731 zoned_model = limits->zoned;
1732 zone_sectors = limits->chunk_sectors;
1733 }
1734 if (validate_hardware_zoned_model(t, zoned_model, zone_sectors))
1735 return -EINVAL;
1736
1737 return validate_hardware_logical_block_alignment(t, limits);
1738 }
1739
1740
1741
1742
1743
1744
1745 static void dm_table_verify_integrity(struct dm_table *t)
1746 {
1747 struct gendisk *template_disk = NULL;
1748
1749 if (t->integrity_added)
1750 return;
1751
1752 if (t->integrity_supported) {
1753
1754
1755
1756
1757 template_disk = dm_table_get_integrity_disk(t);
1758 if (template_disk &&
1759 blk_integrity_compare(dm_disk(t->md), template_disk) >= 0)
1760 return;
1761 }
1762
1763 if (integrity_profile_exists(dm_disk(t->md))) {
1764 DMWARN("%s: unable to establish an integrity profile",
1765 dm_device_name(t->md));
1766 blk_integrity_unregister(dm_disk(t->md));
1767 }
1768 }
1769
1770 static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev,
1771 sector_t start, sector_t len, void *data)
1772 {
1773 unsigned long flush = (unsigned long) data;
1774 struct request_queue *q = bdev_get_queue(dev->bdev);
1775
1776 return (q->queue_flags & flush);
1777 }
1778
1779 static bool dm_table_supports_flush(struct dm_table *t, unsigned long flush)
1780 {
1781
1782
1783
1784
1785
1786
1787 for (unsigned int i = 0; i < t->num_targets; i++) {
1788 struct dm_target *ti = dm_table_get_target(t, i);
1789
1790 if (!ti->num_flush_bios)
1791 continue;
1792
1793 if (ti->flush_supported)
1794 return true;
1795
1796 if (ti->type->iterate_devices &&
1797 ti->type->iterate_devices(ti, device_flush_capable, (void *) flush))
1798 return true;
1799 }
1800
1801 return false;
1802 }
1803
1804 static int device_dax_write_cache_enabled(struct dm_target *ti,
1805 struct dm_dev *dev, sector_t start,
1806 sector_t len, void *data)
1807 {
1808 struct dax_device *dax_dev = dev->dax_dev;
1809
1810 if (!dax_dev)
1811 return false;
1812
1813 if (dax_write_cache_enabled(dax_dev))
1814 return true;
1815 return false;
1816 }
1817
1818 static int device_is_rotational(struct dm_target *ti, struct dm_dev *dev,
1819 sector_t start, sector_t len, void *data)
1820 {
1821 return !bdev_nonrot(dev->bdev);
1822 }
1823
1824 static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev,
1825 sector_t start, sector_t len, void *data)
1826 {
1827 struct request_queue *q = bdev_get_queue(dev->bdev);
1828
1829 return !blk_queue_add_random(q);
1830 }
1831
1832 static int device_not_write_zeroes_capable(struct dm_target *ti, struct dm_dev *dev,
1833 sector_t start, sector_t len, void *data)
1834 {
1835 struct request_queue *q = bdev_get_queue(dev->bdev);
1836
1837 return !q->limits.max_write_zeroes_sectors;
1838 }
1839
1840 static bool dm_table_supports_write_zeroes(struct dm_table *t)
1841 {
1842 for (unsigned int i = 0; i < t->num_targets; i++) {
1843 struct dm_target *ti = dm_table_get_target(t, i);
1844
1845 if (!ti->num_write_zeroes_bios)
1846 return false;
1847
1848 if (!ti->type->iterate_devices ||
1849 ti->type->iterate_devices(ti, device_not_write_zeroes_capable, NULL))
1850 return false;
1851 }
1852
1853 return true;
1854 }
1855
1856 static int device_not_nowait_capable(struct dm_target *ti, struct dm_dev *dev,
1857 sector_t start, sector_t len, void *data)
1858 {
1859 struct request_queue *q = bdev_get_queue(dev->bdev);
1860
1861 return !blk_queue_nowait(q);
1862 }
1863
1864 static bool dm_table_supports_nowait(struct dm_table *t)
1865 {
1866 for (unsigned int i = 0; i < t->num_targets; i++) {
1867 struct dm_target *ti = dm_table_get_target(t, i);
1868
1869 if (!dm_target_supports_nowait(ti->type))
1870 return false;
1871
1872 if (!ti->type->iterate_devices ||
1873 ti->type->iterate_devices(ti, device_not_nowait_capable, NULL))
1874 return false;
1875 }
1876
1877 return true;
1878 }
1879
1880 static int device_not_discard_capable(struct dm_target *ti, struct dm_dev *dev,
1881 sector_t start, sector_t len, void *data)
1882 {
1883 return !bdev_max_discard_sectors(dev->bdev);
1884 }
1885
1886 static bool dm_table_supports_discards(struct dm_table *t)
1887 {
1888 for (unsigned int i = 0; i < t->num_targets; i++) {
1889 struct dm_target *ti = dm_table_get_target(t, i);
1890
1891 if (!ti->num_discard_bios)
1892 return false;
1893
1894
1895
1896
1897
1898
1899 if (!ti->discards_supported &&
1900 (!ti->type->iterate_devices ||
1901 ti->type->iterate_devices(ti, device_not_discard_capable, NULL)))
1902 return false;
1903 }
1904
1905 return true;
1906 }
1907
1908 static int device_not_secure_erase_capable(struct dm_target *ti,
1909 struct dm_dev *dev, sector_t start,
1910 sector_t len, void *data)
1911 {
1912 return !bdev_max_secure_erase_sectors(dev->bdev);
1913 }
1914
1915 static bool dm_table_supports_secure_erase(struct dm_table *t)
1916 {
1917 for (unsigned int i = 0; i < t->num_targets; i++) {
1918 struct dm_target *ti = dm_table_get_target(t, i);
1919
1920 if (!ti->num_secure_erase_bios)
1921 return false;
1922
1923 if (!ti->type->iterate_devices ||
1924 ti->type->iterate_devices(ti, device_not_secure_erase_capable, NULL))
1925 return false;
1926 }
1927
1928 return true;
1929 }
1930
1931 static int device_requires_stable_pages(struct dm_target *ti,
1932 struct dm_dev *dev, sector_t start,
1933 sector_t len, void *data)
1934 {
1935 return bdev_stable_writes(dev->bdev);
1936 }
1937
1938 int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
1939 struct queue_limits *limits)
1940 {
1941 bool wc = false, fua = false;
1942 int r;
1943
1944
1945
1946
1947 q->limits = *limits;
1948
1949 if (dm_table_supports_nowait(t))
1950 blk_queue_flag_set(QUEUE_FLAG_NOWAIT, q);
1951 else
1952 blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, q);
1953
1954 if (!dm_table_supports_discards(t)) {
1955 q->limits.max_discard_sectors = 0;
1956 q->limits.max_hw_discard_sectors = 0;
1957 q->limits.discard_granularity = 0;
1958 q->limits.discard_alignment = 0;
1959 q->limits.discard_misaligned = 0;
1960 }
1961
1962 if (!dm_table_supports_secure_erase(t))
1963 q->limits.max_secure_erase_sectors = 0;
1964
1965 if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) {
1966 wc = true;
1967 if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_FUA)))
1968 fua = true;
1969 }
1970 blk_queue_write_cache(q, wc, fua);
1971
1972 if (dm_table_supports_dax(t, device_not_dax_capable)) {
1973 blk_queue_flag_set(QUEUE_FLAG_DAX, q);
1974 if (dm_table_supports_dax(t, device_not_dax_synchronous_capable))
1975 set_dax_synchronous(t->md->dax_dev);
1976 }
1977 else
1978 blk_queue_flag_clear(QUEUE_FLAG_DAX, q);
1979
1980 if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled, NULL))
1981 dax_write_cache(t->md->dax_dev, true);
1982
1983
1984 if (dm_table_any_dev_attr(t, device_is_rotational, NULL))
1985 blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
1986 else
1987 blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
1988
1989 if (!dm_table_supports_write_zeroes(t))
1990 q->limits.max_write_zeroes_sectors = 0;
1991
1992 dm_table_verify_integrity(t);
1993
1994
1995
1996
1997
1998
1999
2000
2001 if (dm_table_any_dev_attr(t, device_requires_stable_pages, NULL))
2002 blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q);
2003 else
2004 blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q);
2005
2006
2007
2008
2009
2010
2011
2012 if (blk_queue_add_random(q) &&
2013 dm_table_any_dev_attr(t, device_is_not_random, NULL))
2014 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
2015
2016
2017
2018
2019
2020 if (blk_queue_is_zoned(q)) {
2021 r = dm_set_zones_restrictions(t, q);
2022 if (r)
2023 return r;
2024 if (!static_key_enabled(&zoned_enabled.key))
2025 static_branch_enable(&zoned_enabled);
2026 }
2027
2028 dm_update_crypto_profile(q, t);
2029 disk_update_readahead(t->md->disk);
2030
2031
2032
2033
2034
2035
2036
2037
2038 if (__table_type_bio_based(t->type)) {
2039 if (dm_table_supports_poll(t))
2040 blk_queue_flag_set(QUEUE_FLAG_POLL, q);
2041 else
2042 blk_queue_flag_clear(QUEUE_FLAG_POLL, q);
2043 }
2044
2045 return 0;
2046 }
2047
2048 struct list_head *dm_table_get_devices(struct dm_table *t)
2049 {
2050 return &t->devices;
2051 }
2052
2053 fmode_t dm_table_get_mode(struct dm_table *t)
2054 {
2055 return t->mode;
2056 }
2057 EXPORT_SYMBOL(dm_table_get_mode);
2058
2059 enum suspend_mode {
2060 PRESUSPEND,
2061 PRESUSPEND_UNDO,
2062 POSTSUSPEND,
2063 };
2064
2065 static void suspend_targets(struct dm_table *t, enum suspend_mode mode)
2066 {
2067 lockdep_assert_held(&t->md->suspend_lock);
2068
2069 for (unsigned int i = 0; i < t->num_targets; i++) {
2070 struct dm_target *ti = dm_table_get_target(t, i);
2071
2072 switch (mode) {
2073 case PRESUSPEND:
2074 if (ti->type->presuspend)
2075 ti->type->presuspend(ti);
2076 break;
2077 case PRESUSPEND_UNDO:
2078 if (ti->type->presuspend_undo)
2079 ti->type->presuspend_undo(ti);
2080 break;
2081 case POSTSUSPEND:
2082 if (ti->type->postsuspend)
2083 ti->type->postsuspend(ti);
2084 break;
2085 }
2086 }
2087 }
2088
2089 void dm_table_presuspend_targets(struct dm_table *t)
2090 {
2091 if (!t)
2092 return;
2093
2094 suspend_targets(t, PRESUSPEND);
2095 }
2096
2097 void dm_table_presuspend_undo_targets(struct dm_table *t)
2098 {
2099 if (!t)
2100 return;
2101
2102 suspend_targets(t, PRESUSPEND_UNDO);
2103 }
2104
2105 void dm_table_postsuspend_targets(struct dm_table *t)
2106 {
2107 if (!t)
2108 return;
2109
2110 suspend_targets(t, POSTSUSPEND);
2111 }
2112
2113 int dm_table_resume_targets(struct dm_table *t)
2114 {
2115 unsigned int i;
2116 int r = 0;
2117
2118 lockdep_assert_held(&t->md->suspend_lock);
2119
2120 for (i = 0; i < t->num_targets; i++) {
2121 struct dm_target *ti = dm_table_get_target(t, i);
2122
2123 if (!ti->type->preresume)
2124 continue;
2125
2126 r = ti->type->preresume(ti);
2127 if (r) {
2128 DMERR("%s: %s: preresume failed, error = %d",
2129 dm_device_name(t->md), ti->type->name, r);
2130 return r;
2131 }
2132 }
2133
2134 for (i = 0; i < t->num_targets; i++) {
2135 struct dm_target *ti = dm_table_get_target(t, i);
2136
2137 if (ti->type->resume)
2138 ti->type->resume(ti);
2139 }
2140
2141 return 0;
2142 }
2143
2144 struct mapped_device *dm_table_get_md(struct dm_table *t)
2145 {
2146 return t->md;
2147 }
2148 EXPORT_SYMBOL(dm_table_get_md);
2149
2150 const char *dm_table_device_name(struct dm_table *t)
2151 {
2152 return dm_device_name(t->md);
2153 }
2154 EXPORT_SYMBOL_GPL(dm_table_device_name);
2155
2156 void dm_table_run_md_queue_async(struct dm_table *t)
2157 {
2158 if (!dm_table_request_based(t))
2159 return;
2160
2161 if (t->md->queue)
2162 blk_mq_run_hw_queues(t->md->queue, true);
2163 }
2164 EXPORT_SYMBOL(dm_table_run_md_queue_async);
2165