0001
0002
0003
0004
0005
0006
0007
0008 #include "dm-core.h"
0009 #include "dm-rq.h"
0010 #include "dm-uevent.h"
0011 #include "dm-ima.h"
0012
0013 #include <linux/init.h>
0014 #include <linux/module.h>
0015 #include <linux/mutex.h>
0016 #include <linux/sched/mm.h>
0017 #include <linux/sched/signal.h>
0018 #include <linux/blkpg.h>
0019 #include <linux/bio.h>
0020 #include <linux/mempool.h>
0021 #include <linux/dax.h>
0022 #include <linux/slab.h>
0023 #include <linux/idr.h>
0024 #include <linux/uio.h>
0025 #include <linux/hdreg.h>
0026 #include <linux/delay.h>
0027 #include <linux/wait.h>
0028 #include <linux/pr.h>
0029 #include <linux/refcount.h>
0030 #include <linux/part_stat.h>
0031 #include <linux/blk-crypto.h>
0032 #include <linux/blk-crypto-profile.h>
0033
0034 #define DM_MSG_PREFIX "core"
0035
0036
0037
0038
0039
0040 #define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE"
0041 #define DM_COOKIE_LENGTH 24
0042
0043
0044
0045
0046
0047
0048 #define REQ_DM_POLL_LIST REQ_DRV
0049
0050 static const char *_name = DM_NAME;
0051
0052 static unsigned int major = 0;
0053 static unsigned int _major = 0;
0054
0055 static DEFINE_IDR(_minor_idr);
0056
0057 static DEFINE_SPINLOCK(_minor_lock);
0058
0059 static void do_deferred_remove(struct work_struct *w);
0060
0061 static DECLARE_WORK(deferred_remove_work, do_deferred_remove);
0062
0063 static struct workqueue_struct *deferred_remove_workqueue;
0064
0065 atomic_t dm_global_event_nr = ATOMIC_INIT(0);
0066 DECLARE_WAIT_QUEUE_HEAD(dm_global_eventq);
0067
0068 void dm_issue_global_event(void)
0069 {
0070 atomic_inc(&dm_global_event_nr);
0071 wake_up(&dm_global_eventq);
0072 }
0073
0074 DEFINE_STATIC_KEY_FALSE(stats_enabled);
0075 DEFINE_STATIC_KEY_FALSE(swap_bios_enabled);
0076 DEFINE_STATIC_KEY_FALSE(zoned_enabled);
0077
0078
0079
0080
0081 struct clone_info {
0082 struct dm_table *map;
0083 struct bio *bio;
0084 struct dm_io *io;
0085 sector_t sector;
0086 unsigned sector_count;
0087 bool is_abnormal_io:1;
0088 bool submit_as_polled:1;
0089 };
0090
0091 static inline struct dm_target_io *clone_to_tio(struct bio *clone)
0092 {
0093 return container_of(clone, struct dm_target_io, clone);
0094 }
0095
0096 void *dm_per_bio_data(struct bio *bio, size_t data_size)
0097 {
0098 if (!dm_tio_flagged(clone_to_tio(bio), DM_TIO_INSIDE_DM_IO))
0099 return (char *)bio - DM_TARGET_IO_BIO_OFFSET - data_size;
0100 return (char *)bio - DM_IO_BIO_OFFSET - data_size;
0101 }
0102 EXPORT_SYMBOL_GPL(dm_per_bio_data);
0103
0104 struct bio *dm_bio_from_per_bio_data(void *data, size_t data_size)
0105 {
0106 struct dm_io *io = (struct dm_io *)((char *)data + data_size);
0107 if (io->magic == DM_IO_MAGIC)
0108 return (struct bio *)((char *)io + DM_IO_BIO_OFFSET);
0109 BUG_ON(io->magic != DM_TIO_MAGIC);
0110 return (struct bio *)((char *)io + DM_TARGET_IO_BIO_OFFSET);
0111 }
0112 EXPORT_SYMBOL_GPL(dm_bio_from_per_bio_data);
0113
0114 unsigned dm_bio_get_target_bio_nr(const struct bio *bio)
0115 {
0116 return container_of(bio, struct dm_target_io, clone)->target_bio_nr;
0117 }
0118 EXPORT_SYMBOL_GPL(dm_bio_get_target_bio_nr);
0119
0120 #define MINOR_ALLOCED ((void *)-1)
0121
0122 #define DM_NUMA_NODE NUMA_NO_NODE
0123 static int dm_numa_node = DM_NUMA_NODE;
0124
0125 #define DEFAULT_SWAP_BIOS (8 * 1048576 / PAGE_SIZE)
0126 static int swap_bios = DEFAULT_SWAP_BIOS;
0127 static int get_swap_bios(void)
0128 {
0129 int latch = READ_ONCE(swap_bios);
0130 if (unlikely(latch <= 0))
0131 latch = DEFAULT_SWAP_BIOS;
0132 return latch;
0133 }
0134
0135 struct table_device {
0136 struct list_head list;
0137 refcount_t count;
0138 struct dm_dev dm_dev;
0139 };
0140
0141
0142
0143
0144 #define RESERVED_BIO_BASED_IOS 16
0145 static unsigned reserved_bio_based_ios = RESERVED_BIO_BASED_IOS;
0146
0147 static int __dm_get_module_param_int(int *module_param, int min, int max)
0148 {
0149 int param = READ_ONCE(*module_param);
0150 int modified_param = 0;
0151 bool modified = true;
0152
0153 if (param < min)
0154 modified_param = min;
0155 else if (param > max)
0156 modified_param = max;
0157 else
0158 modified = false;
0159
0160 if (modified) {
0161 (void)cmpxchg(module_param, param, modified_param);
0162 param = modified_param;
0163 }
0164
0165 return param;
0166 }
0167
0168 unsigned __dm_get_module_param(unsigned *module_param,
0169 unsigned def, unsigned max)
0170 {
0171 unsigned param = READ_ONCE(*module_param);
0172 unsigned modified_param = 0;
0173
0174 if (!param)
0175 modified_param = def;
0176 else if (param > max)
0177 modified_param = max;
0178
0179 if (modified_param) {
0180 (void)cmpxchg(module_param, param, modified_param);
0181 param = modified_param;
0182 }
0183
0184 return param;
0185 }
0186
0187 unsigned dm_get_reserved_bio_based_ios(void)
0188 {
0189 return __dm_get_module_param(&reserved_bio_based_ios,
0190 RESERVED_BIO_BASED_IOS, DM_RESERVED_MAX_IOS);
0191 }
0192 EXPORT_SYMBOL_GPL(dm_get_reserved_bio_based_ios);
0193
0194 static unsigned dm_get_numa_node(void)
0195 {
0196 return __dm_get_module_param_int(&dm_numa_node,
0197 DM_NUMA_NODE, num_online_nodes() - 1);
0198 }
0199
0200 static int __init local_init(void)
0201 {
0202 int r;
0203
0204 r = dm_uevent_init();
0205 if (r)
0206 return r;
0207
0208 deferred_remove_workqueue = alloc_workqueue("kdmremove", WQ_UNBOUND, 1);
0209 if (!deferred_remove_workqueue) {
0210 r = -ENOMEM;
0211 goto out_uevent_exit;
0212 }
0213
0214 _major = major;
0215 r = register_blkdev(_major, _name);
0216 if (r < 0)
0217 goto out_free_workqueue;
0218
0219 if (!_major)
0220 _major = r;
0221
0222 return 0;
0223
0224 out_free_workqueue:
0225 destroy_workqueue(deferred_remove_workqueue);
0226 out_uevent_exit:
0227 dm_uevent_exit();
0228
0229 return r;
0230 }
0231
0232 static void local_exit(void)
0233 {
0234 flush_scheduled_work();
0235 destroy_workqueue(deferred_remove_workqueue);
0236
0237 unregister_blkdev(_major, _name);
0238 dm_uevent_exit();
0239
0240 _major = 0;
0241
0242 DMINFO("cleaned up");
0243 }
0244
0245 static int (*_inits[])(void) __initdata = {
0246 local_init,
0247 dm_target_init,
0248 dm_linear_init,
0249 dm_stripe_init,
0250 dm_io_init,
0251 dm_kcopyd_init,
0252 dm_interface_init,
0253 dm_statistics_init,
0254 };
0255
0256 static void (*_exits[])(void) = {
0257 local_exit,
0258 dm_target_exit,
0259 dm_linear_exit,
0260 dm_stripe_exit,
0261 dm_io_exit,
0262 dm_kcopyd_exit,
0263 dm_interface_exit,
0264 dm_statistics_exit,
0265 };
0266
0267 static int __init dm_init(void)
0268 {
0269 const int count = ARRAY_SIZE(_inits);
0270 int r, i;
0271
0272 #if (IS_ENABLED(CONFIG_IMA) && !IS_ENABLED(CONFIG_IMA_DISABLE_HTABLE))
0273 DMWARN("CONFIG_IMA_DISABLE_HTABLE is disabled."
0274 " Duplicate IMA measurements will not be recorded in the IMA log.");
0275 #endif
0276
0277 for (i = 0; i < count; i++) {
0278 r = _inits[i]();
0279 if (r)
0280 goto bad;
0281 }
0282
0283 return 0;
0284 bad:
0285 while (i--)
0286 _exits[i]();
0287
0288 return r;
0289 }
0290
0291 static void __exit dm_exit(void)
0292 {
0293 int i = ARRAY_SIZE(_exits);
0294
0295 while (i--)
0296 _exits[i]();
0297
0298
0299
0300
0301 idr_destroy(&_minor_idr);
0302 }
0303
0304
0305
0306
0307 int dm_deleting_md(struct mapped_device *md)
0308 {
0309 return test_bit(DMF_DELETING, &md->flags);
0310 }
0311
0312 static int dm_blk_open(struct block_device *bdev, fmode_t mode)
0313 {
0314 struct mapped_device *md;
0315
0316 spin_lock(&_minor_lock);
0317
0318 md = bdev->bd_disk->private_data;
0319 if (!md)
0320 goto out;
0321
0322 if (test_bit(DMF_FREEING, &md->flags) ||
0323 dm_deleting_md(md)) {
0324 md = NULL;
0325 goto out;
0326 }
0327
0328 dm_get(md);
0329 atomic_inc(&md->open_count);
0330 out:
0331 spin_unlock(&_minor_lock);
0332
0333 return md ? 0 : -ENXIO;
0334 }
0335
0336 static void dm_blk_close(struct gendisk *disk, fmode_t mode)
0337 {
0338 struct mapped_device *md;
0339
0340 spin_lock(&_minor_lock);
0341
0342 md = disk->private_data;
0343 if (WARN_ON(!md))
0344 goto out;
0345
0346 if (atomic_dec_and_test(&md->open_count) &&
0347 (test_bit(DMF_DEFERRED_REMOVE, &md->flags)))
0348 queue_work(deferred_remove_workqueue, &deferred_remove_work);
0349
0350 dm_put(md);
0351 out:
0352 spin_unlock(&_minor_lock);
0353 }
0354
0355 int dm_open_count(struct mapped_device *md)
0356 {
0357 return atomic_read(&md->open_count);
0358 }
0359
0360
0361
0362
0363 int dm_lock_for_deletion(struct mapped_device *md, bool mark_deferred, bool only_deferred)
0364 {
0365 int r = 0;
0366
0367 spin_lock(&_minor_lock);
0368
0369 if (dm_open_count(md)) {
0370 r = -EBUSY;
0371 if (mark_deferred)
0372 set_bit(DMF_DEFERRED_REMOVE, &md->flags);
0373 } else if (only_deferred && !test_bit(DMF_DEFERRED_REMOVE, &md->flags))
0374 r = -EEXIST;
0375 else
0376 set_bit(DMF_DELETING, &md->flags);
0377
0378 spin_unlock(&_minor_lock);
0379
0380 return r;
0381 }
0382
0383 int dm_cancel_deferred_remove(struct mapped_device *md)
0384 {
0385 int r = 0;
0386
0387 spin_lock(&_minor_lock);
0388
0389 if (test_bit(DMF_DELETING, &md->flags))
0390 r = -EBUSY;
0391 else
0392 clear_bit(DMF_DEFERRED_REMOVE, &md->flags);
0393
0394 spin_unlock(&_minor_lock);
0395
0396 return r;
0397 }
0398
0399 static void do_deferred_remove(struct work_struct *w)
0400 {
0401 dm_deferred_remove();
0402 }
0403
0404 static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
0405 {
0406 struct mapped_device *md = bdev->bd_disk->private_data;
0407
0408 return dm_get_geometry(md, geo);
0409 }
0410
0411 static int dm_prepare_ioctl(struct mapped_device *md, int *srcu_idx,
0412 struct block_device **bdev)
0413 {
0414 struct dm_target *ti;
0415 struct dm_table *map;
0416 int r;
0417
0418 retry:
0419 r = -ENOTTY;
0420 map = dm_get_live_table(md, srcu_idx);
0421 if (!map || !dm_table_get_size(map))
0422 return r;
0423
0424
0425 if (map->num_targets != 1)
0426 return r;
0427
0428 ti = dm_table_get_target(map, 0);
0429 if (!ti->type->prepare_ioctl)
0430 return r;
0431
0432 if (dm_suspended_md(md))
0433 return -EAGAIN;
0434
0435 r = ti->type->prepare_ioctl(ti, bdev);
0436 if (r == -ENOTCONN && !fatal_signal_pending(current)) {
0437 dm_put_live_table(md, *srcu_idx);
0438 msleep(10);
0439 goto retry;
0440 }
0441
0442 return r;
0443 }
0444
0445 static void dm_unprepare_ioctl(struct mapped_device *md, int srcu_idx)
0446 {
0447 dm_put_live_table(md, srcu_idx);
0448 }
0449
0450 static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
0451 unsigned int cmd, unsigned long arg)
0452 {
0453 struct mapped_device *md = bdev->bd_disk->private_data;
0454 int r, srcu_idx;
0455
0456 r = dm_prepare_ioctl(md, &srcu_idx, &bdev);
0457 if (r < 0)
0458 goto out;
0459
0460 if (r > 0) {
0461
0462
0463
0464
0465 if (!capable(CAP_SYS_RAWIO)) {
0466 DMDEBUG_LIMIT(
0467 "%s: sending ioctl %x to DM device without required privilege.",
0468 current->comm, cmd);
0469 r = -ENOIOCTLCMD;
0470 goto out;
0471 }
0472 }
0473
0474 if (!bdev->bd_disk->fops->ioctl)
0475 r = -ENOTTY;
0476 else
0477 r = bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg);
0478 out:
0479 dm_unprepare_ioctl(md, srcu_idx);
0480 return r;
0481 }
0482
0483 u64 dm_start_time_ns_from_clone(struct bio *bio)
0484 {
0485 return jiffies_to_nsecs(clone_to_tio(bio)->io->start_time);
0486 }
0487 EXPORT_SYMBOL_GPL(dm_start_time_ns_from_clone);
0488
0489 static bool bio_is_flush_with_data(struct bio *bio)
0490 {
0491 return ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size);
0492 }
0493
0494 static void dm_io_acct(struct dm_io *io, bool end)
0495 {
0496 struct dm_stats_aux *stats_aux = &io->stats_aux;
0497 unsigned long start_time = io->start_time;
0498 struct mapped_device *md = io->md;
0499 struct bio *bio = io->orig_bio;
0500 unsigned int sectors;
0501
0502
0503
0504
0505
0506 if (bio_is_flush_with_data(bio))
0507 sectors = 0;
0508 else if (likely(!(dm_io_flagged(io, DM_IO_WAS_SPLIT))))
0509 sectors = bio_sectors(bio);
0510 else
0511 sectors = io->sectors;
0512
0513 if (!end)
0514 bdev_start_io_acct(bio->bi_bdev, sectors, bio_op(bio),
0515 start_time);
0516 else
0517 bdev_end_io_acct(bio->bi_bdev, bio_op(bio), start_time);
0518
0519 if (static_branch_unlikely(&stats_enabled) &&
0520 unlikely(dm_stats_used(&md->stats))) {
0521 sector_t sector;
0522
0523 if (likely(!dm_io_flagged(io, DM_IO_WAS_SPLIT)))
0524 sector = bio->bi_iter.bi_sector;
0525 else
0526 sector = bio_end_sector(bio) - io->sector_offset;
0527
0528 dm_stats_account_io(&md->stats, bio_data_dir(bio),
0529 sector, sectors,
0530 end, start_time, stats_aux);
0531 }
0532 }
0533
0534 static void __dm_start_io_acct(struct dm_io *io)
0535 {
0536 dm_io_acct(io, false);
0537 }
0538
0539 static void dm_start_io_acct(struct dm_io *io, struct bio *clone)
0540 {
0541
0542
0543
0544 if (dm_io_flagged(io, DM_IO_ACCOUNTED))
0545 return;
0546
0547
0548 if (!clone || likely(dm_tio_is_normal(clone_to_tio(clone)))) {
0549 dm_io_set_flag(io, DM_IO_ACCOUNTED);
0550 } else {
0551 unsigned long flags;
0552
0553 spin_lock_irqsave(&io->lock, flags);
0554 if (dm_io_flagged(io, DM_IO_ACCOUNTED)) {
0555 spin_unlock_irqrestore(&io->lock, flags);
0556 return;
0557 }
0558 dm_io_set_flag(io, DM_IO_ACCOUNTED);
0559 spin_unlock_irqrestore(&io->lock, flags);
0560 }
0561
0562 __dm_start_io_acct(io);
0563 }
0564
0565 static void dm_end_io_acct(struct dm_io *io)
0566 {
0567 dm_io_acct(io, true);
0568 }
0569
0570 static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
0571 {
0572 struct dm_io *io;
0573 struct dm_target_io *tio;
0574 struct bio *clone;
0575
0576 clone = bio_alloc_clone(NULL, bio, GFP_NOIO, &md->mempools->io_bs);
0577 tio = clone_to_tio(clone);
0578 tio->flags = 0;
0579 dm_tio_set_flag(tio, DM_TIO_INSIDE_DM_IO);
0580 tio->io = NULL;
0581
0582 io = container_of(tio, struct dm_io, tio);
0583 io->magic = DM_IO_MAGIC;
0584 io->status = BLK_STS_OK;
0585
0586
0587 atomic_set(&io->io_count, 2);
0588 this_cpu_inc(*md->pending_io);
0589 io->orig_bio = bio;
0590 io->md = md;
0591 spin_lock_init(&io->lock);
0592 io->start_time = jiffies;
0593 io->flags = 0;
0594
0595 if (static_branch_unlikely(&stats_enabled))
0596 dm_stats_record_start(&md->stats, &io->stats_aux);
0597
0598 return io;
0599 }
0600
0601 static void free_io(struct dm_io *io)
0602 {
0603 bio_put(&io->tio.clone);
0604 }
0605
0606 static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti,
0607 unsigned target_bio_nr, unsigned *len, gfp_t gfp_mask)
0608 {
0609 struct mapped_device *md = ci->io->md;
0610 struct dm_target_io *tio;
0611 struct bio *clone;
0612
0613 if (!ci->io->tio.io) {
0614
0615 tio = &ci->io->tio;
0616
0617 clone = &tio->clone;
0618 } else {
0619 clone = bio_alloc_clone(NULL, ci->bio, gfp_mask,
0620 &md->mempools->bs);
0621 if (!clone)
0622 return NULL;
0623
0624
0625 clone->bi_opf &= ~REQ_DM_POLL_LIST;
0626
0627 tio = clone_to_tio(clone);
0628 tio->flags = 0;
0629 }
0630
0631 tio->magic = DM_TIO_MAGIC;
0632 tio->io = ci->io;
0633 tio->ti = ti;
0634 tio->target_bio_nr = target_bio_nr;
0635 tio->len_ptr = len;
0636 tio->old_sector = 0;
0637
0638
0639 clone->bi_bdev = md->disk->part0;
0640 if (unlikely(ti->needs_bio_set_dev))
0641 bio_set_dev(clone, md->disk->part0);
0642
0643 if (len) {
0644 clone->bi_iter.bi_size = to_bytes(*len);
0645 if (bio_integrity(clone))
0646 bio_integrity_trim(clone);
0647 }
0648
0649 return clone;
0650 }
0651
0652 static void free_tio(struct bio *clone)
0653 {
0654 if (dm_tio_flagged(clone_to_tio(clone), DM_TIO_INSIDE_DM_IO))
0655 return;
0656 bio_put(clone);
0657 }
0658
0659
0660
0661
0662 static void queue_io(struct mapped_device *md, struct bio *bio)
0663 {
0664 unsigned long flags;
0665
0666 spin_lock_irqsave(&md->deferred_lock, flags);
0667 bio_list_add(&md->deferred, bio);
0668 spin_unlock_irqrestore(&md->deferred_lock, flags);
0669 queue_work(md->wq, &md->work);
0670 }
0671
0672
0673
0674
0675
0676
0677 struct dm_table *dm_get_live_table(struct mapped_device *md,
0678 int *srcu_idx) __acquires(md->io_barrier)
0679 {
0680 *srcu_idx = srcu_read_lock(&md->io_barrier);
0681
0682 return srcu_dereference(md->map, &md->io_barrier);
0683 }
0684
0685 void dm_put_live_table(struct mapped_device *md,
0686 int srcu_idx) __releases(md->io_barrier)
0687 {
0688 srcu_read_unlock(&md->io_barrier, srcu_idx);
0689 }
0690
0691 void dm_sync_table(struct mapped_device *md)
0692 {
0693 synchronize_srcu(&md->io_barrier);
0694 synchronize_rcu_expedited();
0695 }
0696
0697
0698
0699
0700
0701 static struct dm_table *dm_get_live_table_fast(struct mapped_device *md) __acquires(RCU)
0702 {
0703 rcu_read_lock();
0704 return rcu_dereference(md->map);
0705 }
0706
0707 static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU)
0708 {
0709 rcu_read_unlock();
0710 }
0711
0712 static inline struct dm_table *dm_get_live_table_bio(struct mapped_device *md,
0713 int *srcu_idx, blk_opf_t bio_opf)
0714 {
0715 if (bio_opf & REQ_NOWAIT)
0716 return dm_get_live_table_fast(md);
0717 else
0718 return dm_get_live_table(md, srcu_idx);
0719 }
0720
0721 static inline void dm_put_live_table_bio(struct mapped_device *md, int srcu_idx,
0722 blk_opf_t bio_opf)
0723 {
0724 if (bio_opf & REQ_NOWAIT)
0725 dm_put_live_table_fast(md);
0726 else
0727 dm_put_live_table(md, srcu_idx);
0728 }
0729
0730 static char *_dm_claim_ptr = "I belong to device-mapper";
0731
0732
0733
0734
0735 static int open_table_device(struct table_device *td, dev_t dev,
0736 struct mapped_device *md)
0737 {
0738 struct block_device *bdev;
0739 u64 part_off;
0740 int r;
0741
0742 BUG_ON(td->dm_dev.bdev);
0743
0744 bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _dm_claim_ptr);
0745 if (IS_ERR(bdev))
0746 return PTR_ERR(bdev);
0747
0748 r = bd_link_disk_holder(bdev, dm_disk(md));
0749 if (r) {
0750 blkdev_put(bdev, td->dm_dev.mode | FMODE_EXCL);
0751 return r;
0752 }
0753
0754 td->dm_dev.bdev = bdev;
0755 td->dm_dev.dax_dev = fs_dax_get_by_bdev(bdev, &part_off, NULL, NULL);
0756 return 0;
0757 }
0758
0759
0760
0761
0762 static void close_table_device(struct table_device *td, struct mapped_device *md)
0763 {
0764 if (!td->dm_dev.bdev)
0765 return;
0766
0767 bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md));
0768 blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL);
0769 put_dax(td->dm_dev.dax_dev);
0770 td->dm_dev.bdev = NULL;
0771 td->dm_dev.dax_dev = NULL;
0772 }
0773
0774 static struct table_device *find_table_device(struct list_head *l, dev_t dev,
0775 fmode_t mode)
0776 {
0777 struct table_device *td;
0778
0779 list_for_each_entry(td, l, list)
0780 if (td->dm_dev.bdev->bd_dev == dev && td->dm_dev.mode == mode)
0781 return td;
0782
0783 return NULL;
0784 }
0785
0786 int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
0787 struct dm_dev **result)
0788 {
0789 int r;
0790 struct table_device *td;
0791
0792 mutex_lock(&md->table_devices_lock);
0793 td = find_table_device(&md->table_devices, dev, mode);
0794 if (!td) {
0795 td = kmalloc_node(sizeof(*td), GFP_KERNEL, md->numa_node_id);
0796 if (!td) {
0797 mutex_unlock(&md->table_devices_lock);
0798 return -ENOMEM;
0799 }
0800
0801 td->dm_dev.mode = mode;
0802 td->dm_dev.bdev = NULL;
0803
0804 if ((r = open_table_device(td, dev, md))) {
0805 mutex_unlock(&md->table_devices_lock);
0806 kfree(td);
0807 return r;
0808 }
0809
0810 format_dev_t(td->dm_dev.name, dev);
0811
0812 refcount_set(&td->count, 1);
0813 list_add(&td->list, &md->table_devices);
0814 } else {
0815 refcount_inc(&td->count);
0816 }
0817 mutex_unlock(&md->table_devices_lock);
0818
0819 *result = &td->dm_dev;
0820 return 0;
0821 }
0822
0823 void dm_put_table_device(struct mapped_device *md, struct dm_dev *d)
0824 {
0825 struct table_device *td = container_of(d, struct table_device, dm_dev);
0826
0827 mutex_lock(&md->table_devices_lock);
0828 if (refcount_dec_and_test(&td->count)) {
0829 close_table_device(td, md);
0830 list_del(&td->list);
0831 kfree(td);
0832 }
0833 mutex_unlock(&md->table_devices_lock);
0834 }
0835
0836 static void free_table_devices(struct list_head *devices)
0837 {
0838 struct list_head *tmp, *next;
0839
0840 list_for_each_safe(tmp, next, devices) {
0841 struct table_device *td = list_entry(tmp, struct table_device, list);
0842
0843 DMWARN("dm_destroy: %s still exists with %d references",
0844 td->dm_dev.name, refcount_read(&td->count));
0845 kfree(td);
0846 }
0847 }
0848
0849
0850
0851
0852 int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo)
0853 {
0854 *geo = md->geometry;
0855
0856 return 0;
0857 }
0858
0859
0860
0861
0862 int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo)
0863 {
0864 sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors;
0865
0866 if (geo->start > sz) {
0867 DMWARN("Start sector is beyond the geometry limits.");
0868 return -EINVAL;
0869 }
0870
0871 md->geometry = *geo;
0872
0873 return 0;
0874 }
0875
0876 static int __noflush_suspending(struct mapped_device *md)
0877 {
0878 return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
0879 }
0880
0881 static void dm_requeue_add_io(struct dm_io *io, bool first_stage)
0882 {
0883 struct mapped_device *md = io->md;
0884
0885 if (first_stage) {
0886 struct dm_io *next = md->requeue_list;
0887
0888 md->requeue_list = io;
0889 io->next = next;
0890 } else {
0891 bio_list_add_head(&md->deferred, io->orig_bio);
0892 }
0893 }
0894
0895 static void dm_kick_requeue(struct mapped_device *md, bool first_stage)
0896 {
0897 if (first_stage)
0898 queue_work(md->wq, &md->requeue_work);
0899 else
0900 queue_work(md->wq, &md->work);
0901 }
0902
0903
0904
0905
0906
0907 static bool dm_handle_requeue(struct dm_io *io, bool first_stage)
0908 {
0909 struct bio *bio = io->orig_bio;
0910 bool handle_requeue = (io->status == BLK_STS_DM_REQUEUE);
0911 bool handle_polled_eagain = ((io->status == BLK_STS_AGAIN) &&
0912 (bio->bi_opf & REQ_POLLED));
0913 struct mapped_device *md = io->md;
0914 bool requeued = false;
0915
0916 if (handle_requeue || handle_polled_eagain) {
0917 unsigned long flags;
0918
0919 if (bio->bi_opf & REQ_POLLED) {
0920
0921
0922
0923
0924
0925 bio_clear_polled(bio);
0926 }
0927
0928
0929
0930
0931
0932 spin_lock_irqsave(&md->deferred_lock, flags);
0933 if ((__noflush_suspending(md) &&
0934 !WARN_ON_ONCE(dm_is_zone_write(md, bio))) ||
0935 handle_polled_eagain || first_stage) {
0936 dm_requeue_add_io(io, first_stage);
0937 requeued = true;
0938 } else {
0939
0940
0941
0942
0943 io->status = BLK_STS_IOERR;
0944 }
0945 spin_unlock_irqrestore(&md->deferred_lock, flags);
0946 }
0947
0948 if (requeued)
0949 dm_kick_requeue(md, first_stage);
0950
0951 return requeued;
0952 }
0953
0954 static void __dm_io_complete(struct dm_io *io, bool first_stage)
0955 {
0956 struct bio *bio = io->orig_bio;
0957 struct mapped_device *md = io->md;
0958 blk_status_t io_error;
0959 bool requeued;
0960
0961 requeued = dm_handle_requeue(io, first_stage);
0962 if (requeued && first_stage)
0963 return;
0964
0965 io_error = io->status;
0966 if (dm_io_flagged(io, DM_IO_ACCOUNTED))
0967 dm_end_io_acct(io);
0968 else if (!io_error) {
0969
0970
0971
0972
0973 __dm_start_io_acct(io);
0974 dm_end_io_acct(io);
0975 }
0976 free_io(io);
0977 smp_wmb();
0978 this_cpu_dec(*md->pending_io);
0979
0980
0981 if (unlikely(wq_has_sleeper(&md->wait)))
0982 wake_up(&md->wait);
0983
0984
0985 if (requeued)
0986 return;
0987
0988 if (bio_is_flush_with_data(bio)) {
0989
0990
0991
0992
0993 bio->bi_opf &= ~REQ_PREFLUSH;
0994 queue_io(md, bio);
0995 } else {
0996
0997 if (io_error)
0998 bio->bi_status = io_error;
0999 bio_endio(bio);
1000 }
1001 }
1002
1003 static void dm_wq_requeue_work(struct work_struct *work)
1004 {
1005 struct mapped_device *md = container_of(work, struct mapped_device,
1006 requeue_work);
1007 unsigned long flags;
1008 struct dm_io *io;
1009
1010
1011 spin_lock_irqsave(&md->deferred_lock, flags);
1012 io = md->requeue_list;
1013 md->requeue_list = NULL;
1014 spin_unlock_irqrestore(&md->deferred_lock, flags);
1015
1016 while (io) {
1017 struct dm_io *next = io->next;
1018
1019 dm_io_rewind(io, &md->disk->bio_split);
1020
1021 io->next = NULL;
1022 __dm_io_complete(io, false);
1023 io = next;
1024 }
1025 }
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035 static void dm_io_complete(struct dm_io *io)
1036 {
1037 bool first_requeue;
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047 if (dm_io_flagged(io, DM_IO_WAS_SPLIT))
1048 first_requeue = true;
1049 else
1050 first_requeue = false;
1051
1052 __dm_io_complete(io, first_requeue);
1053 }
1054
1055
1056
1057
1058
1059 static inline void __dm_io_dec_pending(struct dm_io *io)
1060 {
1061 if (atomic_dec_and_test(&io->io_count))
1062 dm_io_complete(io);
1063 }
1064
1065 static void dm_io_set_error(struct dm_io *io, blk_status_t error)
1066 {
1067 unsigned long flags;
1068
1069
1070 spin_lock_irqsave(&io->lock, flags);
1071 if (!(io->status == BLK_STS_DM_REQUEUE &&
1072 __noflush_suspending(io->md))) {
1073 io->status = error;
1074 }
1075 spin_unlock_irqrestore(&io->lock, flags);
1076 }
1077
1078 static void dm_io_dec_pending(struct dm_io *io, blk_status_t error)
1079 {
1080 if (unlikely(error))
1081 dm_io_set_error(io, error);
1082
1083 __dm_io_dec_pending(io);
1084 }
1085
1086 void disable_discard(struct mapped_device *md)
1087 {
1088 struct queue_limits *limits = dm_get_queue_limits(md);
1089
1090
1091 limits->max_discard_sectors = 0;
1092 }
1093
1094 void disable_write_zeroes(struct mapped_device *md)
1095 {
1096 struct queue_limits *limits = dm_get_queue_limits(md);
1097
1098
1099 limits->max_write_zeroes_sectors = 0;
1100 }
1101
1102 static bool swap_bios_limit(struct dm_target *ti, struct bio *bio)
1103 {
1104 return unlikely((bio->bi_opf & REQ_SWAP) != 0) && unlikely(ti->limit_swap_bios);
1105 }
1106
1107 static void clone_endio(struct bio *bio)
1108 {
1109 blk_status_t error = bio->bi_status;
1110 struct dm_target_io *tio = clone_to_tio(bio);
1111 struct dm_target *ti = tio->ti;
1112 dm_endio_fn endio = ti->type->end_io;
1113 struct dm_io *io = tio->io;
1114 struct mapped_device *md = io->md;
1115
1116 if (unlikely(error == BLK_STS_TARGET)) {
1117 if (bio_op(bio) == REQ_OP_DISCARD &&
1118 !bdev_max_discard_sectors(bio->bi_bdev))
1119 disable_discard(md);
1120 else if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
1121 !bdev_write_zeroes_sectors(bio->bi_bdev))
1122 disable_write_zeroes(md);
1123 }
1124
1125 if (static_branch_unlikely(&zoned_enabled) &&
1126 unlikely(bdev_is_zoned(bio->bi_bdev)))
1127 dm_zone_endio(io, bio);
1128
1129 if (endio) {
1130 int r = endio(ti, bio, &error);
1131 switch (r) {
1132 case DM_ENDIO_REQUEUE:
1133 if (static_branch_unlikely(&zoned_enabled)) {
1134
1135
1136
1137
1138
1139 if (WARN_ON_ONCE(dm_is_zone_write(md, bio)))
1140 error = BLK_STS_IOERR;
1141 else
1142 error = BLK_STS_DM_REQUEUE;
1143 } else
1144 error = BLK_STS_DM_REQUEUE;
1145 fallthrough;
1146 case DM_ENDIO_DONE:
1147 break;
1148 case DM_ENDIO_INCOMPLETE:
1149
1150 return;
1151 default:
1152 DMWARN("unimplemented target endio return value: %d", r);
1153 BUG();
1154 }
1155 }
1156
1157 if (static_branch_unlikely(&swap_bios_enabled) &&
1158 unlikely(swap_bios_limit(ti, bio)))
1159 up(&md->swap_bios_semaphore);
1160
1161 free_tio(bio);
1162 dm_io_dec_pending(io, error);
1163 }
1164
1165
1166
1167
1168
1169 static inline sector_t max_io_len_target_boundary(struct dm_target *ti,
1170 sector_t target_offset)
1171 {
1172 return ti->len - target_offset;
1173 }
1174
1175 static sector_t max_io_len(struct dm_target *ti, sector_t sector)
1176 {
1177 sector_t target_offset = dm_target_offset(ti, sector);
1178 sector_t len = max_io_len_target_boundary(ti, target_offset);
1179
1180
1181
1182
1183
1184
1185
1186 if (!ti->max_io_len)
1187 return len;
1188 return min_t(sector_t, len,
1189 min(queue_max_sectors(ti->table->md->queue),
1190 blk_chunk_sectors_left(target_offset, ti->max_io_len)));
1191 }
1192
1193 int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
1194 {
1195 if (len > UINT_MAX) {
1196 DMERR("Specified maximum size of target IO (%llu) exceeds limit (%u)",
1197 (unsigned long long)len, UINT_MAX);
1198 ti->error = "Maximum size of target IO is too large";
1199 return -EINVAL;
1200 }
1201
1202 ti->max_io_len = (uint32_t) len;
1203
1204 return 0;
1205 }
1206 EXPORT_SYMBOL_GPL(dm_set_target_max_io_len);
1207
1208 static struct dm_target *dm_dax_get_live_target(struct mapped_device *md,
1209 sector_t sector, int *srcu_idx)
1210 __acquires(md->io_barrier)
1211 {
1212 struct dm_table *map;
1213 struct dm_target *ti;
1214
1215 map = dm_get_live_table(md, srcu_idx);
1216 if (!map)
1217 return NULL;
1218
1219 ti = dm_table_find_target(map, sector);
1220 if (!ti)
1221 return NULL;
1222
1223 return ti;
1224 }
1225
1226 static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
1227 long nr_pages, enum dax_access_mode mode, void **kaddr,
1228 pfn_t *pfn)
1229 {
1230 struct mapped_device *md = dax_get_private(dax_dev);
1231 sector_t sector = pgoff * PAGE_SECTORS;
1232 struct dm_target *ti;
1233 long len, ret = -EIO;
1234 int srcu_idx;
1235
1236 ti = dm_dax_get_live_target(md, sector, &srcu_idx);
1237
1238 if (!ti)
1239 goto out;
1240 if (!ti->type->direct_access)
1241 goto out;
1242 len = max_io_len(ti, sector) / PAGE_SECTORS;
1243 if (len < 1)
1244 goto out;
1245 nr_pages = min(len, nr_pages);
1246 ret = ti->type->direct_access(ti, pgoff, nr_pages, mode, kaddr, pfn);
1247
1248 out:
1249 dm_put_live_table(md, srcu_idx);
1250
1251 return ret;
1252 }
1253
1254 static int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
1255 size_t nr_pages)
1256 {
1257 struct mapped_device *md = dax_get_private(dax_dev);
1258 sector_t sector = pgoff * PAGE_SECTORS;
1259 struct dm_target *ti;
1260 int ret = -EIO;
1261 int srcu_idx;
1262
1263 ti = dm_dax_get_live_target(md, sector, &srcu_idx);
1264
1265 if (!ti)
1266 goto out;
1267 if (WARN_ON(!ti->type->dax_zero_page_range)) {
1268
1269
1270
1271
1272 goto out;
1273 }
1274 ret = ti->type->dax_zero_page_range(ti, pgoff, nr_pages);
1275 out:
1276 dm_put_live_table(md, srcu_idx);
1277
1278 return ret;
1279 }
1280
1281 static size_t dm_dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff,
1282 void *addr, size_t bytes, struct iov_iter *i)
1283 {
1284 struct mapped_device *md = dax_get_private(dax_dev);
1285 sector_t sector = pgoff * PAGE_SECTORS;
1286 struct dm_target *ti;
1287 int srcu_idx;
1288 long ret = 0;
1289
1290 ti = dm_dax_get_live_target(md, sector, &srcu_idx);
1291 if (!ti || !ti->type->dax_recovery_write)
1292 goto out;
1293
1294 ret = ti->type->dax_recovery_write(ti, pgoff, addr, bytes, i);
1295 out:
1296 dm_put_live_table(md, srcu_idx);
1297 return ret;
1298 }
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330 void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
1331 {
1332 struct dm_target_io *tio = clone_to_tio(bio);
1333 struct dm_io *io = tio->io;
1334 unsigned bio_sectors = bio_sectors(bio);
1335
1336 BUG_ON(dm_tio_flagged(tio, DM_TIO_IS_DUPLICATE_BIO));
1337 BUG_ON(op_is_zone_mgmt(bio_op(bio)));
1338 BUG_ON(bio_op(bio) == REQ_OP_ZONE_APPEND);
1339 BUG_ON(bio_sectors > *tio->len_ptr);
1340 BUG_ON(n_sectors > bio_sectors);
1341
1342 *tio->len_ptr -= bio_sectors - n_sectors;
1343 bio->bi_iter.bi_size = n_sectors << SECTOR_SHIFT;
1344
1345
1346
1347
1348
1349 dm_io_set_flag(io, DM_IO_WAS_SPLIT);
1350 io->sectors = n_sectors;
1351 io->sector_offset = bio_sectors(io->orig_bio);
1352 }
1353 EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364 void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone)
1365 {
1366 struct dm_target_io *tio = clone_to_tio(clone);
1367 struct dm_io *io = tio->io;
1368
1369
1370 if (!tgt_clone)
1371 tgt_clone = clone;
1372
1373
1374
1375
1376
1377 dm_start_io_acct(io, clone);
1378
1379 trace_block_bio_remap(tgt_clone, disk_devt(io->md->disk),
1380 tio->old_sector);
1381 submit_bio_noacct(tgt_clone);
1382 }
1383 EXPORT_SYMBOL_GPL(dm_submit_bio_remap);
1384
1385 static noinline void __set_swap_bios_limit(struct mapped_device *md, int latch)
1386 {
1387 mutex_lock(&md->swap_bios_lock);
1388 while (latch < md->swap_bios) {
1389 cond_resched();
1390 down(&md->swap_bios_semaphore);
1391 md->swap_bios--;
1392 }
1393 while (latch > md->swap_bios) {
1394 cond_resched();
1395 up(&md->swap_bios_semaphore);
1396 md->swap_bios++;
1397 }
1398 mutex_unlock(&md->swap_bios_lock);
1399 }
1400
1401 static void __map_bio(struct bio *clone)
1402 {
1403 struct dm_target_io *tio = clone_to_tio(clone);
1404 struct dm_target *ti = tio->ti;
1405 struct dm_io *io = tio->io;
1406 struct mapped_device *md = io->md;
1407 int r;
1408
1409 clone->bi_end_io = clone_endio;
1410
1411
1412
1413
1414 tio->old_sector = clone->bi_iter.bi_sector;
1415
1416 if (static_branch_unlikely(&swap_bios_enabled) &&
1417 unlikely(swap_bios_limit(ti, clone))) {
1418 int latch = get_swap_bios();
1419 if (unlikely(latch != md->swap_bios))
1420 __set_swap_bios_limit(md, latch);
1421 down(&md->swap_bios_semaphore);
1422 }
1423
1424 if (static_branch_unlikely(&zoned_enabled)) {
1425
1426
1427
1428
1429
1430 if (unlikely(dm_emulate_zone_append(md)))
1431 r = dm_zone_map_bio(tio);
1432 else
1433 r = ti->type->map(ti, clone);
1434 } else
1435 r = ti->type->map(ti, clone);
1436
1437 switch (r) {
1438 case DM_MAPIO_SUBMITTED:
1439
1440 if (!ti->accounts_remapped_io)
1441 dm_start_io_acct(io, clone);
1442 break;
1443 case DM_MAPIO_REMAPPED:
1444 dm_submit_bio_remap(clone, NULL);
1445 break;
1446 case DM_MAPIO_KILL:
1447 case DM_MAPIO_REQUEUE:
1448 if (static_branch_unlikely(&swap_bios_enabled) &&
1449 unlikely(swap_bios_limit(ti, clone)))
1450 up(&md->swap_bios_semaphore);
1451 free_tio(clone);
1452 if (r == DM_MAPIO_KILL)
1453 dm_io_dec_pending(io, BLK_STS_IOERR);
1454 else
1455 dm_io_dec_pending(io, BLK_STS_DM_REQUEUE);
1456 break;
1457 default:
1458 DMWARN("unimplemented target map return value: %d", r);
1459 BUG();
1460 }
1461 }
1462
1463 static void setup_split_accounting(struct clone_info *ci, unsigned len)
1464 {
1465 struct dm_io *io = ci->io;
1466
1467 if (ci->sector_count > len) {
1468
1469
1470
1471
1472 dm_io_set_flag(io, DM_IO_WAS_SPLIT);
1473 io->sectors = len;
1474 io->sector_offset = bio_sectors(ci->bio);
1475 }
1476 }
1477
1478 static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
1479 struct dm_target *ti, unsigned num_bios)
1480 {
1481 struct bio *bio;
1482 int try;
1483
1484 for (try = 0; try < 2; try++) {
1485 int bio_nr;
1486
1487 if (try)
1488 mutex_lock(&ci->io->md->table_devices_lock);
1489 for (bio_nr = 0; bio_nr < num_bios; bio_nr++) {
1490 bio = alloc_tio(ci, ti, bio_nr, NULL,
1491 try ? GFP_NOIO : GFP_NOWAIT);
1492 if (!bio)
1493 break;
1494
1495 bio_list_add(blist, bio);
1496 }
1497 if (try)
1498 mutex_unlock(&ci->io->md->table_devices_lock);
1499 if (bio_nr == num_bios)
1500 return;
1501
1502 while ((bio = bio_list_pop(blist)))
1503 free_tio(bio);
1504 }
1505 }
1506
1507 static int __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
1508 unsigned int num_bios, unsigned *len)
1509 {
1510 struct bio_list blist = BIO_EMPTY_LIST;
1511 struct bio *clone;
1512 unsigned int ret = 0;
1513
1514 switch (num_bios) {
1515 case 0:
1516 break;
1517 case 1:
1518 if (len)
1519 setup_split_accounting(ci, *len);
1520 clone = alloc_tio(ci, ti, 0, len, GFP_NOIO);
1521 __map_bio(clone);
1522 ret = 1;
1523 break;
1524 default:
1525
1526 alloc_multiple_bios(&blist, ci, ti, num_bios);
1527 while ((clone = bio_list_pop(&blist))) {
1528 dm_tio_set_flag(clone_to_tio(clone), DM_TIO_IS_DUPLICATE_BIO);
1529 __map_bio(clone);
1530 ret += 1;
1531 }
1532 break;
1533 }
1534
1535 return ret;
1536 }
1537
1538 static void __send_empty_flush(struct clone_info *ci)
1539 {
1540 struct dm_table *t = ci->map;
1541 struct bio flush_bio;
1542
1543
1544
1545
1546
1547
1548 bio_init(&flush_bio, ci->io->md->disk->part0, NULL, 0,
1549 REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC);
1550
1551 ci->bio = &flush_bio;
1552 ci->sector_count = 0;
1553 ci->io->tio.clone.bi_iter.bi_size = 0;
1554
1555 for (unsigned int i = 0; i < t->num_targets; i++) {
1556 unsigned int bios;
1557 struct dm_target *ti = dm_table_get_target(t, i);
1558
1559 atomic_add(ti->num_flush_bios, &ci->io->io_count);
1560 bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL);
1561 atomic_sub(ti->num_flush_bios - bios, &ci->io->io_count);
1562 }
1563
1564
1565
1566
1567
1568 atomic_sub(1, &ci->io->io_count);
1569
1570 bio_uninit(ci->bio);
1571 }
1572
1573 static void __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti,
1574 unsigned num_bios)
1575 {
1576 unsigned len;
1577 unsigned int bios;
1578
1579 len = min_t(sector_t, ci->sector_count,
1580 max_io_len_target_boundary(ti, dm_target_offset(ti, ci->sector)));
1581
1582 atomic_add(num_bios, &ci->io->io_count);
1583 bios = __send_duplicate_bios(ci, ti, num_bios, &len);
1584
1585
1586
1587
1588 atomic_sub(num_bios - bios + 1, &ci->io->io_count);
1589
1590 ci->sector += len;
1591 ci->sector_count -= len;
1592 }
1593
1594 static bool is_abnormal_io(struct bio *bio)
1595 {
1596 enum req_op op = bio_op(bio);
1597
1598 if (op != REQ_OP_READ && op != REQ_OP_WRITE && op != REQ_OP_FLUSH) {
1599 switch (op) {
1600 case REQ_OP_DISCARD:
1601 case REQ_OP_SECURE_ERASE:
1602 case REQ_OP_WRITE_ZEROES:
1603 return true;
1604 default:
1605 break;
1606 }
1607 }
1608
1609 return false;
1610 }
1611
1612 static blk_status_t __process_abnormal_io(struct clone_info *ci,
1613 struct dm_target *ti)
1614 {
1615 unsigned num_bios = 0;
1616
1617 switch (bio_op(ci->bio)) {
1618 case REQ_OP_DISCARD:
1619 num_bios = ti->num_discard_bios;
1620 break;
1621 case REQ_OP_SECURE_ERASE:
1622 num_bios = ti->num_secure_erase_bios;
1623 break;
1624 case REQ_OP_WRITE_ZEROES:
1625 num_bios = ti->num_write_zeroes_bios;
1626 break;
1627 default:
1628 break;
1629 }
1630
1631
1632
1633
1634
1635
1636
1637 if (unlikely(!num_bios))
1638 return BLK_STS_NOTSUPP;
1639
1640 __send_changing_extent_only(ci, ti, num_bios);
1641 return BLK_STS_OK;
1642 }
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653 static inline struct dm_io **dm_poll_list_head(struct bio *bio)
1654 {
1655 return (struct dm_io **)&bio->bi_private;
1656 }
1657
1658 static void dm_queue_poll_io(struct bio *bio, struct dm_io *io)
1659 {
1660 struct dm_io **head = dm_poll_list_head(bio);
1661
1662 if (!(bio->bi_opf & REQ_DM_POLL_LIST)) {
1663 bio->bi_opf |= REQ_DM_POLL_LIST;
1664
1665
1666
1667
1668 io->data = bio->bi_private;
1669
1670
1671 bio->bi_cookie = ~BLK_QC_T_NONE;
1672
1673 io->next = NULL;
1674 } else {
1675
1676
1677
1678
1679 io->data = (*head)->data;
1680 io->next = *head;
1681 }
1682
1683 *head = io;
1684 }
1685
1686
1687
1688
1689 static blk_status_t __split_and_process_bio(struct clone_info *ci)
1690 {
1691 struct bio *clone;
1692 struct dm_target *ti;
1693 unsigned len;
1694
1695 ti = dm_table_find_target(ci->map, ci->sector);
1696 if (unlikely(!ti))
1697 return BLK_STS_IOERR;
1698
1699 if (unlikely((ci->bio->bi_opf & REQ_NOWAIT) != 0) &&
1700 unlikely(!dm_target_supports_nowait(ti->type)))
1701 return BLK_STS_NOTSUPP;
1702
1703 if (unlikely(ci->is_abnormal_io))
1704 return __process_abnormal_io(ci, ti);
1705
1706
1707
1708
1709
1710 ci->submit_as_polled = !!(ci->bio->bi_opf & REQ_POLLED);
1711
1712 len = min_t(sector_t, max_io_len(ti, ci->sector), ci->sector_count);
1713 setup_split_accounting(ci, len);
1714 clone = alloc_tio(ci, ti, 0, &len, GFP_NOIO);
1715 __map_bio(clone);
1716
1717 ci->sector += len;
1718 ci->sector_count -= len;
1719
1720 return BLK_STS_OK;
1721 }
1722
1723 static void init_clone_info(struct clone_info *ci, struct mapped_device *md,
1724 struct dm_table *map, struct bio *bio, bool is_abnormal)
1725 {
1726 ci->map = map;
1727 ci->io = alloc_io(md, bio);
1728 ci->bio = bio;
1729 ci->is_abnormal_io = is_abnormal;
1730 ci->submit_as_polled = false;
1731 ci->sector = bio->bi_iter.bi_sector;
1732 ci->sector_count = bio_sectors(bio);
1733
1734
1735 if (static_branch_unlikely(&zoned_enabled) &&
1736 WARN_ON_ONCE(op_is_zone_mgmt(bio_op(bio)) && ci->sector_count))
1737 ci->sector_count = 0;
1738 }
1739
1740
1741
1742
1743 static void dm_split_and_process_bio(struct mapped_device *md,
1744 struct dm_table *map, struct bio *bio)
1745 {
1746 struct clone_info ci;
1747 struct dm_io *io;
1748 blk_status_t error = BLK_STS_OK;
1749 bool is_abnormal;
1750
1751 is_abnormal = is_abnormal_io(bio);
1752 if (unlikely(is_abnormal)) {
1753
1754
1755
1756
1757 bio = bio_split_to_limits(bio);
1758 }
1759
1760 init_clone_info(&ci, md, map, bio, is_abnormal);
1761 io = ci.io;
1762
1763 if (bio->bi_opf & REQ_PREFLUSH) {
1764 __send_empty_flush(&ci);
1765
1766 goto out;
1767 }
1768
1769 error = __split_and_process_bio(&ci);
1770 if (error || !ci.sector_count)
1771 goto out;
1772
1773
1774
1775
1776 bio_trim(bio, io->sectors, ci.sector_count);
1777 trace_block_split(bio, bio->bi_iter.bi_sector);
1778 bio_inc_remaining(bio);
1779 submit_bio_noacct(bio);
1780 out:
1781
1782
1783
1784
1785
1786
1787
1788 if (error || !ci.submit_as_polled) {
1789
1790
1791
1792
1793 if (error)
1794 atomic_dec(&io->io_count);
1795 dm_io_dec_pending(io, error);
1796 } else
1797 dm_queue_poll_io(bio, io);
1798 }
1799
1800 static void dm_submit_bio(struct bio *bio)
1801 {
1802 struct mapped_device *md = bio->bi_bdev->bd_disk->private_data;
1803 int srcu_idx;
1804 struct dm_table *map;
1805 blk_opf_t bio_opf = bio->bi_opf;
1806
1807 map = dm_get_live_table_bio(md, &srcu_idx, bio_opf);
1808
1809
1810 if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) ||
1811 unlikely(!map)) {
1812 if (bio->bi_opf & REQ_NOWAIT)
1813 bio_wouldblock_error(bio);
1814 else if (bio->bi_opf & REQ_RAHEAD)
1815 bio_io_error(bio);
1816 else
1817 queue_io(md, bio);
1818 goto out;
1819 }
1820
1821 dm_split_and_process_bio(md, map, bio);
1822 out:
1823 dm_put_live_table_bio(md, srcu_idx, bio_opf);
1824 }
1825
1826 static bool dm_poll_dm_io(struct dm_io *io, struct io_comp_batch *iob,
1827 unsigned int flags)
1828 {
1829 WARN_ON_ONCE(!dm_tio_is_normal(&io->tio));
1830
1831
1832 if (atomic_read(&io->io_count) > 1)
1833 bio_poll(&io->tio.clone, iob, flags);
1834
1835
1836 return atomic_read(&io->io_count) == 1;
1837 }
1838
1839 static int dm_poll_bio(struct bio *bio, struct io_comp_batch *iob,
1840 unsigned int flags)
1841 {
1842 struct dm_io **head = dm_poll_list_head(bio);
1843 struct dm_io *list = *head;
1844 struct dm_io *tmp = NULL;
1845 struct dm_io *curr, *next;
1846
1847
1848 if (!(bio->bi_opf & REQ_DM_POLL_LIST))
1849 return 0;
1850
1851 WARN_ON_ONCE(!list);
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861 bio->bi_opf &= ~REQ_DM_POLL_LIST;
1862 bio->bi_private = list->data;
1863
1864 for (curr = list, next = curr->next; curr; curr = next, next =
1865 curr ? curr->next : NULL) {
1866 if (dm_poll_dm_io(curr, iob, flags)) {
1867
1868
1869
1870
1871 __dm_io_dec_pending(curr);
1872 } else {
1873 curr->next = tmp;
1874 tmp = curr;
1875 }
1876 }
1877
1878
1879 if (tmp) {
1880 bio->bi_opf |= REQ_DM_POLL_LIST;
1881
1882 *head = tmp;
1883 return 0;
1884 }
1885 return 1;
1886 }
1887
1888
1889
1890
1891 static void free_minor(int minor)
1892 {
1893 spin_lock(&_minor_lock);
1894 idr_remove(&_minor_idr, minor);
1895 spin_unlock(&_minor_lock);
1896 }
1897
1898
1899
1900
1901 static int specific_minor(int minor)
1902 {
1903 int r;
1904
1905 if (minor >= (1 << MINORBITS))
1906 return -EINVAL;
1907
1908 idr_preload(GFP_KERNEL);
1909 spin_lock(&_minor_lock);
1910
1911 r = idr_alloc(&_minor_idr, MINOR_ALLOCED, minor, minor + 1, GFP_NOWAIT);
1912
1913 spin_unlock(&_minor_lock);
1914 idr_preload_end();
1915 if (r < 0)
1916 return r == -ENOSPC ? -EBUSY : r;
1917 return 0;
1918 }
1919
1920 static int next_free_minor(int *minor)
1921 {
1922 int r;
1923
1924 idr_preload(GFP_KERNEL);
1925 spin_lock(&_minor_lock);
1926
1927 r = idr_alloc(&_minor_idr, MINOR_ALLOCED, 0, 1 << MINORBITS, GFP_NOWAIT);
1928
1929 spin_unlock(&_minor_lock);
1930 idr_preload_end();
1931 if (r < 0)
1932 return r;
1933 *minor = r;
1934 return 0;
1935 }
1936
1937 static const struct block_device_operations dm_blk_dops;
1938 static const struct block_device_operations dm_rq_blk_dops;
1939 static const struct dax_operations dm_dax_ops;
1940
1941 static void dm_wq_work(struct work_struct *work);
1942
1943 #ifdef CONFIG_BLK_INLINE_ENCRYPTION
1944 static void dm_queue_destroy_crypto_profile(struct request_queue *q)
1945 {
1946 dm_destroy_crypto_profile(q->crypto_profile);
1947 }
1948
1949 #else
1950
1951 static inline void dm_queue_destroy_crypto_profile(struct request_queue *q)
1952 {
1953 }
1954 #endif
1955
1956 static void cleanup_mapped_device(struct mapped_device *md)
1957 {
1958 if (md->wq)
1959 destroy_workqueue(md->wq);
1960 dm_free_md_mempools(md->mempools);
1961
1962 if (md->dax_dev) {
1963 dax_remove_host(md->disk);
1964 kill_dax(md->dax_dev);
1965 put_dax(md->dax_dev);
1966 md->dax_dev = NULL;
1967 }
1968
1969 dm_cleanup_zoned_dev(md);
1970 if (md->disk) {
1971 spin_lock(&_minor_lock);
1972 md->disk->private_data = NULL;
1973 spin_unlock(&_minor_lock);
1974 if (dm_get_md_type(md) != DM_TYPE_NONE) {
1975 dm_sysfs_exit(md);
1976 del_gendisk(md->disk);
1977 }
1978 dm_queue_destroy_crypto_profile(md->queue);
1979 put_disk(md->disk);
1980 }
1981
1982 if (md->pending_io) {
1983 free_percpu(md->pending_io);
1984 md->pending_io = NULL;
1985 }
1986
1987 cleanup_srcu_struct(&md->io_barrier);
1988
1989 mutex_destroy(&md->suspend_lock);
1990 mutex_destroy(&md->type_lock);
1991 mutex_destroy(&md->table_devices_lock);
1992 mutex_destroy(&md->swap_bios_lock);
1993
1994 dm_mq_cleanup_mapped_device(md);
1995 }
1996
1997
1998
1999
2000 static struct mapped_device *alloc_dev(int minor)
2001 {
2002 int r, numa_node_id = dm_get_numa_node();
2003 struct mapped_device *md;
2004 void *old_md;
2005
2006 md = kvzalloc_node(sizeof(*md), GFP_KERNEL, numa_node_id);
2007 if (!md) {
2008 DMWARN("unable to allocate device, out of memory.");
2009 return NULL;
2010 }
2011
2012 if (!try_module_get(THIS_MODULE))
2013 goto bad_module_get;
2014
2015
2016 if (minor == DM_ANY_MINOR)
2017 r = next_free_minor(&minor);
2018 else
2019 r = specific_minor(minor);
2020 if (r < 0)
2021 goto bad_minor;
2022
2023 r = init_srcu_struct(&md->io_barrier);
2024 if (r < 0)
2025 goto bad_io_barrier;
2026
2027 md->numa_node_id = numa_node_id;
2028 md->init_tio_pdu = false;
2029 md->type = DM_TYPE_NONE;
2030 mutex_init(&md->suspend_lock);
2031 mutex_init(&md->type_lock);
2032 mutex_init(&md->table_devices_lock);
2033 spin_lock_init(&md->deferred_lock);
2034 atomic_set(&md->holders, 1);
2035 atomic_set(&md->open_count, 0);
2036 atomic_set(&md->event_nr, 0);
2037 atomic_set(&md->uevent_seq, 0);
2038 INIT_LIST_HEAD(&md->uevent_list);
2039 INIT_LIST_HEAD(&md->table_devices);
2040 spin_lock_init(&md->uevent_lock);
2041
2042
2043
2044
2045
2046
2047 md->disk = blk_alloc_disk(md->numa_node_id);
2048 if (!md->disk)
2049 goto bad;
2050 md->queue = md->disk->queue;
2051
2052 init_waitqueue_head(&md->wait);
2053 INIT_WORK(&md->work, dm_wq_work);
2054 INIT_WORK(&md->requeue_work, dm_wq_requeue_work);
2055 init_waitqueue_head(&md->eventq);
2056 init_completion(&md->kobj_holder.completion);
2057
2058 md->requeue_list = NULL;
2059 md->swap_bios = get_swap_bios();
2060 sema_init(&md->swap_bios_semaphore, md->swap_bios);
2061 mutex_init(&md->swap_bios_lock);
2062
2063 md->disk->major = _major;
2064 md->disk->first_minor = minor;
2065 md->disk->minors = 1;
2066 md->disk->flags |= GENHD_FL_NO_PART;
2067 md->disk->fops = &dm_blk_dops;
2068 md->disk->queue = md->queue;
2069 md->disk->private_data = md;
2070 sprintf(md->disk->disk_name, "dm-%d", minor);
2071
2072 if (IS_ENABLED(CONFIG_FS_DAX)) {
2073 md->dax_dev = alloc_dax(md, &dm_dax_ops);
2074 if (IS_ERR(md->dax_dev)) {
2075 md->dax_dev = NULL;
2076 goto bad;
2077 }
2078 set_dax_nocache(md->dax_dev);
2079 set_dax_nomc(md->dax_dev);
2080 if (dax_add_host(md->dax_dev, md->disk))
2081 goto bad;
2082 }
2083
2084 format_dev_t(md->name, MKDEV(_major, minor));
2085
2086 md->wq = alloc_workqueue("kdmflush/%s", WQ_MEM_RECLAIM, 0, md->name);
2087 if (!md->wq)
2088 goto bad;
2089
2090 md->pending_io = alloc_percpu(unsigned long);
2091 if (!md->pending_io)
2092 goto bad;
2093
2094 dm_stats_init(&md->stats);
2095
2096
2097 spin_lock(&_minor_lock);
2098 old_md = idr_replace(&_minor_idr, md, minor);
2099 spin_unlock(&_minor_lock);
2100
2101 BUG_ON(old_md != MINOR_ALLOCED);
2102
2103 return md;
2104
2105 bad:
2106 cleanup_mapped_device(md);
2107 bad_io_barrier:
2108 free_minor(minor);
2109 bad_minor:
2110 module_put(THIS_MODULE);
2111 bad_module_get:
2112 kvfree(md);
2113 return NULL;
2114 }
2115
2116 static void unlock_fs(struct mapped_device *md);
2117
2118 static void free_dev(struct mapped_device *md)
2119 {
2120 int minor = MINOR(disk_devt(md->disk));
2121
2122 unlock_fs(md);
2123
2124 cleanup_mapped_device(md);
2125
2126 free_table_devices(&md->table_devices);
2127 dm_stats_cleanup(&md->stats);
2128 free_minor(minor);
2129
2130 module_put(THIS_MODULE);
2131 kvfree(md);
2132 }
2133
2134
2135
2136
2137 static void event_callback(void *context)
2138 {
2139 unsigned long flags;
2140 LIST_HEAD(uevents);
2141 struct mapped_device *md = (struct mapped_device *) context;
2142
2143 spin_lock_irqsave(&md->uevent_lock, flags);
2144 list_splice_init(&md->uevent_list, &uevents);
2145 spin_unlock_irqrestore(&md->uevent_lock, flags);
2146
2147 dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj);
2148
2149 atomic_inc(&md->event_nr);
2150 wake_up(&md->eventq);
2151 dm_issue_global_event();
2152 }
2153
2154
2155
2156
2157 static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
2158 struct queue_limits *limits)
2159 {
2160 struct dm_table *old_map;
2161 sector_t size;
2162 int ret;
2163
2164 lockdep_assert_held(&md->suspend_lock);
2165
2166 size = dm_table_get_size(t);
2167
2168
2169
2170
2171 if (size != dm_get_size(md))
2172 memset(&md->geometry, 0, sizeof(md->geometry));
2173
2174 if (!get_capacity(md->disk))
2175 set_capacity(md->disk, size);
2176 else
2177 set_capacity_and_notify(md->disk, size);
2178
2179 dm_table_event_callback(t, event_callback, md);
2180
2181 if (dm_table_request_based(t)) {
2182
2183
2184
2185
2186 md->immutable_target = dm_table_get_immutable_target(t);
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196 if (!md->mempools) {
2197 md->mempools = t->mempools;
2198 t->mempools = NULL;
2199 }
2200 } else {
2201
2202
2203
2204
2205
2206 dm_free_md_mempools(md->mempools);
2207 md->mempools = t->mempools;
2208 t->mempools = NULL;
2209 }
2210
2211 ret = dm_table_set_restrictions(t, md->queue, limits);
2212 if (ret) {
2213 old_map = ERR_PTR(ret);
2214 goto out;
2215 }
2216
2217 old_map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
2218 rcu_assign_pointer(md->map, (void *)t);
2219 md->immutable_target_type = dm_table_get_immutable_target_type(t);
2220
2221 if (old_map)
2222 dm_sync_table(md);
2223 out:
2224 return old_map;
2225 }
2226
2227
2228
2229
2230 static struct dm_table *__unbind(struct mapped_device *md)
2231 {
2232 struct dm_table *map = rcu_dereference_protected(md->map, 1);
2233
2234 if (!map)
2235 return NULL;
2236
2237 dm_table_event_callback(map, NULL, NULL);
2238 RCU_INIT_POINTER(md->map, NULL);
2239 dm_sync_table(md);
2240
2241 return map;
2242 }
2243
2244
2245
2246
2247 int dm_create(int minor, struct mapped_device **result)
2248 {
2249 struct mapped_device *md;
2250
2251 md = alloc_dev(minor);
2252 if (!md)
2253 return -ENXIO;
2254
2255 dm_ima_reset_data(md);
2256
2257 *result = md;
2258 return 0;
2259 }
2260
2261
2262
2263
2264
2265 void dm_lock_md_type(struct mapped_device *md)
2266 {
2267 mutex_lock(&md->type_lock);
2268 }
2269
2270 void dm_unlock_md_type(struct mapped_device *md)
2271 {
2272 mutex_unlock(&md->type_lock);
2273 }
2274
2275 void dm_set_md_type(struct mapped_device *md, enum dm_queue_mode type)
2276 {
2277 BUG_ON(!mutex_is_locked(&md->type_lock));
2278 md->type = type;
2279 }
2280
2281 enum dm_queue_mode dm_get_md_type(struct mapped_device *md)
2282 {
2283 return md->type;
2284 }
2285
2286 struct target_type *dm_get_immutable_target_type(struct mapped_device *md)
2287 {
2288 return md->immutable_target_type;
2289 }
2290
2291
2292
2293
2294
2295 struct queue_limits *dm_get_queue_limits(struct mapped_device *md)
2296 {
2297 BUG_ON(!atomic_read(&md->holders));
2298 return &md->queue->limits;
2299 }
2300 EXPORT_SYMBOL_GPL(dm_get_queue_limits);
2301
2302
2303
2304
2305 int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
2306 {
2307 enum dm_queue_mode type = dm_table_get_type(t);
2308 struct queue_limits limits;
2309 int r;
2310
2311 switch (type) {
2312 case DM_TYPE_REQUEST_BASED:
2313 md->disk->fops = &dm_rq_blk_dops;
2314 r = dm_mq_init_request_queue(md, t);
2315 if (r) {
2316 DMERR("Cannot initialize queue for request-based dm mapped device");
2317 return r;
2318 }
2319 break;
2320 case DM_TYPE_BIO_BASED:
2321 case DM_TYPE_DAX_BIO_BASED:
2322 break;
2323 case DM_TYPE_NONE:
2324 WARN_ON_ONCE(true);
2325 break;
2326 }
2327
2328 r = dm_calculate_queue_limits(t, &limits);
2329 if (r) {
2330 DMERR("Cannot calculate initial queue limits");
2331 return r;
2332 }
2333 r = dm_table_set_restrictions(t, md->queue, &limits);
2334 if (r)
2335 return r;
2336
2337 r = add_disk(md->disk);
2338 if (r)
2339 return r;
2340
2341 r = dm_sysfs_init(md);
2342 if (r) {
2343 del_gendisk(md->disk);
2344 return r;
2345 }
2346 md->type = type;
2347 return 0;
2348 }
2349
2350 struct mapped_device *dm_get_md(dev_t dev)
2351 {
2352 struct mapped_device *md;
2353 unsigned minor = MINOR(dev);
2354
2355 if (MAJOR(dev) != _major || minor >= (1 << MINORBITS))
2356 return NULL;
2357
2358 spin_lock(&_minor_lock);
2359
2360 md = idr_find(&_minor_idr, minor);
2361 if (!md || md == MINOR_ALLOCED || (MINOR(disk_devt(dm_disk(md))) != minor) ||
2362 test_bit(DMF_FREEING, &md->flags) || dm_deleting_md(md)) {
2363 md = NULL;
2364 goto out;
2365 }
2366 dm_get(md);
2367 out:
2368 spin_unlock(&_minor_lock);
2369
2370 return md;
2371 }
2372 EXPORT_SYMBOL_GPL(dm_get_md);
2373
2374 void *dm_get_mdptr(struct mapped_device *md)
2375 {
2376 return md->interface_ptr;
2377 }
2378
2379 void dm_set_mdptr(struct mapped_device *md, void *ptr)
2380 {
2381 md->interface_ptr = ptr;
2382 }
2383
2384 void dm_get(struct mapped_device *md)
2385 {
2386 atomic_inc(&md->holders);
2387 BUG_ON(test_bit(DMF_FREEING, &md->flags));
2388 }
2389
2390 int dm_hold(struct mapped_device *md)
2391 {
2392 spin_lock(&_minor_lock);
2393 if (test_bit(DMF_FREEING, &md->flags)) {
2394 spin_unlock(&_minor_lock);
2395 return -EBUSY;
2396 }
2397 dm_get(md);
2398 spin_unlock(&_minor_lock);
2399 return 0;
2400 }
2401 EXPORT_SYMBOL_GPL(dm_hold);
2402
2403 const char *dm_device_name(struct mapped_device *md)
2404 {
2405 return md->name;
2406 }
2407 EXPORT_SYMBOL_GPL(dm_device_name);
2408
2409 static void __dm_destroy(struct mapped_device *md, bool wait)
2410 {
2411 struct dm_table *map;
2412 int srcu_idx;
2413
2414 might_sleep();
2415
2416 spin_lock(&_minor_lock);
2417 idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md))));
2418 set_bit(DMF_FREEING, &md->flags);
2419 spin_unlock(&_minor_lock);
2420
2421 blk_mark_disk_dead(md->disk);
2422
2423
2424
2425
2426
2427 mutex_lock(&md->suspend_lock);
2428 map = dm_get_live_table(md, &srcu_idx);
2429 if (!dm_suspended_md(md)) {
2430 dm_table_presuspend_targets(map);
2431 set_bit(DMF_SUSPENDED, &md->flags);
2432 set_bit(DMF_POST_SUSPENDING, &md->flags);
2433 dm_table_postsuspend_targets(map);
2434 }
2435
2436 dm_put_live_table(md, srcu_idx);
2437 mutex_unlock(&md->suspend_lock);
2438
2439
2440
2441
2442
2443
2444
2445 if (wait)
2446 while (atomic_read(&md->holders))
2447 msleep(1);
2448 else if (atomic_read(&md->holders))
2449 DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
2450 dm_device_name(md), atomic_read(&md->holders));
2451
2452 dm_table_destroy(__unbind(md));
2453 free_dev(md);
2454 }
2455
2456 void dm_destroy(struct mapped_device *md)
2457 {
2458 __dm_destroy(md, true);
2459 }
2460
2461 void dm_destroy_immediate(struct mapped_device *md)
2462 {
2463 __dm_destroy(md, false);
2464 }
2465
2466 void dm_put(struct mapped_device *md)
2467 {
2468 atomic_dec(&md->holders);
2469 }
2470 EXPORT_SYMBOL_GPL(dm_put);
2471
2472 static bool dm_in_flight_bios(struct mapped_device *md)
2473 {
2474 int cpu;
2475 unsigned long sum = 0;
2476
2477 for_each_possible_cpu(cpu)
2478 sum += *per_cpu_ptr(md->pending_io, cpu);
2479
2480 return sum != 0;
2481 }
2482
2483 static int dm_wait_for_bios_completion(struct mapped_device *md, unsigned int task_state)
2484 {
2485 int r = 0;
2486 DEFINE_WAIT(wait);
2487
2488 while (true) {
2489 prepare_to_wait(&md->wait, &wait, task_state);
2490
2491 if (!dm_in_flight_bios(md))
2492 break;
2493
2494 if (signal_pending_state(task_state, current)) {
2495 r = -EINTR;
2496 break;
2497 }
2498
2499 io_schedule();
2500 }
2501 finish_wait(&md->wait, &wait);
2502
2503 smp_rmb();
2504
2505 return r;
2506 }
2507
2508 static int dm_wait_for_completion(struct mapped_device *md, unsigned int task_state)
2509 {
2510 int r = 0;
2511
2512 if (!queue_is_mq(md->queue))
2513 return dm_wait_for_bios_completion(md, task_state);
2514
2515 while (true) {
2516 if (!blk_mq_queue_inflight(md->queue))
2517 break;
2518
2519 if (signal_pending_state(task_state, current)) {
2520 r = -EINTR;
2521 break;
2522 }
2523
2524 msleep(5);
2525 }
2526
2527 return r;
2528 }
2529
2530
2531
2532
2533 static void dm_wq_work(struct work_struct *work)
2534 {
2535 struct mapped_device *md = container_of(work, struct mapped_device, work);
2536 struct bio *bio;
2537
2538 while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
2539 spin_lock_irq(&md->deferred_lock);
2540 bio = bio_list_pop(&md->deferred);
2541 spin_unlock_irq(&md->deferred_lock);
2542
2543 if (!bio)
2544 break;
2545
2546 submit_bio_noacct(bio);
2547 }
2548 }
2549
2550 static void dm_queue_flush(struct mapped_device *md)
2551 {
2552 clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
2553 smp_mb__after_atomic();
2554 queue_work(md->wq, &md->work);
2555 }
2556
2557
2558
2559
2560 struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
2561 {
2562 struct dm_table *live_map = NULL, *map = ERR_PTR(-EINVAL);
2563 struct queue_limits limits;
2564 int r;
2565
2566 mutex_lock(&md->suspend_lock);
2567
2568
2569 if (!dm_suspended_md(md))
2570 goto out;
2571
2572
2573
2574
2575
2576
2577
2578 if (dm_table_has_no_data_devices(table)) {
2579 live_map = dm_get_live_table_fast(md);
2580 if (live_map)
2581 limits = md->queue->limits;
2582 dm_put_live_table_fast(md);
2583 }
2584
2585 if (!live_map) {
2586 r = dm_calculate_queue_limits(table, &limits);
2587 if (r) {
2588 map = ERR_PTR(r);
2589 goto out;
2590 }
2591 }
2592
2593 map = __bind(md, table, &limits);
2594 dm_issue_global_event();
2595
2596 out:
2597 mutex_unlock(&md->suspend_lock);
2598 return map;
2599 }
2600
2601
2602
2603
2604
2605 static int lock_fs(struct mapped_device *md)
2606 {
2607 int r;
2608
2609 WARN_ON(test_bit(DMF_FROZEN, &md->flags));
2610
2611 r = freeze_bdev(md->disk->part0);
2612 if (!r)
2613 set_bit(DMF_FROZEN, &md->flags);
2614 return r;
2615 }
2616
2617 static void unlock_fs(struct mapped_device *md)
2618 {
2619 if (!test_bit(DMF_FROZEN, &md->flags))
2620 return;
2621 thaw_bdev(md->disk->part0);
2622 clear_bit(DMF_FROZEN, &md->flags);
2623 }
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634 static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
2635 unsigned suspend_flags, unsigned int task_state,
2636 int dmf_suspended_flag)
2637 {
2638 bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG;
2639 bool noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG;
2640 int r;
2641
2642 lockdep_assert_held(&md->suspend_lock);
2643
2644
2645
2646
2647
2648 if (noflush)
2649 set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
2650 else
2651 DMDEBUG("%s: suspending with flush", dm_device_name(md));
2652
2653
2654
2655
2656
2657 dm_table_presuspend_targets(map);
2658
2659
2660
2661
2662
2663
2664
2665 if (!noflush && do_lockfs) {
2666 r = lock_fs(md);
2667 if (r) {
2668 dm_table_presuspend_undo_targets(map);
2669 return r;
2670 }
2671 }
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684 set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
2685 if (map)
2686 synchronize_srcu(&md->io_barrier);
2687
2688
2689
2690
2691
2692 if (dm_request_based(md))
2693 dm_stop_queue(md->queue);
2694
2695 flush_workqueue(md->wq);
2696
2697
2698
2699
2700
2701
2702 r = dm_wait_for_completion(md, task_state);
2703 if (!r)
2704 set_bit(dmf_suspended_flag, &md->flags);
2705
2706 if (noflush)
2707 clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
2708 if (map)
2709 synchronize_srcu(&md->io_barrier);
2710
2711
2712 if (r < 0) {
2713 dm_queue_flush(md);
2714
2715 if (dm_request_based(md))
2716 dm_start_queue(md->queue);
2717
2718 unlock_fs(md);
2719 dm_table_presuspend_undo_targets(map);
2720
2721 }
2722
2723 return r;
2724 }
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742 int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
2743 {
2744 struct dm_table *map = NULL;
2745 int r = 0;
2746
2747 retry:
2748 mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
2749
2750 if (dm_suspended_md(md)) {
2751 r = -EINVAL;
2752 goto out_unlock;
2753 }
2754
2755 if (dm_suspended_internally_md(md)) {
2756
2757 mutex_unlock(&md->suspend_lock);
2758 r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE);
2759 if (r)
2760 return r;
2761 goto retry;
2762 }
2763
2764 map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
2765
2766 r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE, DMF_SUSPENDED);
2767 if (r)
2768 goto out_unlock;
2769
2770 set_bit(DMF_POST_SUSPENDING, &md->flags);
2771 dm_table_postsuspend_targets(map);
2772 clear_bit(DMF_POST_SUSPENDING, &md->flags);
2773
2774 out_unlock:
2775 mutex_unlock(&md->suspend_lock);
2776 return r;
2777 }
2778
2779 static int __dm_resume(struct mapped_device *md, struct dm_table *map)
2780 {
2781 if (map) {
2782 int r = dm_table_resume_targets(map);
2783 if (r)
2784 return r;
2785 }
2786
2787 dm_queue_flush(md);
2788
2789
2790
2791
2792
2793
2794 if (dm_request_based(md))
2795 dm_start_queue(md->queue);
2796
2797 unlock_fs(md);
2798
2799 return 0;
2800 }
2801
2802 int dm_resume(struct mapped_device *md)
2803 {
2804 int r;
2805 struct dm_table *map = NULL;
2806
2807 retry:
2808 r = -EINVAL;
2809 mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
2810
2811 if (!dm_suspended_md(md))
2812 goto out;
2813
2814 if (dm_suspended_internally_md(md)) {
2815
2816 mutex_unlock(&md->suspend_lock);
2817 r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE);
2818 if (r)
2819 return r;
2820 goto retry;
2821 }
2822
2823 map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
2824 if (!map || !dm_table_get_size(map))
2825 goto out;
2826
2827 r = __dm_resume(md, map);
2828 if (r)
2829 goto out;
2830
2831 clear_bit(DMF_SUSPENDED, &md->flags);
2832 out:
2833 mutex_unlock(&md->suspend_lock);
2834
2835 return r;
2836 }
2837
2838
2839
2840
2841
2842
2843
2844 static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_flags)
2845 {
2846 struct dm_table *map = NULL;
2847
2848 lockdep_assert_held(&md->suspend_lock);
2849
2850 if (md->internal_suspend_count++)
2851 return;
2852
2853 if (dm_suspended_md(md)) {
2854 set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
2855 return;
2856 }
2857
2858 map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
2859
2860
2861
2862
2863
2864
2865
2866 (void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE,
2867 DMF_SUSPENDED_INTERNALLY);
2868
2869 set_bit(DMF_POST_SUSPENDING, &md->flags);
2870 dm_table_postsuspend_targets(map);
2871 clear_bit(DMF_POST_SUSPENDING, &md->flags);
2872 }
2873
2874 static void __dm_internal_resume(struct mapped_device *md)
2875 {
2876 BUG_ON(!md->internal_suspend_count);
2877
2878 if (--md->internal_suspend_count)
2879 return;
2880
2881 if (dm_suspended_md(md))
2882 goto done;
2883
2884
2885
2886
2887
2888 (void) __dm_resume(md, NULL);
2889
2890 done:
2891 clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
2892 smp_mb__after_atomic();
2893 wake_up_bit(&md->flags, DMF_SUSPENDED_INTERNALLY);
2894 }
2895
2896 void dm_internal_suspend_noflush(struct mapped_device *md)
2897 {
2898 mutex_lock(&md->suspend_lock);
2899 __dm_internal_suspend(md, DM_SUSPEND_NOFLUSH_FLAG);
2900 mutex_unlock(&md->suspend_lock);
2901 }
2902 EXPORT_SYMBOL_GPL(dm_internal_suspend_noflush);
2903
2904 void dm_internal_resume(struct mapped_device *md)
2905 {
2906 mutex_lock(&md->suspend_lock);
2907 __dm_internal_resume(md);
2908 mutex_unlock(&md->suspend_lock);
2909 }
2910 EXPORT_SYMBOL_GPL(dm_internal_resume);
2911
2912
2913
2914
2915
2916
2917 void dm_internal_suspend_fast(struct mapped_device *md)
2918 {
2919 mutex_lock(&md->suspend_lock);
2920 if (dm_suspended_md(md) || dm_suspended_internally_md(md))
2921 return;
2922
2923 set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
2924 synchronize_srcu(&md->io_barrier);
2925 flush_workqueue(md->wq);
2926 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
2927 }
2928 EXPORT_SYMBOL_GPL(dm_internal_suspend_fast);
2929
2930 void dm_internal_resume_fast(struct mapped_device *md)
2931 {
2932 if (dm_suspended_md(md) || dm_suspended_internally_md(md))
2933 goto done;
2934
2935 dm_queue_flush(md);
2936
2937 done:
2938 mutex_unlock(&md->suspend_lock);
2939 }
2940 EXPORT_SYMBOL_GPL(dm_internal_resume_fast);
2941
2942
2943
2944
2945 int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
2946 unsigned cookie)
2947 {
2948 int r;
2949 unsigned noio_flag;
2950 char udev_cookie[DM_COOKIE_LENGTH];
2951 char *envp[] = { udev_cookie, NULL };
2952
2953 noio_flag = memalloc_noio_save();
2954
2955 if (!cookie)
2956 r = kobject_uevent(&disk_to_dev(md->disk)->kobj, action);
2957 else {
2958 snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u",
2959 DM_COOKIE_ENV_VAR_NAME, cookie);
2960 r = kobject_uevent_env(&disk_to_dev(md->disk)->kobj,
2961 action, envp);
2962 }
2963
2964 memalloc_noio_restore(noio_flag);
2965
2966 return r;
2967 }
2968
2969 uint32_t dm_next_uevent_seq(struct mapped_device *md)
2970 {
2971 return atomic_add_return(1, &md->uevent_seq);
2972 }
2973
2974 uint32_t dm_get_event_nr(struct mapped_device *md)
2975 {
2976 return atomic_read(&md->event_nr);
2977 }
2978
2979 int dm_wait_event(struct mapped_device *md, int event_nr)
2980 {
2981 return wait_event_interruptible(md->eventq,
2982 (event_nr != atomic_read(&md->event_nr)));
2983 }
2984
2985 void dm_uevent_add(struct mapped_device *md, struct list_head *elist)
2986 {
2987 unsigned long flags;
2988
2989 spin_lock_irqsave(&md->uevent_lock, flags);
2990 list_add(elist, &md->uevent_list);
2991 spin_unlock_irqrestore(&md->uevent_lock, flags);
2992 }
2993
2994
2995
2996
2997
2998 struct gendisk *dm_disk(struct mapped_device *md)
2999 {
3000 return md->disk;
3001 }
3002 EXPORT_SYMBOL_GPL(dm_disk);
3003
3004 struct kobject *dm_kobject(struct mapped_device *md)
3005 {
3006 return &md->kobj_holder.kobj;
3007 }
3008
3009 struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
3010 {
3011 struct mapped_device *md;
3012
3013 md = container_of(kobj, struct mapped_device, kobj_holder.kobj);
3014
3015 spin_lock(&_minor_lock);
3016 if (test_bit(DMF_FREEING, &md->flags) || dm_deleting_md(md)) {
3017 md = NULL;
3018 goto out;
3019 }
3020 dm_get(md);
3021 out:
3022 spin_unlock(&_minor_lock);
3023
3024 return md;
3025 }
3026
3027 int dm_suspended_md(struct mapped_device *md)
3028 {
3029 return test_bit(DMF_SUSPENDED, &md->flags);
3030 }
3031
3032 static int dm_post_suspending_md(struct mapped_device *md)
3033 {
3034 return test_bit(DMF_POST_SUSPENDING, &md->flags);
3035 }
3036
3037 int dm_suspended_internally_md(struct mapped_device *md)
3038 {
3039 return test_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
3040 }
3041
3042 int dm_test_deferred_remove_flag(struct mapped_device *md)
3043 {
3044 return test_bit(DMF_DEFERRED_REMOVE, &md->flags);
3045 }
3046
3047 int dm_suspended(struct dm_target *ti)
3048 {
3049 return dm_suspended_md(ti->table->md);
3050 }
3051 EXPORT_SYMBOL_GPL(dm_suspended);
3052
3053 int dm_post_suspending(struct dm_target *ti)
3054 {
3055 return dm_post_suspending_md(ti->table->md);
3056 }
3057 EXPORT_SYMBOL_GPL(dm_post_suspending);
3058
3059 int dm_noflush_suspending(struct dm_target *ti)
3060 {
3061 return __noflush_suspending(ti->table->md);
3062 }
3063 EXPORT_SYMBOL_GPL(dm_noflush_suspending);
3064
3065 void dm_free_md_mempools(struct dm_md_mempools *pools)
3066 {
3067 if (!pools)
3068 return;
3069
3070 bioset_exit(&pools->bs);
3071 bioset_exit(&pools->io_bs);
3072
3073 kfree(pools);
3074 }
3075
3076 struct dm_pr {
3077 u64 old_key;
3078 u64 new_key;
3079 u32 flags;
3080 bool abort;
3081 bool fail_early;
3082 int ret;
3083 enum pr_type type;
3084 };
3085
3086 static int dm_call_pr(struct block_device *bdev, iterate_devices_callout_fn fn,
3087 struct dm_pr *pr)
3088 {
3089 struct mapped_device *md = bdev->bd_disk->private_data;
3090 struct dm_table *table;
3091 struct dm_target *ti;
3092 int ret = -ENOTTY, srcu_idx;
3093
3094 table = dm_get_live_table(md, &srcu_idx);
3095 if (!table || !dm_table_get_size(table))
3096 goto out;
3097
3098
3099 if (table->num_targets != 1)
3100 goto out;
3101 ti = dm_table_get_target(table, 0);
3102
3103 if (dm_suspended_md(md)) {
3104 ret = -EAGAIN;
3105 goto out;
3106 }
3107
3108 ret = -EINVAL;
3109 if (!ti->type->iterate_devices)
3110 goto out;
3111
3112 ti->type->iterate_devices(ti, fn, pr);
3113 ret = 0;
3114 out:
3115 dm_put_live_table(md, srcu_idx);
3116 return ret;
3117 }
3118
3119
3120
3121
3122 static int __dm_pr_register(struct dm_target *ti, struct dm_dev *dev,
3123 sector_t start, sector_t len, void *data)
3124 {
3125 struct dm_pr *pr = data;
3126 const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops;
3127 int ret;
3128
3129 if (!ops || !ops->pr_register) {
3130 pr->ret = -EOPNOTSUPP;
3131 return -1;
3132 }
3133
3134 ret = ops->pr_register(dev->bdev, pr->old_key, pr->new_key, pr->flags);
3135 if (!ret)
3136 return 0;
3137
3138 if (!pr->ret)
3139 pr->ret = ret;
3140
3141 if (pr->fail_early)
3142 return -1;
3143
3144 return 0;
3145 }
3146
3147 static int dm_pr_register(struct block_device *bdev, u64 old_key, u64 new_key,
3148 u32 flags)
3149 {
3150 struct dm_pr pr = {
3151 .old_key = old_key,
3152 .new_key = new_key,
3153 .flags = flags,
3154 .fail_early = true,
3155 .ret = 0,
3156 };
3157 int ret;
3158
3159 ret = dm_call_pr(bdev, __dm_pr_register, &pr);
3160 if (ret) {
3161
3162 return ret;
3163 }
3164
3165 if (!pr.ret)
3166 return 0;
3167 ret = pr.ret;
3168
3169 if (!new_key)
3170 return ret;
3171
3172
3173 pr.old_key = new_key;
3174 pr.new_key = 0;
3175 pr.flags = 0;
3176 pr.fail_early = false;
3177 (void) dm_call_pr(bdev, __dm_pr_register, &pr);
3178 return ret;
3179 }
3180
3181
3182 static int __dm_pr_reserve(struct dm_target *ti, struct dm_dev *dev,
3183 sector_t start, sector_t len, void *data)
3184 {
3185 struct dm_pr *pr = data;
3186 const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops;
3187
3188 if (!ops || !ops->pr_reserve) {
3189 pr->ret = -EOPNOTSUPP;
3190 return -1;
3191 }
3192
3193 pr->ret = ops->pr_reserve(dev->bdev, pr->old_key, pr->type, pr->flags);
3194 if (!pr->ret)
3195 return -1;
3196
3197 return 0;
3198 }
3199
3200 static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
3201 u32 flags)
3202 {
3203 struct dm_pr pr = {
3204 .old_key = key,
3205 .flags = flags,
3206 .type = type,
3207 .fail_early = false,
3208 .ret = 0,
3209 };
3210 int ret;
3211
3212 ret = dm_call_pr(bdev, __dm_pr_reserve, &pr);
3213 if (ret)
3214 return ret;
3215
3216 return pr.ret;
3217 }
3218
3219
3220
3221
3222
3223
3224
3225 static int __dm_pr_release(struct dm_target *ti, struct dm_dev *dev,
3226 sector_t start, sector_t len, void *data)
3227 {
3228 struct dm_pr *pr = data;
3229 const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops;
3230
3231 if (!ops || !ops->pr_release) {
3232 pr->ret = -EOPNOTSUPP;
3233 return -1;
3234 }
3235
3236 pr->ret = ops->pr_release(dev->bdev, pr->old_key, pr->type);
3237 if (pr->ret)
3238 return -1;
3239
3240 return 0;
3241 }
3242
3243 static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
3244 {
3245 struct dm_pr pr = {
3246 .old_key = key,
3247 .type = type,
3248 .fail_early = false,
3249 };
3250 int ret;
3251
3252 ret = dm_call_pr(bdev, __dm_pr_release, &pr);
3253 if (ret)
3254 return ret;
3255
3256 return pr.ret;
3257 }
3258
3259 static int __dm_pr_preempt(struct dm_target *ti, struct dm_dev *dev,
3260 sector_t start, sector_t len, void *data)
3261 {
3262 struct dm_pr *pr = data;
3263 const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops;
3264
3265 if (!ops || !ops->pr_preempt) {
3266 pr->ret = -EOPNOTSUPP;
3267 return -1;
3268 }
3269
3270 pr->ret = ops->pr_preempt(dev->bdev, pr->old_key, pr->new_key, pr->type,
3271 pr->abort);
3272 if (!pr->ret)
3273 return -1;
3274
3275 return 0;
3276 }
3277
3278 static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
3279 enum pr_type type, bool abort)
3280 {
3281 struct dm_pr pr = {
3282 .new_key = new_key,
3283 .old_key = old_key,
3284 .type = type,
3285 .fail_early = false,
3286 };
3287 int ret;
3288
3289 ret = dm_call_pr(bdev, __dm_pr_preempt, &pr);
3290 if (ret)
3291 return ret;
3292
3293 return pr.ret;
3294 }
3295
3296 static int dm_pr_clear(struct block_device *bdev, u64 key)
3297 {
3298 struct mapped_device *md = bdev->bd_disk->private_data;
3299 const struct pr_ops *ops;
3300 int r, srcu_idx;
3301
3302 r = dm_prepare_ioctl(md, &srcu_idx, &bdev);
3303 if (r < 0)
3304 goto out;
3305
3306 ops = bdev->bd_disk->fops->pr_ops;
3307 if (ops && ops->pr_clear)
3308 r = ops->pr_clear(bdev, key);
3309 else
3310 r = -EOPNOTSUPP;
3311 out:
3312 dm_unprepare_ioctl(md, srcu_idx);
3313 return r;
3314 }
3315
3316 static const struct pr_ops dm_pr_ops = {
3317 .pr_register = dm_pr_register,
3318 .pr_reserve = dm_pr_reserve,
3319 .pr_release = dm_pr_release,
3320 .pr_preempt = dm_pr_preempt,
3321 .pr_clear = dm_pr_clear,
3322 };
3323
3324 static const struct block_device_operations dm_blk_dops = {
3325 .submit_bio = dm_submit_bio,
3326 .poll_bio = dm_poll_bio,
3327 .open = dm_blk_open,
3328 .release = dm_blk_close,
3329 .ioctl = dm_blk_ioctl,
3330 .getgeo = dm_blk_getgeo,
3331 .report_zones = dm_blk_report_zones,
3332 .pr_ops = &dm_pr_ops,
3333 .owner = THIS_MODULE
3334 };
3335
3336 static const struct block_device_operations dm_rq_blk_dops = {
3337 .open = dm_blk_open,
3338 .release = dm_blk_close,
3339 .ioctl = dm_blk_ioctl,
3340 .getgeo = dm_blk_getgeo,
3341 .pr_ops = &dm_pr_ops,
3342 .owner = THIS_MODULE
3343 };
3344
3345 static const struct dax_operations dm_dax_ops = {
3346 .direct_access = dm_dax_direct_access,
3347 .zero_page_range = dm_dax_zero_page_range,
3348 .recovery_write = dm_dax_recovery_write,
3349 };
3350
3351
3352
3353
3354 module_init(dm_init);
3355 module_exit(dm_exit);
3356
3357 module_param(major, uint, 0);
3358 MODULE_PARM_DESC(major, "The major number of the device mapper");
3359
3360 module_param(reserved_bio_based_ios, uint, S_IRUGO | S_IWUSR);
3361 MODULE_PARM_DESC(reserved_bio_based_ios, "Reserved IOs in bio-based mempools");
3362
3363 module_param(dm_numa_node, int, S_IRUGO | S_IWUSR);
3364 MODULE_PARM_DESC(dm_numa_node, "NUMA node for DM device memory allocations");
3365
3366 module_param(swap_bios, int, S_IRUGO | S_IWUSR);
3367 MODULE_PARM_DESC(swap_bios, "Maximum allowed inflight swap IOs");
3368
3369 MODULE_DESCRIPTION(DM_NAME " driver");
3370 MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
3371 MODULE_LICENSE("GPL");