0001
0002
0003
0004
0005
0006
0007 #include "dm.h"
0008 #include "dm-bio-prison-v2.h"
0009 #include "dm-bio-record.h"
0010 #include "dm-cache-metadata.h"
0011 #include "dm-io-tracker.h"
0012
0013 #include <linux/dm-io.h>
0014 #include <linux/dm-kcopyd.h>
0015 #include <linux/jiffies.h>
0016 #include <linux/init.h>
0017 #include <linux/mempool.h>
0018 #include <linux/module.h>
0019 #include <linux/rwsem.h>
0020 #include <linux/slab.h>
0021 #include <linux/vmalloc.h>
0022
0023 #define DM_MSG_PREFIX "cache"
0024
0025 DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle,
0026 "A percentage of time allocated for copying to and/or from cache");
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047 struct continuation {
0048 struct work_struct ws;
0049 blk_status_t input;
0050 };
0051
0052 static inline void init_continuation(struct continuation *k,
0053 void (*fn)(struct work_struct *))
0054 {
0055 INIT_WORK(&k->ws, fn);
0056 k->input = 0;
0057 }
0058
0059 static inline void queue_continuation(struct workqueue_struct *wq,
0060 struct continuation *k)
0061 {
0062 queue_work(wq, &k->ws);
0063 }
0064
0065
0066
0067
0068
0069
0070
0071 struct batcher {
0072
0073
0074
0075 blk_status_t (*commit_op)(void *context);
0076 void *commit_context;
0077
0078
0079
0080
0081
0082 void (*issue_op)(struct bio *bio, void *context);
0083 void *issue_context;
0084
0085
0086
0087
0088 struct workqueue_struct *wq;
0089
0090 spinlock_t lock;
0091 struct list_head work_items;
0092 struct bio_list bios;
0093 struct work_struct commit_work;
0094
0095 bool commit_scheduled;
0096 };
0097
0098 static void __commit(struct work_struct *_ws)
0099 {
0100 struct batcher *b = container_of(_ws, struct batcher, commit_work);
0101 blk_status_t r;
0102 struct list_head work_items;
0103 struct work_struct *ws, *tmp;
0104 struct continuation *k;
0105 struct bio *bio;
0106 struct bio_list bios;
0107
0108 INIT_LIST_HEAD(&work_items);
0109 bio_list_init(&bios);
0110
0111
0112
0113
0114
0115 spin_lock_irq(&b->lock);
0116 list_splice_init(&b->work_items, &work_items);
0117 bio_list_merge(&bios, &b->bios);
0118 bio_list_init(&b->bios);
0119 b->commit_scheduled = false;
0120 spin_unlock_irq(&b->lock);
0121
0122 r = b->commit_op(b->commit_context);
0123
0124 list_for_each_entry_safe(ws, tmp, &work_items, entry) {
0125 k = container_of(ws, struct continuation, ws);
0126 k->input = r;
0127 INIT_LIST_HEAD(&ws->entry);
0128 queue_work(b->wq, ws);
0129 }
0130
0131 while ((bio = bio_list_pop(&bios))) {
0132 if (r) {
0133 bio->bi_status = r;
0134 bio_endio(bio);
0135 } else
0136 b->issue_op(bio, b->issue_context);
0137 }
0138 }
0139
0140 static void batcher_init(struct batcher *b,
0141 blk_status_t (*commit_op)(void *),
0142 void *commit_context,
0143 void (*issue_op)(struct bio *bio, void *),
0144 void *issue_context,
0145 struct workqueue_struct *wq)
0146 {
0147 b->commit_op = commit_op;
0148 b->commit_context = commit_context;
0149 b->issue_op = issue_op;
0150 b->issue_context = issue_context;
0151 b->wq = wq;
0152
0153 spin_lock_init(&b->lock);
0154 INIT_LIST_HEAD(&b->work_items);
0155 bio_list_init(&b->bios);
0156 INIT_WORK(&b->commit_work, __commit);
0157 b->commit_scheduled = false;
0158 }
0159
0160 static void async_commit(struct batcher *b)
0161 {
0162 queue_work(b->wq, &b->commit_work);
0163 }
0164
0165 static void continue_after_commit(struct batcher *b, struct continuation *k)
0166 {
0167 bool commit_scheduled;
0168
0169 spin_lock_irq(&b->lock);
0170 commit_scheduled = b->commit_scheduled;
0171 list_add_tail(&k->ws.entry, &b->work_items);
0172 spin_unlock_irq(&b->lock);
0173
0174 if (commit_scheduled)
0175 async_commit(b);
0176 }
0177
0178
0179
0180
0181 static void issue_after_commit(struct batcher *b, struct bio *bio)
0182 {
0183 bool commit_scheduled;
0184
0185 spin_lock_irq(&b->lock);
0186 commit_scheduled = b->commit_scheduled;
0187 bio_list_add(&b->bios, bio);
0188 spin_unlock_irq(&b->lock);
0189
0190 if (commit_scheduled)
0191 async_commit(b);
0192 }
0193
0194
0195
0196
0197 static void schedule_commit(struct batcher *b)
0198 {
0199 bool immediate;
0200
0201 spin_lock_irq(&b->lock);
0202 immediate = !list_empty(&b->work_items) || !bio_list_empty(&b->bios);
0203 b->commit_scheduled = true;
0204 spin_unlock_irq(&b->lock);
0205
0206 if (immediate)
0207 async_commit(b);
0208 }
0209
0210
0211
0212
0213
0214
0215 struct dm_hook_info {
0216 bio_end_io_t *bi_end_io;
0217 };
0218
0219 static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio,
0220 bio_end_io_t *bi_end_io, void *bi_private)
0221 {
0222 h->bi_end_io = bio->bi_end_io;
0223
0224 bio->bi_end_io = bi_end_io;
0225 bio->bi_private = bi_private;
0226 }
0227
0228 static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
0229 {
0230 bio->bi_end_io = h->bi_end_io;
0231 }
0232
0233
0234
0235 #define MIGRATION_POOL_SIZE 128
0236 #define COMMIT_PERIOD HZ
0237 #define MIGRATION_COUNT_WINDOW 10
0238
0239
0240
0241
0242
0243 #define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT)
0244 #define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
0245
0246 enum cache_metadata_mode {
0247 CM_WRITE,
0248 CM_READ_ONLY,
0249 CM_FAIL
0250 };
0251
0252 enum cache_io_mode {
0253
0254
0255
0256
0257
0258 CM_IO_WRITEBACK,
0259
0260
0261
0262
0263
0264 CM_IO_WRITETHROUGH,
0265
0266
0267
0268
0269
0270
0271
0272 CM_IO_PASSTHROUGH
0273 };
0274
0275 struct cache_features {
0276 enum cache_metadata_mode mode;
0277 enum cache_io_mode io_mode;
0278 unsigned metadata_version;
0279 bool discard_passdown:1;
0280 };
0281
0282 struct cache_stats {
0283 atomic_t read_hit;
0284 atomic_t read_miss;
0285 atomic_t write_hit;
0286 atomic_t write_miss;
0287 atomic_t demotion;
0288 atomic_t promotion;
0289 atomic_t writeback;
0290 atomic_t copies_avoided;
0291 atomic_t cache_cell_clash;
0292 atomic_t commit_count;
0293 atomic_t discard_count;
0294 };
0295
0296 struct cache {
0297 struct dm_target *ti;
0298 spinlock_t lock;
0299
0300
0301
0302
0303 int sectors_per_block_shift;
0304 sector_t sectors_per_block;
0305
0306 struct dm_cache_metadata *cmd;
0307
0308
0309
0310
0311 struct dm_dev *metadata_dev;
0312
0313
0314
0315
0316 struct dm_dev *origin_dev;
0317
0318
0319
0320
0321 struct dm_dev *cache_dev;
0322
0323
0324
0325
0326 dm_oblock_t origin_blocks;
0327 sector_t origin_sectors;
0328
0329
0330
0331
0332 dm_cblock_t cache_size;
0333
0334
0335
0336
0337 spinlock_t invalidation_lock;
0338 struct list_head invalidation_requests;
0339
0340 sector_t migration_threshold;
0341 wait_queue_head_t migration_wait;
0342 atomic_t nr_allocated_migrations;
0343
0344
0345
0346
0347
0348 atomic_t nr_io_migrations;
0349
0350 struct bio_list deferred_bios;
0351
0352 struct rw_semaphore quiesce_lock;
0353
0354
0355
0356
0357 dm_dblock_t discard_nr_blocks;
0358 unsigned long *discard_bitset;
0359 uint32_t discard_block_size;
0360
0361
0362
0363
0364
0365 unsigned nr_ctr_args;
0366 const char **ctr_args;
0367
0368 struct dm_kcopyd_client *copier;
0369 struct work_struct deferred_bio_worker;
0370 struct work_struct migration_worker;
0371 struct workqueue_struct *wq;
0372 struct delayed_work waker;
0373 struct dm_bio_prison_v2 *prison;
0374
0375
0376
0377
0378 unsigned long *dirty_bitset;
0379 atomic_t nr_dirty;
0380
0381 unsigned policy_nr_args;
0382 struct dm_cache_policy *policy;
0383
0384
0385
0386
0387 struct cache_features features;
0388
0389 struct cache_stats stats;
0390
0391 bool need_tick_bio:1;
0392 bool sized:1;
0393 bool invalidate:1;
0394 bool commit_requested:1;
0395 bool loaded_mappings:1;
0396 bool loaded_discards:1;
0397
0398 struct rw_semaphore background_work_lock;
0399
0400 struct batcher committer;
0401 struct work_struct commit_ws;
0402
0403 struct dm_io_tracker tracker;
0404
0405 mempool_t migration_pool;
0406
0407 struct bio_set bs;
0408 };
0409
0410 struct per_bio_data {
0411 bool tick:1;
0412 unsigned req_nr:2;
0413 struct dm_bio_prison_cell_v2 *cell;
0414 struct dm_hook_info hook_info;
0415 sector_t len;
0416 };
0417
0418 struct dm_cache_migration {
0419 struct continuation k;
0420 struct cache *cache;
0421
0422 struct policy_work *op;
0423 struct bio *overwrite_bio;
0424 struct dm_bio_prison_cell_v2 *cell;
0425
0426 dm_cblock_t invalidate_cblock;
0427 dm_oblock_t invalidate_oblock;
0428 };
0429
0430
0431
0432 static bool writethrough_mode(struct cache *cache)
0433 {
0434 return cache->features.io_mode == CM_IO_WRITETHROUGH;
0435 }
0436
0437 static bool writeback_mode(struct cache *cache)
0438 {
0439 return cache->features.io_mode == CM_IO_WRITEBACK;
0440 }
0441
0442 static inline bool passthrough_mode(struct cache *cache)
0443 {
0444 return unlikely(cache->features.io_mode == CM_IO_PASSTHROUGH);
0445 }
0446
0447
0448
0449 static void wake_deferred_bio_worker(struct cache *cache)
0450 {
0451 queue_work(cache->wq, &cache->deferred_bio_worker);
0452 }
0453
0454 static void wake_migration_worker(struct cache *cache)
0455 {
0456 if (passthrough_mode(cache))
0457 return;
0458
0459 queue_work(cache->wq, &cache->migration_worker);
0460 }
0461
0462
0463
0464 static struct dm_bio_prison_cell_v2 *alloc_prison_cell(struct cache *cache)
0465 {
0466 return dm_bio_prison_alloc_cell_v2(cache->prison, GFP_NOIO);
0467 }
0468
0469 static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell_v2 *cell)
0470 {
0471 dm_bio_prison_free_cell_v2(cache->prison, cell);
0472 }
0473
0474 static struct dm_cache_migration *alloc_migration(struct cache *cache)
0475 {
0476 struct dm_cache_migration *mg;
0477
0478 mg = mempool_alloc(&cache->migration_pool, GFP_NOIO);
0479
0480 memset(mg, 0, sizeof(*mg));
0481
0482 mg->cache = cache;
0483 atomic_inc(&cache->nr_allocated_migrations);
0484
0485 return mg;
0486 }
0487
0488 static void free_migration(struct dm_cache_migration *mg)
0489 {
0490 struct cache *cache = mg->cache;
0491
0492 if (atomic_dec_and_test(&cache->nr_allocated_migrations))
0493 wake_up(&cache->migration_wait);
0494
0495 mempool_free(mg, &cache->migration_pool);
0496 }
0497
0498
0499
0500 static inline dm_oblock_t oblock_succ(dm_oblock_t b)
0501 {
0502 return to_oblock(from_oblock(b) + 1ull);
0503 }
0504
0505 static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key_v2 *key)
0506 {
0507 key->virtual = 0;
0508 key->dev = 0;
0509 key->block_begin = from_oblock(begin);
0510 key->block_end = from_oblock(end);
0511 }
0512
0513
0514
0515
0516
0517 #define WRITE_LOCK_LEVEL 0
0518 #define READ_WRITE_LOCK_LEVEL 1
0519
0520 static unsigned lock_level(struct bio *bio)
0521 {
0522 return bio_data_dir(bio) == WRITE ?
0523 WRITE_LOCK_LEVEL :
0524 READ_WRITE_LOCK_LEVEL;
0525 }
0526
0527
0528
0529
0530
0531 static struct per_bio_data *get_per_bio_data(struct bio *bio)
0532 {
0533 struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
0534 BUG_ON(!pb);
0535 return pb;
0536 }
0537
0538 static struct per_bio_data *init_per_bio_data(struct bio *bio)
0539 {
0540 struct per_bio_data *pb = get_per_bio_data(bio);
0541
0542 pb->tick = false;
0543 pb->req_nr = dm_bio_get_target_bio_nr(bio);
0544 pb->cell = NULL;
0545 pb->len = 0;
0546
0547 return pb;
0548 }
0549
0550
0551
0552 static void defer_bio(struct cache *cache, struct bio *bio)
0553 {
0554 spin_lock_irq(&cache->lock);
0555 bio_list_add(&cache->deferred_bios, bio);
0556 spin_unlock_irq(&cache->lock);
0557
0558 wake_deferred_bio_worker(cache);
0559 }
0560
0561 static void defer_bios(struct cache *cache, struct bio_list *bios)
0562 {
0563 spin_lock_irq(&cache->lock);
0564 bio_list_merge(&cache->deferred_bios, bios);
0565 bio_list_init(bios);
0566 spin_unlock_irq(&cache->lock);
0567
0568 wake_deferred_bio_worker(cache);
0569 }
0570
0571
0572
0573 static bool bio_detain_shared(struct cache *cache, dm_oblock_t oblock, struct bio *bio)
0574 {
0575 bool r;
0576 struct per_bio_data *pb;
0577 struct dm_cell_key_v2 key;
0578 dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL);
0579 struct dm_bio_prison_cell_v2 *cell_prealloc, *cell;
0580
0581 cell_prealloc = alloc_prison_cell(cache);
0582
0583 build_key(oblock, end, &key);
0584 r = dm_cell_get_v2(cache->prison, &key, lock_level(bio), bio, cell_prealloc, &cell);
0585 if (!r) {
0586
0587
0588
0589 free_prison_cell(cache, cell_prealloc);
0590 return r;
0591 }
0592
0593 if (cell != cell_prealloc)
0594 free_prison_cell(cache, cell_prealloc);
0595
0596 pb = get_per_bio_data(bio);
0597 pb->cell = cell;
0598
0599 return r;
0600 }
0601
0602
0603
0604 static bool is_dirty(struct cache *cache, dm_cblock_t b)
0605 {
0606 return test_bit(from_cblock(b), cache->dirty_bitset);
0607 }
0608
0609 static void set_dirty(struct cache *cache, dm_cblock_t cblock)
0610 {
0611 if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) {
0612 atomic_inc(&cache->nr_dirty);
0613 policy_set_dirty(cache->policy, cblock);
0614 }
0615 }
0616
0617
0618
0619
0620
0621 static void force_set_dirty(struct cache *cache, dm_cblock_t cblock)
0622 {
0623 if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset))
0624 atomic_inc(&cache->nr_dirty);
0625 policy_set_dirty(cache->policy, cblock);
0626 }
0627
0628 static void force_clear_dirty(struct cache *cache, dm_cblock_t cblock)
0629 {
0630 if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) {
0631 if (atomic_dec_return(&cache->nr_dirty) == 0)
0632 dm_table_event(cache->ti->table);
0633 }
0634
0635 policy_clear_dirty(cache->policy, cblock);
0636 }
0637
0638
0639
0640 static bool block_size_is_power_of_two(struct cache *cache)
0641 {
0642 return cache->sectors_per_block_shift >= 0;
0643 }
0644
0645 static dm_block_t block_div(dm_block_t b, uint32_t n)
0646 {
0647 do_div(b, n);
0648
0649 return b;
0650 }
0651
0652 static dm_block_t oblocks_per_dblock(struct cache *cache)
0653 {
0654 dm_block_t oblocks = cache->discard_block_size;
0655
0656 if (block_size_is_power_of_two(cache))
0657 oblocks >>= cache->sectors_per_block_shift;
0658 else
0659 oblocks = block_div(oblocks, cache->sectors_per_block);
0660
0661 return oblocks;
0662 }
0663
0664 static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
0665 {
0666 return to_dblock(block_div(from_oblock(oblock),
0667 oblocks_per_dblock(cache)));
0668 }
0669
0670 static void set_discard(struct cache *cache, dm_dblock_t b)
0671 {
0672 BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks));
0673 atomic_inc(&cache->stats.discard_count);
0674
0675 spin_lock_irq(&cache->lock);
0676 set_bit(from_dblock(b), cache->discard_bitset);
0677 spin_unlock_irq(&cache->lock);
0678 }
0679
0680 static void clear_discard(struct cache *cache, dm_dblock_t b)
0681 {
0682 spin_lock_irq(&cache->lock);
0683 clear_bit(from_dblock(b), cache->discard_bitset);
0684 spin_unlock_irq(&cache->lock);
0685 }
0686
0687 static bool is_discarded(struct cache *cache, dm_dblock_t b)
0688 {
0689 int r;
0690 spin_lock_irq(&cache->lock);
0691 r = test_bit(from_dblock(b), cache->discard_bitset);
0692 spin_unlock_irq(&cache->lock);
0693
0694 return r;
0695 }
0696
0697 static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b)
0698 {
0699 int r;
0700 spin_lock_irq(&cache->lock);
0701 r = test_bit(from_dblock(oblock_to_dblock(cache, b)),
0702 cache->discard_bitset);
0703 spin_unlock_irq(&cache->lock);
0704
0705 return r;
0706 }
0707
0708
0709
0710
0711 static void remap_to_origin(struct cache *cache, struct bio *bio)
0712 {
0713 bio_set_dev(bio, cache->origin_dev->bdev);
0714 }
0715
0716 static void remap_to_cache(struct cache *cache, struct bio *bio,
0717 dm_cblock_t cblock)
0718 {
0719 sector_t bi_sector = bio->bi_iter.bi_sector;
0720 sector_t block = from_cblock(cblock);
0721
0722 bio_set_dev(bio, cache->cache_dev->bdev);
0723 if (!block_size_is_power_of_two(cache))
0724 bio->bi_iter.bi_sector =
0725 (block * cache->sectors_per_block) +
0726 sector_div(bi_sector, cache->sectors_per_block);
0727 else
0728 bio->bi_iter.bi_sector =
0729 (block << cache->sectors_per_block_shift) |
0730 (bi_sector & (cache->sectors_per_block - 1));
0731 }
0732
0733 static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
0734 {
0735 struct per_bio_data *pb;
0736
0737 spin_lock_irq(&cache->lock);
0738 if (cache->need_tick_bio && !op_is_flush(bio->bi_opf) &&
0739 bio_op(bio) != REQ_OP_DISCARD) {
0740 pb = get_per_bio_data(bio);
0741 pb->tick = true;
0742 cache->need_tick_bio = false;
0743 }
0744 spin_unlock_irq(&cache->lock);
0745 }
0746
0747 static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
0748 dm_oblock_t oblock)
0749 {
0750
0751 check_if_tick_bio_needed(cache, bio);
0752 remap_to_origin(cache, bio);
0753 if (bio_data_dir(bio) == WRITE)
0754 clear_discard(cache, oblock_to_dblock(cache, oblock));
0755 }
0756
0757 static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
0758 dm_oblock_t oblock, dm_cblock_t cblock)
0759 {
0760 check_if_tick_bio_needed(cache, bio);
0761 remap_to_cache(cache, bio, cblock);
0762 if (bio_data_dir(bio) == WRITE) {
0763 set_dirty(cache, cblock);
0764 clear_discard(cache, oblock_to_dblock(cache, oblock));
0765 }
0766 }
0767
0768 static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
0769 {
0770 sector_t block_nr = bio->bi_iter.bi_sector;
0771
0772 if (!block_size_is_power_of_two(cache))
0773 (void) sector_div(block_nr, cache->sectors_per_block);
0774 else
0775 block_nr >>= cache->sectors_per_block_shift;
0776
0777 return to_oblock(block_nr);
0778 }
0779
0780 static bool accountable_bio(struct cache *cache, struct bio *bio)
0781 {
0782 return bio_op(bio) != REQ_OP_DISCARD;
0783 }
0784
0785 static void accounted_begin(struct cache *cache, struct bio *bio)
0786 {
0787 struct per_bio_data *pb;
0788
0789 if (accountable_bio(cache, bio)) {
0790 pb = get_per_bio_data(bio);
0791 pb->len = bio_sectors(bio);
0792 dm_iot_io_begin(&cache->tracker, pb->len);
0793 }
0794 }
0795
0796 static void accounted_complete(struct cache *cache, struct bio *bio)
0797 {
0798 struct per_bio_data *pb = get_per_bio_data(bio);
0799
0800 dm_iot_io_end(&cache->tracker, pb->len);
0801 }
0802
0803 static void accounted_request(struct cache *cache, struct bio *bio)
0804 {
0805 accounted_begin(cache, bio);
0806 dm_submit_bio_remap(bio, NULL);
0807 }
0808
0809 static void issue_op(struct bio *bio, void *context)
0810 {
0811 struct cache *cache = context;
0812 accounted_request(cache, bio);
0813 }
0814
0815
0816
0817
0818
0819 static void remap_to_origin_and_cache(struct cache *cache, struct bio *bio,
0820 dm_oblock_t oblock, dm_cblock_t cblock)
0821 {
0822 struct bio *origin_bio = bio_alloc_clone(cache->origin_dev->bdev, bio,
0823 GFP_NOIO, &cache->bs);
0824
0825 BUG_ON(!origin_bio);
0826
0827 bio_chain(origin_bio, bio);
0828
0829 if (bio_data_dir(origin_bio) == WRITE)
0830 clear_discard(cache, oblock_to_dblock(cache, oblock));
0831 submit_bio(origin_bio);
0832
0833 remap_to_cache(cache, bio, cblock);
0834 }
0835
0836
0837
0838
0839 static enum cache_metadata_mode get_cache_mode(struct cache *cache)
0840 {
0841 return cache->features.mode;
0842 }
0843
0844 static const char *cache_device_name(struct cache *cache)
0845 {
0846 return dm_table_device_name(cache->ti->table);
0847 }
0848
0849 static void notify_mode_switch(struct cache *cache, enum cache_metadata_mode mode)
0850 {
0851 const char *descs[] = {
0852 "write",
0853 "read-only",
0854 "fail"
0855 };
0856
0857 dm_table_event(cache->ti->table);
0858 DMINFO("%s: switching cache to %s mode",
0859 cache_device_name(cache), descs[(int)mode]);
0860 }
0861
0862 static void set_cache_mode(struct cache *cache, enum cache_metadata_mode new_mode)
0863 {
0864 bool needs_check;
0865 enum cache_metadata_mode old_mode = get_cache_mode(cache);
0866
0867 if (dm_cache_metadata_needs_check(cache->cmd, &needs_check)) {
0868 DMERR("%s: unable to read needs_check flag, setting failure mode.",
0869 cache_device_name(cache));
0870 new_mode = CM_FAIL;
0871 }
0872
0873 if (new_mode == CM_WRITE && needs_check) {
0874 DMERR("%s: unable to switch cache to write mode until repaired.",
0875 cache_device_name(cache));
0876 if (old_mode != new_mode)
0877 new_mode = old_mode;
0878 else
0879 new_mode = CM_READ_ONLY;
0880 }
0881
0882
0883 if (old_mode == CM_FAIL)
0884 new_mode = CM_FAIL;
0885
0886 switch (new_mode) {
0887 case CM_FAIL:
0888 case CM_READ_ONLY:
0889 dm_cache_metadata_set_read_only(cache->cmd);
0890 break;
0891
0892 case CM_WRITE:
0893 dm_cache_metadata_set_read_write(cache->cmd);
0894 break;
0895 }
0896
0897 cache->features.mode = new_mode;
0898
0899 if (new_mode != old_mode)
0900 notify_mode_switch(cache, new_mode);
0901 }
0902
0903 static void abort_transaction(struct cache *cache)
0904 {
0905 const char *dev_name = cache_device_name(cache);
0906
0907 if (get_cache_mode(cache) >= CM_READ_ONLY)
0908 return;
0909
0910 if (dm_cache_metadata_set_needs_check(cache->cmd)) {
0911 DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name);
0912 set_cache_mode(cache, CM_FAIL);
0913 }
0914
0915 DMERR_LIMIT("%s: aborting current metadata transaction", dev_name);
0916 if (dm_cache_metadata_abort(cache->cmd)) {
0917 DMERR("%s: failed to abort metadata transaction", dev_name);
0918 set_cache_mode(cache, CM_FAIL);
0919 }
0920 }
0921
0922 static void metadata_operation_failed(struct cache *cache, const char *op, int r)
0923 {
0924 DMERR_LIMIT("%s: metadata operation '%s' failed: error = %d",
0925 cache_device_name(cache), op, r);
0926 abort_transaction(cache);
0927 set_cache_mode(cache, CM_READ_ONLY);
0928 }
0929
0930
0931
0932 static void load_stats(struct cache *cache)
0933 {
0934 struct dm_cache_statistics stats;
0935
0936 dm_cache_metadata_get_stats(cache->cmd, &stats);
0937 atomic_set(&cache->stats.read_hit, stats.read_hits);
0938 atomic_set(&cache->stats.read_miss, stats.read_misses);
0939 atomic_set(&cache->stats.write_hit, stats.write_hits);
0940 atomic_set(&cache->stats.write_miss, stats.write_misses);
0941 }
0942
0943 static void save_stats(struct cache *cache)
0944 {
0945 struct dm_cache_statistics stats;
0946
0947 if (get_cache_mode(cache) >= CM_READ_ONLY)
0948 return;
0949
0950 stats.read_hits = atomic_read(&cache->stats.read_hit);
0951 stats.read_misses = atomic_read(&cache->stats.read_miss);
0952 stats.write_hits = atomic_read(&cache->stats.write_hit);
0953 stats.write_misses = atomic_read(&cache->stats.write_miss);
0954
0955 dm_cache_metadata_set_stats(cache->cmd, &stats);
0956 }
0957
0958 static void update_stats(struct cache_stats *stats, enum policy_operation op)
0959 {
0960 switch (op) {
0961 case POLICY_PROMOTE:
0962 atomic_inc(&stats->promotion);
0963 break;
0964
0965 case POLICY_DEMOTE:
0966 atomic_inc(&stats->demotion);
0967 break;
0968
0969 case POLICY_WRITEBACK:
0970 atomic_inc(&stats->writeback);
0971 break;
0972 }
0973 }
0974
0975
0976
0977
0978
0979
0980
0981
0982 static void inc_io_migrations(struct cache *cache)
0983 {
0984 atomic_inc(&cache->nr_io_migrations);
0985 }
0986
0987 static void dec_io_migrations(struct cache *cache)
0988 {
0989 atomic_dec(&cache->nr_io_migrations);
0990 }
0991
0992 static bool discard_or_flush(struct bio *bio)
0993 {
0994 return bio_op(bio) == REQ_OP_DISCARD || op_is_flush(bio->bi_opf);
0995 }
0996
0997 static void calc_discard_block_range(struct cache *cache, struct bio *bio,
0998 dm_dblock_t *b, dm_dblock_t *e)
0999 {
1000 sector_t sb = bio->bi_iter.bi_sector;
1001 sector_t se = bio_end_sector(bio);
1002
1003 *b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size));
1004
1005 if (se - sb < cache->discard_block_size)
1006 *e = *b;
1007 else
1008 *e = to_dblock(block_div(se, cache->discard_block_size));
1009 }
1010
1011
1012
1013 static void prevent_background_work(struct cache *cache)
1014 {
1015 lockdep_off();
1016 down_write(&cache->background_work_lock);
1017 lockdep_on();
1018 }
1019
1020 static void allow_background_work(struct cache *cache)
1021 {
1022 lockdep_off();
1023 up_write(&cache->background_work_lock);
1024 lockdep_on();
1025 }
1026
1027 static bool background_work_begin(struct cache *cache)
1028 {
1029 bool r;
1030
1031 lockdep_off();
1032 r = down_read_trylock(&cache->background_work_lock);
1033 lockdep_on();
1034
1035 return r;
1036 }
1037
1038 static void background_work_end(struct cache *cache)
1039 {
1040 lockdep_off();
1041 up_read(&cache->background_work_lock);
1042 lockdep_on();
1043 }
1044
1045
1046
1047 static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
1048 {
1049 return (bio_data_dir(bio) == WRITE) &&
1050 (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
1051 }
1052
1053 static bool optimisable_bio(struct cache *cache, struct bio *bio, dm_oblock_t block)
1054 {
1055 return writeback_mode(cache) &&
1056 (is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio));
1057 }
1058
1059 static void quiesce(struct dm_cache_migration *mg,
1060 void (*continuation)(struct work_struct *))
1061 {
1062 init_continuation(&mg->k, continuation);
1063 dm_cell_quiesce_v2(mg->cache->prison, mg->cell, &mg->k.ws);
1064 }
1065
1066 static struct dm_cache_migration *ws_to_mg(struct work_struct *ws)
1067 {
1068 struct continuation *k = container_of(ws, struct continuation, ws);
1069 return container_of(k, struct dm_cache_migration, k);
1070 }
1071
1072 static void copy_complete(int read_err, unsigned long write_err, void *context)
1073 {
1074 struct dm_cache_migration *mg = container_of(context, struct dm_cache_migration, k);
1075
1076 if (read_err || write_err)
1077 mg->k.input = BLK_STS_IOERR;
1078
1079 queue_continuation(mg->cache->wq, &mg->k);
1080 }
1081
1082 static void copy(struct dm_cache_migration *mg, bool promote)
1083 {
1084 struct dm_io_region o_region, c_region;
1085 struct cache *cache = mg->cache;
1086
1087 o_region.bdev = cache->origin_dev->bdev;
1088 o_region.sector = from_oblock(mg->op->oblock) * cache->sectors_per_block;
1089 o_region.count = cache->sectors_per_block;
1090
1091 c_region.bdev = cache->cache_dev->bdev;
1092 c_region.sector = from_cblock(mg->op->cblock) * cache->sectors_per_block;
1093 c_region.count = cache->sectors_per_block;
1094
1095 if (promote)
1096 dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, &mg->k);
1097 else
1098 dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, &mg->k);
1099 }
1100
1101 static void bio_drop_shared_lock(struct cache *cache, struct bio *bio)
1102 {
1103 struct per_bio_data *pb = get_per_bio_data(bio);
1104
1105 if (pb->cell && dm_cell_put_v2(cache->prison, pb->cell))
1106 free_prison_cell(cache, pb->cell);
1107 pb->cell = NULL;
1108 }
1109
1110 static void overwrite_endio(struct bio *bio)
1111 {
1112 struct dm_cache_migration *mg = bio->bi_private;
1113 struct cache *cache = mg->cache;
1114 struct per_bio_data *pb = get_per_bio_data(bio);
1115
1116 dm_unhook_bio(&pb->hook_info, bio);
1117
1118 if (bio->bi_status)
1119 mg->k.input = bio->bi_status;
1120
1121 queue_continuation(cache->wq, &mg->k);
1122 }
1123
1124 static void overwrite(struct dm_cache_migration *mg,
1125 void (*continuation)(struct work_struct *))
1126 {
1127 struct bio *bio = mg->overwrite_bio;
1128 struct per_bio_data *pb = get_per_bio_data(bio);
1129
1130 dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);
1131
1132
1133
1134
1135
1136 if (mg->op->op == POLICY_PROMOTE)
1137 remap_to_cache(mg->cache, bio, mg->op->cblock);
1138 else
1139 remap_to_origin(mg->cache, bio);
1140
1141 init_continuation(&mg->k, continuation);
1142 accounted_request(mg->cache, bio);
1143 }
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156 static void mg_complete(struct dm_cache_migration *mg, bool success)
1157 {
1158 struct bio_list bios;
1159 struct cache *cache = mg->cache;
1160 struct policy_work *op = mg->op;
1161 dm_cblock_t cblock = op->cblock;
1162
1163 if (success)
1164 update_stats(&cache->stats, op->op);
1165
1166 switch (op->op) {
1167 case POLICY_PROMOTE:
1168 clear_discard(cache, oblock_to_dblock(cache, op->oblock));
1169 policy_complete_background_work(cache->policy, op, success);
1170
1171 if (mg->overwrite_bio) {
1172 if (success)
1173 force_set_dirty(cache, cblock);
1174 else if (mg->k.input)
1175 mg->overwrite_bio->bi_status = mg->k.input;
1176 else
1177 mg->overwrite_bio->bi_status = BLK_STS_IOERR;
1178 bio_endio(mg->overwrite_bio);
1179 } else {
1180 if (success)
1181 force_clear_dirty(cache, cblock);
1182 dec_io_migrations(cache);
1183 }
1184 break;
1185
1186 case POLICY_DEMOTE:
1187
1188
1189
1190 if (success)
1191 force_clear_dirty(cache, cblock);
1192 policy_complete_background_work(cache->policy, op, success);
1193 dec_io_migrations(cache);
1194 break;
1195
1196 case POLICY_WRITEBACK:
1197 if (success)
1198 force_clear_dirty(cache, cblock);
1199 policy_complete_background_work(cache->policy, op, success);
1200 dec_io_migrations(cache);
1201 break;
1202 }
1203
1204 bio_list_init(&bios);
1205 if (mg->cell) {
1206 if (dm_cell_unlock_v2(cache->prison, mg->cell, &bios))
1207 free_prison_cell(cache, mg->cell);
1208 }
1209
1210 free_migration(mg);
1211 defer_bios(cache, &bios);
1212 wake_migration_worker(cache);
1213
1214 background_work_end(cache);
1215 }
1216
1217 static void mg_success(struct work_struct *ws)
1218 {
1219 struct dm_cache_migration *mg = ws_to_mg(ws);
1220 mg_complete(mg, mg->k.input == 0);
1221 }
1222
1223 static void mg_update_metadata(struct work_struct *ws)
1224 {
1225 int r;
1226 struct dm_cache_migration *mg = ws_to_mg(ws);
1227 struct cache *cache = mg->cache;
1228 struct policy_work *op = mg->op;
1229
1230 switch (op->op) {
1231 case POLICY_PROMOTE:
1232 r = dm_cache_insert_mapping(cache->cmd, op->cblock, op->oblock);
1233 if (r) {
1234 DMERR_LIMIT("%s: migration failed; couldn't insert mapping",
1235 cache_device_name(cache));
1236 metadata_operation_failed(cache, "dm_cache_insert_mapping", r);
1237
1238 mg_complete(mg, false);
1239 return;
1240 }
1241 mg_complete(mg, true);
1242 break;
1243
1244 case POLICY_DEMOTE:
1245 r = dm_cache_remove_mapping(cache->cmd, op->cblock);
1246 if (r) {
1247 DMERR_LIMIT("%s: migration failed; couldn't update on disk metadata",
1248 cache_device_name(cache));
1249 metadata_operation_failed(cache, "dm_cache_remove_mapping", r);
1250
1251 mg_complete(mg, false);
1252 return;
1253 }
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274 init_continuation(&mg->k, mg_success);
1275 continue_after_commit(&cache->committer, &mg->k);
1276 schedule_commit(&cache->committer);
1277 break;
1278
1279 case POLICY_WRITEBACK:
1280 mg_complete(mg, true);
1281 break;
1282 }
1283 }
1284
1285 static void mg_update_metadata_after_copy(struct work_struct *ws)
1286 {
1287 struct dm_cache_migration *mg = ws_to_mg(ws);
1288
1289
1290
1291
1292 if (mg->k.input)
1293 mg_complete(mg, false);
1294 else
1295 mg_update_metadata(ws);
1296 }
1297
1298 static void mg_upgrade_lock(struct work_struct *ws)
1299 {
1300 int r;
1301 struct dm_cache_migration *mg = ws_to_mg(ws);
1302
1303
1304
1305
1306 if (mg->k.input)
1307 mg_complete(mg, false);
1308
1309 else {
1310
1311
1312
1313 r = dm_cell_lock_promote_v2(mg->cache->prison, mg->cell,
1314 READ_WRITE_LOCK_LEVEL);
1315 if (r < 0)
1316 mg_complete(mg, false);
1317
1318 else if (r)
1319 quiesce(mg, mg_update_metadata);
1320
1321 else
1322 mg_update_metadata(ws);
1323 }
1324 }
1325
1326 static void mg_full_copy(struct work_struct *ws)
1327 {
1328 struct dm_cache_migration *mg = ws_to_mg(ws);
1329 struct cache *cache = mg->cache;
1330 struct policy_work *op = mg->op;
1331 bool is_policy_promote = (op->op == POLICY_PROMOTE);
1332
1333 if ((!is_policy_promote && !is_dirty(cache, op->cblock)) ||
1334 is_discarded_oblock(cache, op->oblock)) {
1335 mg_upgrade_lock(ws);
1336 return;
1337 }
1338
1339 init_continuation(&mg->k, mg_upgrade_lock);
1340 copy(mg, is_policy_promote);
1341 }
1342
1343 static void mg_copy(struct work_struct *ws)
1344 {
1345 struct dm_cache_migration *mg = ws_to_mg(ws);
1346
1347 if (mg->overwrite_bio) {
1348
1349
1350
1351
1352
1353 if (!optimisable_bio(mg->cache, mg->overwrite_bio, mg->op->oblock)) {
1354
1355
1356
1357 bool rb = bio_detain_shared(mg->cache, mg->op->oblock, mg->overwrite_bio);
1358 BUG_ON(rb);
1359 mg->overwrite_bio = NULL;
1360 inc_io_migrations(mg->cache);
1361 mg_full_copy(ws);
1362 return;
1363 }
1364
1365
1366
1367
1368
1369
1370
1371
1372 overwrite(mg, mg_update_metadata_after_copy);
1373
1374 } else
1375 mg_full_copy(ws);
1376 }
1377
1378 static int mg_lock_writes(struct dm_cache_migration *mg)
1379 {
1380 int r;
1381 struct dm_cell_key_v2 key;
1382 struct cache *cache = mg->cache;
1383 struct dm_bio_prison_cell_v2 *prealloc;
1384
1385 prealloc = alloc_prison_cell(cache);
1386
1387
1388
1389
1390
1391
1392 build_key(mg->op->oblock, oblock_succ(mg->op->oblock), &key);
1393 r = dm_cell_lock_v2(cache->prison, &key,
1394 mg->overwrite_bio ? READ_WRITE_LOCK_LEVEL : WRITE_LOCK_LEVEL,
1395 prealloc, &mg->cell);
1396 if (r < 0) {
1397 free_prison_cell(cache, prealloc);
1398 mg_complete(mg, false);
1399 return r;
1400 }
1401
1402 if (mg->cell != prealloc)
1403 free_prison_cell(cache, prealloc);
1404
1405 if (r == 0)
1406 mg_copy(&mg->k.ws);
1407 else
1408 quiesce(mg, mg_copy);
1409
1410 return 0;
1411 }
1412
1413 static int mg_start(struct cache *cache, struct policy_work *op, struct bio *bio)
1414 {
1415 struct dm_cache_migration *mg;
1416
1417 if (!background_work_begin(cache)) {
1418 policy_complete_background_work(cache->policy, op, false);
1419 return -EPERM;
1420 }
1421
1422 mg = alloc_migration(cache);
1423
1424 mg->op = op;
1425 mg->overwrite_bio = bio;
1426
1427 if (!bio)
1428 inc_io_migrations(cache);
1429
1430 return mg_lock_writes(mg);
1431 }
1432
1433
1434
1435
1436
1437 static void invalidate_complete(struct dm_cache_migration *mg, bool success)
1438 {
1439 struct bio_list bios;
1440 struct cache *cache = mg->cache;
1441
1442 bio_list_init(&bios);
1443 if (dm_cell_unlock_v2(cache->prison, mg->cell, &bios))
1444 free_prison_cell(cache, mg->cell);
1445
1446 if (!success && mg->overwrite_bio)
1447 bio_io_error(mg->overwrite_bio);
1448
1449 free_migration(mg);
1450 defer_bios(cache, &bios);
1451
1452 background_work_end(cache);
1453 }
1454
1455 static void invalidate_completed(struct work_struct *ws)
1456 {
1457 struct dm_cache_migration *mg = ws_to_mg(ws);
1458 invalidate_complete(mg, !mg->k.input);
1459 }
1460
1461 static int invalidate_cblock(struct cache *cache, dm_cblock_t cblock)
1462 {
1463 int r = policy_invalidate_mapping(cache->policy, cblock);
1464 if (!r) {
1465 r = dm_cache_remove_mapping(cache->cmd, cblock);
1466 if (r) {
1467 DMERR_LIMIT("%s: invalidation failed; couldn't update on disk metadata",
1468 cache_device_name(cache));
1469 metadata_operation_failed(cache, "dm_cache_remove_mapping", r);
1470 }
1471
1472 } else if (r == -ENODATA) {
1473
1474
1475
1476 r = 0;
1477
1478 } else
1479 DMERR("%s: policy_invalidate_mapping failed", cache_device_name(cache));
1480
1481 return r;
1482 }
1483
1484 static void invalidate_remove(struct work_struct *ws)
1485 {
1486 int r;
1487 struct dm_cache_migration *mg = ws_to_mg(ws);
1488 struct cache *cache = mg->cache;
1489
1490 r = invalidate_cblock(cache, mg->invalidate_cblock);
1491 if (r) {
1492 invalidate_complete(mg, false);
1493 return;
1494 }
1495
1496 init_continuation(&mg->k, invalidate_completed);
1497 continue_after_commit(&cache->committer, &mg->k);
1498 remap_to_origin_clear_discard(cache, mg->overwrite_bio, mg->invalidate_oblock);
1499 mg->overwrite_bio = NULL;
1500 schedule_commit(&cache->committer);
1501 }
1502
1503 static int invalidate_lock(struct dm_cache_migration *mg)
1504 {
1505 int r;
1506 struct dm_cell_key_v2 key;
1507 struct cache *cache = mg->cache;
1508 struct dm_bio_prison_cell_v2 *prealloc;
1509
1510 prealloc = alloc_prison_cell(cache);
1511
1512 build_key(mg->invalidate_oblock, oblock_succ(mg->invalidate_oblock), &key);
1513 r = dm_cell_lock_v2(cache->prison, &key,
1514 READ_WRITE_LOCK_LEVEL, prealloc, &mg->cell);
1515 if (r < 0) {
1516 free_prison_cell(cache, prealloc);
1517 invalidate_complete(mg, false);
1518 return r;
1519 }
1520
1521 if (mg->cell != prealloc)
1522 free_prison_cell(cache, prealloc);
1523
1524 if (r)
1525 quiesce(mg, invalidate_remove);
1526
1527 else {
1528
1529
1530
1531
1532 init_continuation(&mg->k, invalidate_remove);
1533 queue_work(cache->wq, &mg->k.ws);
1534 }
1535
1536 return 0;
1537 }
1538
1539 static int invalidate_start(struct cache *cache, dm_cblock_t cblock,
1540 dm_oblock_t oblock, struct bio *bio)
1541 {
1542 struct dm_cache_migration *mg;
1543
1544 if (!background_work_begin(cache))
1545 return -EPERM;
1546
1547 mg = alloc_migration(cache);
1548
1549 mg->overwrite_bio = bio;
1550 mg->invalidate_cblock = cblock;
1551 mg->invalidate_oblock = oblock;
1552
1553 return invalidate_lock(mg);
1554 }
1555
1556
1557
1558
1559
1560 enum busy {
1561 IDLE,
1562 BUSY
1563 };
1564
1565 static enum busy spare_migration_bandwidth(struct cache *cache)
1566 {
1567 bool idle = dm_iot_idle_for(&cache->tracker, HZ);
1568 sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) *
1569 cache->sectors_per_block;
1570
1571 if (idle && current_volume <= cache->migration_threshold)
1572 return IDLE;
1573 else
1574 return BUSY;
1575 }
1576
1577 static void inc_hit_counter(struct cache *cache, struct bio *bio)
1578 {
1579 atomic_inc(bio_data_dir(bio) == READ ?
1580 &cache->stats.read_hit : &cache->stats.write_hit);
1581 }
1582
1583 static void inc_miss_counter(struct cache *cache, struct bio *bio)
1584 {
1585 atomic_inc(bio_data_dir(bio) == READ ?
1586 &cache->stats.read_miss : &cache->stats.write_miss);
1587 }
1588
1589
1590
1591 static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block,
1592 bool *commit_needed)
1593 {
1594 int r, data_dir;
1595 bool rb, background_queued;
1596 dm_cblock_t cblock;
1597
1598 *commit_needed = false;
1599
1600 rb = bio_detain_shared(cache, block, bio);
1601 if (!rb) {
1602
1603
1604
1605
1606
1607
1608 *commit_needed = true;
1609 return DM_MAPIO_SUBMITTED;
1610 }
1611
1612 data_dir = bio_data_dir(bio);
1613
1614 if (optimisable_bio(cache, bio, block)) {
1615 struct policy_work *op = NULL;
1616
1617 r = policy_lookup_with_work(cache->policy, block, &cblock, data_dir, true, &op);
1618 if (unlikely(r && r != -ENOENT)) {
1619 DMERR_LIMIT("%s: policy_lookup_with_work() failed with r = %d",
1620 cache_device_name(cache), r);
1621 bio_io_error(bio);
1622 return DM_MAPIO_SUBMITTED;
1623 }
1624
1625 if (r == -ENOENT && op) {
1626 bio_drop_shared_lock(cache, bio);
1627 BUG_ON(op->op != POLICY_PROMOTE);
1628 mg_start(cache, op, bio);
1629 return DM_MAPIO_SUBMITTED;
1630 }
1631 } else {
1632 r = policy_lookup(cache->policy, block, &cblock, data_dir, false, &background_queued);
1633 if (unlikely(r && r != -ENOENT)) {
1634 DMERR_LIMIT("%s: policy_lookup() failed with r = %d",
1635 cache_device_name(cache), r);
1636 bio_io_error(bio);
1637 return DM_MAPIO_SUBMITTED;
1638 }
1639
1640 if (background_queued)
1641 wake_migration_worker(cache);
1642 }
1643
1644 if (r == -ENOENT) {
1645 struct per_bio_data *pb = get_per_bio_data(bio);
1646
1647
1648
1649
1650 inc_miss_counter(cache, bio);
1651 if (pb->req_nr == 0) {
1652 accounted_begin(cache, bio);
1653 remap_to_origin_clear_discard(cache, bio, block);
1654 } else {
1655
1656
1657
1658
1659 bio_endio(bio);
1660 return DM_MAPIO_SUBMITTED;
1661 }
1662 } else {
1663
1664
1665
1666 inc_hit_counter(cache, bio);
1667
1668
1669
1670
1671
1672 if (passthrough_mode(cache)) {
1673 if (bio_data_dir(bio) == WRITE) {
1674 bio_drop_shared_lock(cache, bio);
1675 atomic_inc(&cache->stats.demotion);
1676 invalidate_start(cache, cblock, block, bio);
1677 } else
1678 remap_to_origin_clear_discard(cache, bio, block);
1679 } else {
1680 if (bio_data_dir(bio) == WRITE && writethrough_mode(cache) &&
1681 !is_dirty(cache, cblock)) {
1682 remap_to_origin_and_cache(cache, bio, block, cblock);
1683 accounted_begin(cache, bio);
1684 } else
1685 remap_to_cache_dirty(cache, bio, block, cblock);
1686 }
1687 }
1688
1689
1690
1691
1692 if (bio->bi_opf & REQ_FUA) {
1693
1694
1695
1696
1697 accounted_complete(cache, bio);
1698 issue_after_commit(&cache->committer, bio);
1699 *commit_needed = true;
1700 return DM_MAPIO_SUBMITTED;
1701 }
1702
1703 return DM_MAPIO_REMAPPED;
1704 }
1705
1706 static bool process_bio(struct cache *cache, struct bio *bio)
1707 {
1708 bool commit_needed;
1709
1710 if (map_bio(cache, bio, get_bio_block(cache, bio), &commit_needed) == DM_MAPIO_REMAPPED)
1711 dm_submit_bio_remap(bio, NULL);
1712
1713 return commit_needed;
1714 }
1715
1716
1717
1718
1719 static int commit(struct cache *cache, bool clean_shutdown)
1720 {
1721 int r;
1722
1723 if (get_cache_mode(cache) >= CM_READ_ONLY)
1724 return -EINVAL;
1725
1726 atomic_inc(&cache->stats.commit_count);
1727 r = dm_cache_commit(cache->cmd, clean_shutdown);
1728 if (r)
1729 metadata_operation_failed(cache, "dm_cache_commit", r);
1730
1731 return r;
1732 }
1733
1734
1735
1736
1737 static blk_status_t commit_op(void *context)
1738 {
1739 struct cache *cache = context;
1740
1741 if (dm_cache_changed_this_transaction(cache->cmd))
1742 return errno_to_blk_status(commit(cache, false));
1743
1744 return 0;
1745 }
1746
1747
1748
1749 static bool process_flush_bio(struct cache *cache, struct bio *bio)
1750 {
1751 struct per_bio_data *pb = get_per_bio_data(bio);
1752
1753 if (!pb->req_nr)
1754 remap_to_origin(cache, bio);
1755 else
1756 remap_to_cache(cache, bio, 0);
1757
1758 issue_after_commit(&cache->committer, bio);
1759 return true;
1760 }
1761
1762 static bool process_discard_bio(struct cache *cache, struct bio *bio)
1763 {
1764 dm_dblock_t b, e;
1765
1766
1767
1768
1769 calc_discard_block_range(cache, bio, &b, &e);
1770 while (b != e) {
1771 set_discard(cache, b);
1772 b = to_dblock(from_dblock(b) + 1);
1773 }
1774
1775 if (cache->features.discard_passdown) {
1776 remap_to_origin(cache, bio);
1777 dm_submit_bio_remap(bio, NULL);
1778 } else
1779 bio_endio(bio);
1780
1781 return false;
1782 }
1783
1784 static void process_deferred_bios(struct work_struct *ws)
1785 {
1786 struct cache *cache = container_of(ws, struct cache, deferred_bio_worker);
1787
1788 bool commit_needed = false;
1789 struct bio_list bios;
1790 struct bio *bio;
1791
1792 bio_list_init(&bios);
1793
1794 spin_lock_irq(&cache->lock);
1795 bio_list_merge(&bios, &cache->deferred_bios);
1796 bio_list_init(&cache->deferred_bios);
1797 spin_unlock_irq(&cache->lock);
1798
1799 while ((bio = bio_list_pop(&bios))) {
1800 if (bio->bi_opf & REQ_PREFLUSH)
1801 commit_needed = process_flush_bio(cache, bio) || commit_needed;
1802
1803 else if (bio_op(bio) == REQ_OP_DISCARD)
1804 commit_needed = process_discard_bio(cache, bio) || commit_needed;
1805
1806 else
1807 commit_needed = process_bio(cache, bio) || commit_needed;
1808 }
1809
1810 if (commit_needed)
1811 schedule_commit(&cache->committer);
1812 }
1813
1814
1815
1816
1817
1818 static void requeue_deferred_bios(struct cache *cache)
1819 {
1820 struct bio *bio;
1821 struct bio_list bios;
1822
1823 bio_list_init(&bios);
1824 bio_list_merge(&bios, &cache->deferred_bios);
1825 bio_list_init(&cache->deferred_bios);
1826
1827 while ((bio = bio_list_pop(&bios))) {
1828 bio->bi_status = BLK_STS_DM_REQUEUE;
1829 bio_endio(bio);
1830 }
1831 }
1832
1833
1834
1835
1836
1837 static void do_waker(struct work_struct *ws)
1838 {
1839 struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker);
1840
1841 policy_tick(cache->policy, true);
1842 wake_migration_worker(cache);
1843 schedule_commit(&cache->committer);
1844 queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD);
1845 }
1846
1847 static void check_migrations(struct work_struct *ws)
1848 {
1849 int r;
1850 struct policy_work *op;
1851 struct cache *cache = container_of(ws, struct cache, migration_worker);
1852 enum busy b;
1853
1854 for (;;) {
1855 b = spare_migration_bandwidth(cache);
1856
1857 r = policy_get_background_work(cache->policy, b == IDLE, &op);
1858 if (r == -ENODATA)
1859 break;
1860
1861 if (r) {
1862 DMERR_LIMIT("%s: policy_background_work failed",
1863 cache_device_name(cache));
1864 break;
1865 }
1866
1867 r = mg_start(cache, op, NULL);
1868 if (r)
1869 break;
1870 }
1871 }
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881 static void destroy(struct cache *cache)
1882 {
1883 unsigned i;
1884
1885 mempool_exit(&cache->migration_pool);
1886
1887 if (cache->prison)
1888 dm_bio_prison_destroy_v2(cache->prison);
1889
1890 if (cache->wq)
1891 destroy_workqueue(cache->wq);
1892
1893 if (cache->dirty_bitset)
1894 free_bitset(cache->dirty_bitset);
1895
1896 if (cache->discard_bitset)
1897 free_bitset(cache->discard_bitset);
1898
1899 if (cache->copier)
1900 dm_kcopyd_client_destroy(cache->copier);
1901
1902 if (cache->cmd)
1903 dm_cache_metadata_close(cache->cmd);
1904
1905 if (cache->metadata_dev)
1906 dm_put_device(cache->ti, cache->metadata_dev);
1907
1908 if (cache->origin_dev)
1909 dm_put_device(cache->ti, cache->origin_dev);
1910
1911 if (cache->cache_dev)
1912 dm_put_device(cache->ti, cache->cache_dev);
1913
1914 if (cache->policy)
1915 dm_cache_policy_destroy(cache->policy);
1916
1917 for (i = 0; i < cache->nr_ctr_args ; i++)
1918 kfree(cache->ctr_args[i]);
1919 kfree(cache->ctr_args);
1920
1921 bioset_exit(&cache->bs);
1922
1923 kfree(cache);
1924 }
1925
1926 static void cache_dtr(struct dm_target *ti)
1927 {
1928 struct cache *cache = ti->private;
1929
1930 destroy(cache);
1931 }
1932
1933 static sector_t get_dev_size(struct dm_dev *dev)
1934 {
1935 return bdev_nr_sectors(dev->bdev);
1936 }
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969 struct cache_args {
1970 struct dm_target *ti;
1971
1972 struct dm_dev *metadata_dev;
1973
1974 struct dm_dev *cache_dev;
1975 sector_t cache_sectors;
1976
1977 struct dm_dev *origin_dev;
1978 sector_t origin_sectors;
1979
1980 uint32_t block_size;
1981
1982 const char *policy_name;
1983 int policy_argc;
1984 const char **policy_argv;
1985
1986 struct cache_features features;
1987 };
1988
1989 static void destroy_cache_args(struct cache_args *ca)
1990 {
1991 if (ca->metadata_dev)
1992 dm_put_device(ca->ti, ca->metadata_dev);
1993
1994 if (ca->cache_dev)
1995 dm_put_device(ca->ti, ca->cache_dev);
1996
1997 if (ca->origin_dev)
1998 dm_put_device(ca->ti, ca->origin_dev);
1999
2000 kfree(ca);
2001 }
2002
2003 static bool at_least_one_arg(struct dm_arg_set *as, char **error)
2004 {
2005 if (!as->argc) {
2006 *error = "Insufficient args";
2007 return false;
2008 }
2009
2010 return true;
2011 }
2012
2013 static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as,
2014 char **error)
2015 {
2016 int r;
2017 sector_t metadata_dev_size;
2018
2019 if (!at_least_one_arg(as, error))
2020 return -EINVAL;
2021
2022 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2023 &ca->metadata_dev);
2024 if (r) {
2025 *error = "Error opening metadata device";
2026 return r;
2027 }
2028
2029 metadata_dev_size = get_dev_size(ca->metadata_dev);
2030 if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING)
2031 DMWARN("Metadata device %pg is larger than %u sectors: excess space will not be used.",
2032 ca->metadata_dev->bdev, THIN_METADATA_MAX_SECTORS);
2033
2034 return 0;
2035 }
2036
2037 static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as,
2038 char **error)
2039 {
2040 int r;
2041
2042 if (!at_least_one_arg(as, error))
2043 return -EINVAL;
2044
2045 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2046 &ca->cache_dev);
2047 if (r) {
2048 *error = "Error opening cache device";
2049 return r;
2050 }
2051 ca->cache_sectors = get_dev_size(ca->cache_dev);
2052
2053 return 0;
2054 }
2055
2056 static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as,
2057 char **error)
2058 {
2059 int r;
2060
2061 if (!at_least_one_arg(as, error))
2062 return -EINVAL;
2063
2064 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2065 &ca->origin_dev);
2066 if (r) {
2067 *error = "Error opening origin device";
2068 return r;
2069 }
2070
2071 ca->origin_sectors = get_dev_size(ca->origin_dev);
2072 if (ca->ti->len > ca->origin_sectors) {
2073 *error = "Device size larger than cached device";
2074 return -EINVAL;
2075 }
2076
2077 return 0;
2078 }
2079
2080 static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as,
2081 char **error)
2082 {
2083 unsigned long block_size;
2084
2085 if (!at_least_one_arg(as, error))
2086 return -EINVAL;
2087
2088 if (kstrtoul(dm_shift_arg(as), 10, &block_size) || !block_size ||
2089 block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
2090 block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
2091 block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) {
2092 *error = "Invalid data block size";
2093 return -EINVAL;
2094 }
2095
2096 if (block_size > ca->cache_sectors) {
2097 *error = "Data block size is larger than the cache device";
2098 return -EINVAL;
2099 }
2100
2101 ca->block_size = block_size;
2102
2103 return 0;
2104 }
2105
2106 static void init_features(struct cache_features *cf)
2107 {
2108 cf->mode = CM_WRITE;
2109 cf->io_mode = CM_IO_WRITEBACK;
2110 cf->metadata_version = 1;
2111 cf->discard_passdown = true;
2112 }
2113
2114 static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
2115 char **error)
2116 {
2117 static const struct dm_arg _args[] = {
2118 {0, 3, "Invalid number of cache feature arguments"},
2119 };
2120
2121 int r, mode_ctr = 0;
2122 unsigned argc;
2123 const char *arg;
2124 struct cache_features *cf = &ca->features;
2125
2126 init_features(cf);
2127
2128 r = dm_read_arg_group(_args, as, &argc, error);
2129 if (r)
2130 return -EINVAL;
2131
2132 while (argc--) {
2133 arg = dm_shift_arg(as);
2134
2135 if (!strcasecmp(arg, "writeback")) {
2136 cf->io_mode = CM_IO_WRITEBACK;
2137 mode_ctr++;
2138 }
2139
2140 else if (!strcasecmp(arg, "writethrough")) {
2141 cf->io_mode = CM_IO_WRITETHROUGH;
2142 mode_ctr++;
2143 }
2144
2145 else if (!strcasecmp(arg, "passthrough")) {
2146 cf->io_mode = CM_IO_PASSTHROUGH;
2147 mode_ctr++;
2148 }
2149
2150 else if (!strcasecmp(arg, "metadata2"))
2151 cf->metadata_version = 2;
2152
2153 else if (!strcasecmp(arg, "no_discard_passdown"))
2154 cf->discard_passdown = false;
2155
2156 else {
2157 *error = "Unrecognised cache feature requested";
2158 return -EINVAL;
2159 }
2160 }
2161
2162 if (mode_ctr > 1) {
2163 *error = "Duplicate cache io_mode features requested";
2164 return -EINVAL;
2165 }
2166
2167 return 0;
2168 }
2169
2170 static int parse_policy(struct cache_args *ca, struct dm_arg_set *as,
2171 char **error)
2172 {
2173 static const struct dm_arg _args[] = {
2174 {0, 1024, "Invalid number of policy arguments"},
2175 };
2176
2177 int r;
2178
2179 if (!at_least_one_arg(as, error))
2180 return -EINVAL;
2181
2182 ca->policy_name = dm_shift_arg(as);
2183
2184 r = dm_read_arg_group(_args, as, &ca->policy_argc, error);
2185 if (r)
2186 return -EINVAL;
2187
2188 ca->policy_argv = (const char **)as->argv;
2189 dm_consume_args(as, ca->policy_argc);
2190
2191 return 0;
2192 }
2193
2194 static int parse_cache_args(struct cache_args *ca, int argc, char **argv,
2195 char **error)
2196 {
2197 int r;
2198 struct dm_arg_set as;
2199
2200 as.argc = argc;
2201 as.argv = argv;
2202
2203 r = parse_metadata_dev(ca, &as, error);
2204 if (r)
2205 return r;
2206
2207 r = parse_cache_dev(ca, &as, error);
2208 if (r)
2209 return r;
2210
2211 r = parse_origin_dev(ca, &as, error);
2212 if (r)
2213 return r;
2214
2215 r = parse_block_size(ca, &as, error);
2216 if (r)
2217 return r;
2218
2219 r = parse_features(ca, &as, error);
2220 if (r)
2221 return r;
2222
2223 r = parse_policy(ca, &as, error);
2224 if (r)
2225 return r;
2226
2227 return 0;
2228 }
2229
2230
2231
2232 static struct kmem_cache *migration_cache;
2233
2234 #define NOT_CORE_OPTION 1
2235
2236 static int process_config_option(struct cache *cache, const char *key, const char *value)
2237 {
2238 unsigned long tmp;
2239
2240 if (!strcasecmp(key, "migration_threshold")) {
2241 if (kstrtoul(value, 10, &tmp))
2242 return -EINVAL;
2243
2244 cache->migration_threshold = tmp;
2245 return 0;
2246 }
2247
2248 return NOT_CORE_OPTION;
2249 }
2250
2251 static int set_config_value(struct cache *cache, const char *key, const char *value)
2252 {
2253 int r = process_config_option(cache, key, value);
2254
2255 if (r == NOT_CORE_OPTION)
2256 r = policy_set_config_value(cache->policy, key, value);
2257
2258 if (r)
2259 DMWARN("bad config value for %s: %s", key, value);
2260
2261 return r;
2262 }
2263
2264 static int set_config_values(struct cache *cache, int argc, const char **argv)
2265 {
2266 int r = 0;
2267
2268 if (argc & 1) {
2269 DMWARN("Odd number of policy arguments given but they should be <key> <value> pairs.");
2270 return -EINVAL;
2271 }
2272
2273 while (argc) {
2274 r = set_config_value(cache, argv[0], argv[1]);
2275 if (r)
2276 break;
2277
2278 argc -= 2;
2279 argv += 2;
2280 }
2281
2282 return r;
2283 }
2284
2285 static int create_cache_policy(struct cache *cache, struct cache_args *ca,
2286 char **error)
2287 {
2288 struct dm_cache_policy *p = dm_cache_policy_create(ca->policy_name,
2289 cache->cache_size,
2290 cache->origin_sectors,
2291 cache->sectors_per_block);
2292 if (IS_ERR(p)) {
2293 *error = "Error creating cache's policy";
2294 return PTR_ERR(p);
2295 }
2296 cache->policy = p;
2297 BUG_ON(!cache->policy);
2298
2299 return 0;
2300 }
2301
2302
2303
2304
2305
2306 #define MAX_DISCARD_BLOCKS (1 << 14)
2307
2308 static bool too_many_discard_blocks(sector_t discard_block_size,
2309 sector_t origin_size)
2310 {
2311 (void) sector_div(origin_size, discard_block_size);
2312
2313 return origin_size > MAX_DISCARD_BLOCKS;
2314 }
2315
2316 static sector_t calculate_discard_block_size(sector_t cache_block_size,
2317 sector_t origin_size)
2318 {
2319 sector_t discard_block_size = cache_block_size;
2320
2321 if (origin_size)
2322 while (too_many_discard_blocks(discard_block_size, origin_size))
2323 discard_block_size *= 2;
2324
2325 return discard_block_size;
2326 }
2327
2328 static void set_cache_size(struct cache *cache, dm_cblock_t size)
2329 {
2330 dm_block_t nr_blocks = from_cblock(size);
2331
2332 if (nr_blocks > (1 << 20) && cache->cache_size != size)
2333 DMWARN_LIMIT("You have created a cache device with a lot of individual cache blocks (%llu)\n"
2334 "All these mappings can consume a lot of kernel memory, and take some time to read/write.\n"
2335 "Please consider increasing the cache block size to reduce the overall cache block count.",
2336 (unsigned long long) nr_blocks);
2337
2338 cache->cache_size = size;
2339 }
2340
2341 #define DEFAULT_MIGRATION_THRESHOLD 2048
2342
2343 static int cache_create(struct cache_args *ca, struct cache **result)
2344 {
2345 int r = 0;
2346 char **error = &ca->ti->error;
2347 struct cache *cache;
2348 struct dm_target *ti = ca->ti;
2349 dm_block_t origin_blocks;
2350 struct dm_cache_metadata *cmd;
2351 bool may_format = ca->features.mode == CM_WRITE;
2352
2353 cache = kzalloc(sizeof(*cache), GFP_KERNEL);
2354 if (!cache)
2355 return -ENOMEM;
2356
2357 cache->ti = ca->ti;
2358 ti->private = cache;
2359 ti->accounts_remapped_io = true;
2360 ti->num_flush_bios = 2;
2361 ti->flush_supported = true;
2362
2363 ti->num_discard_bios = 1;
2364 ti->discards_supported = true;
2365
2366 ti->per_io_data_size = sizeof(struct per_bio_data);
2367
2368 cache->features = ca->features;
2369 if (writethrough_mode(cache)) {
2370
2371 r = bioset_init(&cache->bs, BIO_POOL_SIZE, 0, 0);
2372 if (r)
2373 goto bad;
2374 }
2375
2376 cache->metadata_dev = ca->metadata_dev;
2377 cache->origin_dev = ca->origin_dev;
2378 cache->cache_dev = ca->cache_dev;
2379
2380 ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL;
2381
2382 origin_blocks = cache->origin_sectors = ca->origin_sectors;
2383 origin_blocks = block_div(origin_blocks, ca->block_size);
2384 cache->origin_blocks = to_oblock(origin_blocks);
2385
2386 cache->sectors_per_block = ca->block_size;
2387 if (dm_set_target_max_io_len(ti, cache->sectors_per_block)) {
2388 r = -EINVAL;
2389 goto bad;
2390 }
2391
2392 if (ca->block_size & (ca->block_size - 1)) {
2393 dm_block_t cache_size = ca->cache_sectors;
2394
2395 cache->sectors_per_block_shift = -1;
2396 cache_size = block_div(cache_size, ca->block_size);
2397 set_cache_size(cache, to_cblock(cache_size));
2398 } else {
2399 cache->sectors_per_block_shift = __ffs(ca->block_size);
2400 set_cache_size(cache, to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift));
2401 }
2402
2403 r = create_cache_policy(cache, ca, error);
2404 if (r)
2405 goto bad;
2406
2407 cache->policy_nr_args = ca->policy_argc;
2408 cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD;
2409
2410 r = set_config_values(cache, ca->policy_argc, ca->policy_argv);
2411 if (r) {
2412 *error = "Error setting cache policy's config values";
2413 goto bad;
2414 }
2415
2416 cmd = dm_cache_metadata_open(cache->metadata_dev->bdev,
2417 ca->block_size, may_format,
2418 dm_cache_policy_get_hint_size(cache->policy),
2419 ca->features.metadata_version);
2420 if (IS_ERR(cmd)) {
2421 *error = "Error creating metadata object";
2422 r = PTR_ERR(cmd);
2423 goto bad;
2424 }
2425 cache->cmd = cmd;
2426 set_cache_mode(cache, CM_WRITE);
2427 if (get_cache_mode(cache) != CM_WRITE) {
2428 *error = "Unable to get write access to metadata, please check/repair metadata.";
2429 r = -EINVAL;
2430 goto bad;
2431 }
2432
2433 if (passthrough_mode(cache)) {
2434 bool all_clean;
2435
2436 r = dm_cache_metadata_all_clean(cache->cmd, &all_clean);
2437 if (r) {
2438 *error = "dm_cache_metadata_all_clean() failed";
2439 goto bad;
2440 }
2441
2442 if (!all_clean) {
2443 *error = "Cannot enter passthrough mode unless all blocks are clean";
2444 r = -EINVAL;
2445 goto bad;
2446 }
2447
2448 policy_allow_migrations(cache->policy, false);
2449 }
2450
2451 spin_lock_init(&cache->lock);
2452 bio_list_init(&cache->deferred_bios);
2453 atomic_set(&cache->nr_allocated_migrations, 0);
2454 atomic_set(&cache->nr_io_migrations, 0);
2455 init_waitqueue_head(&cache->migration_wait);
2456
2457 r = -ENOMEM;
2458 atomic_set(&cache->nr_dirty, 0);
2459 cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size));
2460 if (!cache->dirty_bitset) {
2461 *error = "could not allocate dirty bitset";
2462 goto bad;
2463 }
2464 clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size));
2465
2466 cache->discard_block_size =
2467 calculate_discard_block_size(cache->sectors_per_block,
2468 cache->origin_sectors);
2469 cache->discard_nr_blocks = to_dblock(dm_sector_div_up(cache->origin_sectors,
2470 cache->discard_block_size));
2471 cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks));
2472 if (!cache->discard_bitset) {
2473 *error = "could not allocate discard bitset";
2474 goto bad;
2475 }
2476 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
2477
2478 cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
2479 if (IS_ERR(cache->copier)) {
2480 *error = "could not create kcopyd client";
2481 r = PTR_ERR(cache->copier);
2482 goto bad;
2483 }
2484
2485 cache->wq = alloc_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM, 0);
2486 if (!cache->wq) {
2487 *error = "could not create workqueue for metadata object";
2488 goto bad;
2489 }
2490 INIT_WORK(&cache->deferred_bio_worker, process_deferred_bios);
2491 INIT_WORK(&cache->migration_worker, check_migrations);
2492 INIT_DELAYED_WORK(&cache->waker, do_waker);
2493
2494 cache->prison = dm_bio_prison_create_v2(cache->wq);
2495 if (!cache->prison) {
2496 *error = "could not create bio prison";
2497 goto bad;
2498 }
2499
2500 r = mempool_init_slab_pool(&cache->migration_pool, MIGRATION_POOL_SIZE,
2501 migration_cache);
2502 if (r) {
2503 *error = "Error creating cache's migration mempool";
2504 goto bad;
2505 }
2506
2507 cache->need_tick_bio = true;
2508 cache->sized = false;
2509 cache->invalidate = false;
2510 cache->commit_requested = false;
2511 cache->loaded_mappings = false;
2512 cache->loaded_discards = false;
2513
2514 load_stats(cache);
2515
2516 atomic_set(&cache->stats.demotion, 0);
2517 atomic_set(&cache->stats.promotion, 0);
2518 atomic_set(&cache->stats.copies_avoided, 0);
2519 atomic_set(&cache->stats.cache_cell_clash, 0);
2520 atomic_set(&cache->stats.commit_count, 0);
2521 atomic_set(&cache->stats.discard_count, 0);
2522
2523 spin_lock_init(&cache->invalidation_lock);
2524 INIT_LIST_HEAD(&cache->invalidation_requests);
2525
2526 batcher_init(&cache->committer, commit_op, cache,
2527 issue_op, cache, cache->wq);
2528 dm_iot_init(&cache->tracker);
2529
2530 init_rwsem(&cache->background_work_lock);
2531 prevent_background_work(cache);
2532
2533 *result = cache;
2534 return 0;
2535 bad:
2536 destroy(cache);
2537 return r;
2538 }
2539
2540 static int copy_ctr_args(struct cache *cache, int argc, const char **argv)
2541 {
2542 unsigned i;
2543 const char **copy;
2544
2545 copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL);
2546 if (!copy)
2547 return -ENOMEM;
2548 for (i = 0; i < argc; i++) {
2549 copy[i] = kstrdup(argv[i], GFP_KERNEL);
2550 if (!copy[i]) {
2551 while (i--)
2552 kfree(copy[i]);
2553 kfree(copy);
2554 return -ENOMEM;
2555 }
2556 }
2557
2558 cache->nr_ctr_args = argc;
2559 cache->ctr_args = copy;
2560
2561 return 0;
2562 }
2563
2564 static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv)
2565 {
2566 int r = -EINVAL;
2567 struct cache_args *ca;
2568 struct cache *cache = NULL;
2569
2570 ca = kzalloc(sizeof(*ca), GFP_KERNEL);
2571 if (!ca) {
2572 ti->error = "Error allocating memory for cache";
2573 return -ENOMEM;
2574 }
2575 ca->ti = ti;
2576
2577 r = parse_cache_args(ca, argc, argv, &ti->error);
2578 if (r)
2579 goto out;
2580
2581 r = cache_create(ca, &cache);
2582 if (r)
2583 goto out;
2584
2585 r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3);
2586 if (r) {
2587 destroy(cache);
2588 goto out;
2589 }
2590
2591 ti->private = cache;
2592 out:
2593 destroy_cache_args(ca);
2594 return r;
2595 }
2596
2597
2598
2599 static int cache_map(struct dm_target *ti, struct bio *bio)
2600 {
2601 struct cache *cache = ti->private;
2602
2603 int r;
2604 bool commit_needed;
2605 dm_oblock_t block = get_bio_block(cache, bio);
2606
2607 init_per_bio_data(bio);
2608 if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) {
2609
2610
2611
2612
2613
2614 remap_to_origin(cache, bio);
2615 accounted_begin(cache, bio);
2616 return DM_MAPIO_REMAPPED;
2617 }
2618
2619 if (discard_or_flush(bio)) {
2620 defer_bio(cache, bio);
2621 return DM_MAPIO_SUBMITTED;
2622 }
2623
2624 r = map_bio(cache, bio, block, &commit_needed);
2625 if (commit_needed)
2626 schedule_commit(&cache->committer);
2627
2628 return r;
2629 }
2630
2631 static int cache_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *error)
2632 {
2633 struct cache *cache = ti->private;
2634 unsigned long flags;
2635 struct per_bio_data *pb = get_per_bio_data(bio);
2636
2637 if (pb->tick) {
2638 policy_tick(cache->policy, false);
2639
2640 spin_lock_irqsave(&cache->lock, flags);
2641 cache->need_tick_bio = true;
2642 spin_unlock_irqrestore(&cache->lock, flags);
2643 }
2644
2645 bio_drop_shared_lock(cache, bio);
2646 accounted_complete(cache, bio);
2647
2648 return DM_ENDIO_DONE;
2649 }
2650
2651 static int write_dirty_bitset(struct cache *cache)
2652 {
2653 int r;
2654
2655 if (get_cache_mode(cache) >= CM_READ_ONLY)
2656 return -EINVAL;
2657
2658 r = dm_cache_set_dirty_bits(cache->cmd, from_cblock(cache->cache_size), cache->dirty_bitset);
2659 if (r)
2660 metadata_operation_failed(cache, "dm_cache_set_dirty_bits", r);
2661
2662 return r;
2663 }
2664
2665 static int write_discard_bitset(struct cache *cache)
2666 {
2667 unsigned i, r;
2668
2669 if (get_cache_mode(cache) >= CM_READ_ONLY)
2670 return -EINVAL;
2671
2672 r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size,
2673 cache->discard_nr_blocks);
2674 if (r) {
2675 DMERR("%s: could not resize on-disk discard bitset", cache_device_name(cache));
2676 metadata_operation_failed(cache, "dm_cache_discard_bitset_resize", r);
2677 return r;
2678 }
2679
2680 for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) {
2681 r = dm_cache_set_discard(cache->cmd, to_dblock(i),
2682 is_discarded(cache, to_dblock(i)));
2683 if (r) {
2684 metadata_operation_failed(cache, "dm_cache_set_discard", r);
2685 return r;
2686 }
2687 }
2688
2689 return 0;
2690 }
2691
2692 static int write_hints(struct cache *cache)
2693 {
2694 int r;
2695
2696 if (get_cache_mode(cache) >= CM_READ_ONLY)
2697 return -EINVAL;
2698
2699 r = dm_cache_write_hints(cache->cmd, cache->policy);
2700 if (r) {
2701 metadata_operation_failed(cache, "dm_cache_write_hints", r);
2702 return r;
2703 }
2704
2705 return 0;
2706 }
2707
2708
2709
2710
2711 static bool sync_metadata(struct cache *cache)
2712 {
2713 int r1, r2, r3, r4;
2714
2715 r1 = write_dirty_bitset(cache);
2716 if (r1)
2717 DMERR("%s: could not write dirty bitset", cache_device_name(cache));
2718
2719 r2 = write_discard_bitset(cache);
2720 if (r2)
2721 DMERR("%s: could not write discard bitset", cache_device_name(cache));
2722
2723 save_stats(cache);
2724
2725 r3 = write_hints(cache);
2726 if (r3)
2727 DMERR("%s: could not write hints", cache_device_name(cache));
2728
2729
2730
2731
2732
2733
2734 r4 = commit(cache, !r1 && !r2 && !r3);
2735 if (r4)
2736 DMERR("%s: could not write cache metadata", cache_device_name(cache));
2737
2738 return !r1 && !r2 && !r3 && !r4;
2739 }
2740
2741 static void cache_postsuspend(struct dm_target *ti)
2742 {
2743 struct cache *cache = ti->private;
2744
2745 prevent_background_work(cache);
2746 BUG_ON(atomic_read(&cache->nr_io_migrations));
2747
2748 cancel_delayed_work_sync(&cache->waker);
2749 drain_workqueue(cache->wq);
2750 WARN_ON(cache->tracker.in_flight);
2751
2752
2753
2754
2755
2756 requeue_deferred_bios(cache);
2757
2758 if (get_cache_mode(cache) == CM_WRITE)
2759 (void) sync_metadata(cache);
2760 }
2761
2762 static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
2763 bool dirty, uint32_t hint, bool hint_valid)
2764 {
2765 struct cache *cache = context;
2766
2767 if (dirty) {
2768 set_bit(from_cblock(cblock), cache->dirty_bitset);
2769 atomic_inc(&cache->nr_dirty);
2770 } else
2771 clear_bit(from_cblock(cblock), cache->dirty_bitset);
2772
2773 return policy_load_mapping(cache->policy, oblock, cblock, dirty, hint, hint_valid);
2774 }
2775
2776
2777
2778
2779
2780
2781
2782 struct discard_load_info {
2783 struct cache *cache;
2784
2785
2786
2787
2788
2789 dm_block_t block_size;
2790 dm_block_t discard_begin, discard_end;
2791 };
2792
2793 static void discard_load_info_init(struct cache *cache,
2794 struct discard_load_info *li)
2795 {
2796 li->cache = cache;
2797 li->discard_begin = li->discard_end = 0;
2798 }
2799
2800 static void set_discard_range(struct discard_load_info *li)
2801 {
2802 sector_t b, e;
2803
2804 if (li->discard_begin == li->discard_end)
2805 return;
2806
2807
2808
2809
2810 b = li->discard_begin * li->block_size;
2811 e = li->discard_end * li->block_size;
2812
2813
2814
2815
2816 b = dm_sector_div_up(b, li->cache->discard_block_size);
2817 sector_div(e, li->cache->discard_block_size);
2818
2819
2820
2821
2822
2823 if (e > from_dblock(li->cache->discard_nr_blocks))
2824 e = from_dblock(li->cache->discard_nr_blocks);
2825
2826 for (; b < e; b++)
2827 set_discard(li->cache, to_dblock(b));
2828 }
2829
2830 static int load_discard(void *context, sector_t discard_block_size,
2831 dm_dblock_t dblock, bool discard)
2832 {
2833 struct discard_load_info *li = context;
2834
2835 li->block_size = discard_block_size;
2836
2837 if (discard) {
2838 if (from_dblock(dblock) == li->discard_end)
2839
2840
2841
2842 li->discard_end = li->discard_end + 1ULL;
2843
2844 else {
2845
2846
2847
2848 set_discard_range(li);
2849 li->discard_begin = from_dblock(dblock);
2850 li->discard_end = li->discard_begin + 1ULL;
2851 }
2852 } else {
2853 set_discard_range(li);
2854 li->discard_begin = li->discard_end = 0;
2855 }
2856
2857 return 0;
2858 }
2859
2860 static dm_cblock_t get_cache_dev_size(struct cache *cache)
2861 {
2862 sector_t size = get_dev_size(cache->cache_dev);
2863 (void) sector_div(size, cache->sectors_per_block);
2864 return to_cblock(size);
2865 }
2866
2867 static bool can_resize(struct cache *cache, dm_cblock_t new_size)
2868 {
2869 if (from_cblock(new_size) > from_cblock(cache->cache_size)) {
2870 if (cache->sized) {
2871 DMERR("%s: unable to extend cache due to missing cache table reload",
2872 cache_device_name(cache));
2873 return false;
2874 }
2875 }
2876
2877
2878
2879
2880 while (from_cblock(new_size) < from_cblock(cache->cache_size)) {
2881 new_size = to_cblock(from_cblock(new_size) + 1);
2882 if (is_dirty(cache, new_size)) {
2883 DMERR("%s: unable to shrink cache; cache block %llu is dirty",
2884 cache_device_name(cache),
2885 (unsigned long long) from_cblock(new_size));
2886 return false;
2887 }
2888 }
2889
2890 return true;
2891 }
2892
2893 static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size)
2894 {
2895 int r;
2896
2897 r = dm_cache_resize(cache->cmd, new_size);
2898 if (r) {
2899 DMERR("%s: could not resize cache metadata", cache_device_name(cache));
2900 metadata_operation_failed(cache, "dm_cache_resize", r);
2901 return r;
2902 }
2903
2904 set_cache_size(cache, new_size);
2905
2906 return 0;
2907 }
2908
2909 static int cache_preresume(struct dm_target *ti)
2910 {
2911 int r = 0;
2912 struct cache *cache = ti->private;
2913 dm_cblock_t csize = get_cache_dev_size(cache);
2914
2915
2916
2917
2918 if (!cache->sized) {
2919 r = resize_cache_dev(cache, csize);
2920 if (r)
2921 return r;
2922
2923 cache->sized = true;
2924
2925 } else if (csize != cache->cache_size) {
2926 if (!can_resize(cache, csize))
2927 return -EINVAL;
2928
2929 r = resize_cache_dev(cache, csize);
2930 if (r)
2931 return r;
2932 }
2933
2934 if (!cache->loaded_mappings) {
2935 r = dm_cache_load_mappings(cache->cmd, cache->policy,
2936 load_mapping, cache);
2937 if (r) {
2938 DMERR("%s: could not load cache mappings", cache_device_name(cache));
2939 metadata_operation_failed(cache, "dm_cache_load_mappings", r);
2940 return r;
2941 }
2942
2943 cache->loaded_mappings = true;
2944 }
2945
2946 if (!cache->loaded_discards) {
2947 struct discard_load_info li;
2948
2949
2950
2951
2952
2953
2954 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
2955
2956 discard_load_info_init(cache, &li);
2957 r = dm_cache_load_discards(cache->cmd, load_discard, &li);
2958 if (r) {
2959 DMERR("%s: could not load origin discards", cache_device_name(cache));
2960 metadata_operation_failed(cache, "dm_cache_load_discards", r);
2961 return r;
2962 }
2963 set_discard_range(&li);
2964
2965 cache->loaded_discards = true;
2966 }
2967
2968 return r;
2969 }
2970
2971 static void cache_resume(struct dm_target *ti)
2972 {
2973 struct cache *cache = ti->private;
2974
2975 cache->need_tick_bio = true;
2976 allow_background_work(cache);
2977 do_waker(&cache->waker.work);
2978 }
2979
2980 static void emit_flags(struct cache *cache, char *result,
2981 unsigned maxlen, ssize_t *sz_ptr)
2982 {
2983 ssize_t sz = *sz_ptr;
2984 struct cache_features *cf = &cache->features;
2985 unsigned count = (cf->metadata_version == 2) + !cf->discard_passdown + 1;
2986
2987 DMEMIT("%u ", count);
2988
2989 if (cf->metadata_version == 2)
2990 DMEMIT("metadata2 ");
2991
2992 if (writethrough_mode(cache))
2993 DMEMIT("writethrough ");
2994
2995 else if (passthrough_mode(cache))
2996 DMEMIT("passthrough ");
2997
2998 else if (writeback_mode(cache))
2999 DMEMIT("writeback ");
3000
3001 else {
3002 DMEMIT("unknown ");
3003 DMERR("%s: internal error: unknown io mode: %d",
3004 cache_device_name(cache), (int) cf->io_mode);
3005 }
3006
3007 if (!cf->discard_passdown)
3008 DMEMIT("no_discard_passdown ");
3009
3010 *sz_ptr = sz;
3011 }
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024 static void cache_status(struct dm_target *ti, status_type_t type,
3025 unsigned status_flags, char *result, unsigned maxlen)
3026 {
3027 int r = 0;
3028 unsigned i;
3029 ssize_t sz = 0;
3030 dm_block_t nr_free_blocks_metadata = 0;
3031 dm_block_t nr_blocks_metadata = 0;
3032 char buf[BDEVNAME_SIZE];
3033 struct cache *cache = ti->private;
3034 dm_cblock_t residency;
3035 bool needs_check;
3036
3037 switch (type) {
3038 case STATUSTYPE_INFO:
3039 if (get_cache_mode(cache) == CM_FAIL) {
3040 DMEMIT("Fail");
3041 break;
3042 }
3043
3044
3045 if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti))
3046 (void) commit(cache, false);
3047
3048 r = dm_cache_get_free_metadata_block_count(cache->cmd, &nr_free_blocks_metadata);
3049 if (r) {
3050 DMERR("%s: dm_cache_get_free_metadata_block_count returned %d",
3051 cache_device_name(cache), r);
3052 goto err;
3053 }
3054
3055 r = dm_cache_get_metadata_dev_size(cache->cmd, &nr_blocks_metadata);
3056 if (r) {
3057 DMERR("%s: dm_cache_get_metadata_dev_size returned %d",
3058 cache_device_name(cache), r);
3059 goto err;
3060 }
3061
3062 residency = policy_residency(cache->policy);
3063
3064 DMEMIT("%u %llu/%llu %llu %llu/%llu %u %u %u %u %u %u %lu ",
3065 (unsigned)DM_CACHE_METADATA_BLOCK_SIZE,
3066 (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
3067 (unsigned long long)nr_blocks_metadata,
3068 (unsigned long long)cache->sectors_per_block,
3069 (unsigned long long) from_cblock(residency),
3070 (unsigned long long) from_cblock(cache->cache_size),
3071 (unsigned) atomic_read(&cache->stats.read_hit),
3072 (unsigned) atomic_read(&cache->stats.read_miss),
3073 (unsigned) atomic_read(&cache->stats.write_hit),
3074 (unsigned) atomic_read(&cache->stats.write_miss),
3075 (unsigned) atomic_read(&cache->stats.demotion),
3076 (unsigned) atomic_read(&cache->stats.promotion),
3077 (unsigned long) atomic_read(&cache->nr_dirty));
3078
3079 emit_flags(cache, result, maxlen, &sz);
3080
3081 DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold);
3082
3083 DMEMIT("%s ", dm_cache_policy_get_name(cache->policy));
3084 if (sz < maxlen) {
3085 r = policy_emit_config_values(cache->policy, result, maxlen, &sz);
3086 if (r)
3087 DMERR("%s: policy_emit_config_values returned %d",
3088 cache_device_name(cache), r);
3089 }
3090
3091 if (get_cache_mode(cache) == CM_READ_ONLY)
3092 DMEMIT("ro ");
3093 else
3094 DMEMIT("rw ");
3095
3096 r = dm_cache_metadata_needs_check(cache->cmd, &needs_check);
3097
3098 if (r || needs_check)
3099 DMEMIT("needs_check ");
3100 else
3101 DMEMIT("- ");
3102
3103 break;
3104
3105 case STATUSTYPE_TABLE:
3106 format_dev_t(buf, cache->metadata_dev->bdev->bd_dev);
3107 DMEMIT("%s ", buf);
3108 format_dev_t(buf, cache->cache_dev->bdev->bd_dev);
3109 DMEMIT("%s ", buf);
3110 format_dev_t(buf, cache->origin_dev->bdev->bd_dev);
3111 DMEMIT("%s", buf);
3112
3113 for (i = 0; i < cache->nr_ctr_args - 1; i++)
3114 DMEMIT(" %s", cache->ctr_args[i]);
3115 if (cache->nr_ctr_args)
3116 DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]);
3117 break;
3118
3119 case STATUSTYPE_IMA:
3120 DMEMIT_TARGET_NAME_VERSION(ti->type);
3121 if (get_cache_mode(cache) == CM_FAIL)
3122 DMEMIT(",metadata_mode=fail");
3123 else if (get_cache_mode(cache) == CM_READ_ONLY)
3124 DMEMIT(",metadata_mode=ro");
3125 else
3126 DMEMIT(",metadata_mode=rw");
3127
3128 format_dev_t(buf, cache->metadata_dev->bdev->bd_dev);
3129 DMEMIT(",cache_metadata_device=%s", buf);
3130 format_dev_t(buf, cache->cache_dev->bdev->bd_dev);
3131 DMEMIT(",cache_device=%s", buf);
3132 format_dev_t(buf, cache->origin_dev->bdev->bd_dev);
3133 DMEMIT(",cache_origin_device=%s", buf);
3134 DMEMIT(",writethrough=%c", writethrough_mode(cache) ? 'y' : 'n');
3135 DMEMIT(",writeback=%c", writeback_mode(cache) ? 'y' : 'n');
3136 DMEMIT(",passthrough=%c", passthrough_mode(cache) ? 'y' : 'n');
3137 DMEMIT(",metadata2=%c", cache->features.metadata_version == 2 ? 'y' : 'n');
3138 DMEMIT(",no_discard_passdown=%c", cache->features.discard_passdown ? 'n' : 'y');
3139 DMEMIT(";");
3140 break;
3141 }
3142
3143 return;
3144
3145 err:
3146 DMEMIT("Error");
3147 }
3148
3149
3150
3151
3152
3153 struct cblock_range {
3154 dm_cblock_t begin;
3155 dm_cblock_t end;
3156 };
3157
3158
3159
3160
3161
3162
3163
3164 static int parse_cblock_range(struct cache *cache, const char *str,
3165 struct cblock_range *result)
3166 {
3167 char dummy;
3168 uint64_t b, e;
3169 int r;
3170
3171
3172
3173
3174 r = sscanf(str, "%llu-%llu%c", &b, &e, &dummy);
3175 if (r < 0)
3176 return r;
3177
3178 if (r == 2) {
3179 result->begin = to_cblock(b);
3180 result->end = to_cblock(e);
3181 return 0;
3182 }
3183
3184
3185
3186
3187 r = sscanf(str, "%llu%c", &b, &dummy);
3188 if (r < 0)
3189 return r;
3190
3191 if (r == 1) {
3192 result->begin = to_cblock(b);
3193 result->end = to_cblock(from_cblock(result->begin) + 1u);
3194 return 0;
3195 }
3196
3197 DMERR("%s: invalid cblock range '%s'", cache_device_name(cache), str);
3198 return -EINVAL;
3199 }
3200
3201 static int validate_cblock_range(struct cache *cache, struct cblock_range *range)
3202 {
3203 uint64_t b = from_cblock(range->begin);
3204 uint64_t e = from_cblock(range->end);
3205 uint64_t n = from_cblock(cache->cache_size);
3206
3207 if (b >= n) {
3208 DMERR("%s: begin cblock out of range: %llu >= %llu",
3209 cache_device_name(cache), b, n);
3210 return -EINVAL;
3211 }
3212
3213 if (e > n) {
3214 DMERR("%s: end cblock out of range: %llu > %llu",
3215 cache_device_name(cache), e, n);
3216 return -EINVAL;
3217 }
3218
3219 if (b >= e) {
3220 DMERR("%s: invalid cblock range: %llu >= %llu",
3221 cache_device_name(cache), b, e);
3222 return -EINVAL;
3223 }
3224
3225 return 0;
3226 }
3227
3228 static inline dm_cblock_t cblock_succ(dm_cblock_t b)
3229 {
3230 return to_cblock(from_cblock(b) + 1);
3231 }
3232
3233 static int request_invalidation(struct cache *cache, struct cblock_range *range)
3234 {
3235 int r = 0;
3236
3237
3238
3239
3240
3241
3242
3243 while (range->begin != range->end) {
3244 r = invalidate_cblock(cache, range->begin);
3245 if (r)
3246 return r;
3247
3248 range->begin = cblock_succ(range->begin);
3249 }
3250
3251 cache->commit_requested = true;
3252 return r;
3253 }
3254
3255 static int process_invalidate_cblocks_message(struct cache *cache, unsigned count,
3256 const char **cblock_ranges)
3257 {
3258 int r = 0;
3259 unsigned i;
3260 struct cblock_range range;
3261
3262 if (!passthrough_mode(cache)) {
3263 DMERR("%s: cache has to be in passthrough mode for invalidation",
3264 cache_device_name(cache));
3265 return -EPERM;
3266 }
3267
3268 for (i = 0; i < count; i++) {
3269 r = parse_cblock_range(cache, cblock_ranges[i], &range);
3270 if (r)
3271 break;
3272
3273 r = validate_cblock_range(cache, &range);
3274 if (r)
3275 break;
3276
3277
3278
3279
3280 r = request_invalidation(cache, &range);
3281 if (r)
3282 break;
3283 }
3284
3285 return r;
3286 }
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296 static int cache_message(struct dm_target *ti, unsigned argc, char **argv,
3297 char *result, unsigned maxlen)
3298 {
3299 struct cache *cache = ti->private;
3300
3301 if (!argc)
3302 return -EINVAL;
3303
3304 if (get_cache_mode(cache) >= CM_READ_ONLY) {
3305 DMERR("%s: unable to service cache target messages in READ_ONLY or FAIL mode",
3306 cache_device_name(cache));
3307 return -EOPNOTSUPP;
3308 }
3309
3310 if (!strcasecmp(argv[0], "invalidate_cblocks"))
3311 return process_invalidate_cblocks_message(cache, argc - 1, (const char **) argv + 1);
3312
3313 if (argc != 2)
3314 return -EINVAL;
3315
3316 return set_config_value(cache, argv[0], argv[1]);
3317 }
3318
3319 static int cache_iterate_devices(struct dm_target *ti,
3320 iterate_devices_callout_fn fn, void *data)
3321 {
3322 int r = 0;
3323 struct cache *cache = ti->private;
3324
3325 r = fn(ti, cache->cache_dev, 0, get_dev_size(cache->cache_dev), data);
3326 if (!r)
3327 r = fn(ti, cache->origin_dev, 0, ti->len, data);
3328
3329 return r;
3330 }
3331
3332
3333
3334
3335
3336 static void disable_passdown_if_not_supported(struct cache *cache)
3337 {
3338 struct block_device *origin_bdev = cache->origin_dev->bdev;
3339 struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits;
3340 const char *reason = NULL;
3341
3342 if (!cache->features.discard_passdown)
3343 return;
3344
3345 if (!bdev_max_discard_sectors(origin_bdev))
3346 reason = "discard unsupported";
3347
3348 else if (origin_limits->max_discard_sectors < cache->sectors_per_block)
3349 reason = "max discard sectors smaller than a block";
3350
3351 if (reason) {
3352 DMWARN("Origin device (%pg) %s: Disabling discard passdown.",
3353 origin_bdev, reason);
3354 cache->features.discard_passdown = false;
3355 }
3356 }
3357
3358 static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
3359 {
3360 struct block_device *origin_bdev = cache->origin_dev->bdev;
3361 struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits;
3362
3363 if (!cache->features.discard_passdown) {
3364
3365 limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024,
3366 cache->origin_sectors);
3367 limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
3368 return;
3369 }
3370
3371
3372
3373
3374
3375 limits->max_discard_sectors = origin_limits->max_discard_sectors;
3376 limits->max_hw_discard_sectors = origin_limits->max_hw_discard_sectors;
3377 limits->discard_granularity = origin_limits->discard_granularity;
3378 limits->discard_alignment = origin_limits->discard_alignment;
3379 limits->discard_misaligned = origin_limits->discard_misaligned;
3380 }
3381
3382 static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
3383 {
3384 struct cache *cache = ti->private;
3385 uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT;
3386
3387
3388
3389
3390
3391 if (io_opt_sectors < cache->sectors_per_block ||
3392 do_div(io_opt_sectors, cache->sectors_per_block)) {
3393 blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT);
3394 blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
3395 }
3396
3397 disable_passdown_if_not_supported(cache);
3398 set_discard_limits(cache, limits);
3399 }
3400
3401
3402
3403 static struct target_type cache_target = {
3404 .name = "cache",
3405 .version = {2, 2, 0},
3406 .module = THIS_MODULE,
3407 .ctr = cache_ctr,
3408 .dtr = cache_dtr,
3409 .map = cache_map,
3410 .end_io = cache_end_io,
3411 .postsuspend = cache_postsuspend,
3412 .preresume = cache_preresume,
3413 .resume = cache_resume,
3414 .status = cache_status,
3415 .message = cache_message,
3416 .iterate_devices = cache_iterate_devices,
3417 .io_hints = cache_io_hints,
3418 };
3419
3420 static int __init dm_cache_init(void)
3421 {
3422 int r;
3423
3424 migration_cache = KMEM_CACHE(dm_cache_migration, 0);
3425 if (!migration_cache)
3426 return -ENOMEM;
3427
3428 r = dm_register_target(&cache_target);
3429 if (r) {
3430 DMERR("cache target registration failed: %d", r);
3431 kmem_cache_destroy(migration_cache);
3432 return r;
3433 }
3434
3435 return 0;
3436 }
3437
3438 static void __exit dm_cache_exit(void)
3439 {
3440 dm_unregister_target(&cache_target);
3441 kmem_cache_destroy(migration_cache);
3442 }
3443
3444 module_init(dm_cache_init);
3445 module_exit(dm_cache_exit);
3446
3447 MODULE_DESCRIPTION(DM_NAME " cache target");
3448 MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>");
3449 MODULE_LICENSE("GPL");