0001
0002
0003
0004
0005
0006
0007
0008 #include <linux/dm-dirty-log.h>
0009 #include <linux/dm-region-hash.h>
0010
0011 #include <linux/ctype.h>
0012 #include <linux/init.h>
0013 #include <linux/module.h>
0014 #include <linux/slab.h>
0015 #include <linux/vmalloc.h>
0016
0017 #include "dm.h"
0018
0019 #define DM_MSG_PREFIX "region hash"
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057 struct dm_region_hash {
0058 uint32_t region_size;
0059 unsigned region_shift;
0060
0061
0062 struct dm_dirty_log *log;
0063
0064
0065 rwlock_t hash_lock;
0066 unsigned mask;
0067 unsigned nr_buckets;
0068 unsigned prime;
0069 unsigned shift;
0070 struct list_head *buckets;
0071
0072
0073
0074
0075 int flush_failure;
0076
0077 unsigned max_recovery;
0078
0079 spinlock_t region_lock;
0080 atomic_t recovery_in_flight;
0081 struct list_head clean_regions;
0082 struct list_head quiesced_regions;
0083 struct list_head recovered_regions;
0084 struct list_head failed_recovered_regions;
0085 struct semaphore recovery_count;
0086
0087 mempool_t region_pool;
0088
0089 void *context;
0090 sector_t target_begin;
0091
0092
0093 void (*dispatch_bios)(void *context, struct bio_list *bios);
0094
0095
0096 void (*wakeup_workers)(void *context);
0097
0098
0099 void (*wakeup_all_recovery_waiters)(void *context);
0100 };
0101
0102 struct dm_region {
0103 struct dm_region_hash *rh;
0104 region_t key;
0105 int state;
0106
0107 struct list_head hash_list;
0108 struct list_head list;
0109
0110 atomic_t pending;
0111 struct bio_list delayed_bios;
0112 };
0113
0114
0115
0116
0117 static region_t dm_rh_sector_to_region(struct dm_region_hash *rh, sector_t sector)
0118 {
0119 return sector >> rh->region_shift;
0120 }
0121
0122 sector_t dm_rh_region_to_sector(struct dm_region_hash *rh, region_t region)
0123 {
0124 return region << rh->region_shift;
0125 }
0126 EXPORT_SYMBOL_GPL(dm_rh_region_to_sector);
0127
0128 region_t dm_rh_bio_to_region(struct dm_region_hash *rh, struct bio *bio)
0129 {
0130 return dm_rh_sector_to_region(rh, bio->bi_iter.bi_sector -
0131 rh->target_begin);
0132 }
0133 EXPORT_SYMBOL_GPL(dm_rh_bio_to_region);
0134
0135 void *dm_rh_region_context(struct dm_region *reg)
0136 {
0137 return reg->rh->context;
0138 }
0139 EXPORT_SYMBOL_GPL(dm_rh_region_context);
0140
0141 region_t dm_rh_get_region_key(struct dm_region *reg)
0142 {
0143 return reg->key;
0144 }
0145 EXPORT_SYMBOL_GPL(dm_rh_get_region_key);
0146
0147 sector_t dm_rh_get_region_size(struct dm_region_hash *rh)
0148 {
0149 return rh->region_size;
0150 }
0151 EXPORT_SYMBOL_GPL(dm_rh_get_region_size);
0152
0153
0154
0155
0156
0157 #define RH_HASH_MULT 2654435387U
0158 #define RH_HASH_SHIFT 12
0159
0160 #define MIN_REGIONS 64
0161 struct dm_region_hash *dm_region_hash_create(
0162 void *context, void (*dispatch_bios)(void *context,
0163 struct bio_list *bios),
0164 void (*wakeup_workers)(void *context),
0165 void (*wakeup_all_recovery_waiters)(void *context),
0166 sector_t target_begin, unsigned max_recovery,
0167 struct dm_dirty_log *log, uint32_t region_size,
0168 region_t nr_regions)
0169 {
0170 struct dm_region_hash *rh;
0171 unsigned nr_buckets, max_buckets;
0172 size_t i;
0173 int ret;
0174
0175
0176
0177
0178
0179 max_buckets = nr_regions >> 6;
0180 for (nr_buckets = 128u; nr_buckets < max_buckets; nr_buckets <<= 1)
0181 ;
0182 nr_buckets >>= 1;
0183
0184 rh = kzalloc(sizeof(*rh), GFP_KERNEL);
0185 if (!rh) {
0186 DMERR("unable to allocate region hash memory");
0187 return ERR_PTR(-ENOMEM);
0188 }
0189
0190 rh->context = context;
0191 rh->dispatch_bios = dispatch_bios;
0192 rh->wakeup_workers = wakeup_workers;
0193 rh->wakeup_all_recovery_waiters = wakeup_all_recovery_waiters;
0194 rh->target_begin = target_begin;
0195 rh->max_recovery = max_recovery;
0196 rh->log = log;
0197 rh->region_size = region_size;
0198 rh->region_shift = __ffs(region_size);
0199 rwlock_init(&rh->hash_lock);
0200 rh->mask = nr_buckets - 1;
0201 rh->nr_buckets = nr_buckets;
0202
0203 rh->shift = RH_HASH_SHIFT;
0204 rh->prime = RH_HASH_MULT;
0205
0206 rh->buckets = vmalloc(array_size(nr_buckets, sizeof(*rh->buckets)));
0207 if (!rh->buckets) {
0208 DMERR("unable to allocate region hash bucket memory");
0209 kfree(rh);
0210 return ERR_PTR(-ENOMEM);
0211 }
0212
0213 for (i = 0; i < nr_buckets; i++)
0214 INIT_LIST_HEAD(rh->buckets + i);
0215
0216 spin_lock_init(&rh->region_lock);
0217 sema_init(&rh->recovery_count, 0);
0218 atomic_set(&rh->recovery_in_flight, 0);
0219 INIT_LIST_HEAD(&rh->clean_regions);
0220 INIT_LIST_HEAD(&rh->quiesced_regions);
0221 INIT_LIST_HEAD(&rh->recovered_regions);
0222 INIT_LIST_HEAD(&rh->failed_recovered_regions);
0223 rh->flush_failure = 0;
0224
0225 ret = mempool_init_kmalloc_pool(&rh->region_pool, MIN_REGIONS,
0226 sizeof(struct dm_region));
0227 if (ret) {
0228 vfree(rh->buckets);
0229 kfree(rh);
0230 rh = ERR_PTR(-ENOMEM);
0231 }
0232
0233 return rh;
0234 }
0235 EXPORT_SYMBOL_GPL(dm_region_hash_create);
0236
0237 void dm_region_hash_destroy(struct dm_region_hash *rh)
0238 {
0239 unsigned h;
0240 struct dm_region *reg, *nreg;
0241
0242 BUG_ON(!list_empty(&rh->quiesced_regions));
0243 for (h = 0; h < rh->nr_buckets; h++) {
0244 list_for_each_entry_safe(reg, nreg, rh->buckets + h,
0245 hash_list) {
0246 BUG_ON(atomic_read(®->pending));
0247 mempool_free(reg, &rh->region_pool);
0248 }
0249 }
0250
0251 if (rh->log)
0252 dm_dirty_log_destroy(rh->log);
0253
0254 mempool_exit(&rh->region_pool);
0255 vfree(rh->buckets);
0256 kfree(rh);
0257 }
0258 EXPORT_SYMBOL_GPL(dm_region_hash_destroy);
0259
0260 struct dm_dirty_log *dm_rh_dirty_log(struct dm_region_hash *rh)
0261 {
0262 return rh->log;
0263 }
0264 EXPORT_SYMBOL_GPL(dm_rh_dirty_log);
0265
0266 static unsigned rh_hash(struct dm_region_hash *rh, region_t region)
0267 {
0268 return (unsigned) ((region * rh->prime) >> rh->shift) & rh->mask;
0269 }
0270
0271 static struct dm_region *__rh_lookup(struct dm_region_hash *rh, region_t region)
0272 {
0273 struct dm_region *reg;
0274 struct list_head *bucket = rh->buckets + rh_hash(rh, region);
0275
0276 list_for_each_entry(reg, bucket, hash_list)
0277 if (reg->key == region)
0278 return reg;
0279
0280 return NULL;
0281 }
0282
0283 static void __rh_insert(struct dm_region_hash *rh, struct dm_region *reg)
0284 {
0285 list_add(®->hash_list, rh->buckets + rh_hash(rh, reg->key));
0286 }
0287
0288 static struct dm_region *__rh_alloc(struct dm_region_hash *rh, region_t region)
0289 {
0290 struct dm_region *reg, *nreg;
0291
0292 nreg = mempool_alloc(&rh->region_pool, GFP_ATOMIC);
0293 if (unlikely(!nreg))
0294 nreg = kmalloc(sizeof(*nreg), GFP_NOIO | __GFP_NOFAIL);
0295
0296 nreg->state = rh->log->type->in_sync(rh->log, region, 1) ?
0297 DM_RH_CLEAN : DM_RH_NOSYNC;
0298 nreg->rh = rh;
0299 nreg->key = region;
0300 INIT_LIST_HEAD(&nreg->list);
0301 atomic_set(&nreg->pending, 0);
0302 bio_list_init(&nreg->delayed_bios);
0303
0304 write_lock_irq(&rh->hash_lock);
0305 reg = __rh_lookup(rh, region);
0306 if (reg)
0307
0308 mempool_free(nreg, &rh->region_pool);
0309 else {
0310 __rh_insert(rh, nreg);
0311 if (nreg->state == DM_RH_CLEAN) {
0312 spin_lock(&rh->region_lock);
0313 list_add(&nreg->list, &rh->clean_regions);
0314 spin_unlock(&rh->region_lock);
0315 }
0316
0317 reg = nreg;
0318 }
0319 write_unlock_irq(&rh->hash_lock);
0320
0321 return reg;
0322 }
0323
0324 static struct dm_region *__rh_find(struct dm_region_hash *rh, region_t region)
0325 {
0326 struct dm_region *reg;
0327
0328 reg = __rh_lookup(rh, region);
0329 if (!reg) {
0330 read_unlock(&rh->hash_lock);
0331 reg = __rh_alloc(rh, region);
0332 read_lock(&rh->hash_lock);
0333 }
0334
0335 return reg;
0336 }
0337
0338 int dm_rh_get_state(struct dm_region_hash *rh, region_t region, int may_block)
0339 {
0340 int r;
0341 struct dm_region *reg;
0342
0343 read_lock(&rh->hash_lock);
0344 reg = __rh_lookup(rh, region);
0345 read_unlock(&rh->hash_lock);
0346
0347 if (reg)
0348 return reg->state;
0349
0350
0351
0352
0353
0354 r = rh->log->type->in_sync(rh->log, region, may_block);
0355
0356
0357
0358
0359
0360 return r == 1 ? DM_RH_CLEAN : DM_RH_NOSYNC;
0361 }
0362 EXPORT_SYMBOL_GPL(dm_rh_get_state);
0363
0364 static void complete_resync_work(struct dm_region *reg, int success)
0365 {
0366 struct dm_region_hash *rh = reg->rh;
0367
0368 rh->log->type->set_region_sync(rh->log, reg->key, success);
0369
0370
0371
0372
0373
0374
0375
0376
0377
0378
0379 rh->dispatch_bios(rh->context, ®->delayed_bios);
0380 if (atomic_dec_and_test(&rh->recovery_in_flight))
0381 rh->wakeup_all_recovery_waiters(rh->context);
0382 up(&rh->recovery_count);
0383 }
0384
0385
0386
0387
0388
0389
0390
0391
0392
0393
0394
0395 void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio)
0396 {
0397 unsigned long flags;
0398 struct dm_dirty_log *log = rh->log;
0399 struct dm_region *reg;
0400 region_t region = dm_rh_bio_to_region(rh, bio);
0401 int recovering = 0;
0402
0403 if (bio->bi_opf & REQ_PREFLUSH) {
0404 rh->flush_failure = 1;
0405 return;
0406 }
0407
0408 if (bio_op(bio) == REQ_OP_DISCARD)
0409 return;
0410
0411
0412 log->type->set_region_sync(log, region, 0);
0413
0414 read_lock(&rh->hash_lock);
0415 reg = __rh_find(rh, region);
0416 read_unlock(&rh->hash_lock);
0417
0418
0419 BUG_ON(!reg);
0420 BUG_ON(!list_empty(®->list));
0421
0422 spin_lock_irqsave(&rh->region_lock, flags);
0423
0424
0425
0426
0427
0428
0429
0430 recovering = (reg->state == DM_RH_RECOVERING);
0431 reg->state = DM_RH_NOSYNC;
0432 BUG_ON(!list_empty(®->list));
0433 spin_unlock_irqrestore(&rh->region_lock, flags);
0434
0435 if (recovering)
0436 complete_resync_work(reg, 0);
0437 }
0438 EXPORT_SYMBOL_GPL(dm_rh_mark_nosync);
0439
0440 void dm_rh_update_states(struct dm_region_hash *rh, int errors_handled)
0441 {
0442 struct dm_region *reg, *next;
0443
0444 LIST_HEAD(clean);
0445 LIST_HEAD(recovered);
0446 LIST_HEAD(failed_recovered);
0447
0448
0449
0450
0451 write_lock_irq(&rh->hash_lock);
0452 spin_lock(&rh->region_lock);
0453 if (!list_empty(&rh->clean_regions)) {
0454 list_splice_init(&rh->clean_regions, &clean);
0455
0456 list_for_each_entry(reg, &clean, list)
0457 list_del(®->hash_list);
0458 }
0459
0460 if (!list_empty(&rh->recovered_regions)) {
0461 list_splice_init(&rh->recovered_regions, &recovered);
0462
0463 list_for_each_entry(reg, &recovered, list)
0464 list_del(®->hash_list);
0465 }
0466
0467 if (!list_empty(&rh->failed_recovered_regions)) {
0468 list_splice_init(&rh->failed_recovered_regions,
0469 &failed_recovered);
0470
0471 list_for_each_entry(reg, &failed_recovered, list)
0472 list_del(®->hash_list);
0473 }
0474
0475 spin_unlock(&rh->region_lock);
0476 write_unlock_irq(&rh->hash_lock);
0477
0478
0479
0480
0481
0482
0483 list_for_each_entry_safe(reg, next, &recovered, list) {
0484 rh->log->type->clear_region(rh->log, reg->key);
0485 complete_resync_work(reg, 1);
0486 mempool_free(reg, &rh->region_pool);
0487 }
0488
0489 list_for_each_entry_safe(reg, next, &failed_recovered, list) {
0490 complete_resync_work(reg, errors_handled ? 0 : 1);
0491 mempool_free(reg, &rh->region_pool);
0492 }
0493
0494 list_for_each_entry_safe(reg, next, &clean, list) {
0495 rh->log->type->clear_region(rh->log, reg->key);
0496 mempool_free(reg, &rh->region_pool);
0497 }
0498
0499 rh->log->type->flush(rh->log);
0500 }
0501 EXPORT_SYMBOL_GPL(dm_rh_update_states);
0502
0503 static void rh_inc(struct dm_region_hash *rh, region_t region)
0504 {
0505 struct dm_region *reg;
0506
0507 read_lock(&rh->hash_lock);
0508 reg = __rh_find(rh, region);
0509
0510 spin_lock_irq(&rh->region_lock);
0511 atomic_inc(®->pending);
0512
0513 if (reg->state == DM_RH_CLEAN) {
0514 reg->state = DM_RH_DIRTY;
0515 list_del_init(®->list);
0516 spin_unlock_irq(&rh->region_lock);
0517
0518 rh->log->type->mark_region(rh->log, reg->key);
0519 } else
0520 spin_unlock_irq(&rh->region_lock);
0521
0522
0523 read_unlock(&rh->hash_lock);
0524 }
0525
0526 void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios)
0527 {
0528 struct bio *bio;
0529
0530 for (bio = bios->head; bio; bio = bio->bi_next) {
0531 if (bio->bi_opf & REQ_PREFLUSH || bio_op(bio) == REQ_OP_DISCARD)
0532 continue;
0533 rh_inc(rh, dm_rh_bio_to_region(rh, bio));
0534 }
0535 }
0536 EXPORT_SYMBOL_GPL(dm_rh_inc_pending);
0537
0538 void dm_rh_dec(struct dm_region_hash *rh, region_t region)
0539 {
0540 unsigned long flags;
0541 struct dm_region *reg;
0542 int should_wake = 0;
0543
0544 read_lock(&rh->hash_lock);
0545 reg = __rh_lookup(rh, region);
0546 read_unlock(&rh->hash_lock);
0547
0548 spin_lock_irqsave(&rh->region_lock, flags);
0549 if (atomic_dec_and_test(®->pending)) {
0550
0551
0552
0553
0554
0555
0556
0557
0558
0559
0560
0561
0562 if (unlikely(rh->flush_failure)) {
0563
0564
0565
0566
0567
0568 reg->state = DM_RH_NOSYNC;
0569 } else if (reg->state == DM_RH_RECOVERING) {
0570 list_add_tail(®->list, &rh->quiesced_regions);
0571 } else if (reg->state == DM_RH_DIRTY) {
0572 reg->state = DM_RH_CLEAN;
0573 list_add(®->list, &rh->clean_regions);
0574 }
0575 should_wake = 1;
0576 }
0577 spin_unlock_irqrestore(&rh->region_lock, flags);
0578
0579 if (should_wake)
0580 rh->wakeup_workers(rh->context);
0581 }
0582 EXPORT_SYMBOL_GPL(dm_rh_dec);
0583
0584
0585
0586
0587 static int __rh_recovery_prepare(struct dm_region_hash *rh)
0588 {
0589 int r;
0590 region_t region;
0591 struct dm_region *reg;
0592
0593
0594
0595
0596 r = rh->log->type->get_resync_work(rh->log, ®ion);
0597 if (r <= 0)
0598 return r;
0599
0600
0601
0602
0603
0604 read_lock(&rh->hash_lock);
0605 reg = __rh_find(rh, region);
0606 read_unlock(&rh->hash_lock);
0607
0608 spin_lock_irq(&rh->region_lock);
0609 reg->state = DM_RH_RECOVERING;
0610
0611
0612 if (atomic_read(®->pending))
0613 list_del_init(®->list);
0614 else
0615 list_move(®->list, &rh->quiesced_regions);
0616
0617 spin_unlock_irq(&rh->region_lock);
0618
0619 return 1;
0620 }
0621
0622 void dm_rh_recovery_prepare(struct dm_region_hash *rh)
0623 {
0624
0625 atomic_inc(&rh->recovery_in_flight);
0626
0627 while (!down_trylock(&rh->recovery_count)) {
0628 atomic_inc(&rh->recovery_in_flight);
0629 if (__rh_recovery_prepare(rh) <= 0) {
0630 atomic_dec(&rh->recovery_in_flight);
0631 up(&rh->recovery_count);
0632 break;
0633 }
0634 }
0635
0636
0637 if (atomic_dec_and_test(&rh->recovery_in_flight))
0638 rh->wakeup_all_recovery_waiters(rh->context);
0639 }
0640 EXPORT_SYMBOL_GPL(dm_rh_recovery_prepare);
0641
0642
0643
0644
0645 struct dm_region *dm_rh_recovery_start(struct dm_region_hash *rh)
0646 {
0647 struct dm_region *reg = NULL;
0648
0649 spin_lock_irq(&rh->region_lock);
0650 if (!list_empty(&rh->quiesced_regions)) {
0651 reg = list_entry(rh->quiesced_regions.next,
0652 struct dm_region, list);
0653 list_del_init(®->list);
0654 }
0655 spin_unlock_irq(&rh->region_lock);
0656
0657 return reg;
0658 }
0659 EXPORT_SYMBOL_GPL(dm_rh_recovery_start);
0660
0661 void dm_rh_recovery_end(struct dm_region *reg, int success)
0662 {
0663 struct dm_region_hash *rh = reg->rh;
0664
0665 spin_lock_irq(&rh->region_lock);
0666 if (success)
0667 list_add(®->list, ®->rh->recovered_regions);
0668 else
0669 list_add(®->list, ®->rh->failed_recovered_regions);
0670
0671 spin_unlock_irq(&rh->region_lock);
0672
0673 rh->wakeup_workers(rh->context);
0674 }
0675 EXPORT_SYMBOL_GPL(dm_rh_recovery_end);
0676
0677
0678 int dm_rh_recovery_in_flight(struct dm_region_hash *rh)
0679 {
0680 return atomic_read(&rh->recovery_in_flight);
0681 }
0682 EXPORT_SYMBOL_GPL(dm_rh_recovery_in_flight);
0683
0684 int dm_rh_flush(struct dm_region_hash *rh)
0685 {
0686 return rh->log->type->flush(rh->log);
0687 }
0688 EXPORT_SYMBOL_GPL(dm_rh_flush);
0689
0690 void dm_rh_delay(struct dm_region_hash *rh, struct bio *bio)
0691 {
0692 struct dm_region *reg;
0693
0694 read_lock(&rh->hash_lock);
0695 reg = __rh_find(rh, dm_rh_bio_to_region(rh, bio));
0696 bio_list_add(®->delayed_bios, bio);
0697 read_unlock(&rh->hash_lock);
0698 }
0699 EXPORT_SYMBOL_GPL(dm_rh_delay);
0700
0701 void dm_rh_stop_recovery(struct dm_region_hash *rh)
0702 {
0703 int i;
0704
0705
0706 for (i = 0; i < rh->max_recovery; i++)
0707 down(&rh->recovery_count);
0708 }
0709 EXPORT_SYMBOL_GPL(dm_rh_stop_recovery);
0710
0711 void dm_rh_start_recovery(struct dm_region_hash *rh)
0712 {
0713 int i;
0714
0715 for (i = 0; i < rh->max_recovery; i++)
0716 up(&rh->recovery_count);
0717
0718 rh->wakeup_workers(rh->context);
0719 }
0720 EXPORT_SYMBOL_GPL(dm_rh_start_recovery);
0721
0722 MODULE_DESCRIPTION(DM_NAME " region hash");
0723 MODULE_AUTHOR("Joe Thornber/Heinz Mauelshagen <dm-devel@redhat.com>");
0724 MODULE_LICENSE("GPL");