0001
0002
0003
0004
0005
0006
0007
0008 #include "dm-zoned.h"
0009
0010 #include <linux/module.h>
0011
0012 #define DM_MSG_PREFIX "zoned reclaim"
0013
0014 struct dmz_reclaim {
0015 struct dmz_metadata *metadata;
0016
0017 struct delayed_work work;
0018 struct workqueue_struct *wq;
0019
0020 struct dm_kcopyd_client *kc;
0021 struct dm_kcopyd_throttle kc_throttle;
0022 int kc_err;
0023
0024 int dev_idx;
0025
0026 unsigned long flags;
0027
0028
0029 unsigned long atime;
0030 };
0031
0032
0033
0034
0035 enum {
0036 DMZ_RECLAIM_KCOPY,
0037 };
0038
0039
0040
0041
0042 #define DMZ_IDLE_PERIOD (10UL * HZ)
0043
0044
0045
0046
0047
0048 #define DMZ_RECLAIM_LOW_UNMAP_ZONES 30
0049
0050
0051
0052
0053
0054 #define DMZ_RECLAIM_HIGH_UNMAP_ZONES 50
0055
0056
0057
0058
0059 static int dmz_reclaim_align_wp(struct dmz_reclaim *zrc, struct dm_zone *zone,
0060 sector_t block)
0061 {
0062 struct dmz_metadata *zmd = zrc->metadata;
0063 struct dmz_dev *dev = zone->dev;
0064 sector_t wp_block = zone->wp_block;
0065 unsigned int nr_blocks;
0066 int ret;
0067
0068 if (wp_block == block)
0069 return 0;
0070
0071 if (wp_block > block)
0072 return -EIO;
0073
0074
0075
0076
0077
0078 nr_blocks = block - wp_block;
0079 ret = blkdev_issue_zeroout(dev->bdev,
0080 dmz_start_sect(zmd, zone) + dmz_blk2sect(wp_block),
0081 dmz_blk2sect(nr_blocks), GFP_NOIO, 0);
0082 if (ret) {
0083 dmz_dev_err(dev,
0084 "Align zone %u wp %llu to %llu (wp+%u) blocks failed %d",
0085 zone->id, (unsigned long long)wp_block,
0086 (unsigned long long)block, nr_blocks, ret);
0087 dmz_check_bdev(dev);
0088 return ret;
0089 }
0090
0091 zone->wp_block = block;
0092
0093 return 0;
0094 }
0095
0096
0097
0098
0099 static void dmz_reclaim_kcopy_end(int read_err, unsigned long write_err,
0100 void *context)
0101 {
0102 struct dmz_reclaim *zrc = context;
0103
0104 if (read_err || write_err)
0105 zrc->kc_err = -EIO;
0106 else
0107 zrc->kc_err = 0;
0108
0109 clear_bit_unlock(DMZ_RECLAIM_KCOPY, &zrc->flags);
0110 smp_mb__after_atomic();
0111 wake_up_bit(&zrc->flags, DMZ_RECLAIM_KCOPY);
0112 }
0113
0114
0115
0116
0117 static int dmz_reclaim_copy(struct dmz_reclaim *zrc,
0118 struct dm_zone *src_zone, struct dm_zone *dst_zone)
0119 {
0120 struct dmz_metadata *zmd = zrc->metadata;
0121 struct dm_io_region src, dst;
0122 sector_t block = 0, end_block;
0123 sector_t nr_blocks;
0124 sector_t src_zone_block;
0125 sector_t dst_zone_block;
0126 unsigned long flags = 0;
0127 int ret;
0128
0129 if (dmz_is_seq(src_zone))
0130 end_block = src_zone->wp_block;
0131 else
0132 end_block = dmz_zone_nr_blocks(zmd);
0133 src_zone_block = dmz_start_block(zmd, src_zone);
0134 dst_zone_block = dmz_start_block(zmd, dst_zone);
0135
0136 if (dmz_is_seq(dst_zone))
0137 flags |= BIT(DM_KCOPYD_WRITE_SEQ);
0138
0139 while (block < end_block) {
0140 if (src_zone->dev->flags & DMZ_BDEV_DYING)
0141 return -EIO;
0142 if (dst_zone->dev->flags & DMZ_BDEV_DYING)
0143 return -EIO;
0144
0145 if (dmz_reclaim_should_terminate(src_zone))
0146 return -EINTR;
0147
0148
0149 ret = dmz_first_valid_block(zmd, src_zone, &block);
0150 if (ret <= 0)
0151 return ret;
0152 nr_blocks = ret;
0153
0154
0155
0156
0157
0158
0159 if (dmz_is_seq(dst_zone)) {
0160 ret = dmz_reclaim_align_wp(zrc, dst_zone, block);
0161 if (ret)
0162 return ret;
0163 }
0164
0165 src.bdev = src_zone->dev->bdev;
0166 src.sector = dmz_blk2sect(src_zone_block + block);
0167 src.count = dmz_blk2sect(nr_blocks);
0168
0169 dst.bdev = dst_zone->dev->bdev;
0170 dst.sector = dmz_blk2sect(dst_zone_block + block);
0171 dst.count = src.count;
0172
0173
0174 set_bit(DMZ_RECLAIM_KCOPY, &zrc->flags);
0175 dm_kcopyd_copy(zrc->kc, &src, 1, &dst, flags,
0176 dmz_reclaim_kcopy_end, zrc);
0177
0178
0179 wait_on_bit_io(&zrc->flags, DMZ_RECLAIM_KCOPY,
0180 TASK_UNINTERRUPTIBLE);
0181 if (zrc->kc_err)
0182 return zrc->kc_err;
0183
0184 block += nr_blocks;
0185 if (dmz_is_seq(dst_zone))
0186 dst_zone->wp_block = block;
0187 }
0188
0189 return 0;
0190 }
0191
0192
0193
0194
0195
0196 static int dmz_reclaim_buf(struct dmz_reclaim *zrc, struct dm_zone *dzone)
0197 {
0198 struct dm_zone *bzone = dzone->bzone;
0199 sector_t chunk_block = dzone->wp_block;
0200 struct dmz_metadata *zmd = zrc->metadata;
0201 int ret;
0202
0203 DMDEBUG("(%s/%u): Chunk %u, move buf zone %u (weight %u) to data zone %u (weight %u)",
0204 dmz_metadata_label(zmd), zrc->dev_idx,
0205 dzone->chunk, bzone->id, dmz_weight(bzone),
0206 dzone->id, dmz_weight(dzone));
0207
0208
0209 ret = dmz_reclaim_copy(zrc, bzone, dzone);
0210 if (ret < 0)
0211 return ret;
0212
0213 dmz_lock_flush(zmd);
0214
0215
0216 ret = dmz_merge_valid_blocks(zmd, bzone, dzone, chunk_block);
0217 if (ret == 0) {
0218
0219 dmz_invalidate_blocks(zmd, bzone, 0, dmz_zone_nr_blocks(zmd));
0220 dmz_lock_map(zmd);
0221 dmz_unmap_zone(zmd, bzone);
0222 dmz_unlock_zone_reclaim(dzone);
0223 dmz_free_zone(zmd, bzone);
0224 dmz_unlock_map(zmd);
0225 }
0226
0227 dmz_unlock_flush(zmd);
0228
0229 return ret;
0230 }
0231
0232
0233
0234
0235 static int dmz_reclaim_seq_data(struct dmz_reclaim *zrc, struct dm_zone *dzone)
0236 {
0237 unsigned int chunk = dzone->chunk;
0238 struct dm_zone *bzone = dzone->bzone;
0239 struct dmz_metadata *zmd = zrc->metadata;
0240 int ret = 0;
0241
0242 DMDEBUG("(%s/%u): Chunk %u, move data zone %u (weight %u) to buf zone %u (weight %u)",
0243 dmz_metadata_label(zmd), zrc->dev_idx,
0244 chunk, dzone->id, dmz_weight(dzone),
0245 bzone->id, dmz_weight(bzone));
0246
0247
0248 ret = dmz_reclaim_copy(zrc, dzone, bzone);
0249 if (ret < 0)
0250 return ret;
0251
0252 dmz_lock_flush(zmd);
0253
0254
0255 ret = dmz_merge_valid_blocks(zmd, dzone, bzone, 0);
0256 if (ret == 0) {
0257
0258
0259
0260
0261 dmz_invalidate_blocks(zmd, dzone, 0, dmz_zone_nr_blocks(zmd));
0262 dmz_lock_map(zmd);
0263 dmz_unmap_zone(zmd, bzone);
0264 dmz_unmap_zone(zmd, dzone);
0265 dmz_unlock_zone_reclaim(dzone);
0266 dmz_free_zone(zmd, dzone);
0267 dmz_map_zone(zmd, bzone, chunk);
0268 dmz_unlock_map(zmd);
0269 }
0270
0271 dmz_unlock_flush(zmd);
0272
0273 return ret;
0274 }
0275
0276
0277
0278
0279
0280 static int dmz_reclaim_rnd_data(struct dmz_reclaim *zrc, struct dm_zone *dzone)
0281 {
0282 unsigned int chunk = dzone->chunk;
0283 struct dm_zone *szone = NULL;
0284 struct dmz_metadata *zmd = zrc->metadata;
0285 int ret;
0286 int alloc_flags = DMZ_ALLOC_SEQ;
0287
0288
0289 dmz_lock_map(zmd);
0290 again:
0291 szone = dmz_alloc_zone(zmd, zrc->dev_idx,
0292 alloc_flags | DMZ_ALLOC_RECLAIM);
0293 if (!szone && alloc_flags == DMZ_ALLOC_SEQ && dmz_nr_cache_zones(zmd)) {
0294 alloc_flags = DMZ_ALLOC_RND;
0295 goto again;
0296 }
0297 dmz_unlock_map(zmd);
0298 if (!szone)
0299 return -ENOSPC;
0300
0301 DMDEBUG("(%s/%u): Chunk %u, move %s zone %u (weight %u) to %s zone %u",
0302 dmz_metadata_label(zmd), zrc->dev_idx, chunk,
0303 dmz_is_cache(dzone) ? "cache" : "rnd",
0304 dzone->id, dmz_weight(dzone),
0305 dmz_is_rnd(szone) ? "rnd" : "seq", szone->id);
0306
0307
0308 ret = dmz_reclaim_copy(zrc, dzone, szone);
0309
0310 dmz_lock_flush(zmd);
0311
0312 if (ret == 0) {
0313
0314 ret = dmz_copy_valid_blocks(zmd, dzone, szone);
0315 }
0316 if (ret) {
0317
0318 dmz_lock_map(zmd);
0319 dmz_free_zone(zmd, szone);
0320 dmz_unlock_map(zmd);
0321 } else {
0322
0323 dmz_invalidate_blocks(zmd, dzone, 0, dmz_zone_nr_blocks(zmd));
0324 dmz_lock_map(zmd);
0325 dmz_unmap_zone(zmd, dzone);
0326 dmz_unlock_zone_reclaim(dzone);
0327 dmz_free_zone(zmd, dzone);
0328 dmz_map_zone(zmd, szone, chunk);
0329 dmz_unlock_map(zmd);
0330 }
0331
0332 dmz_unlock_flush(zmd);
0333
0334 return ret;
0335 }
0336
0337
0338
0339
0340 static void dmz_reclaim_empty(struct dmz_reclaim *zrc, struct dm_zone *dzone)
0341 {
0342 struct dmz_metadata *zmd = zrc->metadata;
0343
0344 dmz_lock_flush(zmd);
0345 dmz_lock_map(zmd);
0346 dmz_unmap_zone(zmd, dzone);
0347 dmz_unlock_zone_reclaim(dzone);
0348 dmz_free_zone(zmd, dzone);
0349 dmz_unlock_map(zmd);
0350 dmz_unlock_flush(zmd);
0351 }
0352
0353
0354
0355
0356 static inline int dmz_target_idle(struct dmz_reclaim *zrc)
0357 {
0358 return time_is_before_jiffies(zrc->atime + DMZ_IDLE_PERIOD);
0359 }
0360
0361
0362
0363
0364 static int dmz_do_reclaim(struct dmz_reclaim *zrc)
0365 {
0366 struct dmz_metadata *zmd = zrc->metadata;
0367 struct dm_zone *dzone;
0368 struct dm_zone *rzone;
0369 unsigned long start;
0370 int ret;
0371
0372
0373 dzone = dmz_get_zone_for_reclaim(zmd, zrc->dev_idx,
0374 dmz_target_idle(zrc));
0375 if (!dzone) {
0376 DMDEBUG("(%s/%u): No zone found to reclaim",
0377 dmz_metadata_label(zmd), zrc->dev_idx);
0378 return -EBUSY;
0379 }
0380 rzone = dzone;
0381
0382 start = jiffies;
0383 if (dmz_is_cache(dzone) || dmz_is_rnd(dzone)) {
0384 if (!dmz_weight(dzone)) {
0385
0386 dmz_reclaim_empty(zrc, dzone);
0387 ret = 0;
0388 } else {
0389
0390
0391
0392
0393 ret = dmz_reclaim_rnd_data(zrc, dzone);
0394 }
0395 } else {
0396 struct dm_zone *bzone = dzone->bzone;
0397 sector_t chunk_block = 0;
0398
0399 ret = dmz_first_valid_block(zmd, bzone, &chunk_block);
0400 if (ret < 0)
0401 goto out;
0402
0403 if (ret == 0 || chunk_block >= dzone->wp_block) {
0404
0405
0406
0407
0408 ret = dmz_reclaim_buf(zrc, dzone);
0409 rzone = bzone;
0410 } else {
0411
0412
0413
0414
0415
0416 ret = dmz_reclaim_seq_data(zrc, dzone);
0417 }
0418 }
0419 out:
0420 if (ret) {
0421 if (ret == -EINTR)
0422 DMDEBUG("(%s/%u): reclaim zone %u interrupted",
0423 dmz_metadata_label(zmd), zrc->dev_idx,
0424 rzone->id);
0425 else
0426 DMDEBUG("(%s/%u): Failed to reclaim zone %u, err %d",
0427 dmz_metadata_label(zmd), zrc->dev_idx,
0428 rzone->id, ret);
0429 dmz_unlock_zone_reclaim(dzone);
0430 return ret;
0431 }
0432
0433 ret = dmz_flush_metadata(zrc->metadata);
0434 if (ret) {
0435 DMDEBUG("(%s/%u): Metadata flush for zone %u failed, err %d",
0436 dmz_metadata_label(zmd), zrc->dev_idx, rzone->id, ret);
0437 return ret;
0438 }
0439
0440 DMDEBUG("(%s/%u): Reclaimed zone %u in %u ms",
0441 dmz_metadata_label(zmd), zrc->dev_idx,
0442 rzone->id, jiffies_to_msecs(jiffies - start));
0443 return 0;
0444 }
0445
0446 static unsigned int dmz_reclaim_percentage(struct dmz_reclaim *zrc)
0447 {
0448 struct dmz_metadata *zmd = zrc->metadata;
0449 unsigned int nr_cache = dmz_nr_cache_zones(zmd);
0450 unsigned int nr_unmap, nr_zones;
0451
0452 if (nr_cache) {
0453 nr_zones = nr_cache;
0454 nr_unmap = dmz_nr_unmap_cache_zones(zmd);
0455 } else {
0456 nr_zones = dmz_nr_rnd_zones(zmd, zrc->dev_idx);
0457 nr_unmap = dmz_nr_unmap_rnd_zones(zmd, zrc->dev_idx);
0458 }
0459 if (nr_unmap <= 1)
0460 return 0;
0461 return nr_unmap * 100 / nr_zones;
0462 }
0463
0464
0465
0466
0467 static bool dmz_should_reclaim(struct dmz_reclaim *zrc, unsigned int p_unmap)
0468 {
0469 unsigned int nr_reclaim;
0470
0471 nr_reclaim = dmz_nr_rnd_zones(zrc->metadata, zrc->dev_idx);
0472
0473 if (dmz_nr_cache_zones(zrc->metadata)) {
0474
0475
0476
0477
0478
0479 if (zrc->dev_idx == 0)
0480 return false;
0481 nr_reclaim += dmz_nr_cache_zones(zrc->metadata);
0482 }
0483
0484
0485 if (dmz_target_idle(zrc) && nr_reclaim)
0486 return true;
0487
0488
0489 if (p_unmap >= DMZ_RECLAIM_HIGH_UNMAP_ZONES)
0490 return false;
0491
0492
0493
0494
0495
0496 return p_unmap <= DMZ_RECLAIM_LOW_UNMAP_ZONES;
0497 }
0498
0499
0500
0501
0502 static void dmz_reclaim_work(struct work_struct *work)
0503 {
0504 struct dmz_reclaim *zrc = container_of(work, struct dmz_reclaim, work.work);
0505 struct dmz_metadata *zmd = zrc->metadata;
0506 unsigned int p_unmap;
0507 int ret;
0508
0509 if (dmz_dev_is_dying(zmd))
0510 return;
0511
0512 p_unmap = dmz_reclaim_percentage(zrc);
0513 if (!dmz_should_reclaim(zrc, p_unmap)) {
0514 mod_delayed_work(zrc->wq, &zrc->work, DMZ_IDLE_PERIOD);
0515 return;
0516 }
0517
0518
0519
0520
0521
0522
0523
0524 if (dmz_target_idle(zrc) || p_unmap < DMZ_RECLAIM_LOW_UNMAP_ZONES / 2) {
0525
0526 zrc->kc_throttle.throttle = 100;
0527 } else {
0528
0529 zrc->kc_throttle.throttle = min(75U, 100U - p_unmap / 2);
0530 }
0531
0532 DMDEBUG("(%s/%u): Reclaim (%u): %s, %u%% free zones (%u/%u cache %u/%u random)",
0533 dmz_metadata_label(zmd), zrc->dev_idx,
0534 zrc->kc_throttle.throttle,
0535 (dmz_target_idle(zrc) ? "Idle" : "Busy"),
0536 p_unmap, dmz_nr_unmap_cache_zones(zmd),
0537 dmz_nr_cache_zones(zmd),
0538 dmz_nr_unmap_rnd_zones(zmd, zrc->dev_idx),
0539 dmz_nr_rnd_zones(zmd, zrc->dev_idx));
0540
0541 ret = dmz_do_reclaim(zrc);
0542 if (ret && ret != -EINTR) {
0543 if (!dmz_check_dev(zmd))
0544 return;
0545 }
0546
0547 dmz_schedule_reclaim(zrc);
0548 }
0549
0550
0551
0552
0553 int dmz_ctr_reclaim(struct dmz_metadata *zmd,
0554 struct dmz_reclaim **reclaim, int idx)
0555 {
0556 struct dmz_reclaim *zrc;
0557 int ret;
0558
0559 zrc = kzalloc(sizeof(struct dmz_reclaim), GFP_KERNEL);
0560 if (!zrc)
0561 return -ENOMEM;
0562
0563 zrc->metadata = zmd;
0564 zrc->atime = jiffies;
0565 zrc->dev_idx = idx;
0566
0567
0568 zrc->kc = dm_kcopyd_client_create(&zrc->kc_throttle);
0569 if (IS_ERR(zrc->kc)) {
0570 ret = PTR_ERR(zrc->kc);
0571 zrc->kc = NULL;
0572 goto err;
0573 }
0574
0575
0576 INIT_DELAYED_WORK(&zrc->work, dmz_reclaim_work);
0577 zrc->wq = alloc_ordered_workqueue("dmz_rwq_%s_%d", WQ_MEM_RECLAIM,
0578 dmz_metadata_label(zmd), idx);
0579 if (!zrc->wq) {
0580 ret = -ENOMEM;
0581 goto err;
0582 }
0583
0584 *reclaim = zrc;
0585 queue_delayed_work(zrc->wq, &zrc->work, 0);
0586
0587 return 0;
0588 err:
0589 if (zrc->kc)
0590 dm_kcopyd_client_destroy(zrc->kc);
0591 kfree(zrc);
0592
0593 return ret;
0594 }
0595
0596
0597
0598
0599 void dmz_dtr_reclaim(struct dmz_reclaim *zrc)
0600 {
0601 cancel_delayed_work_sync(&zrc->work);
0602 destroy_workqueue(zrc->wq);
0603 dm_kcopyd_client_destroy(zrc->kc);
0604 kfree(zrc);
0605 }
0606
0607
0608
0609
0610 void dmz_suspend_reclaim(struct dmz_reclaim *zrc)
0611 {
0612 cancel_delayed_work_sync(&zrc->work);
0613 }
0614
0615
0616
0617
0618 void dmz_resume_reclaim(struct dmz_reclaim *zrc)
0619 {
0620 queue_delayed_work(zrc->wq, &zrc->work, DMZ_IDLE_PERIOD);
0621 }
0622
0623
0624
0625
0626 void dmz_reclaim_bio_acc(struct dmz_reclaim *zrc)
0627 {
0628 zrc->atime = jiffies;
0629 }
0630
0631
0632
0633
0634 void dmz_schedule_reclaim(struct dmz_reclaim *zrc)
0635 {
0636 unsigned int p_unmap = dmz_reclaim_percentage(zrc);
0637
0638 if (dmz_should_reclaim(zrc, p_unmap))
0639 mod_delayed_work(zrc->wq, &zrc->work, 0);
0640 }