0001
0002
0003
0004
0005
0006
0007
0008 #include "dm-zoned.h"
0009
0010 #include <linux/module.h>
0011
0012 #define DM_MSG_PREFIX "zoned"
0013
0014 #define DMZ_MIN_BIOS 8192
0015
0016
0017
0018
0019 struct dmz_bioctx {
0020 struct dmz_dev *dev;
0021 struct dm_zone *zone;
0022 struct bio *bio;
0023 refcount_t ref;
0024 };
0025
0026
0027
0028
0029 struct dm_chunk_work {
0030 struct work_struct work;
0031 refcount_t refcount;
0032 struct dmz_target *target;
0033 unsigned int chunk;
0034 struct bio_list bio_list;
0035 };
0036
0037
0038
0039
0040 struct dmz_target {
0041 struct dm_dev **ddev;
0042 unsigned int nr_ddevs;
0043
0044 unsigned int flags;
0045
0046
0047 struct dmz_dev *dev;
0048
0049
0050 struct dmz_metadata *metadata;
0051
0052
0053 struct radix_tree_root chunk_rxtree;
0054 struct workqueue_struct *chunk_wq;
0055 struct mutex chunk_lock;
0056
0057
0058 struct bio_set bio_set;
0059
0060
0061 spinlock_t flush_lock;
0062 struct bio_list flush_list;
0063 struct delayed_work flush_work;
0064 struct workqueue_struct *flush_wq;
0065 };
0066
0067
0068
0069
0070 #define DMZ_FLUSH_PERIOD (10 * HZ)
0071
0072
0073
0074
0075 static inline void dmz_bio_endio(struct bio *bio, blk_status_t status)
0076 {
0077 struct dmz_bioctx *bioctx =
0078 dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
0079
0080 if (status != BLK_STS_OK && bio->bi_status == BLK_STS_OK)
0081 bio->bi_status = status;
0082 if (bioctx->dev && bio->bi_status != BLK_STS_OK)
0083 bioctx->dev->flags |= DMZ_CHECK_BDEV;
0084
0085 if (refcount_dec_and_test(&bioctx->ref)) {
0086 struct dm_zone *zone = bioctx->zone;
0087
0088 if (zone) {
0089 if (bio->bi_status != BLK_STS_OK &&
0090 bio_op(bio) == REQ_OP_WRITE &&
0091 dmz_is_seq(zone))
0092 set_bit(DMZ_SEQ_WRITE_ERR, &zone->flags);
0093 dmz_deactivate_zone(zone);
0094 }
0095 bio_endio(bio);
0096 }
0097 }
0098
0099
0100
0101
0102
0103 static void dmz_clone_endio(struct bio *clone)
0104 {
0105 struct dmz_bioctx *bioctx = clone->bi_private;
0106 blk_status_t status = clone->bi_status;
0107
0108 bio_put(clone);
0109 dmz_bio_endio(bioctx->bio, status);
0110 }
0111
0112
0113
0114
0115
0116 static int dmz_submit_bio(struct dmz_target *dmz, struct dm_zone *zone,
0117 struct bio *bio, sector_t chunk_block,
0118 unsigned int nr_blocks)
0119 {
0120 struct dmz_bioctx *bioctx =
0121 dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
0122 struct dmz_dev *dev = zone->dev;
0123 struct bio *clone;
0124
0125 if (dev->flags & DMZ_BDEV_DYING)
0126 return -EIO;
0127
0128 clone = bio_alloc_clone(dev->bdev, bio, GFP_NOIO, &dmz->bio_set);
0129 if (!clone)
0130 return -ENOMEM;
0131
0132 bioctx->dev = dev;
0133 clone->bi_iter.bi_sector =
0134 dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);
0135 clone->bi_iter.bi_size = dmz_blk2sect(nr_blocks) << SECTOR_SHIFT;
0136 clone->bi_end_io = dmz_clone_endio;
0137 clone->bi_private = bioctx;
0138
0139 bio_advance(bio, clone->bi_iter.bi_size);
0140
0141 refcount_inc(&bioctx->ref);
0142 submit_bio_noacct(clone);
0143
0144 if (bio_op(bio) == REQ_OP_WRITE && dmz_is_seq(zone))
0145 zone->wp_block += nr_blocks;
0146
0147 return 0;
0148 }
0149
0150
0151
0152
0153 static void dmz_handle_read_zero(struct dmz_target *dmz, struct bio *bio,
0154 sector_t chunk_block, unsigned int nr_blocks)
0155 {
0156 unsigned int size = nr_blocks << DMZ_BLOCK_SHIFT;
0157
0158
0159 swap(bio->bi_iter.bi_size, size);
0160 zero_fill_bio(bio);
0161 swap(bio->bi_iter.bi_size, size);
0162
0163 bio_advance(bio, size);
0164 }
0165
0166
0167
0168
0169 static int dmz_handle_read(struct dmz_target *dmz, struct dm_zone *zone,
0170 struct bio *bio)
0171 {
0172 struct dmz_metadata *zmd = dmz->metadata;
0173 sector_t chunk_block = dmz_chunk_block(zmd, dmz_bio_block(bio));
0174 unsigned int nr_blocks = dmz_bio_blocks(bio);
0175 sector_t end_block = chunk_block + nr_blocks;
0176 struct dm_zone *rzone, *bzone;
0177 int ret;
0178
0179
0180 if (!zone) {
0181 zero_fill_bio(bio);
0182 return 0;
0183 }
0184
0185 DMDEBUG("(%s): READ chunk %llu -> %s zone %u, block %llu, %u blocks",
0186 dmz_metadata_label(zmd),
0187 (unsigned long long)dmz_bio_chunk(zmd, bio),
0188 (dmz_is_rnd(zone) ? "RND" :
0189 (dmz_is_cache(zone) ? "CACHE" : "SEQ")),
0190 zone->id,
0191 (unsigned long long)chunk_block, nr_blocks);
0192
0193
0194 bzone = zone->bzone;
0195 while (chunk_block < end_block) {
0196 nr_blocks = 0;
0197 if (dmz_is_rnd(zone) || dmz_is_cache(zone) ||
0198 chunk_block < zone->wp_block) {
0199
0200 ret = dmz_block_valid(zmd, zone, chunk_block);
0201 if (ret < 0)
0202 return ret;
0203 if (ret > 0) {
0204
0205 nr_blocks = ret;
0206 rzone = zone;
0207 }
0208 }
0209
0210
0211
0212
0213
0214 if (!nr_blocks && bzone) {
0215 ret = dmz_block_valid(zmd, bzone, chunk_block);
0216 if (ret < 0)
0217 return ret;
0218 if (ret > 0) {
0219
0220 nr_blocks = ret;
0221 rzone = bzone;
0222 }
0223 }
0224
0225 if (nr_blocks) {
0226
0227 nr_blocks = min_t(unsigned int, nr_blocks,
0228 end_block - chunk_block);
0229 ret = dmz_submit_bio(dmz, rzone, bio,
0230 chunk_block, nr_blocks);
0231 if (ret)
0232 return ret;
0233 chunk_block += nr_blocks;
0234 } else {
0235
0236 dmz_handle_read_zero(dmz, bio, chunk_block, 1);
0237 chunk_block++;
0238 }
0239 }
0240
0241 return 0;
0242 }
0243
0244
0245
0246
0247
0248
0249 static int dmz_handle_direct_write(struct dmz_target *dmz,
0250 struct dm_zone *zone, struct bio *bio,
0251 sector_t chunk_block,
0252 unsigned int nr_blocks)
0253 {
0254 struct dmz_metadata *zmd = dmz->metadata;
0255 struct dm_zone *bzone = zone->bzone;
0256 int ret;
0257
0258 if (dmz_is_readonly(zone))
0259 return -EROFS;
0260
0261
0262 ret = dmz_submit_bio(dmz, zone, bio, chunk_block, nr_blocks);
0263 if (ret)
0264 return ret;
0265
0266
0267
0268
0269
0270 ret = dmz_validate_blocks(zmd, zone, chunk_block, nr_blocks);
0271 if (ret == 0 && bzone)
0272 ret = dmz_invalidate_blocks(zmd, bzone, chunk_block, nr_blocks);
0273
0274 return ret;
0275 }
0276
0277
0278
0279
0280
0281
0282 static int dmz_handle_buffered_write(struct dmz_target *dmz,
0283 struct dm_zone *zone, struct bio *bio,
0284 sector_t chunk_block,
0285 unsigned int nr_blocks)
0286 {
0287 struct dmz_metadata *zmd = dmz->metadata;
0288 struct dm_zone *bzone;
0289 int ret;
0290
0291
0292 bzone = dmz_get_chunk_buffer(zmd, zone);
0293 if (IS_ERR(bzone))
0294 return PTR_ERR(bzone);
0295
0296 if (dmz_is_readonly(bzone))
0297 return -EROFS;
0298
0299
0300 ret = dmz_submit_bio(dmz, bzone, bio, chunk_block, nr_blocks);
0301 if (ret)
0302 return ret;
0303
0304
0305
0306
0307
0308 ret = dmz_validate_blocks(zmd, bzone, chunk_block, nr_blocks);
0309 if (ret == 0 && chunk_block < zone->wp_block)
0310 ret = dmz_invalidate_blocks(zmd, zone, chunk_block, nr_blocks);
0311
0312 return ret;
0313 }
0314
0315
0316
0317
0318 static int dmz_handle_write(struct dmz_target *dmz, struct dm_zone *zone,
0319 struct bio *bio)
0320 {
0321 struct dmz_metadata *zmd = dmz->metadata;
0322 sector_t chunk_block = dmz_chunk_block(zmd, dmz_bio_block(bio));
0323 unsigned int nr_blocks = dmz_bio_blocks(bio);
0324
0325 if (!zone)
0326 return -ENOSPC;
0327
0328 DMDEBUG("(%s): WRITE chunk %llu -> %s zone %u, block %llu, %u blocks",
0329 dmz_metadata_label(zmd),
0330 (unsigned long long)dmz_bio_chunk(zmd, bio),
0331 (dmz_is_rnd(zone) ? "RND" :
0332 (dmz_is_cache(zone) ? "CACHE" : "SEQ")),
0333 zone->id,
0334 (unsigned long long)chunk_block, nr_blocks);
0335
0336 if (dmz_is_rnd(zone) || dmz_is_cache(zone) ||
0337 chunk_block == zone->wp_block) {
0338
0339
0340
0341
0342
0343 return dmz_handle_direct_write(dmz, zone, bio,
0344 chunk_block, nr_blocks);
0345 }
0346
0347
0348
0349
0350
0351 return dmz_handle_buffered_write(dmz, zone, bio, chunk_block, nr_blocks);
0352 }
0353
0354
0355
0356
0357 static int dmz_handle_discard(struct dmz_target *dmz, struct dm_zone *zone,
0358 struct bio *bio)
0359 {
0360 struct dmz_metadata *zmd = dmz->metadata;
0361 sector_t block = dmz_bio_block(bio);
0362 unsigned int nr_blocks = dmz_bio_blocks(bio);
0363 sector_t chunk_block = dmz_chunk_block(zmd, block);
0364 int ret = 0;
0365
0366
0367 if (!zone)
0368 return 0;
0369
0370 if (dmz_is_readonly(zone))
0371 return -EROFS;
0372
0373 DMDEBUG("(%s): DISCARD chunk %llu -> zone %u, block %llu, %u blocks",
0374 dmz_metadata_label(dmz->metadata),
0375 (unsigned long long)dmz_bio_chunk(zmd, bio),
0376 zone->id,
0377 (unsigned long long)chunk_block, nr_blocks);
0378
0379
0380
0381
0382
0383 if (dmz_is_rnd(zone) || dmz_is_cache(zone) ||
0384 chunk_block < zone->wp_block)
0385 ret = dmz_invalidate_blocks(zmd, zone, chunk_block, nr_blocks);
0386 if (ret == 0 && zone->bzone)
0387 ret = dmz_invalidate_blocks(zmd, zone->bzone,
0388 chunk_block, nr_blocks);
0389 return ret;
0390 }
0391
0392
0393
0394
0395 static void dmz_handle_bio(struct dmz_target *dmz, struct dm_chunk_work *cw,
0396 struct bio *bio)
0397 {
0398 struct dmz_bioctx *bioctx =
0399 dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
0400 struct dmz_metadata *zmd = dmz->metadata;
0401 struct dm_zone *zone;
0402 int ret;
0403
0404 dmz_lock_metadata(zmd);
0405
0406
0407
0408
0409
0410
0411 zone = dmz_get_chunk_mapping(zmd, dmz_bio_chunk(zmd, bio),
0412 bio_op(bio));
0413 if (IS_ERR(zone)) {
0414 ret = PTR_ERR(zone);
0415 goto out;
0416 }
0417
0418
0419 if (zone) {
0420 dmz_activate_zone(zone);
0421 bioctx->zone = zone;
0422 dmz_reclaim_bio_acc(zone->dev->reclaim);
0423 }
0424
0425 switch (bio_op(bio)) {
0426 case REQ_OP_READ:
0427 ret = dmz_handle_read(dmz, zone, bio);
0428 break;
0429 case REQ_OP_WRITE:
0430 ret = dmz_handle_write(dmz, zone, bio);
0431 break;
0432 case REQ_OP_DISCARD:
0433 case REQ_OP_WRITE_ZEROES:
0434 ret = dmz_handle_discard(dmz, zone, bio);
0435 break;
0436 default:
0437 DMERR("(%s): Unsupported BIO operation 0x%x",
0438 dmz_metadata_label(dmz->metadata), bio_op(bio));
0439 ret = -EIO;
0440 }
0441
0442
0443
0444
0445
0446 if (zone)
0447 dmz_put_chunk_mapping(zmd, zone);
0448 out:
0449 dmz_bio_endio(bio, errno_to_blk_status(ret));
0450
0451 dmz_unlock_metadata(zmd);
0452 }
0453
0454
0455
0456
0457 static inline void dmz_get_chunk_work(struct dm_chunk_work *cw)
0458 {
0459 refcount_inc(&cw->refcount);
0460 }
0461
0462
0463
0464
0465
0466 static void dmz_put_chunk_work(struct dm_chunk_work *cw)
0467 {
0468 if (refcount_dec_and_test(&cw->refcount)) {
0469 WARN_ON(!bio_list_empty(&cw->bio_list));
0470 radix_tree_delete(&cw->target->chunk_rxtree, cw->chunk);
0471 kfree(cw);
0472 }
0473 }
0474
0475
0476
0477
0478 static void dmz_chunk_work(struct work_struct *work)
0479 {
0480 struct dm_chunk_work *cw = container_of(work, struct dm_chunk_work, work);
0481 struct dmz_target *dmz = cw->target;
0482 struct bio *bio;
0483
0484 mutex_lock(&dmz->chunk_lock);
0485
0486
0487 while ((bio = bio_list_pop(&cw->bio_list))) {
0488 mutex_unlock(&dmz->chunk_lock);
0489 dmz_handle_bio(dmz, cw, bio);
0490 mutex_lock(&dmz->chunk_lock);
0491 dmz_put_chunk_work(cw);
0492 }
0493
0494
0495 dmz_put_chunk_work(cw);
0496
0497 mutex_unlock(&dmz->chunk_lock);
0498 }
0499
0500
0501
0502
0503 static void dmz_flush_work(struct work_struct *work)
0504 {
0505 struct dmz_target *dmz = container_of(work, struct dmz_target, flush_work.work);
0506 struct bio *bio;
0507 int ret;
0508
0509
0510 ret = dmz_flush_metadata(dmz->metadata);
0511 if (ret)
0512 DMDEBUG("(%s): Metadata flush failed, rc=%d",
0513 dmz_metadata_label(dmz->metadata), ret);
0514
0515
0516 while (1) {
0517 spin_lock(&dmz->flush_lock);
0518 bio = bio_list_pop(&dmz->flush_list);
0519 spin_unlock(&dmz->flush_lock);
0520
0521 if (!bio)
0522 break;
0523
0524 dmz_bio_endio(bio, errno_to_blk_status(ret));
0525 }
0526
0527 queue_delayed_work(dmz->flush_wq, &dmz->flush_work, DMZ_FLUSH_PERIOD);
0528 }
0529
0530
0531
0532
0533
0534 static int dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio)
0535 {
0536 unsigned int chunk = dmz_bio_chunk(dmz->metadata, bio);
0537 struct dm_chunk_work *cw;
0538 int ret = 0;
0539
0540 mutex_lock(&dmz->chunk_lock);
0541
0542
0543 cw = radix_tree_lookup(&dmz->chunk_rxtree, chunk);
0544 if (cw) {
0545 dmz_get_chunk_work(cw);
0546 } else {
0547
0548 cw = kmalloc(sizeof(struct dm_chunk_work), GFP_NOIO);
0549 if (unlikely(!cw)) {
0550 ret = -ENOMEM;
0551 goto out;
0552 }
0553
0554 INIT_WORK(&cw->work, dmz_chunk_work);
0555 refcount_set(&cw->refcount, 1);
0556 cw->target = dmz;
0557 cw->chunk = chunk;
0558 bio_list_init(&cw->bio_list);
0559
0560 ret = radix_tree_insert(&dmz->chunk_rxtree, chunk, cw);
0561 if (unlikely(ret)) {
0562 kfree(cw);
0563 goto out;
0564 }
0565 }
0566
0567 bio_list_add(&cw->bio_list, bio);
0568
0569 if (queue_work(dmz->chunk_wq, &cw->work))
0570 dmz_get_chunk_work(cw);
0571 out:
0572 mutex_unlock(&dmz->chunk_lock);
0573 return ret;
0574 }
0575
0576
0577
0578
0579
0580
0581 bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev)
0582 {
0583 if (dmz_dev->flags & DMZ_BDEV_DYING)
0584 return true;
0585
0586 if (dmz_dev->flags & DMZ_CHECK_BDEV)
0587 return !dmz_check_bdev(dmz_dev);
0588
0589 if (blk_queue_dying(bdev_get_queue(dmz_dev->bdev))) {
0590 dmz_dev_warn(dmz_dev, "Backing device queue dying");
0591 dmz_dev->flags |= DMZ_BDEV_DYING;
0592 }
0593
0594 return dmz_dev->flags & DMZ_BDEV_DYING;
0595 }
0596
0597
0598
0599
0600
0601
0602
0603 bool dmz_check_bdev(struct dmz_dev *dmz_dev)
0604 {
0605 struct gendisk *disk;
0606
0607 dmz_dev->flags &= ~DMZ_CHECK_BDEV;
0608
0609 if (dmz_bdev_is_dying(dmz_dev))
0610 return false;
0611
0612 disk = dmz_dev->bdev->bd_disk;
0613 if (disk->fops->check_events &&
0614 disk->fops->check_events(disk, 0) & DISK_EVENT_MEDIA_CHANGE) {
0615 dmz_dev_warn(dmz_dev, "Backing device offline");
0616 dmz_dev->flags |= DMZ_BDEV_DYING;
0617 }
0618
0619 return !(dmz_dev->flags & DMZ_BDEV_DYING);
0620 }
0621
0622
0623
0624
0625 static int dmz_map(struct dm_target *ti, struct bio *bio)
0626 {
0627 struct dmz_target *dmz = ti->private;
0628 struct dmz_metadata *zmd = dmz->metadata;
0629 struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
0630 sector_t sector = bio->bi_iter.bi_sector;
0631 unsigned int nr_sectors = bio_sectors(bio);
0632 sector_t chunk_sector;
0633 int ret;
0634
0635 if (dmz_dev_is_dying(zmd))
0636 return DM_MAPIO_KILL;
0637
0638 DMDEBUG("(%s): BIO op %d sector %llu + %u => chunk %llu, block %llu, %u blocks",
0639 dmz_metadata_label(zmd),
0640 bio_op(bio), (unsigned long long)sector, nr_sectors,
0641 (unsigned long long)dmz_bio_chunk(zmd, bio),
0642 (unsigned long long)dmz_chunk_block(zmd, dmz_bio_block(bio)),
0643 (unsigned int)dmz_bio_blocks(bio));
0644
0645 if (!nr_sectors && bio_op(bio) != REQ_OP_WRITE)
0646 return DM_MAPIO_REMAPPED;
0647
0648
0649 if ((nr_sectors & DMZ_BLOCK_SECTORS_MASK) || (sector & DMZ_BLOCK_SECTORS_MASK))
0650 return DM_MAPIO_KILL;
0651
0652
0653 bioctx->dev = NULL;
0654 bioctx->zone = NULL;
0655 bioctx->bio = bio;
0656 refcount_set(&bioctx->ref, 1);
0657
0658
0659 if (!nr_sectors && bio_op(bio) == REQ_OP_WRITE) {
0660 spin_lock(&dmz->flush_lock);
0661 bio_list_add(&dmz->flush_list, bio);
0662 spin_unlock(&dmz->flush_lock);
0663 mod_delayed_work(dmz->flush_wq, &dmz->flush_work, 0);
0664 return DM_MAPIO_SUBMITTED;
0665 }
0666
0667
0668 chunk_sector = sector & (dmz_zone_nr_sectors(zmd) - 1);
0669 if (chunk_sector + nr_sectors > dmz_zone_nr_sectors(zmd))
0670 dm_accept_partial_bio(bio, dmz_zone_nr_sectors(zmd) - chunk_sector);
0671
0672
0673 ret = dmz_queue_chunk_work(dmz, bio);
0674 if (ret) {
0675 DMDEBUG("(%s): BIO op %d, can't process chunk %llu, err %i",
0676 dmz_metadata_label(zmd),
0677 bio_op(bio), (u64)dmz_bio_chunk(zmd, bio),
0678 ret);
0679 return DM_MAPIO_REQUEUE;
0680 }
0681
0682 return DM_MAPIO_SUBMITTED;
0683 }
0684
0685
0686
0687
0688 static int dmz_get_zoned_device(struct dm_target *ti, char *path,
0689 int idx, int nr_devs)
0690 {
0691 struct dmz_target *dmz = ti->private;
0692 struct dm_dev *ddev;
0693 struct dmz_dev *dev;
0694 int ret;
0695 struct block_device *bdev;
0696
0697
0698 ret = dm_get_device(ti, path, dm_table_get_mode(ti->table), &ddev);
0699 if (ret) {
0700 ti->error = "Get target device failed";
0701 return ret;
0702 }
0703
0704 bdev = ddev->bdev;
0705 if (bdev_zoned_model(bdev) == BLK_ZONED_NONE) {
0706 if (nr_devs == 1) {
0707 ti->error = "Invalid regular device";
0708 goto err;
0709 }
0710 if (idx != 0) {
0711 ti->error = "First device must be a regular device";
0712 goto err;
0713 }
0714 if (dmz->ddev[0]) {
0715 ti->error = "Too many regular devices";
0716 goto err;
0717 }
0718 dev = &dmz->dev[idx];
0719 dev->flags = DMZ_BDEV_REGULAR;
0720 } else {
0721 if (dmz->ddev[idx]) {
0722 ti->error = "Too many zoned devices";
0723 goto err;
0724 }
0725 if (nr_devs > 1 && idx == 0) {
0726 ti->error = "First device must be a regular device";
0727 goto err;
0728 }
0729 dev = &dmz->dev[idx];
0730 }
0731 dev->bdev = bdev;
0732 dev->dev_idx = idx;
0733
0734 dev->capacity = bdev_nr_sectors(bdev);
0735 if (ti->begin) {
0736 ti->error = "Partial mapping is not supported";
0737 goto err;
0738 }
0739
0740 dmz->ddev[idx] = ddev;
0741
0742 return 0;
0743 err:
0744 dm_put_device(ti, ddev);
0745 return -EINVAL;
0746 }
0747
0748
0749
0750
0751 static void dmz_put_zoned_device(struct dm_target *ti)
0752 {
0753 struct dmz_target *dmz = ti->private;
0754 int i;
0755
0756 for (i = 0; i < dmz->nr_ddevs; i++) {
0757 if (dmz->ddev[i]) {
0758 dm_put_device(ti, dmz->ddev[i]);
0759 dmz->ddev[i] = NULL;
0760 }
0761 }
0762 }
0763
0764 static int dmz_fixup_devices(struct dm_target *ti)
0765 {
0766 struct dmz_target *dmz = ti->private;
0767 struct dmz_dev *reg_dev = NULL;
0768 sector_t zone_nr_sectors = 0;
0769 int i;
0770
0771
0772
0773
0774
0775 if (dmz->nr_ddevs > 1) {
0776 reg_dev = &dmz->dev[0];
0777 if (!(reg_dev->flags & DMZ_BDEV_REGULAR)) {
0778 ti->error = "Primary disk is not a regular device";
0779 return -EINVAL;
0780 }
0781 for (i = 1; i < dmz->nr_ddevs; i++) {
0782 struct dmz_dev *zoned_dev = &dmz->dev[i];
0783 struct block_device *bdev = zoned_dev->bdev;
0784
0785 if (zoned_dev->flags & DMZ_BDEV_REGULAR) {
0786 ti->error = "Secondary disk is not a zoned device";
0787 return -EINVAL;
0788 }
0789 if (zone_nr_sectors &&
0790 zone_nr_sectors != bdev_zone_sectors(bdev)) {
0791 ti->error = "Zone nr sectors mismatch";
0792 return -EINVAL;
0793 }
0794 zone_nr_sectors = bdev_zone_sectors(bdev);
0795 zoned_dev->zone_nr_sectors = zone_nr_sectors;
0796 zoned_dev->nr_zones = bdev_nr_zones(bdev);
0797 }
0798 } else {
0799 struct dmz_dev *zoned_dev = &dmz->dev[0];
0800 struct block_device *bdev = zoned_dev->bdev;
0801
0802 if (zoned_dev->flags & DMZ_BDEV_REGULAR) {
0803 ti->error = "Disk is not a zoned device";
0804 return -EINVAL;
0805 }
0806 zoned_dev->zone_nr_sectors = bdev_zone_sectors(bdev);
0807 zoned_dev->nr_zones = bdev_nr_zones(bdev);
0808 }
0809
0810 if (reg_dev) {
0811 sector_t zone_offset;
0812
0813 reg_dev->zone_nr_sectors = zone_nr_sectors;
0814 reg_dev->nr_zones =
0815 DIV_ROUND_UP_SECTOR_T(reg_dev->capacity,
0816 reg_dev->zone_nr_sectors);
0817 reg_dev->zone_offset = 0;
0818 zone_offset = reg_dev->nr_zones;
0819 for (i = 1; i < dmz->nr_ddevs; i++) {
0820 dmz->dev[i].zone_offset = zone_offset;
0821 zone_offset += dmz->dev[i].nr_zones;
0822 }
0823 }
0824 return 0;
0825 }
0826
0827
0828
0829
0830 static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
0831 {
0832 struct dmz_target *dmz;
0833 int ret, i;
0834
0835
0836 if (argc < 1) {
0837 ti->error = "Invalid argument count";
0838 return -EINVAL;
0839 }
0840
0841
0842 dmz = kzalloc(sizeof(struct dmz_target), GFP_KERNEL);
0843 if (!dmz) {
0844 ti->error = "Unable to allocate the zoned target descriptor";
0845 return -ENOMEM;
0846 }
0847 dmz->dev = kcalloc(argc, sizeof(struct dmz_dev), GFP_KERNEL);
0848 if (!dmz->dev) {
0849 ti->error = "Unable to allocate the zoned device descriptors";
0850 kfree(dmz);
0851 return -ENOMEM;
0852 }
0853 dmz->ddev = kcalloc(argc, sizeof(struct dm_dev *), GFP_KERNEL);
0854 if (!dmz->ddev) {
0855 ti->error = "Unable to allocate the dm device descriptors";
0856 ret = -ENOMEM;
0857 goto err;
0858 }
0859 dmz->nr_ddevs = argc;
0860
0861 ti->private = dmz;
0862
0863
0864 for (i = 0; i < argc; i++) {
0865 ret = dmz_get_zoned_device(ti, argv[i], i, argc);
0866 if (ret)
0867 goto err_dev;
0868 }
0869 ret = dmz_fixup_devices(ti);
0870 if (ret)
0871 goto err_dev;
0872
0873
0874 ret = dmz_ctr_metadata(dmz->dev, argc, &dmz->metadata,
0875 dm_table_device_name(ti->table));
0876 if (ret) {
0877 ti->error = "Metadata initialization failed";
0878 goto err_dev;
0879 }
0880
0881
0882 ti->max_io_len = dmz_zone_nr_sectors(dmz->metadata);
0883 ti->num_flush_bios = 1;
0884 ti->num_discard_bios = 1;
0885 ti->num_write_zeroes_bios = 1;
0886 ti->per_io_data_size = sizeof(struct dmz_bioctx);
0887 ti->flush_supported = true;
0888 ti->discards_supported = true;
0889
0890
0891 ti->len = (sector_t)dmz_nr_chunks(dmz->metadata) <<
0892 dmz_zone_nr_sectors_shift(dmz->metadata);
0893
0894
0895 ret = bioset_init(&dmz->bio_set, DMZ_MIN_BIOS, 0, 0);
0896 if (ret) {
0897 ti->error = "Create BIO set failed";
0898 goto err_meta;
0899 }
0900
0901
0902 mutex_init(&dmz->chunk_lock);
0903 INIT_RADIX_TREE(&dmz->chunk_rxtree, GFP_NOIO);
0904 dmz->chunk_wq = alloc_workqueue("dmz_cwq_%s",
0905 WQ_MEM_RECLAIM | WQ_UNBOUND, 0,
0906 dmz_metadata_label(dmz->metadata));
0907 if (!dmz->chunk_wq) {
0908 ti->error = "Create chunk workqueue failed";
0909 ret = -ENOMEM;
0910 goto err_bio;
0911 }
0912
0913
0914 spin_lock_init(&dmz->flush_lock);
0915 bio_list_init(&dmz->flush_list);
0916 INIT_DELAYED_WORK(&dmz->flush_work, dmz_flush_work);
0917 dmz->flush_wq = alloc_ordered_workqueue("dmz_fwq_%s", WQ_MEM_RECLAIM,
0918 dmz_metadata_label(dmz->metadata));
0919 if (!dmz->flush_wq) {
0920 ti->error = "Create flush workqueue failed";
0921 ret = -ENOMEM;
0922 goto err_cwq;
0923 }
0924 mod_delayed_work(dmz->flush_wq, &dmz->flush_work, DMZ_FLUSH_PERIOD);
0925
0926
0927 for (i = 0; i < dmz->nr_ddevs; i++) {
0928 ret = dmz_ctr_reclaim(dmz->metadata, &dmz->dev[i].reclaim, i);
0929 if (ret) {
0930 ti->error = "Zone reclaim initialization failed";
0931 goto err_fwq;
0932 }
0933 }
0934
0935 DMINFO("(%s): Target device: %llu 512-byte logical sectors (%llu blocks)",
0936 dmz_metadata_label(dmz->metadata),
0937 (unsigned long long)ti->len,
0938 (unsigned long long)dmz_sect2blk(ti->len));
0939
0940 return 0;
0941 err_fwq:
0942 destroy_workqueue(dmz->flush_wq);
0943 err_cwq:
0944 destroy_workqueue(dmz->chunk_wq);
0945 err_bio:
0946 mutex_destroy(&dmz->chunk_lock);
0947 bioset_exit(&dmz->bio_set);
0948 err_meta:
0949 dmz_dtr_metadata(dmz->metadata);
0950 err_dev:
0951 dmz_put_zoned_device(ti);
0952 err:
0953 kfree(dmz->dev);
0954 kfree(dmz);
0955
0956 return ret;
0957 }
0958
0959
0960
0961
0962 static void dmz_dtr(struct dm_target *ti)
0963 {
0964 struct dmz_target *dmz = ti->private;
0965 int i;
0966
0967 destroy_workqueue(dmz->chunk_wq);
0968
0969 for (i = 0; i < dmz->nr_ddevs; i++)
0970 dmz_dtr_reclaim(dmz->dev[i].reclaim);
0971
0972 cancel_delayed_work_sync(&dmz->flush_work);
0973 destroy_workqueue(dmz->flush_wq);
0974
0975 (void) dmz_flush_metadata(dmz->metadata);
0976
0977 dmz_dtr_metadata(dmz->metadata);
0978
0979 bioset_exit(&dmz->bio_set);
0980
0981 dmz_put_zoned_device(ti);
0982
0983 mutex_destroy(&dmz->chunk_lock);
0984
0985 kfree(dmz->dev);
0986 kfree(dmz);
0987 }
0988
0989
0990
0991
0992 static void dmz_io_hints(struct dm_target *ti, struct queue_limits *limits)
0993 {
0994 struct dmz_target *dmz = ti->private;
0995 unsigned int chunk_sectors = dmz_zone_nr_sectors(dmz->metadata);
0996
0997 limits->logical_block_size = DMZ_BLOCK_SIZE;
0998 limits->physical_block_size = DMZ_BLOCK_SIZE;
0999
1000 blk_limits_io_min(limits, DMZ_BLOCK_SIZE);
1001 blk_limits_io_opt(limits, DMZ_BLOCK_SIZE);
1002
1003 limits->discard_alignment = 0;
1004 limits->discard_granularity = DMZ_BLOCK_SIZE;
1005 limits->max_discard_sectors = chunk_sectors;
1006 limits->max_hw_discard_sectors = chunk_sectors;
1007 limits->max_write_zeroes_sectors = chunk_sectors;
1008
1009
1010 limits->chunk_sectors = chunk_sectors;
1011 limits->max_sectors = chunk_sectors;
1012
1013
1014 limits->zoned = BLK_ZONED_NONE;
1015 }
1016
1017
1018
1019
1020 static int dmz_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
1021 {
1022 struct dmz_target *dmz = ti->private;
1023 struct dmz_dev *dev = &dmz->dev[0];
1024
1025 if (!dmz_check_bdev(dev))
1026 return -EIO;
1027
1028 *bdev = dev->bdev;
1029
1030 return 0;
1031 }
1032
1033
1034
1035
1036 static void dmz_suspend(struct dm_target *ti)
1037 {
1038 struct dmz_target *dmz = ti->private;
1039 int i;
1040
1041 flush_workqueue(dmz->chunk_wq);
1042 for (i = 0; i < dmz->nr_ddevs; i++)
1043 dmz_suspend_reclaim(dmz->dev[i].reclaim);
1044 cancel_delayed_work_sync(&dmz->flush_work);
1045 }
1046
1047
1048
1049
1050 static void dmz_resume(struct dm_target *ti)
1051 {
1052 struct dmz_target *dmz = ti->private;
1053 int i;
1054
1055 queue_delayed_work(dmz->flush_wq, &dmz->flush_work, DMZ_FLUSH_PERIOD);
1056 for (i = 0; i < dmz->nr_ddevs; i++)
1057 dmz_resume_reclaim(dmz->dev[i].reclaim);
1058 }
1059
1060 static int dmz_iterate_devices(struct dm_target *ti,
1061 iterate_devices_callout_fn fn, void *data)
1062 {
1063 struct dmz_target *dmz = ti->private;
1064 unsigned int zone_nr_sectors = dmz_zone_nr_sectors(dmz->metadata);
1065 sector_t capacity;
1066 int i, r;
1067
1068 for (i = 0; i < dmz->nr_ddevs; i++) {
1069 capacity = dmz->dev[i].capacity & ~(zone_nr_sectors - 1);
1070 r = fn(ti, dmz->ddev[i], 0, capacity, data);
1071 if (r)
1072 break;
1073 }
1074 return r;
1075 }
1076
1077 static void dmz_status(struct dm_target *ti, status_type_t type,
1078 unsigned int status_flags, char *result,
1079 unsigned int maxlen)
1080 {
1081 struct dmz_target *dmz = ti->private;
1082 ssize_t sz = 0;
1083 char buf[BDEVNAME_SIZE];
1084 struct dmz_dev *dev;
1085 int i;
1086
1087 switch (type) {
1088 case STATUSTYPE_INFO:
1089 DMEMIT("%u zones %u/%u cache",
1090 dmz_nr_zones(dmz->metadata),
1091 dmz_nr_unmap_cache_zones(dmz->metadata),
1092 dmz_nr_cache_zones(dmz->metadata));
1093 for (i = 0; i < dmz->nr_ddevs; i++) {
1094
1095
1096
1097
1098 if ((i == 0) &&
1099 (dmz_nr_cache_zones(dmz->metadata) > 0))
1100 continue;
1101 DMEMIT(" %u/%u random %u/%u sequential",
1102 dmz_nr_unmap_rnd_zones(dmz->metadata, i),
1103 dmz_nr_rnd_zones(dmz->metadata, i),
1104 dmz_nr_unmap_seq_zones(dmz->metadata, i),
1105 dmz_nr_seq_zones(dmz->metadata, i));
1106 }
1107 break;
1108 case STATUSTYPE_TABLE:
1109 dev = &dmz->dev[0];
1110 format_dev_t(buf, dev->bdev->bd_dev);
1111 DMEMIT("%s", buf);
1112 for (i = 1; i < dmz->nr_ddevs; i++) {
1113 dev = &dmz->dev[i];
1114 format_dev_t(buf, dev->bdev->bd_dev);
1115 DMEMIT(" %s", buf);
1116 }
1117 break;
1118 case STATUSTYPE_IMA:
1119 *result = '\0';
1120 break;
1121 }
1122 return;
1123 }
1124
1125 static int dmz_message(struct dm_target *ti, unsigned int argc, char **argv,
1126 char *result, unsigned int maxlen)
1127 {
1128 struct dmz_target *dmz = ti->private;
1129 int r = -EINVAL;
1130
1131 if (!strcasecmp(argv[0], "reclaim")) {
1132 int i;
1133
1134 for (i = 0; i < dmz->nr_ddevs; i++)
1135 dmz_schedule_reclaim(dmz->dev[i].reclaim);
1136 r = 0;
1137 } else
1138 DMERR("unrecognized message %s", argv[0]);
1139 return r;
1140 }
1141
1142 static struct target_type dmz_type = {
1143 .name = "zoned",
1144 .version = {2, 0, 0},
1145 .features = DM_TARGET_SINGLETON | DM_TARGET_MIXED_ZONED_MODEL,
1146 .module = THIS_MODULE,
1147 .ctr = dmz_ctr,
1148 .dtr = dmz_dtr,
1149 .map = dmz_map,
1150 .io_hints = dmz_io_hints,
1151 .prepare_ioctl = dmz_prepare_ioctl,
1152 .postsuspend = dmz_suspend,
1153 .resume = dmz_resume,
1154 .iterate_devices = dmz_iterate_devices,
1155 .status = dmz_status,
1156 .message = dmz_message,
1157 };
1158
1159 static int __init dmz_init(void)
1160 {
1161 return dm_register_target(&dmz_type);
1162 }
1163
1164 static void __exit dmz_exit(void)
1165 {
1166 dm_unregister_target(&dmz_type);
1167 }
1168
1169 module_init(dmz_init);
1170 module_exit(dmz_exit);
1171
1172 MODULE_DESCRIPTION(DM_NAME " target for zoned block devices");
1173 MODULE_AUTHOR("Damien Le Moal <damien.lemoal@wdc.com>");
1174 MODULE_LICENSE("GPL");