0001
0002 #include <linux/vmalloc.h>
0003 #include <linux/bitmap.h>
0004 #include "null_blk.h"
0005
0006 #define CREATE_TRACE_POINTS
0007 #include "trace.h"
0008
0009 #undef pr_fmt
0010 #define pr_fmt(fmt) "null_blk: " fmt
0011
0012 static inline sector_t mb_to_sects(unsigned long mb)
0013 {
0014 return ((sector_t)mb * SZ_1M) >> SECTOR_SHIFT;
0015 }
0016
0017 static inline unsigned int null_zone_no(struct nullb_device *dev, sector_t sect)
0018 {
0019 return sect >> ilog2(dev->zone_size_sects);
0020 }
0021
0022 static inline void null_lock_zone_res(struct nullb_device *dev)
0023 {
0024 if (dev->need_zone_res_mgmt)
0025 spin_lock_irq(&dev->zone_res_lock);
0026 }
0027
0028 static inline void null_unlock_zone_res(struct nullb_device *dev)
0029 {
0030 if (dev->need_zone_res_mgmt)
0031 spin_unlock_irq(&dev->zone_res_lock);
0032 }
0033
0034 static inline void null_init_zone_lock(struct nullb_device *dev,
0035 struct nullb_zone *zone)
0036 {
0037 if (!dev->memory_backed)
0038 spin_lock_init(&zone->spinlock);
0039 else
0040 mutex_init(&zone->mutex);
0041 }
0042
0043 static inline void null_lock_zone(struct nullb_device *dev,
0044 struct nullb_zone *zone)
0045 {
0046 if (!dev->memory_backed)
0047 spin_lock_irq(&zone->spinlock);
0048 else
0049 mutex_lock(&zone->mutex);
0050 }
0051
0052 static inline void null_unlock_zone(struct nullb_device *dev,
0053 struct nullb_zone *zone)
0054 {
0055 if (!dev->memory_backed)
0056 spin_unlock_irq(&zone->spinlock);
0057 else
0058 mutex_unlock(&zone->mutex);
0059 }
0060
0061 int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
0062 {
0063 sector_t dev_capacity_sects, zone_capacity_sects;
0064 struct nullb_zone *zone;
0065 sector_t sector = 0;
0066 unsigned int i;
0067
0068 if (!is_power_of_2(dev->zone_size)) {
0069 pr_err("zone_size must be power-of-two\n");
0070 return -EINVAL;
0071 }
0072 if (dev->zone_size > dev->size) {
0073 pr_err("Zone size larger than device capacity\n");
0074 return -EINVAL;
0075 }
0076
0077 if (!dev->zone_capacity)
0078 dev->zone_capacity = dev->zone_size;
0079
0080 if (dev->zone_capacity > dev->zone_size) {
0081 pr_err("zone capacity (%lu MB) larger than zone size (%lu MB)\n",
0082 dev->zone_capacity, dev->zone_size);
0083 return -EINVAL;
0084 }
0085
0086 zone_capacity_sects = mb_to_sects(dev->zone_capacity);
0087 dev_capacity_sects = mb_to_sects(dev->size);
0088 dev->zone_size_sects = mb_to_sects(dev->zone_size);
0089 dev->nr_zones = round_up(dev_capacity_sects, dev->zone_size_sects)
0090 >> ilog2(dev->zone_size_sects);
0091
0092 dev->zones = kvmalloc_array(dev->nr_zones, sizeof(struct nullb_zone),
0093 GFP_KERNEL | __GFP_ZERO);
0094 if (!dev->zones)
0095 return -ENOMEM;
0096
0097 spin_lock_init(&dev->zone_res_lock);
0098
0099 if (dev->zone_nr_conv >= dev->nr_zones) {
0100 dev->zone_nr_conv = dev->nr_zones - 1;
0101 pr_info("changed the number of conventional zones to %u",
0102 dev->zone_nr_conv);
0103 }
0104
0105
0106 if (dev->zone_max_active >= dev->nr_zones - dev->zone_nr_conv) {
0107 dev->zone_max_active = 0;
0108 pr_info("zone_max_active limit disabled, limit >= zone count\n");
0109 }
0110
0111
0112 if (dev->zone_max_active && dev->zone_max_open > dev->zone_max_active) {
0113 dev->zone_max_open = dev->zone_max_active;
0114 pr_info("changed the maximum number of open zones to %u\n",
0115 dev->nr_zones);
0116 } else if (dev->zone_max_open >= dev->nr_zones - dev->zone_nr_conv) {
0117 dev->zone_max_open = 0;
0118 pr_info("zone_max_open limit disabled, limit >= zone count\n");
0119 }
0120 dev->need_zone_res_mgmt = dev->zone_max_active || dev->zone_max_open;
0121 dev->imp_close_zone_no = dev->zone_nr_conv;
0122
0123 for (i = 0; i < dev->zone_nr_conv; i++) {
0124 zone = &dev->zones[i];
0125
0126 null_init_zone_lock(dev, zone);
0127 zone->start = sector;
0128 zone->len = dev->zone_size_sects;
0129 zone->capacity = zone->len;
0130 zone->wp = zone->start + zone->len;
0131 zone->type = BLK_ZONE_TYPE_CONVENTIONAL;
0132 zone->cond = BLK_ZONE_COND_NOT_WP;
0133
0134 sector += dev->zone_size_sects;
0135 }
0136
0137 for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) {
0138 zone = &dev->zones[i];
0139
0140 null_init_zone_lock(dev, zone);
0141 zone->start = zone->wp = sector;
0142 if (zone->start + dev->zone_size_sects > dev_capacity_sects)
0143 zone->len = dev_capacity_sects - zone->start;
0144 else
0145 zone->len = dev->zone_size_sects;
0146 zone->capacity =
0147 min_t(sector_t, zone->len, zone_capacity_sects);
0148 zone->type = BLK_ZONE_TYPE_SEQWRITE_REQ;
0149 zone->cond = BLK_ZONE_COND_EMPTY;
0150
0151 sector += dev->zone_size_sects;
0152 }
0153
0154 return 0;
0155 }
0156
0157 int null_register_zoned_dev(struct nullb *nullb)
0158 {
0159 struct nullb_device *dev = nullb->dev;
0160 struct request_queue *q = nullb->q;
0161
0162 disk_set_zoned(nullb->disk, BLK_ZONED_HM);
0163 blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
0164 blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
0165
0166 if (queue_is_mq(q)) {
0167 int ret = blk_revalidate_disk_zones(nullb->disk, NULL);
0168
0169 if (ret)
0170 return ret;
0171 } else {
0172 blk_queue_chunk_sectors(q, dev->zone_size_sects);
0173 nullb->disk->nr_zones = bdev_nr_zones(nullb->disk->part0);
0174 }
0175
0176 blk_queue_max_zone_append_sectors(q, dev->zone_size_sects);
0177 disk_set_max_open_zones(nullb->disk, dev->zone_max_open);
0178 disk_set_max_active_zones(nullb->disk, dev->zone_max_active);
0179
0180 return 0;
0181 }
0182
0183 void null_free_zoned_dev(struct nullb_device *dev)
0184 {
0185 kvfree(dev->zones);
0186 dev->zones = NULL;
0187 }
0188
0189 int null_report_zones(struct gendisk *disk, sector_t sector,
0190 unsigned int nr_zones, report_zones_cb cb, void *data)
0191 {
0192 struct nullb *nullb = disk->private_data;
0193 struct nullb_device *dev = nullb->dev;
0194 unsigned int first_zone, i;
0195 struct nullb_zone *zone;
0196 struct blk_zone blkz;
0197 int error;
0198
0199 first_zone = null_zone_no(dev, sector);
0200 if (first_zone >= dev->nr_zones)
0201 return 0;
0202
0203 nr_zones = min(nr_zones, dev->nr_zones - first_zone);
0204 trace_nullb_report_zones(nullb, nr_zones);
0205
0206 memset(&blkz, 0, sizeof(struct blk_zone));
0207 zone = &dev->zones[first_zone];
0208 for (i = 0; i < nr_zones; i++, zone++) {
0209
0210
0211
0212
0213
0214
0215 null_lock_zone(dev, zone);
0216 blkz.start = zone->start;
0217 blkz.len = zone->len;
0218 blkz.wp = zone->wp;
0219 blkz.type = zone->type;
0220 blkz.cond = zone->cond;
0221 blkz.capacity = zone->capacity;
0222 null_unlock_zone(dev, zone);
0223
0224 error = cb(&blkz, i, data);
0225 if (error)
0226 return error;
0227 }
0228
0229 return nr_zones;
0230 }
0231
0232
0233
0234
0235
0236 size_t null_zone_valid_read_len(struct nullb *nullb,
0237 sector_t sector, unsigned int len)
0238 {
0239 struct nullb_device *dev = nullb->dev;
0240 struct nullb_zone *zone = &dev->zones[null_zone_no(dev, sector)];
0241 unsigned int nr_sectors = len >> SECTOR_SHIFT;
0242
0243
0244 if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL ||
0245 sector + nr_sectors <= zone->wp)
0246 return len;
0247
0248 if (sector > zone->wp)
0249 return 0;
0250
0251 return (zone->wp - sector) << SECTOR_SHIFT;
0252 }
0253
0254 static blk_status_t __null_close_zone(struct nullb_device *dev,
0255 struct nullb_zone *zone)
0256 {
0257 switch (zone->cond) {
0258 case BLK_ZONE_COND_CLOSED:
0259
0260 return BLK_STS_OK;
0261 case BLK_ZONE_COND_IMP_OPEN:
0262 dev->nr_zones_imp_open--;
0263 break;
0264 case BLK_ZONE_COND_EXP_OPEN:
0265 dev->nr_zones_exp_open--;
0266 break;
0267 case BLK_ZONE_COND_EMPTY:
0268 case BLK_ZONE_COND_FULL:
0269 default:
0270 return BLK_STS_IOERR;
0271 }
0272
0273 if (zone->wp == zone->start) {
0274 zone->cond = BLK_ZONE_COND_EMPTY;
0275 } else {
0276 zone->cond = BLK_ZONE_COND_CLOSED;
0277 dev->nr_zones_closed++;
0278 }
0279
0280 return BLK_STS_OK;
0281 }
0282
0283 static void null_close_imp_open_zone(struct nullb_device *dev)
0284 {
0285 struct nullb_zone *zone;
0286 unsigned int zno, i;
0287
0288 zno = dev->imp_close_zone_no;
0289 if (zno >= dev->nr_zones)
0290 zno = dev->zone_nr_conv;
0291
0292 for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) {
0293 zone = &dev->zones[zno];
0294 zno++;
0295 if (zno >= dev->nr_zones)
0296 zno = dev->zone_nr_conv;
0297
0298 if (zone->cond == BLK_ZONE_COND_IMP_OPEN) {
0299 __null_close_zone(dev, zone);
0300 dev->imp_close_zone_no = zno;
0301 return;
0302 }
0303 }
0304 }
0305
0306 static blk_status_t null_check_active(struct nullb_device *dev)
0307 {
0308 if (!dev->zone_max_active)
0309 return BLK_STS_OK;
0310
0311 if (dev->nr_zones_exp_open + dev->nr_zones_imp_open +
0312 dev->nr_zones_closed < dev->zone_max_active)
0313 return BLK_STS_OK;
0314
0315 return BLK_STS_ZONE_ACTIVE_RESOURCE;
0316 }
0317
0318 static blk_status_t null_check_open(struct nullb_device *dev)
0319 {
0320 if (!dev->zone_max_open)
0321 return BLK_STS_OK;
0322
0323 if (dev->nr_zones_exp_open + dev->nr_zones_imp_open < dev->zone_max_open)
0324 return BLK_STS_OK;
0325
0326 if (dev->nr_zones_imp_open) {
0327 if (null_check_active(dev) == BLK_STS_OK) {
0328 null_close_imp_open_zone(dev);
0329 return BLK_STS_OK;
0330 }
0331 }
0332
0333 return BLK_STS_ZONE_OPEN_RESOURCE;
0334 }
0335
0336
0337
0338
0339
0340
0341
0342
0343
0344
0345
0346
0347
0348
0349 static blk_status_t null_check_zone_resources(struct nullb_device *dev,
0350 struct nullb_zone *zone)
0351 {
0352 blk_status_t ret;
0353
0354 switch (zone->cond) {
0355 case BLK_ZONE_COND_EMPTY:
0356 ret = null_check_active(dev);
0357 if (ret != BLK_STS_OK)
0358 return ret;
0359 fallthrough;
0360 case BLK_ZONE_COND_CLOSED:
0361 return null_check_open(dev);
0362 default:
0363
0364 WARN_ON(1);
0365 return BLK_STS_IOERR;
0366 }
0367 }
0368
0369 static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
0370 unsigned int nr_sectors, bool append)
0371 {
0372 struct nullb_device *dev = cmd->nq->dev;
0373 unsigned int zno = null_zone_no(dev, sector);
0374 struct nullb_zone *zone = &dev->zones[zno];
0375 blk_status_t ret;
0376
0377 trace_nullb_zone_op(cmd, zno, zone->cond);
0378
0379 if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) {
0380 if (append)
0381 return BLK_STS_IOERR;
0382 return null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
0383 }
0384
0385 null_lock_zone(dev, zone);
0386
0387 if (zone->cond == BLK_ZONE_COND_FULL) {
0388
0389 ret = BLK_STS_IOERR;
0390 goto unlock;
0391 }
0392
0393
0394
0395
0396
0397
0398
0399 if (append) {
0400 sector = zone->wp;
0401 if (dev->queue_mode == NULL_Q_MQ)
0402 cmd->rq->__sector = sector;
0403 else
0404 cmd->bio->bi_iter.bi_sector = sector;
0405 } else if (sector != zone->wp) {
0406 ret = BLK_STS_IOERR;
0407 goto unlock;
0408 }
0409
0410 if (zone->wp + nr_sectors > zone->start + zone->capacity) {
0411 ret = BLK_STS_IOERR;
0412 goto unlock;
0413 }
0414
0415 if (zone->cond == BLK_ZONE_COND_CLOSED ||
0416 zone->cond == BLK_ZONE_COND_EMPTY) {
0417 null_lock_zone_res(dev);
0418
0419 ret = null_check_zone_resources(dev, zone);
0420 if (ret != BLK_STS_OK) {
0421 null_unlock_zone_res(dev);
0422 goto unlock;
0423 }
0424 if (zone->cond == BLK_ZONE_COND_CLOSED) {
0425 dev->nr_zones_closed--;
0426 dev->nr_zones_imp_open++;
0427 } else if (zone->cond == BLK_ZONE_COND_EMPTY) {
0428 dev->nr_zones_imp_open++;
0429 }
0430
0431 if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
0432 zone->cond = BLK_ZONE_COND_IMP_OPEN;
0433
0434 null_unlock_zone_res(dev);
0435 }
0436
0437 ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
0438 if (ret != BLK_STS_OK)
0439 goto unlock;
0440
0441 zone->wp += nr_sectors;
0442 if (zone->wp == zone->start + zone->capacity) {
0443 null_lock_zone_res(dev);
0444 if (zone->cond == BLK_ZONE_COND_EXP_OPEN)
0445 dev->nr_zones_exp_open--;
0446 else if (zone->cond == BLK_ZONE_COND_IMP_OPEN)
0447 dev->nr_zones_imp_open--;
0448 zone->cond = BLK_ZONE_COND_FULL;
0449 null_unlock_zone_res(dev);
0450 }
0451
0452 ret = BLK_STS_OK;
0453
0454 unlock:
0455 null_unlock_zone(dev, zone);
0456
0457 return ret;
0458 }
0459
0460 static blk_status_t null_open_zone(struct nullb_device *dev,
0461 struct nullb_zone *zone)
0462 {
0463 blk_status_t ret = BLK_STS_OK;
0464
0465 if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
0466 return BLK_STS_IOERR;
0467
0468 null_lock_zone_res(dev);
0469
0470 switch (zone->cond) {
0471 case BLK_ZONE_COND_EXP_OPEN:
0472
0473 goto unlock;
0474 case BLK_ZONE_COND_EMPTY:
0475 ret = null_check_zone_resources(dev, zone);
0476 if (ret != BLK_STS_OK)
0477 goto unlock;
0478 break;
0479 case BLK_ZONE_COND_IMP_OPEN:
0480 dev->nr_zones_imp_open--;
0481 break;
0482 case BLK_ZONE_COND_CLOSED:
0483 ret = null_check_zone_resources(dev, zone);
0484 if (ret != BLK_STS_OK)
0485 goto unlock;
0486 dev->nr_zones_closed--;
0487 break;
0488 case BLK_ZONE_COND_FULL:
0489 default:
0490 ret = BLK_STS_IOERR;
0491 goto unlock;
0492 }
0493
0494 zone->cond = BLK_ZONE_COND_EXP_OPEN;
0495 dev->nr_zones_exp_open++;
0496
0497 unlock:
0498 null_unlock_zone_res(dev);
0499
0500 return ret;
0501 }
0502
0503 static blk_status_t null_close_zone(struct nullb_device *dev,
0504 struct nullb_zone *zone)
0505 {
0506 blk_status_t ret;
0507
0508 if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
0509 return BLK_STS_IOERR;
0510
0511 null_lock_zone_res(dev);
0512 ret = __null_close_zone(dev, zone);
0513 null_unlock_zone_res(dev);
0514
0515 return ret;
0516 }
0517
0518 static blk_status_t null_finish_zone(struct nullb_device *dev,
0519 struct nullb_zone *zone)
0520 {
0521 blk_status_t ret = BLK_STS_OK;
0522
0523 if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
0524 return BLK_STS_IOERR;
0525
0526 null_lock_zone_res(dev);
0527
0528 switch (zone->cond) {
0529 case BLK_ZONE_COND_FULL:
0530
0531 goto unlock;
0532 case BLK_ZONE_COND_EMPTY:
0533 ret = null_check_zone_resources(dev, zone);
0534 if (ret != BLK_STS_OK)
0535 goto unlock;
0536 break;
0537 case BLK_ZONE_COND_IMP_OPEN:
0538 dev->nr_zones_imp_open--;
0539 break;
0540 case BLK_ZONE_COND_EXP_OPEN:
0541 dev->nr_zones_exp_open--;
0542 break;
0543 case BLK_ZONE_COND_CLOSED:
0544 ret = null_check_zone_resources(dev, zone);
0545 if (ret != BLK_STS_OK)
0546 goto unlock;
0547 dev->nr_zones_closed--;
0548 break;
0549 default:
0550 ret = BLK_STS_IOERR;
0551 goto unlock;
0552 }
0553
0554 zone->cond = BLK_ZONE_COND_FULL;
0555 zone->wp = zone->start + zone->len;
0556
0557 unlock:
0558 null_unlock_zone_res(dev);
0559
0560 return ret;
0561 }
0562
0563 static blk_status_t null_reset_zone(struct nullb_device *dev,
0564 struct nullb_zone *zone)
0565 {
0566 if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
0567 return BLK_STS_IOERR;
0568
0569 null_lock_zone_res(dev);
0570
0571 switch (zone->cond) {
0572 case BLK_ZONE_COND_EMPTY:
0573
0574 null_unlock_zone_res(dev);
0575 return BLK_STS_OK;
0576 case BLK_ZONE_COND_IMP_OPEN:
0577 dev->nr_zones_imp_open--;
0578 break;
0579 case BLK_ZONE_COND_EXP_OPEN:
0580 dev->nr_zones_exp_open--;
0581 break;
0582 case BLK_ZONE_COND_CLOSED:
0583 dev->nr_zones_closed--;
0584 break;
0585 case BLK_ZONE_COND_FULL:
0586 break;
0587 default:
0588 null_unlock_zone_res(dev);
0589 return BLK_STS_IOERR;
0590 }
0591
0592 zone->cond = BLK_ZONE_COND_EMPTY;
0593 zone->wp = zone->start;
0594
0595 null_unlock_zone_res(dev);
0596
0597 if (dev->memory_backed)
0598 return null_handle_discard(dev, zone->start, zone->len);
0599
0600 return BLK_STS_OK;
0601 }
0602
0603 static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_op op,
0604 sector_t sector)
0605 {
0606 struct nullb_device *dev = cmd->nq->dev;
0607 unsigned int zone_no;
0608 struct nullb_zone *zone;
0609 blk_status_t ret;
0610 size_t i;
0611
0612 if (op == REQ_OP_ZONE_RESET_ALL) {
0613 for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) {
0614 zone = &dev->zones[i];
0615 null_lock_zone(dev, zone);
0616 if (zone->cond != BLK_ZONE_COND_EMPTY) {
0617 null_reset_zone(dev, zone);
0618 trace_nullb_zone_op(cmd, i, zone->cond);
0619 }
0620 null_unlock_zone(dev, zone);
0621 }
0622 return BLK_STS_OK;
0623 }
0624
0625 zone_no = null_zone_no(dev, sector);
0626 zone = &dev->zones[zone_no];
0627
0628 null_lock_zone(dev, zone);
0629
0630 switch (op) {
0631 case REQ_OP_ZONE_RESET:
0632 ret = null_reset_zone(dev, zone);
0633 break;
0634 case REQ_OP_ZONE_OPEN:
0635 ret = null_open_zone(dev, zone);
0636 break;
0637 case REQ_OP_ZONE_CLOSE:
0638 ret = null_close_zone(dev, zone);
0639 break;
0640 case REQ_OP_ZONE_FINISH:
0641 ret = null_finish_zone(dev, zone);
0642 break;
0643 default:
0644 ret = BLK_STS_NOTSUPP;
0645 break;
0646 }
0647
0648 if (ret == BLK_STS_OK)
0649 trace_nullb_zone_op(cmd, zone_no, zone->cond);
0650
0651 null_unlock_zone(dev, zone);
0652
0653 return ret;
0654 }
0655
0656 blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_op op,
0657 sector_t sector, sector_t nr_sectors)
0658 {
0659 struct nullb_device *dev;
0660 struct nullb_zone *zone;
0661 blk_status_t sts;
0662
0663 switch (op) {
0664 case REQ_OP_WRITE:
0665 return null_zone_write(cmd, sector, nr_sectors, false);
0666 case REQ_OP_ZONE_APPEND:
0667 return null_zone_write(cmd, sector, nr_sectors, true);
0668 case REQ_OP_ZONE_RESET:
0669 case REQ_OP_ZONE_RESET_ALL:
0670 case REQ_OP_ZONE_OPEN:
0671 case REQ_OP_ZONE_CLOSE:
0672 case REQ_OP_ZONE_FINISH:
0673 return null_zone_mgmt(cmd, op, sector);
0674 default:
0675 dev = cmd->nq->dev;
0676 zone = &dev->zones[null_zone_no(dev, sector)];
0677
0678 null_lock_zone(dev, zone);
0679 sts = null_process_cmd(cmd, op, sector, nr_sectors);
0680 null_unlock_zone(dev, zone);
0681 return sts;
0682 }
0683 }