0001
0002
0003
0004
0005
0006 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0007 #include <linux/nvme.h>
0008 #include <linux/blkdev.h>
0009 #include "nvmet.h"
0010
0011
0012
0013
0014
0015
0016 #define NVMET_MPSMIN_SHIFT 12
0017
0018 static inline u8 nvmet_zasl(unsigned int zone_append_sects)
0019 {
0020
0021
0022
0023
0024 return ilog2(zone_append_sects >> (NVMET_MPSMIN_SHIFT - 9));
0025 }
0026
0027 static int validate_conv_zones_cb(struct blk_zone *z,
0028 unsigned int i, void *data)
0029 {
0030 if (z->type == BLK_ZONE_TYPE_CONVENTIONAL)
0031 return -EOPNOTSUPP;
0032 return 0;
0033 }
0034
0035 bool nvmet_bdev_zns_enable(struct nvmet_ns *ns)
0036 {
0037 u8 zasl = nvmet_zasl(bdev_max_zone_append_sectors(ns->bdev));
0038 struct gendisk *bd_disk = ns->bdev->bd_disk;
0039 int ret;
0040
0041 if (ns->subsys->zasl) {
0042 if (ns->subsys->zasl > zasl)
0043 return false;
0044 }
0045 ns->subsys->zasl = zasl;
0046
0047
0048
0049
0050
0051
0052 if (get_capacity(bd_disk) & (bdev_zone_sectors(ns->bdev) - 1))
0053 return false;
0054
0055
0056
0057
0058
0059
0060 if (ns->bdev->bd_disk->conv_zones_bitmap)
0061 return false;
0062
0063 ret = blkdev_report_zones(ns->bdev, 0, bdev_nr_zones(ns->bdev),
0064 validate_conv_zones_cb, NULL);
0065 if (ret < 0)
0066 return false;
0067
0068 ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
0069
0070 return true;
0071 }
0072
0073 void nvmet_execute_identify_cns_cs_ctrl(struct nvmet_req *req)
0074 {
0075 u8 zasl = req->sq->ctrl->subsys->zasl;
0076 struct nvmet_ctrl *ctrl = req->sq->ctrl;
0077 struct nvme_id_ctrl_zns *id;
0078 u16 status;
0079
0080 id = kzalloc(sizeof(*id), GFP_KERNEL);
0081 if (!id) {
0082 status = NVME_SC_INTERNAL;
0083 goto out;
0084 }
0085
0086 if (ctrl->ops->get_mdts)
0087 id->zasl = min_t(u8, ctrl->ops->get_mdts(ctrl), zasl);
0088 else
0089 id->zasl = zasl;
0090
0091 status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
0092
0093 kfree(id);
0094 out:
0095 nvmet_req_complete(req, status);
0096 }
0097
0098 void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req)
0099 {
0100 struct nvme_id_ns_zns *id_zns;
0101 u64 zsze;
0102 u16 status;
0103 u32 mar, mor;
0104
0105 if (le32_to_cpu(req->cmd->identify.nsid) == NVME_NSID_ALL) {
0106 req->error_loc = offsetof(struct nvme_identify, nsid);
0107 status = NVME_SC_INVALID_NS | NVME_SC_DNR;
0108 goto out;
0109 }
0110
0111 id_zns = kzalloc(sizeof(*id_zns), GFP_KERNEL);
0112 if (!id_zns) {
0113 status = NVME_SC_INTERNAL;
0114 goto out;
0115 }
0116
0117 status = nvmet_req_find_ns(req);
0118 if (status)
0119 goto done;
0120
0121 if (!bdev_is_zoned(req->ns->bdev)) {
0122 req->error_loc = offsetof(struct nvme_identify, nsid);
0123 goto done;
0124 }
0125
0126 if (nvmet_ns_revalidate(req->ns)) {
0127 mutex_lock(&req->ns->subsys->lock);
0128 nvmet_ns_changed(req->ns->subsys, req->ns->nsid);
0129 mutex_unlock(&req->ns->subsys->lock);
0130 }
0131 zsze = (bdev_zone_sectors(req->ns->bdev) << 9) >>
0132 req->ns->blksize_shift;
0133 id_zns->lbafe[0].zsze = cpu_to_le64(zsze);
0134
0135 mor = bdev_max_open_zones(req->ns->bdev);
0136 if (!mor)
0137 mor = U32_MAX;
0138 else
0139 mor--;
0140 id_zns->mor = cpu_to_le32(mor);
0141
0142 mar = bdev_max_active_zones(req->ns->bdev);
0143 if (!mar)
0144 mar = U32_MAX;
0145 else
0146 mar--;
0147 id_zns->mar = cpu_to_le32(mar);
0148
0149 done:
0150 status = nvmet_copy_to_sgl(req, 0, id_zns, sizeof(*id_zns));
0151 kfree(id_zns);
0152 out:
0153 nvmet_req_complete(req, status);
0154 }
0155
0156 static u16 nvmet_bdev_validate_zone_mgmt_recv(struct nvmet_req *req)
0157 {
0158 sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->zmr.slba);
0159 u32 out_bufsize = (le32_to_cpu(req->cmd->zmr.numd) + 1) << 2;
0160
0161 if (sect >= get_capacity(req->ns->bdev->bd_disk)) {
0162 req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, slba);
0163 return NVME_SC_LBA_RANGE | NVME_SC_DNR;
0164 }
0165
0166 if (out_bufsize < sizeof(struct nvme_zone_report)) {
0167 req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, numd);
0168 return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
0169 }
0170
0171 if (req->cmd->zmr.zra != NVME_ZRA_ZONE_REPORT) {
0172 req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, zra);
0173 return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
0174 }
0175
0176 switch (req->cmd->zmr.pr) {
0177 case 0:
0178 case 1:
0179 break;
0180 default:
0181 req->error_loc = offsetof(struct nvme_zone_mgmt_recv_cmd, pr);
0182 return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
0183 }
0184
0185 switch (req->cmd->zmr.zrasf) {
0186 case NVME_ZRASF_ZONE_REPORT_ALL:
0187 case NVME_ZRASF_ZONE_STATE_EMPTY:
0188 case NVME_ZRASF_ZONE_STATE_IMP_OPEN:
0189 case NVME_ZRASF_ZONE_STATE_EXP_OPEN:
0190 case NVME_ZRASF_ZONE_STATE_CLOSED:
0191 case NVME_ZRASF_ZONE_STATE_FULL:
0192 case NVME_ZRASF_ZONE_STATE_READONLY:
0193 case NVME_ZRASF_ZONE_STATE_OFFLINE:
0194 break;
0195 default:
0196 req->error_loc =
0197 offsetof(struct nvme_zone_mgmt_recv_cmd, zrasf);
0198 return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
0199 }
0200
0201 return NVME_SC_SUCCESS;
0202 }
0203
0204 struct nvmet_report_zone_data {
0205 struct nvmet_req *req;
0206 u64 out_buf_offset;
0207 u64 out_nr_zones;
0208 u64 nr_zones;
0209 u8 zrasf;
0210 };
0211
0212 static int nvmet_bdev_report_zone_cb(struct blk_zone *z, unsigned i, void *d)
0213 {
0214 static const unsigned int nvme_zrasf_to_blk_zcond[] = {
0215 [NVME_ZRASF_ZONE_STATE_EMPTY] = BLK_ZONE_COND_EMPTY,
0216 [NVME_ZRASF_ZONE_STATE_IMP_OPEN] = BLK_ZONE_COND_IMP_OPEN,
0217 [NVME_ZRASF_ZONE_STATE_EXP_OPEN] = BLK_ZONE_COND_EXP_OPEN,
0218 [NVME_ZRASF_ZONE_STATE_CLOSED] = BLK_ZONE_COND_CLOSED,
0219 [NVME_ZRASF_ZONE_STATE_READONLY] = BLK_ZONE_COND_READONLY,
0220 [NVME_ZRASF_ZONE_STATE_FULL] = BLK_ZONE_COND_FULL,
0221 [NVME_ZRASF_ZONE_STATE_OFFLINE] = BLK_ZONE_COND_OFFLINE,
0222 };
0223 struct nvmet_report_zone_data *rz = d;
0224
0225 if (rz->zrasf != NVME_ZRASF_ZONE_REPORT_ALL &&
0226 z->cond != nvme_zrasf_to_blk_zcond[rz->zrasf])
0227 return 0;
0228
0229 if (rz->nr_zones < rz->out_nr_zones) {
0230 struct nvme_zone_descriptor zdesc = { };
0231 u16 status;
0232
0233 zdesc.zcap = nvmet_sect_to_lba(rz->req->ns, z->capacity);
0234 zdesc.zslba = nvmet_sect_to_lba(rz->req->ns, z->start);
0235 zdesc.wp = nvmet_sect_to_lba(rz->req->ns, z->wp);
0236 zdesc.za = z->reset ? 1 << 2 : 0;
0237 zdesc.zs = z->cond << 4;
0238 zdesc.zt = z->type;
0239
0240 status = nvmet_copy_to_sgl(rz->req, rz->out_buf_offset, &zdesc,
0241 sizeof(zdesc));
0242 if (status)
0243 return -EINVAL;
0244
0245 rz->out_buf_offset += sizeof(zdesc);
0246 }
0247
0248 rz->nr_zones++;
0249
0250 return 0;
0251 }
0252
0253 static unsigned long nvmet_req_nr_zones_from_slba(struct nvmet_req *req)
0254 {
0255 unsigned int sect = nvmet_lba_to_sect(req->ns, req->cmd->zmr.slba);
0256
0257 return bdev_nr_zones(req->ns->bdev) -
0258 (sect >> ilog2(bdev_zone_sectors(req->ns->bdev)));
0259 }
0260
0261 static unsigned long get_nr_zones_from_buf(struct nvmet_req *req, u32 bufsize)
0262 {
0263 if (bufsize <= sizeof(struct nvme_zone_report))
0264 return 0;
0265
0266 return (bufsize - sizeof(struct nvme_zone_report)) /
0267 sizeof(struct nvme_zone_descriptor);
0268 }
0269
0270 static void nvmet_bdev_zone_zmgmt_recv_work(struct work_struct *w)
0271 {
0272 struct nvmet_req *req = container_of(w, struct nvmet_req, z.zmgmt_work);
0273 sector_t start_sect = nvmet_lba_to_sect(req->ns, req->cmd->zmr.slba);
0274 unsigned long req_slba_nr_zones = nvmet_req_nr_zones_from_slba(req);
0275 u32 out_bufsize = (le32_to_cpu(req->cmd->zmr.numd) + 1) << 2;
0276 __le64 nr_zones;
0277 u16 status;
0278 int ret;
0279 struct nvmet_report_zone_data rz_data = {
0280 .out_nr_zones = get_nr_zones_from_buf(req, out_bufsize),
0281
0282 .out_buf_offset = sizeof(struct nvme_zone_report),
0283 .zrasf = req->cmd->zmr.zrasf,
0284 .nr_zones = 0,
0285 .req = req,
0286 };
0287
0288 status = nvmet_bdev_validate_zone_mgmt_recv(req);
0289 if (status)
0290 goto out;
0291
0292 if (!req_slba_nr_zones) {
0293 status = NVME_SC_SUCCESS;
0294 goto out;
0295 }
0296
0297 ret = blkdev_report_zones(req->ns->bdev, start_sect, req_slba_nr_zones,
0298 nvmet_bdev_report_zone_cb, &rz_data);
0299 if (ret < 0) {
0300 status = NVME_SC_INTERNAL;
0301 goto out;
0302 }
0303
0304
0305
0306
0307
0308 if (req->cmd->zmr.pr)
0309 rz_data.nr_zones = min(rz_data.nr_zones, rz_data.out_nr_zones);
0310
0311 nr_zones = cpu_to_le64(rz_data.nr_zones);
0312 status = nvmet_copy_to_sgl(req, 0, &nr_zones, sizeof(nr_zones));
0313
0314 out:
0315 nvmet_req_complete(req, status);
0316 }
0317
0318 void nvmet_bdev_execute_zone_mgmt_recv(struct nvmet_req *req)
0319 {
0320 INIT_WORK(&req->z.zmgmt_work, nvmet_bdev_zone_zmgmt_recv_work);
0321 queue_work(zbd_wq, &req->z.zmgmt_work);
0322 }
0323
0324 static inline enum req_op zsa_req_op(u8 zsa)
0325 {
0326 switch (zsa) {
0327 case NVME_ZONE_OPEN:
0328 return REQ_OP_ZONE_OPEN;
0329 case NVME_ZONE_CLOSE:
0330 return REQ_OP_ZONE_CLOSE;
0331 case NVME_ZONE_FINISH:
0332 return REQ_OP_ZONE_FINISH;
0333 case NVME_ZONE_RESET:
0334 return REQ_OP_ZONE_RESET;
0335 default:
0336 return REQ_OP_LAST;
0337 }
0338 }
0339
0340 static u16 blkdev_zone_mgmt_errno_to_nvme_status(int ret)
0341 {
0342 switch (ret) {
0343 case 0:
0344 return NVME_SC_SUCCESS;
0345 case -EINVAL:
0346 case -EIO:
0347 return NVME_SC_ZONE_INVALID_TRANSITION | NVME_SC_DNR;
0348 default:
0349 return NVME_SC_INTERNAL;
0350 }
0351 }
0352
0353 struct nvmet_zone_mgmt_send_all_data {
0354 unsigned long *zbitmap;
0355 struct nvmet_req *req;
0356 };
0357
0358 static int zmgmt_send_scan_cb(struct blk_zone *z, unsigned i, void *d)
0359 {
0360 struct nvmet_zone_mgmt_send_all_data *data = d;
0361
0362 switch (zsa_req_op(data->req->cmd->zms.zsa)) {
0363 case REQ_OP_ZONE_OPEN:
0364 switch (z->cond) {
0365 case BLK_ZONE_COND_CLOSED:
0366 break;
0367 default:
0368 return 0;
0369 }
0370 break;
0371 case REQ_OP_ZONE_CLOSE:
0372 switch (z->cond) {
0373 case BLK_ZONE_COND_IMP_OPEN:
0374 case BLK_ZONE_COND_EXP_OPEN:
0375 break;
0376 default:
0377 return 0;
0378 }
0379 break;
0380 case REQ_OP_ZONE_FINISH:
0381 switch (z->cond) {
0382 case BLK_ZONE_COND_IMP_OPEN:
0383 case BLK_ZONE_COND_EXP_OPEN:
0384 case BLK_ZONE_COND_CLOSED:
0385 break;
0386 default:
0387 return 0;
0388 }
0389 break;
0390 default:
0391 return -EINVAL;
0392 }
0393
0394 set_bit(i, data->zbitmap);
0395
0396 return 0;
0397 }
0398
0399 static u16 nvmet_bdev_zone_mgmt_emulate_all(struct nvmet_req *req)
0400 {
0401 struct block_device *bdev = req->ns->bdev;
0402 unsigned int nr_zones = bdev_nr_zones(bdev);
0403 struct request_queue *q = bdev_get_queue(bdev);
0404 struct bio *bio = NULL;
0405 sector_t sector = 0;
0406 int ret;
0407 struct nvmet_zone_mgmt_send_all_data d = {
0408 .req = req,
0409 };
0410
0411 d.zbitmap = kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(*(d.zbitmap)),
0412 GFP_NOIO, q->node);
0413 if (!d.zbitmap) {
0414 ret = -ENOMEM;
0415 goto out;
0416 }
0417
0418
0419 ret = blkdev_report_zones(bdev, 0, nr_zones, zmgmt_send_scan_cb, &d);
0420 if (ret != nr_zones) {
0421 if (ret > 0)
0422 ret = -EIO;
0423 goto out;
0424 } else {
0425
0426 ret = 0;
0427 }
0428
0429 while (sector < bdev_nr_sectors(bdev)) {
0430 if (test_bit(disk_zone_no(bdev->bd_disk, sector), d.zbitmap)) {
0431 bio = blk_next_bio(bio, bdev, 0,
0432 zsa_req_op(req->cmd->zms.zsa) | REQ_SYNC,
0433 GFP_KERNEL);
0434 bio->bi_iter.bi_sector = sector;
0435
0436 cond_resched();
0437 }
0438 sector += bdev_zone_sectors(bdev);
0439 }
0440
0441 if (bio) {
0442 ret = submit_bio_wait(bio);
0443 bio_put(bio);
0444 }
0445
0446 out:
0447 kfree(d.zbitmap);
0448
0449 return blkdev_zone_mgmt_errno_to_nvme_status(ret);
0450 }
0451
0452 static u16 nvmet_bdev_execute_zmgmt_send_all(struct nvmet_req *req)
0453 {
0454 int ret;
0455
0456 switch (zsa_req_op(req->cmd->zms.zsa)) {
0457 case REQ_OP_ZONE_RESET:
0458 ret = blkdev_zone_mgmt(req->ns->bdev, REQ_OP_ZONE_RESET, 0,
0459 get_capacity(req->ns->bdev->bd_disk),
0460 GFP_KERNEL);
0461 if (ret < 0)
0462 return blkdev_zone_mgmt_errno_to_nvme_status(ret);
0463 break;
0464 case REQ_OP_ZONE_OPEN:
0465 case REQ_OP_ZONE_CLOSE:
0466 case REQ_OP_ZONE_FINISH:
0467 return nvmet_bdev_zone_mgmt_emulate_all(req);
0468 default:
0469
0470 req->error_loc = offsetof(struct nvme_zone_mgmt_send_cmd, zsa);
0471 return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
0472 }
0473
0474 return NVME_SC_SUCCESS;
0475 }
0476
0477 static void nvmet_bdev_zmgmt_send_work(struct work_struct *w)
0478 {
0479 struct nvmet_req *req = container_of(w, struct nvmet_req, z.zmgmt_work);
0480 sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->zms.slba);
0481 enum req_op op = zsa_req_op(req->cmd->zms.zsa);
0482 struct block_device *bdev = req->ns->bdev;
0483 sector_t zone_sectors = bdev_zone_sectors(bdev);
0484 u16 status = NVME_SC_SUCCESS;
0485 int ret;
0486
0487 if (op == REQ_OP_LAST) {
0488 req->error_loc = offsetof(struct nvme_zone_mgmt_send_cmd, zsa);
0489 status = NVME_SC_ZONE_INVALID_TRANSITION | NVME_SC_DNR;
0490 goto out;
0491 }
0492
0493
0494 if (req->cmd->zms.select_all) {
0495 status = nvmet_bdev_execute_zmgmt_send_all(req);
0496 goto out;
0497 }
0498
0499 if (sect >= get_capacity(bdev->bd_disk)) {
0500 req->error_loc = offsetof(struct nvme_zone_mgmt_send_cmd, slba);
0501 status = NVME_SC_LBA_RANGE | NVME_SC_DNR;
0502 goto out;
0503 }
0504
0505 if (sect & (zone_sectors - 1)) {
0506 req->error_loc = offsetof(struct nvme_zone_mgmt_send_cmd, slba);
0507 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
0508 goto out;
0509 }
0510
0511 ret = blkdev_zone_mgmt(bdev, op, sect, zone_sectors, GFP_KERNEL);
0512 if (ret < 0)
0513 status = blkdev_zone_mgmt_errno_to_nvme_status(ret);
0514
0515 out:
0516 nvmet_req_complete(req, status);
0517 }
0518
0519 void nvmet_bdev_execute_zone_mgmt_send(struct nvmet_req *req)
0520 {
0521 INIT_WORK(&req->z.zmgmt_work, nvmet_bdev_zmgmt_send_work);
0522 queue_work(zbd_wq, &req->z.zmgmt_work);
0523 }
0524
0525 static void nvmet_bdev_zone_append_bio_done(struct bio *bio)
0526 {
0527 struct nvmet_req *req = bio->bi_private;
0528
0529 if (bio->bi_status == BLK_STS_OK) {
0530 req->cqe->result.u64 =
0531 nvmet_sect_to_lba(req->ns, bio->bi_iter.bi_sector);
0532 }
0533
0534 nvmet_req_complete(req, blk_to_nvme_status(req, bio->bi_status));
0535 nvmet_req_bio_put(req, bio);
0536 }
0537
0538 void nvmet_bdev_execute_zone_append(struct nvmet_req *req)
0539 {
0540 sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba);
0541 const blk_opf_t opf = REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE;
0542 u16 status = NVME_SC_SUCCESS;
0543 unsigned int total_len = 0;
0544 struct scatterlist *sg;
0545 struct bio *bio;
0546 int sg_cnt;
0547
0548
0549 if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req)))
0550 return;
0551
0552 if (!req->sg_cnt) {
0553 nvmet_req_complete(req, 0);
0554 return;
0555 }
0556
0557 if (sect >= get_capacity(req->ns->bdev->bd_disk)) {
0558 req->error_loc = offsetof(struct nvme_rw_command, slba);
0559 status = NVME_SC_LBA_RANGE | NVME_SC_DNR;
0560 goto out;
0561 }
0562
0563 if (sect & (bdev_zone_sectors(req->ns->bdev) - 1)) {
0564 req->error_loc = offsetof(struct nvme_rw_command, slba);
0565 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
0566 goto out;
0567 }
0568
0569 if (nvmet_use_inline_bvec(req)) {
0570 bio = &req->z.inline_bio;
0571 bio_init(bio, req->ns->bdev, req->inline_bvec,
0572 ARRAY_SIZE(req->inline_bvec), opf);
0573 } else {
0574 bio = bio_alloc(req->ns->bdev, req->sg_cnt, opf, GFP_KERNEL);
0575 }
0576
0577 bio->bi_end_io = nvmet_bdev_zone_append_bio_done;
0578 bio->bi_iter.bi_sector = sect;
0579 bio->bi_private = req;
0580 if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
0581 bio->bi_opf |= REQ_FUA;
0582
0583 for_each_sg(req->sg, sg, req->sg_cnt, sg_cnt) {
0584 struct page *p = sg_page(sg);
0585 unsigned int l = sg->length;
0586 unsigned int o = sg->offset;
0587 unsigned int ret;
0588
0589 ret = bio_add_zone_append_page(bio, p, l, o);
0590 if (ret != sg->length) {
0591 status = NVME_SC_INTERNAL;
0592 goto out_put_bio;
0593 }
0594 total_len += sg->length;
0595 }
0596
0597 if (total_len != nvmet_rw_data_len(req)) {
0598 status = NVME_SC_INTERNAL | NVME_SC_DNR;
0599 goto out_put_bio;
0600 }
0601
0602 submit_bio(bio);
0603 return;
0604
0605 out_put_bio:
0606 nvmet_req_bio_put(req, bio);
0607 out:
0608 nvmet_req_complete(req, status);
0609 }
0610
0611 u16 nvmet_bdev_zns_parse_io_cmd(struct nvmet_req *req)
0612 {
0613 struct nvme_command *cmd = req->cmd;
0614
0615 switch (cmd->common.opcode) {
0616 case nvme_cmd_zone_append:
0617 req->execute = nvmet_bdev_execute_zone_append;
0618 return 0;
0619 case nvme_cmd_zone_mgmt_recv:
0620 req->execute = nvmet_bdev_execute_zone_mgmt_recv;
0621 return 0;
0622 case nvme_cmd_zone_mgmt_send:
0623 req->execute = nvmet_bdev_execute_zone_mgmt_send;
0624 return 0;
0625 default:
0626 return nvmet_bdev_parse_io_cmd(req);
0627 }
0628 }