Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * NVMe I/O command implementation.
0004  * Copyright (c) 2015-2016 HGST, a Western Digital Company.
0005  */
0006 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0007 #include <linux/blkdev.h>
0008 #include <linux/blk-integrity.h>
0009 #include <linux/memremap.h>
0010 #include <linux/module.h>
0011 #include "nvmet.h"
0012 
0013 void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)
0014 {
0015     const struct queue_limits *ql = &bdev_get_queue(bdev)->limits;
0016     /* Number of logical blocks per physical block. */
0017     const u32 lpp = ql->physical_block_size / ql->logical_block_size;
0018     /* Logical blocks per physical block, 0's based. */
0019     const __le16 lpp0b = to0based(lpp);
0020 
0021     /*
0022      * For NVMe 1.2 and later, bit 1 indicates that the fields NAWUN,
0023      * NAWUPF, and NACWU are defined for this namespace and should be
0024      * used by the host for this namespace instead of the AWUN, AWUPF,
0025      * and ACWU fields in the Identify Controller data structure. If
0026      * any of these fields are zero that means that the corresponding
0027      * field from the identify controller data structure should be used.
0028      */
0029     id->nsfeat |= 1 << 1;
0030     id->nawun = lpp0b;
0031     id->nawupf = lpp0b;
0032     id->nacwu = lpp0b;
0033 
0034     /*
0035      * Bit 4 indicates that the fields NPWG, NPWA, NPDG, NPDA, and
0036      * NOWS are defined for this namespace and should be used by
0037      * the host for I/O optimization.
0038      */
0039     id->nsfeat |= 1 << 4;
0040     /* NPWG = Namespace Preferred Write Granularity. 0's based */
0041     id->npwg = lpp0b;
0042     /* NPWA = Namespace Preferred Write Alignment. 0's based */
0043     id->npwa = id->npwg;
0044     /* NPDG = Namespace Preferred Deallocate Granularity. 0's based */
0045     id->npdg = to0based(ql->discard_granularity / ql->logical_block_size);
0046     /* NPDG = Namespace Preferred Deallocate Alignment */
0047     id->npda = id->npdg;
0048     /* NOWS = Namespace Optimal Write Size */
0049     id->nows = to0based(ql->io_opt / ql->logical_block_size);
0050 }
0051 
0052 void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
0053 {
0054     if (ns->bdev) {
0055         blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ);
0056         ns->bdev = NULL;
0057     }
0058 }
0059 
0060 static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns)
0061 {
0062     struct blk_integrity *bi = bdev_get_integrity(ns->bdev);
0063 
0064     if (bi) {
0065         ns->metadata_size = bi->tuple_size;
0066         if (bi->profile == &t10_pi_type1_crc)
0067             ns->pi_type = NVME_NS_DPS_PI_TYPE1;
0068         else if (bi->profile == &t10_pi_type3_crc)
0069             ns->pi_type = NVME_NS_DPS_PI_TYPE3;
0070         else
0071             /* Unsupported metadata type */
0072             ns->metadata_size = 0;
0073     }
0074 }
0075 
0076 int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
0077 {
0078     int ret;
0079 
0080     /*
0081      * When buffered_io namespace attribute is enabled that means user want
0082      * this block device to be used as a file, so block device can take
0083      * an advantage of cache.
0084      */
0085     if (ns->buffered_io)
0086         return -ENOTBLK;
0087 
0088     ns->bdev = blkdev_get_by_path(ns->device_path,
0089             FMODE_READ | FMODE_WRITE, NULL);
0090     if (IS_ERR(ns->bdev)) {
0091         ret = PTR_ERR(ns->bdev);
0092         if (ret != -ENOTBLK) {
0093             pr_err("failed to open block device %s: (%ld)\n",
0094                     ns->device_path, PTR_ERR(ns->bdev));
0095         }
0096         ns->bdev = NULL;
0097         return ret;
0098     }
0099     ns->size = bdev_nr_bytes(ns->bdev);
0100     ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
0101 
0102     ns->pi_type = 0;
0103     ns->metadata_size = 0;
0104     if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10))
0105         nvmet_bdev_ns_enable_integrity(ns);
0106 
0107     if (bdev_is_zoned(ns->bdev)) {
0108         if (!nvmet_bdev_zns_enable(ns)) {
0109             nvmet_bdev_ns_disable(ns);
0110             return -EINVAL;
0111         }
0112         ns->csi = NVME_CSI_ZNS;
0113     }
0114 
0115     return 0;
0116 }
0117 
0118 void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns)
0119 {
0120     ns->size = bdev_nr_bytes(ns->bdev);
0121 }
0122 
0123 u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts)
0124 {
0125     u16 status = NVME_SC_SUCCESS;
0126 
0127     if (likely(blk_sts == BLK_STS_OK))
0128         return status;
0129     /*
0130      * Right now there exists M : 1 mapping between block layer error
0131      * to the NVMe status code (see nvme_error_status()). For consistency,
0132      * when we reverse map we use most appropriate NVMe Status code from
0133      * the group of the NVMe staus codes used in the nvme_error_status().
0134      */
0135     switch (blk_sts) {
0136     case BLK_STS_NOSPC:
0137         status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR;
0138         req->error_loc = offsetof(struct nvme_rw_command, length);
0139         break;
0140     case BLK_STS_TARGET:
0141         status = NVME_SC_LBA_RANGE | NVME_SC_DNR;
0142         req->error_loc = offsetof(struct nvme_rw_command, slba);
0143         break;
0144     case BLK_STS_NOTSUPP:
0145         req->error_loc = offsetof(struct nvme_common_command, opcode);
0146         switch (req->cmd->common.opcode) {
0147         case nvme_cmd_dsm:
0148         case nvme_cmd_write_zeroes:
0149             status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR;
0150             break;
0151         default:
0152             status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
0153         }
0154         break;
0155     case BLK_STS_MEDIUM:
0156         status = NVME_SC_ACCESS_DENIED;
0157         req->error_loc = offsetof(struct nvme_rw_command, nsid);
0158         break;
0159     case BLK_STS_IOERR:
0160     default:
0161         status = NVME_SC_INTERNAL | NVME_SC_DNR;
0162         req->error_loc = offsetof(struct nvme_common_command, opcode);
0163     }
0164 
0165     switch (req->cmd->common.opcode) {
0166     case nvme_cmd_read:
0167     case nvme_cmd_write:
0168         req->error_slba = le64_to_cpu(req->cmd->rw.slba);
0169         break;
0170     case nvme_cmd_write_zeroes:
0171         req->error_slba =
0172             le64_to_cpu(req->cmd->write_zeroes.slba);
0173         break;
0174     default:
0175         req->error_slba = 0;
0176     }
0177     return status;
0178 }
0179 
0180 static void nvmet_bio_done(struct bio *bio)
0181 {
0182     struct nvmet_req *req = bio->bi_private;
0183 
0184     nvmet_req_complete(req, blk_to_nvme_status(req, bio->bi_status));
0185     nvmet_req_bio_put(req, bio);
0186 }
0187 
0188 #ifdef CONFIG_BLK_DEV_INTEGRITY
0189 static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
0190                 struct sg_mapping_iter *miter)
0191 {
0192     struct blk_integrity *bi;
0193     struct bio_integrity_payload *bip;
0194     int rc;
0195     size_t resid, len;
0196 
0197     bi = bdev_get_integrity(req->ns->bdev);
0198     if (unlikely(!bi)) {
0199         pr_err("Unable to locate bio_integrity\n");
0200         return -ENODEV;
0201     }
0202 
0203     bip = bio_integrity_alloc(bio, GFP_NOIO,
0204                     bio_max_segs(req->metadata_sg_cnt));
0205     if (IS_ERR(bip)) {
0206         pr_err("Unable to allocate bio_integrity_payload\n");
0207         return PTR_ERR(bip);
0208     }
0209 
0210     bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio));
0211     /* virtual start sector must be in integrity interval units */
0212     bip_set_seed(bip, bio->bi_iter.bi_sector >>
0213              (bi->interval_exp - SECTOR_SHIFT));
0214 
0215     resid = bip->bip_iter.bi_size;
0216     while (resid > 0 && sg_miter_next(miter)) {
0217         len = min_t(size_t, miter->length, resid);
0218         rc = bio_integrity_add_page(bio, miter->page, len,
0219                         offset_in_page(miter->addr));
0220         if (unlikely(rc != len)) {
0221             pr_err("bio_integrity_add_page() failed; %d\n", rc);
0222             sg_miter_stop(miter);
0223             return -ENOMEM;
0224         }
0225 
0226         resid -= len;
0227         if (len < miter->length)
0228             miter->consumed -= miter->length - len;
0229     }
0230     sg_miter_stop(miter);
0231 
0232     return 0;
0233 }
0234 #else
0235 static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
0236                 struct sg_mapping_iter *miter)
0237 {
0238     return -EINVAL;
0239 }
0240 #endif /* CONFIG_BLK_DEV_INTEGRITY */
0241 
0242 static void nvmet_bdev_execute_rw(struct nvmet_req *req)
0243 {
0244     unsigned int sg_cnt = req->sg_cnt;
0245     struct bio *bio;
0246     struct scatterlist *sg;
0247     struct blk_plug plug;
0248     sector_t sector;
0249     blk_opf_t opf;
0250     int i, rc;
0251     struct sg_mapping_iter prot_miter;
0252     unsigned int iter_flags;
0253     unsigned int total_len = nvmet_rw_data_len(req) + req->metadata_len;
0254 
0255     if (!nvmet_check_transfer_len(req, total_len))
0256         return;
0257 
0258     if (!req->sg_cnt) {
0259         nvmet_req_complete(req, 0);
0260         return;
0261     }
0262 
0263     if (req->cmd->rw.opcode == nvme_cmd_write) {
0264         opf = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
0265         if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
0266             opf |= REQ_FUA;
0267         iter_flags = SG_MITER_TO_SG;
0268     } else {
0269         opf = REQ_OP_READ;
0270         iter_flags = SG_MITER_FROM_SG;
0271     }
0272 
0273     if (is_pci_p2pdma_page(sg_page(req->sg)))
0274         opf |= REQ_NOMERGE;
0275 
0276     sector = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba);
0277 
0278     if (nvmet_use_inline_bvec(req)) {
0279         bio = &req->b.inline_bio;
0280         bio_init(bio, req->ns->bdev, req->inline_bvec,
0281              ARRAY_SIZE(req->inline_bvec), opf);
0282     } else {
0283         bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt), opf,
0284                 GFP_KERNEL);
0285     }
0286     bio->bi_iter.bi_sector = sector;
0287     bio->bi_private = req;
0288     bio->bi_end_io = nvmet_bio_done;
0289 
0290     blk_start_plug(&plug);
0291     if (req->metadata_len)
0292         sg_miter_start(&prot_miter, req->metadata_sg,
0293                    req->metadata_sg_cnt, iter_flags);
0294 
0295     for_each_sg(req->sg, sg, req->sg_cnt, i) {
0296         while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
0297                 != sg->length) {
0298             struct bio *prev = bio;
0299 
0300             if (req->metadata_len) {
0301                 rc = nvmet_bdev_alloc_bip(req, bio,
0302                               &prot_miter);
0303                 if (unlikely(rc)) {
0304                     bio_io_error(bio);
0305                     return;
0306                 }
0307             }
0308 
0309             bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt),
0310                     opf, GFP_KERNEL);
0311             bio->bi_iter.bi_sector = sector;
0312 
0313             bio_chain(bio, prev);
0314             submit_bio(prev);
0315         }
0316 
0317         sector += sg->length >> 9;
0318         sg_cnt--;
0319     }
0320 
0321     if (req->metadata_len) {
0322         rc = nvmet_bdev_alloc_bip(req, bio, &prot_miter);
0323         if (unlikely(rc)) {
0324             bio_io_error(bio);
0325             return;
0326         }
0327     }
0328 
0329     submit_bio(bio);
0330     blk_finish_plug(&plug);
0331 }
0332 
0333 static void nvmet_bdev_execute_flush(struct nvmet_req *req)
0334 {
0335     struct bio *bio = &req->b.inline_bio;
0336 
0337     if (!nvmet_check_transfer_len(req, 0))
0338         return;
0339 
0340     bio_init(bio, req->ns->bdev, req->inline_bvec,
0341          ARRAY_SIZE(req->inline_bvec), REQ_OP_WRITE | REQ_PREFLUSH);
0342     bio->bi_private = req;
0343     bio->bi_end_io = nvmet_bio_done;
0344 
0345     submit_bio(bio);
0346 }
0347 
0348 u16 nvmet_bdev_flush(struct nvmet_req *req)
0349 {
0350     if (blkdev_issue_flush(req->ns->bdev))
0351         return NVME_SC_INTERNAL | NVME_SC_DNR;
0352     return 0;
0353 }
0354 
0355 static u16 nvmet_bdev_discard_range(struct nvmet_req *req,
0356         struct nvme_dsm_range *range, struct bio **bio)
0357 {
0358     struct nvmet_ns *ns = req->ns;
0359     int ret;
0360 
0361     ret = __blkdev_issue_discard(ns->bdev,
0362             nvmet_lba_to_sect(ns, range->slba),
0363             le32_to_cpu(range->nlb) << (ns->blksize_shift - 9),
0364             GFP_KERNEL, bio);
0365     if (ret && ret != -EOPNOTSUPP) {
0366         req->error_slba = le64_to_cpu(range->slba);
0367         return errno_to_nvme_status(req, ret);
0368     }
0369     return NVME_SC_SUCCESS;
0370 }
0371 
0372 static void nvmet_bdev_execute_discard(struct nvmet_req *req)
0373 {
0374     struct nvme_dsm_range range;
0375     struct bio *bio = NULL;
0376     int i;
0377     u16 status;
0378 
0379     for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
0380         status = nvmet_copy_from_sgl(req, i * sizeof(range), &range,
0381                 sizeof(range));
0382         if (status)
0383             break;
0384 
0385         status = nvmet_bdev_discard_range(req, &range, &bio);
0386         if (status)
0387             break;
0388     }
0389 
0390     if (bio) {
0391         bio->bi_private = req;
0392         bio->bi_end_io = nvmet_bio_done;
0393         if (status)
0394             bio_io_error(bio);
0395         else
0396             submit_bio(bio);
0397     } else {
0398         nvmet_req_complete(req, status);
0399     }
0400 }
0401 
0402 static void nvmet_bdev_execute_dsm(struct nvmet_req *req)
0403 {
0404     if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req)))
0405         return;
0406 
0407     switch (le32_to_cpu(req->cmd->dsm.attributes)) {
0408     case NVME_DSMGMT_AD:
0409         nvmet_bdev_execute_discard(req);
0410         return;
0411     case NVME_DSMGMT_IDR:
0412     case NVME_DSMGMT_IDW:
0413     default:
0414         /* Not supported yet */
0415         nvmet_req_complete(req, 0);
0416         return;
0417     }
0418 }
0419 
0420 static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req)
0421 {
0422     struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes;
0423     struct bio *bio = NULL;
0424     sector_t sector;
0425     sector_t nr_sector;
0426     int ret;
0427 
0428     if (!nvmet_check_transfer_len(req, 0))
0429         return;
0430 
0431     sector = nvmet_lba_to_sect(req->ns, write_zeroes->slba);
0432     nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
0433         (req->ns->blksize_shift - 9));
0434 
0435     ret = __blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector,
0436             GFP_KERNEL, &bio, 0);
0437     if (bio) {
0438         bio->bi_private = req;
0439         bio->bi_end_io = nvmet_bio_done;
0440         submit_bio(bio);
0441     } else {
0442         nvmet_req_complete(req, errno_to_nvme_status(req, ret));
0443     }
0444 }
0445 
0446 u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
0447 {
0448     switch (req->cmd->common.opcode) {
0449     case nvme_cmd_read:
0450     case nvme_cmd_write:
0451         req->execute = nvmet_bdev_execute_rw;
0452         if (req->sq->ctrl->pi_support && nvmet_ns_has_pi(req->ns))
0453             req->metadata_len = nvmet_rw_metadata_len(req);
0454         return 0;
0455     case nvme_cmd_flush:
0456         req->execute = nvmet_bdev_execute_flush;
0457         return 0;
0458     case nvme_cmd_dsm:
0459         req->execute = nvmet_bdev_execute_dsm;
0460         return 0;
0461     case nvme_cmd_write_zeroes:
0462         req->execute = nvmet_bdev_execute_write_zeroes;
0463         return 0;
0464     default:
0465         return nvmet_report_invalid_opcode(req);
0466     }
0467 }