Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * NVMe Over Fabrics Target File I/O commands implementation.
0004  * Copyright (c) 2017-2018 Western Digital Corporation or its
0005  * affiliates.
0006  */
0007 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0008 #include <linux/uio.h>
0009 #include <linux/falloc.h>
0010 #include <linux/file.h>
0011 #include <linux/fs.h>
0012 #include "nvmet.h"
0013 
0014 #define NVMET_MAX_MPOOL_BVEC        16
0015 #define NVMET_MIN_MPOOL_OBJ     16
0016 
0017 void nvmet_file_ns_revalidate(struct nvmet_ns *ns)
0018 {
0019     ns->size = i_size_read(ns->file->f_mapping->host);
0020 }
0021 
0022 void nvmet_file_ns_disable(struct nvmet_ns *ns)
0023 {
0024     if (ns->file) {
0025         if (ns->buffered_io)
0026             flush_workqueue(buffered_io_wq);
0027         mempool_destroy(ns->bvec_pool);
0028         ns->bvec_pool = NULL;
0029         kmem_cache_destroy(ns->bvec_cache);
0030         ns->bvec_cache = NULL;
0031         fput(ns->file);
0032         ns->file = NULL;
0033     }
0034 }
0035 
0036 int nvmet_file_ns_enable(struct nvmet_ns *ns)
0037 {
0038     int flags = O_RDWR | O_LARGEFILE;
0039     int ret = 0;
0040 
0041     if (!ns->buffered_io)
0042         flags |= O_DIRECT;
0043 
0044     ns->file = filp_open(ns->device_path, flags, 0);
0045     if (IS_ERR(ns->file)) {
0046         ret = PTR_ERR(ns->file);
0047         pr_err("failed to open file %s: (%d)\n",
0048             ns->device_path, ret);
0049         ns->file = NULL;
0050         return ret;
0051     }
0052 
0053     nvmet_file_ns_revalidate(ns);
0054 
0055     /*
0056      * i_blkbits can be greater than the universally accepted upper bound,
0057      * so make sure we export a sane namespace lba_shift.
0058      */
0059     ns->blksize_shift = min_t(u8,
0060             file_inode(ns->file)->i_blkbits, 12);
0061 
0062     ns->bvec_cache = kmem_cache_create("nvmet-bvec",
0063             NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec),
0064             0, SLAB_HWCACHE_ALIGN, NULL);
0065     if (!ns->bvec_cache) {
0066         ret = -ENOMEM;
0067         goto err;
0068     }
0069 
0070     ns->bvec_pool = mempool_create(NVMET_MIN_MPOOL_OBJ, mempool_alloc_slab,
0071             mempool_free_slab, ns->bvec_cache);
0072 
0073     if (!ns->bvec_pool) {
0074         ret = -ENOMEM;
0075         goto err;
0076     }
0077 
0078     return ret;
0079 err:
0080     ns->size = 0;
0081     ns->blksize_shift = 0;
0082     nvmet_file_ns_disable(ns);
0083     return ret;
0084 }
0085 
0086 static void nvmet_file_init_bvec(struct bio_vec *bv, struct scatterlist *sg)
0087 {
0088     bv->bv_page = sg_page(sg);
0089     bv->bv_offset = sg->offset;
0090     bv->bv_len = sg->length;
0091 }
0092 
0093 static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos,
0094         unsigned long nr_segs, size_t count, int ki_flags)
0095 {
0096     struct kiocb *iocb = &req->f.iocb;
0097     ssize_t (*call_iter)(struct kiocb *iocb, struct iov_iter *iter);
0098     struct iov_iter iter;
0099     int rw;
0100 
0101     if (req->cmd->rw.opcode == nvme_cmd_write) {
0102         if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
0103             ki_flags |= IOCB_DSYNC;
0104         call_iter = req->ns->file->f_op->write_iter;
0105         rw = WRITE;
0106     } else {
0107         call_iter = req->ns->file->f_op->read_iter;
0108         rw = READ;
0109     }
0110 
0111     iov_iter_bvec(&iter, rw, req->f.bvec, nr_segs, count);
0112 
0113     iocb->ki_pos = pos;
0114     iocb->ki_filp = req->ns->file;
0115     iocb->ki_flags = ki_flags | iocb->ki_filp->f_iocb_flags;
0116 
0117     return call_iter(iocb, &iter);
0118 }
0119 
0120 static void nvmet_file_io_done(struct kiocb *iocb, long ret)
0121 {
0122     struct nvmet_req *req = container_of(iocb, struct nvmet_req, f.iocb);
0123     u16 status = NVME_SC_SUCCESS;
0124 
0125     if (req->f.bvec != req->inline_bvec) {
0126         if (likely(req->f.mpool_alloc == false))
0127             kfree(req->f.bvec);
0128         else
0129             mempool_free(req->f.bvec, req->ns->bvec_pool);
0130     }
0131 
0132     if (unlikely(ret != req->transfer_len))
0133         status = errno_to_nvme_status(req, ret);
0134     nvmet_req_complete(req, status);
0135 }
0136 
0137 static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags)
0138 {
0139     ssize_t nr_bvec = req->sg_cnt;
0140     unsigned long bv_cnt = 0;
0141     bool is_sync = false;
0142     size_t len = 0, total_len = 0;
0143     ssize_t ret = 0;
0144     loff_t pos;
0145     int i;
0146     struct scatterlist *sg;
0147 
0148     if (req->f.mpool_alloc && nr_bvec > NVMET_MAX_MPOOL_BVEC)
0149         is_sync = true;
0150 
0151     pos = le64_to_cpu(req->cmd->rw.slba) << req->ns->blksize_shift;
0152     if (unlikely(pos + req->transfer_len > req->ns->size)) {
0153         nvmet_req_complete(req, errno_to_nvme_status(req, -ENOSPC));
0154         return true;
0155     }
0156 
0157     memset(&req->f.iocb, 0, sizeof(struct kiocb));
0158     for_each_sg(req->sg, sg, req->sg_cnt, i) {
0159         nvmet_file_init_bvec(&req->f.bvec[bv_cnt], sg);
0160         len += req->f.bvec[bv_cnt].bv_len;
0161         total_len += req->f.bvec[bv_cnt].bv_len;
0162         bv_cnt++;
0163 
0164         WARN_ON_ONCE((nr_bvec - 1) < 0);
0165 
0166         if (unlikely(is_sync) &&
0167             (nr_bvec - 1 == 0 || bv_cnt == NVMET_MAX_MPOOL_BVEC)) {
0168             ret = nvmet_file_submit_bvec(req, pos, bv_cnt, len, 0);
0169             if (ret < 0)
0170                 goto complete;
0171 
0172             pos += len;
0173             bv_cnt = 0;
0174             len = 0;
0175         }
0176         nr_bvec--;
0177     }
0178 
0179     if (WARN_ON_ONCE(total_len != req->transfer_len)) {
0180         ret = -EIO;
0181         goto complete;
0182     }
0183 
0184     if (unlikely(is_sync)) {
0185         ret = total_len;
0186         goto complete;
0187     }
0188 
0189     /*
0190      * A NULL ki_complete ask for synchronous execution, which we want
0191      * for the IOCB_NOWAIT case.
0192      */
0193     if (!(ki_flags & IOCB_NOWAIT))
0194         req->f.iocb.ki_complete = nvmet_file_io_done;
0195 
0196     ret = nvmet_file_submit_bvec(req, pos, bv_cnt, total_len, ki_flags);
0197 
0198     switch (ret) {
0199     case -EIOCBQUEUED:
0200         return true;
0201     case -EAGAIN:
0202         if (WARN_ON_ONCE(!(ki_flags & IOCB_NOWAIT)))
0203             goto complete;
0204         return false;
0205     case -EOPNOTSUPP:
0206         /*
0207          * For file systems returning error -EOPNOTSUPP, handle
0208          * IOCB_NOWAIT error case separately and retry without
0209          * IOCB_NOWAIT.
0210          */
0211         if ((ki_flags & IOCB_NOWAIT))
0212             return false;
0213         break;
0214     }
0215 
0216 complete:
0217     nvmet_file_io_done(&req->f.iocb, ret);
0218     return true;
0219 }
0220 
0221 static void nvmet_file_buffered_io_work(struct work_struct *w)
0222 {
0223     struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
0224 
0225     nvmet_file_execute_io(req, 0);
0226 }
0227 
0228 static void nvmet_file_submit_buffered_io(struct nvmet_req *req)
0229 {
0230     INIT_WORK(&req->f.work, nvmet_file_buffered_io_work);
0231     queue_work(buffered_io_wq, &req->f.work);
0232 }
0233 
0234 static void nvmet_file_execute_rw(struct nvmet_req *req)
0235 {
0236     ssize_t nr_bvec = req->sg_cnt;
0237 
0238     if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req)))
0239         return;
0240 
0241     if (!req->sg_cnt || !nr_bvec) {
0242         nvmet_req_complete(req, 0);
0243         return;
0244     }
0245 
0246     if (nr_bvec > NVMET_MAX_INLINE_BIOVEC)
0247         req->f.bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec),
0248                 GFP_KERNEL);
0249     else
0250         req->f.bvec = req->inline_bvec;
0251 
0252     if (unlikely(!req->f.bvec)) {
0253         /* fallback under memory pressure */
0254         req->f.bvec = mempool_alloc(req->ns->bvec_pool, GFP_KERNEL);
0255         req->f.mpool_alloc = true;
0256     } else
0257         req->f.mpool_alloc = false;
0258 
0259     if (req->ns->buffered_io) {
0260         if (likely(!req->f.mpool_alloc) &&
0261             (req->ns->file->f_mode & FMODE_NOWAIT) &&
0262             nvmet_file_execute_io(req, IOCB_NOWAIT))
0263             return;
0264         nvmet_file_submit_buffered_io(req);
0265     } else
0266         nvmet_file_execute_io(req, 0);
0267 }
0268 
0269 u16 nvmet_file_flush(struct nvmet_req *req)
0270 {
0271     return errno_to_nvme_status(req, vfs_fsync(req->ns->file, 1));
0272 }
0273 
0274 static void nvmet_file_flush_work(struct work_struct *w)
0275 {
0276     struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
0277 
0278     nvmet_req_complete(req, nvmet_file_flush(req));
0279 }
0280 
0281 static void nvmet_file_execute_flush(struct nvmet_req *req)
0282 {
0283     if (!nvmet_check_transfer_len(req, 0))
0284         return;
0285     INIT_WORK(&req->f.work, nvmet_file_flush_work);
0286     queue_work(nvmet_wq, &req->f.work);
0287 }
0288 
0289 static void nvmet_file_execute_discard(struct nvmet_req *req)
0290 {
0291     int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
0292     struct nvme_dsm_range range;
0293     loff_t offset, len;
0294     u16 status = 0;
0295     int ret;
0296     int i;
0297 
0298     for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
0299         status = nvmet_copy_from_sgl(req, i * sizeof(range), &range,
0300                     sizeof(range));
0301         if (status)
0302             break;
0303 
0304         offset = le64_to_cpu(range.slba) << req->ns->blksize_shift;
0305         len = le32_to_cpu(range.nlb);
0306         len <<= req->ns->blksize_shift;
0307         if (offset + len > req->ns->size) {
0308             req->error_slba = le64_to_cpu(range.slba);
0309             status = errno_to_nvme_status(req, -ENOSPC);
0310             break;
0311         }
0312 
0313         ret = vfs_fallocate(req->ns->file, mode, offset, len);
0314         if (ret && ret != -EOPNOTSUPP) {
0315             req->error_slba = le64_to_cpu(range.slba);
0316             status = errno_to_nvme_status(req, ret);
0317             break;
0318         }
0319     }
0320 
0321     nvmet_req_complete(req, status);
0322 }
0323 
0324 static void nvmet_file_dsm_work(struct work_struct *w)
0325 {
0326     struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
0327 
0328     switch (le32_to_cpu(req->cmd->dsm.attributes)) {
0329     case NVME_DSMGMT_AD:
0330         nvmet_file_execute_discard(req);
0331         return;
0332     case NVME_DSMGMT_IDR:
0333     case NVME_DSMGMT_IDW:
0334     default:
0335         /* Not supported yet */
0336         nvmet_req_complete(req, 0);
0337         return;
0338     }
0339 }
0340 
0341 static void nvmet_file_execute_dsm(struct nvmet_req *req)
0342 {
0343     if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req)))
0344         return;
0345     INIT_WORK(&req->f.work, nvmet_file_dsm_work);
0346     queue_work(nvmet_wq, &req->f.work);
0347 }
0348 
0349 static void nvmet_file_write_zeroes_work(struct work_struct *w)
0350 {
0351     struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
0352     struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes;
0353     int mode = FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE;
0354     loff_t offset;
0355     loff_t len;
0356     int ret;
0357 
0358     offset = le64_to_cpu(write_zeroes->slba) << req->ns->blksize_shift;
0359     len = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
0360             req->ns->blksize_shift);
0361 
0362     if (unlikely(offset + len > req->ns->size)) {
0363         nvmet_req_complete(req, errno_to_nvme_status(req, -ENOSPC));
0364         return;
0365     }
0366 
0367     ret = vfs_fallocate(req->ns->file, mode, offset, len);
0368     nvmet_req_complete(req, ret < 0 ? errno_to_nvme_status(req, ret) : 0);
0369 }
0370 
0371 static void nvmet_file_execute_write_zeroes(struct nvmet_req *req)
0372 {
0373     if (!nvmet_check_transfer_len(req, 0))
0374         return;
0375     INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work);
0376     queue_work(nvmet_wq, &req->f.work);
0377 }
0378 
0379 u16 nvmet_file_parse_io_cmd(struct nvmet_req *req)
0380 {
0381     switch (req->cmd->common.opcode) {
0382     case nvme_cmd_read:
0383     case nvme_cmd_write:
0384         req->execute = nvmet_file_execute_rw;
0385         return 0;
0386     case nvme_cmd_flush:
0387         req->execute = nvmet_file_execute_flush;
0388         return 0;
0389     case nvme_cmd_dsm:
0390         req->execute = nvmet_file_execute_dsm;
0391         return 0;
0392     case nvme_cmd_write_zeroes:
0393         req->execute = nvmet_file_execute_write_zeroes;
0394         return 0;
0395     default:
0396         return nvmet_report_invalid_opcode(req);
0397     }
0398 }