0001
0002
0003 #include <linux/spinlock.h>
0004 #include <linux/slab.h>
0005 #include <linux/blkdev.h>
0006 #include <linux/hdreg.h>
0007 #include <linux/module.h>
0008 #include <linux/mutex.h>
0009 #include <linux/interrupt.h>
0010 #include <linux/virtio.h>
0011 #include <linux/virtio_blk.h>
0012 #include <linux/scatterlist.h>
0013 #include <linux/string_helpers.h>
0014 #include <linux/idr.h>
0015 #include <linux/blk-mq.h>
0016 #include <linux/blk-mq-virtio.h>
0017 #include <linux/numa.h>
0018 #include <uapi/linux/virtio_ring.h>
0019
0020 #define PART_BITS 4
0021 #define VQ_NAME_LEN 16
0022 #define MAX_DISCARD_SEGMENTS 256u
0023
0024
0025 #define VIRTIO_BLK_MAX_SG_ELEMS 32768
0026
0027 #ifdef CONFIG_ARCH_NO_SG_CHAIN
0028 #define VIRTIO_BLK_INLINE_SG_CNT 0
0029 #else
0030 #define VIRTIO_BLK_INLINE_SG_CNT 2
0031 #endif
0032
0033 static unsigned int num_request_queues;
0034 module_param(num_request_queues, uint, 0644);
0035 MODULE_PARM_DESC(num_request_queues,
0036 "Limit the number of request queues to use for blk device. "
0037 "0 for no limit. "
0038 "Values > nr_cpu_ids truncated to nr_cpu_ids.");
0039
0040 static unsigned int poll_queues;
0041 module_param(poll_queues, uint, 0644);
0042 MODULE_PARM_DESC(poll_queues, "The number of dedicated virtqueues for polling I/O");
0043
0044 static int major;
0045 static DEFINE_IDA(vd_index_ida);
0046
0047 static struct workqueue_struct *virtblk_wq;
0048
0049 struct virtio_blk_vq {
0050 struct virtqueue *vq;
0051 spinlock_t lock;
0052 char name[VQ_NAME_LEN];
0053 } ____cacheline_aligned_in_smp;
0054
0055 struct virtio_blk {
0056
0057
0058
0059
0060
0061
0062
0063
0064 struct mutex vdev_mutex;
0065 struct virtio_device *vdev;
0066
0067
0068 struct gendisk *disk;
0069
0070
0071 struct blk_mq_tag_set tag_set;
0072
0073
0074 struct work_struct config_work;
0075
0076
0077 int index;
0078
0079
0080 int num_vqs;
0081 int io_queues[HCTX_MAX_TYPES];
0082 struct virtio_blk_vq *vqs;
0083 };
0084
0085 struct virtblk_req {
0086 struct virtio_blk_outhdr out_hdr;
0087 u8 status;
0088 struct sg_table sg_table;
0089 struct scatterlist sg[];
0090 };
0091
0092 static inline blk_status_t virtblk_result(struct virtblk_req *vbr)
0093 {
0094 switch (vbr->status) {
0095 case VIRTIO_BLK_S_OK:
0096 return BLK_STS_OK;
0097 case VIRTIO_BLK_S_UNSUPP:
0098 return BLK_STS_NOTSUPP;
0099 default:
0100 return BLK_STS_IOERR;
0101 }
0102 }
0103
0104 static inline struct virtio_blk_vq *get_virtio_blk_vq(struct blk_mq_hw_ctx *hctx)
0105 {
0106 struct virtio_blk *vblk = hctx->queue->queuedata;
0107 struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num];
0108
0109 return vq;
0110 }
0111
0112 static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr)
0113 {
0114 struct scatterlist hdr, status, *sgs[3];
0115 unsigned int num_out = 0, num_in = 0;
0116
0117 sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr));
0118 sgs[num_out++] = &hdr;
0119
0120 if (vbr->sg_table.nents) {
0121 if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT))
0122 sgs[num_out++] = vbr->sg_table.sgl;
0123 else
0124 sgs[num_out + num_in++] = vbr->sg_table.sgl;
0125 }
0126
0127 sg_init_one(&status, &vbr->status, sizeof(vbr->status));
0128 sgs[num_out + num_in++] = &status;
0129
0130 return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
0131 }
0132
0133 static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap)
0134 {
0135 unsigned short segments = blk_rq_nr_discard_segments(req);
0136 unsigned short n = 0;
0137 struct virtio_blk_discard_write_zeroes *range;
0138 struct bio *bio;
0139 u32 flags = 0;
0140
0141 if (unmap)
0142 flags |= VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP;
0143
0144 range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
0145 if (!range)
0146 return -ENOMEM;
0147
0148
0149
0150
0151
0152
0153
0154 if (queue_max_discard_segments(req->q) == 1) {
0155 range[0].flags = cpu_to_le32(flags);
0156 range[0].num_sectors = cpu_to_le32(blk_rq_sectors(req));
0157 range[0].sector = cpu_to_le64(blk_rq_pos(req));
0158 n = 1;
0159 } else {
0160 __rq_for_each_bio(bio, req) {
0161 u64 sector = bio->bi_iter.bi_sector;
0162 u32 num_sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT;
0163
0164 range[n].flags = cpu_to_le32(flags);
0165 range[n].num_sectors = cpu_to_le32(num_sectors);
0166 range[n].sector = cpu_to_le64(sector);
0167 n++;
0168 }
0169 }
0170
0171 WARN_ON_ONCE(n != segments);
0172
0173 req->special_vec.bv_page = virt_to_page(range);
0174 req->special_vec.bv_offset = offset_in_page(range);
0175 req->special_vec.bv_len = sizeof(*range) * segments;
0176 req->rq_flags |= RQF_SPECIAL_PAYLOAD;
0177
0178 return 0;
0179 }
0180
0181 static void virtblk_unmap_data(struct request *req, struct virtblk_req *vbr)
0182 {
0183 if (blk_rq_nr_phys_segments(req))
0184 sg_free_table_chained(&vbr->sg_table,
0185 VIRTIO_BLK_INLINE_SG_CNT);
0186 }
0187
0188 static int virtblk_map_data(struct blk_mq_hw_ctx *hctx, struct request *req,
0189 struct virtblk_req *vbr)
0190 {
0191 int err;
0192
0193 if (!blk_rq_nr_phys_segments(req))
0194 return 0;
0195
0196 vbr->sg_table.sgl = vbr->sg;
0197 err = sg_alloc_table_chained(&vbr->sg_table,
0198 blk_rq_nr_phys_segments(req),
0199 vbr->sg_table.sgl,
0200 VIRTIO_BLK_INLINE_SG_CNT);
0201 if (unlikely(err))
0202 return -ENOMEM;
0203
0204 return blk_rq_map_sg(hctx->queue, req, vbr->sg_table.sgl);
0205 }
0206
0207 static void virtblk_cleanup_cmd(struct request *req)
0208 {
0209 if (req->rq_flags & RQF_SPECIAL_PAYLOAD)
0210 kfree(bvec_virt(&req->special_vec));
0211 }
0212
0213 static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev,
0214 struct request *req,
0215 struct virtblk_req *vbr)
0216 {
0217 bool unmap = false;
0218 u32 type;
0219
0220 vbr->out_hdr.sector = 0;
0221
0222 switch (req_op(req)) {
0223 case REQ_OP_READ:
0224 type = VIRTIO_BLK_T_IN;
0225 vbr->out_hdr.sector = cpu_to_virtio64(vdev,
0226 blk_rq_pos(req));
0227 break;
0228 case REQ_OP_WRITE:
0229 type = VIRTIO_BLK_T_OUT;
0230 vbr->out_hdr.sector = cpu_to_virtio64(vdev,
0231 blk_rq_pos(req));
0232 break;
0233 case REQ_OP_FLUSH:
0234 type = VIRTIO_BLK_T_FLUSH;
0235 break;
0236 case REQ_OP_DISCARD:
0237 type = VIRTIO_BLK_T_DISCARD;
0238 break;
0239 case REQ_OP_WRITE_ZEROES:
0240 type = VIRTIO_BLK_T_WRITE_ZEROES;
0241 unmap = !(req->cmd_flags & REQ_NOUNMAP);
0242 break;
0243 case REQ_OP_DRV_IN:
0244 type = VIRTIO_BLK_T_GET_ID;
0245 break;
0246 default:
0247 WARN_ON_ONCE(1);
0248 return BLK_STS_IOERR;
0249 }
0250
0251 vbr->out_hdr.type = cpu_to_virtio32(vdev, type);
0252 vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req));
0253
0254 if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) {
0255 if (virtblk_setup_discard_write_zeroes(req, unmap))
0256 return BLK_STS_RESOURCE;
0257 }
0258
0259 return 0;
0260 }
0261
0262 static inline void virtblk_request_done(struct request *req)
0263 {
0264 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
0265
0266 virtblk_unmap_data(req, vbr);
0267 virtblk_cleanup_cmd(req);
0268 blk_mq_end_request(req, virtblk_result(vbr));
0269 }
0270
0271 static void virtblk_done(struct virtqueue *vq)
0272 {
0273 struct virtio_blk *vblk = vq->vdev->priv;
0274 bool req_done = false;
0275 int qid = vq->index;
0276 struct virtblk_req *vbr;
0277 unsigned long flags;
0278 unsigned int len;
0279
0280 spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
0281 do {
0282 virtqueue_disable_cb(vq);
0283 while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) {
0284 struct request *req = blk_mq_rq_from_pdu(vbr);
0285
0286 if (likely(!blk_should_fake_timeout(req->q)))
0287 blk_mq_complete_request(req);
0288 req_done = true;
0289 }
0290 if (unlikely(virtqueue_is_broken(vq)))
0291 break;
0292 } while (!virtqueue_enable_cb(vq));
0293
0294
0295 if (req_done)
0296 blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
0297 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
0298 }
0299
0300 static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx)
0301 {
0302 struct virtio_blk *vblk = hctx->queue->queuedata;
0303 struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num];
0304 bool kick;
0305
0306 spin_lock_irq(&vq->lock);
0307 kick = virtqueue_kick_prepare(vq->vq);
0308 spin_unlock_irq(&vq->lock);
0309
0310 if (kick)
0311 virtqueue_notify(vq->vq);
0312 }
0313
0314 static blk_status_t virtblk_prep_rq(struct blk_mq_hw_ctx *hctx,
0315 struct virtio_blk *vblk,
0316 struct request *req,
0317 struct virtblk_req *vbr)
0318 {
0319 blk_status_t status;
0320
0321 status = virtblk_setup_cmd(vblk->vdev, req, vbr);
0322 if (unlikely(status))
0323 return status;
0324
0325 vbr->sg_table.nents = virtblk_map_data(hctx, req, vbr);
0326 if (unlikely(vbr->sg_table.nents < 0)) {
0327 virtblk_cleanup_cmd(req);
0328 return BLK_STS_RESOURCE;
0329 }
0330
0331 blk_mq_start_request(req);
0332
0333 return BLK_STS_OK;
0334 }
0335
0336 static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
0337 const struct blk_mq_queue_data *bd)
0338 {
0339 struct virtio_blk *vblk = hctx->queue->queuedata;
0340 struct request *req = bd->rq;
0341 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
0342 unsigned long flags;
0343 int qid = hctx->queue_num;
0344 bool notify = false;
0345 blk_status_t status;
0346 int err;
0347
0348 status = virtblk_prep_rq(hctx, vblk, req, vbr);
0349 if (unlikely(status))
0350 return status;
0351
0352 spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
0353 err = virtblk_add_req(vblk->vqs[qid].vq, vbr);
0354 if (err) {
0355 virtqueue_kick(vblk->vqs[qid].vq);
0356
0357
0358
0359 if (err == -ENOSPC)
0360 blk_mq_stop_hw_queue(hctx);
0361 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
0362 virtblk_unmap_data(req, vbr);
0363 virtblk_cleanup_cmd(req);
0364 switch (err) {
0365 case -ENOSPC:
0366 return BLK_STS_DEV_RESOURCE;
0367 case -ENOMEM:
0368 return BLK_STS_RESOURCE;
0369 default:
0370 return BLK_STS_IOERR;
0371 }
0372 }
0373
0374 if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
0375 notify = true;
0376 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
0377
0378 if (notify)
0379 virtqueue_notify(vblk->vqs[qid].vq);
0380 return BLK_STS_OK;
0381 }
0382
0383 static bool virtblk_prep_rq_batch(struct request *req)
0384 {
0385 struct virtio_blk *vblk = req->mq_hctx->queue->queuedata;
0386 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
0387
0388 req->mq_hctx->tags->rqs[req->tag] = req;
0389
0390 return virtblk_prep_rq(req->mq_hctx, vblk, req, vbr) == BLK_STS_OK;
0391 }
0392
0393 static bool virtblk_add_req_batch(struct virtio_blk_vq *vq,
0394 struct request **rqlist)
0395 {
0396 unsigned long flags;
0397 int err;
0398 bool kick;
0399
0400 spin_lock_irqsave(&vq->lock, flags);
0401
0402 while (!rq_list_empty(*rqlist)) {
0403 struct request *req = rq_list_pop(rqlist);
0404 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
0405
0406 err = virtblk_add_req(vq->vq, vbr);
0407 if (err) {
0408 virtblk_unmap_data(req, vbr);
0409 virtblk_cleanup_cmd(req);
0410 blk_mq_requeue_request(req, true);
0411 }
0412 }
0413
0414 kick = virtqueue_kick_prepare(vq->vq);
0415 spin_unlock_irqrestore(&vq->lock, flags);
0416
0417 return kick;
0418 }
0419
0420 static void virtio_queue_rqs(struct request **rqlist)
0421 {
0422 struct request *req, *next, *prev = NULL;
0423 struct request *requeue_list = NULL;
0424
0425 rq_list_for_each_safe(rqlist, req, next) {
0426 struct virtio_blk_vq *vq = get_virtio_blk_vq(req->mq_hctx);
0427 bool kick;
0428
0429 if (!virtblk_prep_rq_batch(req)) {
0430 rq_list_move(rqlist, &requeue_list, req, prev);
0431 req = prev;
0432 if (!req)
0433 continue;
0434 }
0435
0436 if (!next || req->mq_hctx != next->mq_hctx) {
0437 req->rq_next = NULL;
0438 kick = virtblk_add_req_batch(vq, rqlist);
0439 if (kick)
0440 virtqueue_notify(vq->vq);
0441
0442 *rqlist = next;
0443 prev = NULL;
0444 } else
0445 prev = req;
0446 }
0447
0448 *rqlist = requeue_list;
0449 }
0450
0451
0452
0453 static int virtblk_get_id(struct gendisk *disk, char *id_str)
0454 {
0455 struct virtio_blk *vblk = disk->private_data;
0456 struct request_queue *q = vblk->disk->queue;
0457 struct request *req;
0458 int err;
0459
0460 req = blk_mq_alloc_request(q, REQ_OP_DRV_IN, 0);
0461 if (IS_ERR(req))
0462 return PTR_ERR(req);
0463
0464 err = blk_rq_map_kern(q, req, id_str, VIRTIO_BLK_ID_BYTES, GFP_KERNEL);
0465 if (err)
0466 goto out;
0467
0468 blk_execute_rq(req, false);
0469 err = blk_status_to_errno(virtblk_result(blk_mq_rq_to_pdu(req)));
0470 out:
0471 blk_mq_free_request(req);
0472 return err;
0473 }
0474
0475
0476 static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
0477 {
0478 struct virtio_blk *vblk = bd->bd_disk->private_data;
0479 int ret = 0;
0480
0481 mutex_lock(&vblk->vdev_mutex);
0482
0483 if (!vblk->vdev) {
0484 ret = -ENXIO;
0485 goto out;
0486 }
0487
0488
0489 if (virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_GEOMETRY)) {
0490 virtio_cread(vblk->vdev, struct virtio_blk_config,
0491 geometry.cylinders, &geo->cylinders);
0492 virtio_cread(vblk->vdev, struct virtio_blk_config,
0493 geometry.heads, &geo->heads);
0494 virtio_cread(vblk->vdev, struct virtio_blk_config,
0495 geometry.sectors, &geo->sectors);
0496 } else {
0497
0498 geo->heads = 1 << 6;
0499 geo->sectors = 1 << 5;
0500 geo->cylinders = get_capacity(bd->bd_disk) >> 11;
0501 }
0502 out:
0503 mutex_unlock(&vblk->vdev_mutex);
0504 return ret;
0505 }
0506
0507 static void virtblk_free_disk(struct gendisk *disk)
0508 {
0509 struct virtio_blk *vblk = disk->private_data;
0510
0511 ida_simple_remove(&vd_index_ida, vblk->index);
0512 mutex_destroy(&vblk->vdev_mutex);
0513 kfree(vblk);
0514 }
0515
0516 static const struct block_device_operations virtblk_fops = {
0517 .owner = THIS_MODULE,
0518 .getgeo = virtblk_getgeo,
0519 .free_disk = virtblk_free_disk,
0520 };
0521
0522 static int index_to_minor(int index)
0523 {
0524 return index << PART_BITS;
0525 }
0526
0527 static int minor_to_index(int minor)
0528 {
0529 return minor >> PART_BITS;
0530 }
0531
0532 static ssize_t serial_show(struct device *dev,
0533 struct device_attribute *attr, char *buf)
0534 {
0535 struct gendisk *disk = dev_to_disk(dev);
0536 int err;
0537
0538
0539 BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES);
0540
0541 buf[VIRTIO_BLK_ID_BYTES] = '\0';
0542 err = virtblk_get_id(disk, buf);
0543 if (!err)
0544 return strlen(buf);
0545
0546 if (err == -EIO)
0547 return 0;
0548
0549 return err;
0550 }
0551
0552 static DEVICE_ATTR_RO(serial);
0553
0554
0555 static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize)
0556 {
0557 struct virtio_device *vdev = vblk->vdev;
0558 struct request_queue *q = vblk->disk->queue;
0559 char cap_str_2[10], cap_str_10[10];
0560 unsigned long long nblocks;
0561 u64 capacity;
0562
0563
0564 virtio_cread(vdev, struct virtio_blk_config, capacity, &capacity);
0565
0566 nblocks = DIV_ROUND_UP_ULL(capacity, queue_logical_block_size(q) >> 9);
0567
0568 string_get_size(nblocks, queue_logical_block_size(q),
0569 STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
0570 string_get_size(nblocks, queue_logical_block_size(q),
0571 STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
0572
0573 dev_notice(&vdev->dev,
0574 "[%s] %s%llu %d-byte logical blocks (%s/%s)\n",
0575 vblk->disk->disk_name,
0576 resize ? "new size: " : "",
0577 nblocks,
0578 queue_logical_block_size(q),
0579 cap_str_10,
0580 cap_str_2);
0581
0582 set_capacity_and_notify(vblk->disk, capacity);
0583 }
0584
0585 static void virtblk_config_changed_work(struct work_struct *work)
0586 {
0587 struct virtio_blk *vblk =
0588 container_of(work, struct virtio_blk, config_work);
0589
0590 virtblk_update_capacity(vblk, true);
0591 }
0592
0593 static void virtblk_config_changed(struct virtio_device *vdev)
0594 {
0595 struct virtio_blk *vblk = vdev->priv;
0596
0597 queue_work(virtblk_wq, &vblk->config_work);
0598 }
0599
0600 static int init_vq(struct virtio_blk *vblk)
0601 {
0602 int err;
0603 int i;
0604 vq_callback_t **callbacks;
0605 const char **names;
0606 struct virtqueue **vqs;
0607 unsigned short num_vqs;
0608 unsigned int num_poll_vqs;
0609 struct virtio_device *vdev = vblk->vdev;
0610 struct irq_affinity desc = { 0, };
0611
0612 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ,
0613 struct virtio_blk_config, num_queues,
0614 &num_vqs);
0615 if (err)
0616 num_vqs = 1;
0617
0618 if (!err && !num_vqs) {
0619 dev_err(&vdev->dev, "MQ advertised but zero queues reported\n");
0620 return -EINVAL;
0621 }
0622
0623 num_vqs = min_t(unsigned int,
0624 min_not_zero(num_request_queues, nr_cpu_ids),
0625 num_vqs);
0626
0627 num_poll_vqs = min_t(unsigned int, poll_queues, num_vqs - 1);
0628
0629 vblk->io_queues[HCTX_TYPE_DEFAULT] = num_vqs - num_poll_vqs;
0630 vblk->io_queues[HCTX_TYPE_READ] = 0;
0631 vblk->io_queues[HCTX_TYPE_POLL] = num_poll_vqs;
0632
0633 dev_info(&vdev->dev, "%d/%d/%d default/read/poll queues\n",
0634 vblk->io_queues[HCTX_TYPE_DEFAULT],
0635 vblk->io_queues[HCTX_TYPE_READ],
0636 vblk->io_queues[HCTX_TYPE_POLL]);
0637
0638 vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL);
0639 if (!vblk->vqs)
0640 return -ENOMEM;
0641
0642 names = kmalloc_array(num_vqs, sizeof(*names), GFP_KERNEL);
0643 callbacks = kmalloc_array(num_vqs, sizeof(*callbacks), GFP_KERNEL);
0644 vqs = kmalloc_array(num_vqs, sizeof(*vqs), GFP_KERNEL);
0645 if (!names || !callbacks || !vqs) {
0646 err = -ENOMEM;
0647 goto out;
0648 }
0649
0650 for (i = 0; i < num_vqs - num_poll_vqs; i++) {
0651 callbacks[i] = virtblk_done;
0652 snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", i);
0653 names[i] = vblk->vqs[i].name;
0654 }
0655
0656 for (; i < num_vqs; i++) {
0657 callbacks[i] = NULL;
0658 snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req_poll.%d", i);
0659 names[i] = vblk->vqs[i].name;
0660 }
0661
0662
0663 err = virtio_find_vqs(vdev, num_vqs, vqs, callbacks, names, &desc);
0664 if (err)
0665 goto out;
0666
0667 for (i = 0; i < num_vqs; i++) {
0668 spin_lock_init(&vblk->vqs[i].lock);
0669 vblk->vqs[i].vq = vqs[i];
0670 }
0671 vblk->num_vqs = num_vqs;
0672
0673 out:
0674 kfree(vqs);
0675 kfree(callbacks);
0676 kfree(names);
0677 if (err)
0678 kfree(vblk->vqs);
0679 return err;
0680 }
0681
0682
0683
0684
0685
0686 static int virtblk_name_format(char *prefix, int index, char *buf, int buflen)
0687 {
0688 const int base = 'z' - 'a' + 1;
0689 char *begin = buf + strlen(prefix);
0690 char *end = buf + buflen;
0691 char *p;
0692 int unit;
0693
0694 p = end - 1;
0695 *p = '\0';
0696 unit = base;
0697 do {
0698 if (p == begin)
0699 return -EINVAL;
0700 *--p = 'a' + (index % unit);
0701 index = (index / unit) - 1;
0702 } while (index >= 0);
0703
0704 memmove(begin, p, end - p);
0705 memcpy(buf, prefix, strlen(prefix));
0706
0707 return 0;
0708 }
0709
0710 static int virtblk_get_cache_mode(struct virtio_device *vdev)
0711 {
0712 u8 writeback;
0713 int err;
0714
0715 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE,
0716 struct virtio_blk_config, wce,
0717 &writeback);
0718
0719
0720
0721
0722
0723 if (err)
0724 writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH);
0725
0726 return writeback;
0727 }
0728
0729 static void virtblk_update_cache_mode(struct virtio_device *vdev)
0730 {
0731 u8 writeback = virtblk_get_cache_mode(vdev);
0732 struct virtio_blk *vblk = vdev->priv;
0733
0734 blk_queue_write_cache(vblk->disk->queue, writeback, false);
0735 }
0736
0737 static const char *const virtblk_cache_types[] = {
0738 "write through", "write back"
0739 };
0740
0741 static ssize_t
0742 cache_type_store(struct device *dev, struct device_attribute *attr,
0743 const char *buf, size_t count)
0744 {
0745 struct gendisk *disk = dev_to_disk(dev);
0746 struct virtio_blk *vblk = disk->private_data;
0747 struct virtio_device *vdev = vblk->vdev;
0748 int i;
0749
0750 BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE));
0751 i = sysfs_match_string(virtblk_cache_types, buf);
0752 if (i < 0)
0753 return i;
0754
0755 virtio_cwrite8(vdev, offsetof(struct virtio_blk_config, wce), i);
0756 virtblk_update_cache_mode(vdev);
0757 return count;
0758 }
0759
0760 static ssize_t
0761 cache_type_show(struct device *dev, struct device_attribute *attr, char *buf)
0762 {
0763 struct gendisk *disk = dev_to_disk(dev);
0764 struct virtio_blk *vblk = disk->private_data;
0765 u8 writeback = virtblk_get_cache_mode(vblk->vdev);
0766
0767 BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types));
0768 return sysfs_emit(buf, "%s\n", virtblk_cache_types[writeback]);
0769 }
0770
0771 static DEVICE_ATTR_RW(cache_type);
0772
0773 static struct attribute *virtblk_attrs[] = {
0774 &dev_attr_serial.attr,
0775 &dev_attr_cache_type.attr,
0776 NULL,
0777 };
0778
0779 static umode_t virtblk_attrs_are_visible(struct kobject *kobj,
0780 struct attribute *a, int n)
0781 {
0782 struct device *dev = kobj_to_dev(kobj);
0783 struct gendisk *disk = dev_to_disk(dev);
0784 struct virtio_blk *vblk = disk->private_data;
0785 struct virtio_device *vdev = vblk->vdev;
0786
0787 if (a == &dev_attr_cache_type.attr &&
0788 !virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE))
0789 return S_IRUGO;
0790
0791 return a->mode;
0792 }
0793
0794 static const struct attribute_group virtblk_attr_group = {
0795 .attrs = virtblk_attrs,
0796 .is_visible = virtblk_attrs_are_visible,
0797 };
0798
0799 static const struct attribute_group *virtblk_attr_groups[] = {
0800 &virtblk_attr_group,
0801 NULL,
0802 };
0803
0804 static int virtblk_map_queues(struct blk_mq_tag_set *set)
0805 {
0806 struct virtio_blk *vblk = set->driver_data;
0807 int i, qoff;
0808
0809 for (i = 0, qoff = 0; i < set->nr_maps; i++) {
0810 struct blk_mq_queue_map *map = &set->map[i];
0811
0812 map->nr_queues = vblk->io_queues[i];
0813 map->queue_offset = qoff;
0814 qoff += map->nr_queues;
0815
0816 if (map->nr_queues == 0)
0817 continue;
0818
0819
0820
0821
0822
0823
0824 if (i == HCTX_TYPE_POLL)
0825 blk_mq_map_queues(&set->map[i]);
0826 else
0827 blk_mq_virtio_map_queues(&set->map[i], vblk->vdev, 0);
0828 }
0829
0830 return 0;
0831 }
0832
0833 static void virtblk_complete_batch(struct io_comp_batch *iob)
0834 {
0835 struct request *req;
0836
0837 rq_list_for_each(&iob->req_list, req) {
0838 virtblk_unmap_data(req, blk_mq_rq_to_pdu(req));
0839 virtblk_cleanup_cmd(req);
0840 }
0841 blk_mq_end_request_batch(iob);
0842 }
0843
0844 static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
0845 {
0846 struct virtio_blk *vblk = hctx->queue->queuedata;
0847 struct virtio_blk_vq *vq = get_virtio_blk_vq(hctx);
0848 struct virtblk_req *vbr;
0849 unsigned long flags;
0850 unsigned int len;
0851 int found = 0;
0852
0853 spin_lock_irqsave(&vq->lock, flags);
0854
0855 while ((vbr = virtqueue_get_buf(vq->vq, &len)) != NULL) {
0856 struct request *req = blk_mq_rq_from_pdu(vbr);
0857
0858 found++;
0859 if (!blk_mq_add_to_batch(req, iob, vbr->status,
0860 virtblk_complete_batch))
0861 blk_mq_complete_request(req);
0862 }
0863
0864 if (found)
0865 blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
0866
0867 spin_unlock_irqrestore(&vq->lock, flags);
0868
0869 return found;
0870 }
0871
0872 static const struct blk_mq_ops virtio_mq_ops = {
0873 .queue_rq = virtio_queue_rq,
0874 .queue_rqs = virtio_queue_rqs,
0875 .commit_rqs = virtio_commit_rqs,
0876 .complete = virtblk_request_done,
0877 .map_queues = virtblk_map_queues,
0878 .poll = virtblk_poll,
0879 };
0880
0881 static unsigned int virtblk_queue_depth;
0882 module_param_named(queue_depth, virtblk_queue_depth, uint, 0444);
0883
0884 static int virtblk_probe(struct virtio_device *vdev)
0885 {
0886 struct virtio_blk *vblk;
0887 struct request_queue *q;
0888 int err, index;
0889
0890 u32 v, blk_size, max_size, sg_elems, opt_io_size;
0891 u16 min_io_size;
0892 u8 physical_block_exp, alignment_offset;
0893 unsigned int queue_depth;
0894
0895 if (!vdev->config->get) {
0896 dev_err(&vdev->dev, "%s failure: config access disabled\n",
0897 __func__);
0898 return -EINVAL;
0899 }
0900
0901 err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS),
0902 GFP_KERNEL);
0903 if (err < 0)
0904 goto out;
0905 index = err;
0906
0907
0908 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SEG_MAX,
0909 struct virtio_blk_config, seg_max,
0910 &sg_elems);
0911
0912
0913 if (err || !sg_elems)
0914 sg_elems = 1;
0915
0916
0917 sg_elems = min_t(u32, sg_elems, VIRTIO_BLK_MAX_SG_ELEMS - 2);
0918
0919 vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
0920 if (!vblk) {
0921 err = -ENOMEM;
0922 goto out_free_index;
0923 }
0924
0925 mutex_init(&vblk->vdev_mutex);
0926
0927 vblk->vdev = vdev;
0928
0929 INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
0930
0931 err = init_vq(vblk);
0932 if (err)
0933 goto out_free_vblk;
0934
0935
0936 if (!virtblk_queue_depth) {
0937 queue_depth = vblk->vqs[0].vq->num_free;
0938
0939 if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
0940 queue_depth /= 2;
0941 } else {
0942 queue_depth = virtblk_queue_depth;
0943 }
0944
0945 memset(&vblk->tag_set, 0, sizeof(vblk->tag_set));
0946 vblk->tag_set.ops = &virtio_mq_ops;
0947 vblk->tag_set.queue_depth = queue_depth;
0948 vblk->tag_set.numa_node = NUMA_NO_NODE;
0949 vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
0950 vblk->tag_set.cmd_size =
0951 sizeof(struct virtblk_req) +
0952 sizeof(struct scatterlist) * VIRTIO_BLK_INLINE_SG_CNT;
0953 vblk->tag_set.driver_data = vblk;
0954 vblk->tag_set.nr_hw_queues = vblk->num_vqs;
0955 vblk->tag_set.nr_maps = 1;
0956 if (vblk->io_queues[HCTX_TYPE_POLL])
0957 vblk->tag_set.nr_maps = 3;
0958
0959 err = blk_mq_alloc_tag_set(&vblk->tag_set);
0960 if (err)
0961 goto out_free_vq;
0962
0963 vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, vblk);
0964 if (IS_ERR(vblk->disk)) {
0965 err = PTR_ERR(vblk->disk);
0966 goto out_free_tags;
0967 }
0968 q = vblk->disk->queue;
0969
0970 virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
0971
0972 vblk->disk->major = major;
0973 vblk->disk->first_minor = index_to_minor(index);
0974 vblk->disk->minors = 1 << PART_BITS;
0975 vblk->disk->private_data = vblk;
0976 vblk->disk->fops = &virtblk_fops;
0977 vblk->index = index;
0978
0979
0980 virtblk_update_cache_mode(vdev);
0981
0982
0983 if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
0984 set_disk_ro(vblk->disk, 1);
0985
0986
0987 blk_queue_max_segments(q, sg_elems);
0988
0989
0990 blk_queue_max_hw_sectors(q, -1U);
0991
0992 max_size = virtio_max_dma_size(vdev);
0993
0994
0995
0996 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SIZE_MAX,
0997 struct virtio_blk_config, size_max, &v);
0998 if (!err)
0999 max_size = min(max_size, v);
1000
1001 blk_queue_max_segment_size(q, max_size);
1002
1003
1004 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE,
1005 struct virtio_blk_config, blk_size,
1006 &blk_size);
1007 if (!err) {
1008 err = blk_validate_block_size(blk_size);
1009 if (err) {
1010 dev_err(&vdev->dev,
1011 "virtio_blk: invalid block size: 0x%x\n",
1012 blk_size);
1013 goto out_cleanup_disk;
1014 }
1015
1016 blk_queue_logical_block_size(q, blk_size);
1017 } else
1018 blk_size = queue_logical_block_size(q);
1019
1020
1021 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
1022 struct virtio_blk_config, physical_block_exp,
1023 &physical_block_exp);
1024 if (!err && physical_block_exp)
1025 blk_queue_physical_block_size(q,
1026 blk_size * (1 << physical_block_exp));
1027
1028 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
1029 struct virtio_blk_config, alignment_offset,
1030 &alignment_offset);
1031 if (!err && alignment_offset)
1032 blk_queue_alignment_offset(q, blk_size * alignment_offset);
1033
1034 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
1035 struct virtio_blk_config, min_io_size,
1036 &min_io_size);
1037 if (!err && min_io_size)
1038 blk_queue_io_min(q, blk_size * min_io_size);
1039
1040 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
1041 struct virtio_blk_config, opt_io_size,
1042 &opt_io_size);
1043 if (!err && opt_io_size)
1044 blk_queue_io_opt(q, blk_size * opt_io_size);
1045
1046 if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
1047 virtio_cread(vdev, struct virtio_blk_config,
1048 discard_sector_alignment, &v);
1049 if (v)
1050 q->limits.discard_granularity = v << SECTOR_SHIFT;
1051 else
1052 q->limits.discard_granularity = blk_size;
1053
1054 virtio_cread(vdev, struct virtio_blk_config,
1055 max_discard_sectors, &v);
1056 blk_queue_max_discard_sectors(q, v ? v : UINT_MAX);
1057
1058 virtio_cread(vdev, struct virtio_blk_config, max_discard_seg,
1059 &v);
1060
1061
1062
1063
1064
1065 if (!v)
1066 v = sg_elems;
1067 blk_queue_max_discard_segments(q,
1068 min(v, MAX_DISCARD_SEGMENTS));
1069 }
1070
1071 if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
1072 virtio_cread(vdev, struct virtio_blk_config,
1073 max_write_zeroes_sectors, &v);
1074 blk_queue_max_write_zeroes_sectors(q, v ? v : UINT_MAX);
1075 }
1076
1077 virtblk_update_capacity(vblk, false);
1078 virtio_device_ready(vdev);
1079
1080 err = device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups);
1081 if (err)
1082 goto out_cleanup_disk;
1083
1084 return 0;
1085
1086 out_cleanup_disk:
1087 put_disk(vblk->disk);
1088 out_free_tags:
1089 blk_mq_free_tag_set(&vblk->tag_set);
1090 out_free_vq:
1091 vdev->config->del_vqs(vdev);
1092 kfree(vblk->vqs);
1093 out_free_vblk:
1094 kfree(vblk);
1095 out_free_index:
1096 ida_simple_remove(&vd_index_ida, index);
1097 out:
1098 return err;
1099 }
1100
1101 static void virtblk_remove(struct virtio_device *vdev)
1102 {
1103 struct virtio_blk *vblk = vdev->priv;
1104
1105
1106 flush_work(&vblk->config_work);
1107
1108 del_gendisk(vblk->disk);
1109 blk_mq_free_tag_set(&vblk->tag_set);
1110
1111 mutex_lock(&vblk->vdev_mutex);
1112
1113
1114 virtio_reset_device(vdev);
1115
1116
1117 vblk->vdev = NULL;
1118
1119 vdev->config->del_vqs(vdev);
1120 kfree(vblk->vqs);
1121
1122 mutex_unlock(&vblk->vdev_mutex);
1123
1124 put_disk(vblk->disk);
1125 }
1126
1127 #ifdef CONFIG_PM_SLEEP
1128 static int virtblk_freeze(struct virtio_device *vdev)
1129 {
1130 struct virtio_blk *vblk = vdev->priv;
1131
1132
1133 virtio_reset_device(vdev);
1134
1135
1136 flush_work(&vblk->config_work);
1137
1138 blk_mq_quiesce_queue(vblk->disk->queue);
1139
1140 vdev->config->del_vqs(vdev);
1141 kfree(vblk->vqs);
1142
1143 return 0;
1144 }
1145
1146 static int virtblk_restore(struct virtio_device *vdev)
1147 {
1148 struct virtio_blk *vblk = vdev->priv;
1149 int ret;
1150
1151 ret = init_vq(vdev->priv);
1152 if (ret)
1153 return ret;
1154
1155 virtio_device_ready(vdev);
1156
1157 blk_mq_unquiesce_queue(vblk->disk->queue);
1158 return 0;
1159 }
1160 #endif
1161
1162 static const struct virtio_device_id id_table[] = {
1163 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
1164 { 0 },
1165 };
1166
1167 static unsigned int features_legacy[] = {
1168 VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
1169 VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
1170 VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
1171 VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
1172 }
1173 ;
1174 static unsigned int features[] = {
1175 VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
1176 VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
1177 VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
1178 VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
1179 };
1180
1181 static struct virtio_driver virtio_blk = {
1182 .feature_table = features,
1183 .feature_table_size = ARRAY_SIZE(features),
1184 .feature_table_legacy = features_legacy,
1185 .feature_table_size_legacy = ARRAY_SIZE(features_legacy),
1186 .driver.name = KBUILD_MODNAME,
1187 .driver.owner = THIS_MODULE,
1188 .id_table = id_table,
1189 .probe = virtblk_probe,
1190 .remove = virtblk_remove,
1191 .config_changed = virtblk_config_changed,
1192 #ifdef CONFIG_PM_SLEEP
1193 .freeze = virtblk_freeze,
1194 .restore = virtblk_restore,
1195 #endif
1196 };
1197
1198 static int __init virtio_blk_init(void)
1199 {
1200 int error;
1201
1202 virtblk_wq = alloc_workqueue("virtio-blk", 0, 0);
1203 if (!virtblk_wq)
1204 return -ENOMEM;
1205
1206 major = register_blkdev(0, "virtblk");
1207 if (major < 0) {
1208 error = major;
1209 goto out_destroy_workqueue;
1210 }
1211
1212 error = register_virtio_driver(&virtio_blk);
1213 if (error)
1214 goto out_unregister_blkdev;
1215 return 0;
1216
1217 out_unregister_blkdev:
1218 unregister_blkdev(major, "virtblk");
1219 out_destroy_workqueue:
1220 destroy_workqueue(virtblk_wq);
1221 return error;
1222 }
1223
1224 static void __exit virtio_blk_fini(void)
1225 {
1226 unregister_virtio_driver(&virtio_blk);
1227 unregister_blkdev(major, "virtblk");
1228 destroy_workqueue(virtblk_wq);
1229 }
1230 module_init(virtio_blk_init);
1231 module_exit(virtio_blk_fini);
1232
1233 MODULE_DEVICE_TABLE(virtio, id_table);
1234 MODULE_DESCRIPTION("Virtio block driver");
1235 MODULE_LICENSE("GPL");