0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011 #include <linux/init.h>
0012 #include <linux/module.h>
0013 #include <linux/cdev.h>
0014 #include <linux/device.h>
0015 #include <linux/eventfd.h>
0016 #include <linux/slab.h>
0017 #include <linux/wait.h>
0018 #include <linux/dma-map-ops.h>
0019 #include <linux/poll.h>
0020 #include <linux/file.h>
0021 #include <linux/uio.h>
0022 #include <linux/vdpa.h>
0023 #include <linux/nospec.h>
0024 #include <linux/vmalloc.h>
0025 #include <linux/sched/mm.h>
0026 #include <uapi/linux/vduse.h>
0027 #include <uapi/linux/vdpa.h>
0028 #include <uapi/linux/virtio_config.h>
0029 #include <uapi/linux/virtio_ids.h>
0030 #include <uapi/linux/virtio_blk.h>
0031 #include <linux/mod_devicetable.h>
0032
0033 #include "iova_domain.h"
0034
0035 #define DRV_AUTHOR "Yongji Xie <xieyongji@bytedance.com>"
0036 #define DRV_DESC "vDPA Device in Userspace"
0037 #define DRV_LICENSE "GPL v2"
0038
0039 #define VDUSE_DEV_MAX (1U << MINORBITS)
0040 #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
0041 #define VDUSE_IOVA_SIZE (128 * 1024 * 1024)
0042 #define VDUSE_MSG_DEFAULT_TIMEOUT 30
0043
0044 struct vduse_virtqueue {
0045 u16 index;
0046 u16 num_max;
0047 u32 num;
0048 u64 desc_addr;
0049 u64 driver_addr;
0050 u64 device_addr;
0051 struct vdpa_vq_state state;
0052 bool ready;
0053 bool kicked;
0054 spinlock_t kick_lock;
0055 spinlock_t irq_lock;
0056 struct eventfd_ctx *kickfd;
0057 struct vdpa_callback cb;
0058 struct work_struct inject;
0059 struct work_struct kick;
0060 };
0061
0062 struct vduse_dev;
0063
0064 struct vduse_vdpa {
0065 struct vdpa_device vdpa;
0066 struct vduse_dev *dev;
0067 };
0068
0069 struct vduse_umem {
0070 unsigned long iova;
0071 unsigned long npages;
0072 struct page **pages;
0073 struct mm_struct *mm;
0074 };
0075
0076 struct vduse_dev {
0077 struct vduse_vdpa *vdev;
0078 struct device *dev;
0079 struct vduse_virtqueue *vqs;
0080 struct vduse_iova_domain *domain;
0081 char *name;
0082 struct mutex lock;
0083 spinlock_t msg_lock;
0084 u64 msg_unique;
0085 u32 msg_timeout;
0086 wait_queue_head_t waitq;
0087 struct list_head send_list;
0088 struct list_head recv_list;
0089 struct vdpa_callback config_cb;
0090 struct work_struct inject;
0091 spinlock_t irq_lock;
0092 struct rw_semaphore rwsem;
0093 int minor;
0094 bool broken;
0095 bool connected;
0096 u64 api_version;
0097 u64 device_features;
0098 u64 driver_features;
0099 u32 device_id;
0100 u32 vendor_id;
0101 u32 generation;
0102 u32 config_size;
0103 void *config;
0104 u8 status;
0105 u32 vq_num;
0106 u32 vq_align;
0107 struct vduse_umem *umem;
0108 struct mutex mem_lock;
0109 };
0110
0111 struct vduse_dev_msg {
0112 struct vduse_dev_request req;
0113 struct vduse_dev_response resp;
0114 struct list_head list;
0115 wait_queue_head_t waitq;
0116 bool completed;
0117 };
0118
0119 struct vduse_control {
0120 u64 api_version;
0121 };
0122
0123 static DEFINE_MUTEX(vduse_lock);
0124 static DEFINE_IDR(vduse_idr);
0125
0126 static dev_t vduse_major;
0127 static struct class *vduse_class;
0128 static struct cdev vduse_ctrl_cdev;
0129 static struct cdev vduse_cdev;
0130 static struct workqueue_struct *vduse_irq_wq;
0131
0132 static u32 allowed_device_id[] = {
0133 VIRTIO_ID_BLOCK,
0134 };
0135
0136 static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa)
0137 {
0138 struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa);
0139
0140 return vdev->dev;
0141 }
0142
0143 static inline struct vduse_dev *dev_to_vduse(struct device *dev)
0144 {
0145 struct vdpa_device *vdpa = dev_to_vdpa(dev);
0146
0147 return vdpa_to_vduse(vdpa);
0148 }
0149
0150 static struct vduse_dev_msg *vduse_find_msg(struct list_head *head,
0151 uint32_t request_id)
0152 {
0153 struct vduse_dev_msg *msg;
0154
0155 list_for_each_entry(msg, head, list) {
0156 if (msg->req.request_id == request_id) {
0157 list_del(&msg->list);
0158 return msg;
0159 }
0160 }
0161
0162 return NULL;
0163 }
0164
0165 static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head)
0166 {
0167 struct vduse_dev_msg *msg = NULL;
0168
0169 if (!list_empty(head)) {
0170 msg = list_first_entry(head, struct vduse_dev_msg, list);
0171 list_del(&msg->list);
0172 }
0173
0174 return msg;
0175 }
0176
0177 static void vduse_enqueue_msg(struct list_head *head,
0178 struct vduse_dev_msg *msg)
0179 {
0180 list_add_tail(&msg->list, head);
0181 }
0182
0183 static void vduse_dev_broken(struct vduse_dev *dev)
0184 {
0185 struct vduse_dev_msg *msg, *tmp;
0186
0187 if (unlikely(dev->broken))
0188 return;
0189
0190 list_splice_init(&dev->recv_list, &dev->send_list);
0191 list_for_each_entry_safe(msg, tmp, &dev->send_list, list) {
0192 list_del(&msg->list);
0193 msg->completed = 1;
0194 msg->resp.result = VDUSE_REQ_RESULT_FAILED;
0195 wake_up(&msg->waitq);
0196 }
0197 dev->broken = true;
0198 wake_up(&dev->waitq);
0199 }
0200
0201 static int vduse_dev_msg_sync(struct vduse_dev *dev,
0202 struct vduse_dev_msg *msg)
0203 {
0204 int ret;
0205
0206 if (unlikely(dev->broken))
0207 return -EIO;
0208
0209 init_waitqueue_head(&msg->waitq);
0210 spin_lock(&dev->msg_lock);
0211 if (unlikely(dev->broken)) {
0212 spin_unlock(&dev->msg_lock);
0213 return -EIO;
0214 }
0215 msg->req.request_id = dev->msg_unique++;
0216 vduse_enqueue_msg(&dev->send_list, msg);
0217 wake_up(&dev->waitq);
0218 spin_unlock(&dev->msg_lock);
0219 if (dev->msg_timeout)
0220 ret = wait_event_killable_timeout(msg->waitq, msg->completed,
0221 (long)dev->msg_timeout * HZ);
0222 else
0223 ret = wait_event_killable(msg->waitq, msg->completed);
0224
0225 spin_lock(&dev->msg_lock);
0226 if (!msg->completed) {
0227 list_del(&msg->list);
0228 msg->resp.result = VDUSE_REQ_RESULT_FAILED;
0229
0230 if (!ret)
0231 vduse_dev_broken(dev);
0232 }
0233 ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO;
0234 spin_unlock(&dev->msg_lock);
0235
0236 return ret;
0237 }
0238
0239 static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev,
0240 struct vduse_virtqueue *vq,
0241 struct vdpa_vq_state_packed *packed)
0242 {
0243 struct vduse_dev_msg msg = { 0 };
0244 int ret;
0245
0246 msg.req.type = VDUSE_GET_VQ_STATE;
0247 msg.req.vq_state.index = vq->index;
0248
0249 ret = vduse_dev_msg_sync(dev, &msg);
0250 if (ret)
0251 return ret;
0252
0253 packed->last_avail_counter =
0254 msg.resp.vq_state.packed.last_avail_counter & 0x0001;
0255 packed->last_avail_idx =
0256 msg.resp.vq_state.packed.last_avail_idx & 0x7FFF;
0257 packed->last_used_counter =
0258 msg.resp.vq_state.packed.last_used_counter & 0x0001;
0259 packed->last_used_idx =
0260 msg.resp.vq_state.packed.last_used_idx & 0x7FFF;
0261
0262 return 0;
0263 }
0264
0265 static int vduse_dev_get_vq_state_split(struct vduse_dev *dev,
0266 struct vduse_virtqueue *vq,
0267 struct vdpa_vq_state_split *split)
0268 {
0269 struct vduse_dev_msg msg = { 0 };
0270 int ret;
0271
0272 msg.req.type = VDUSE_GET_VQ_STATE;
0273 msg.req.vq_state.index = vq->index;
0274
0275 ret = vduse_dev_msg_sync(dev, &msg);
0276 if (ret)
0277 return ret;
0278
0279 split->avail_index = msg.resp.vq_state.split.avail_index;
0280
0281 return 0;
0282 }
0283
0284 static int vduse_dev_set_status(struct vduse_dev *dev, u8 status)
0285 {
0286 struct vduse_dev_msg msg = { 0 };
0287
0288 msg.req.type = VDUSE_SET_STATUS;
0289 msg.req.s.status = status;
0290
0291 return vduse_dev_msg_sync(dev, &msg);
0292 }
0293
0294 static int vduse_dev_update_iotlb(struct vduse_dev *dev,
0295 u64 start, u64 last)
0296 {
0297 struct vduse_dev_msg msg = { 0 };
0298
0299 if (last < start)
0300 return -EINVAL;
0301
0302 msg.req.type = VDUSE_UPDATE_IOTLB;
0303 msg.req.iova.start = start;
0304 msg.req.iova.last = last;
0305
0306 return vduse_dev_msg_sync(dev, &msg);
0307 }
0308
0309 static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
0310 {
0311 struct file *file = iocb->ki_filp;
0312 struct vduse_dev *dev = file->private_data;
0313 struct vduse_dev_msg *msg;
0314 int size = sizeof(struct vduse_dev_request);
0315 ssize_t ret;
0316
0317 if (iov_iter_count(to) < size)
0318 return -EINVAL;
0319
0320 spin_lock(&dev->msg_lock);
0321 while (1) {
0322 msg = vduse_dequeue_msg(&dev->send_list);
0323 if (msg)
0324 break;
0325
0326 ret = -EAGAIN;
0327 if (file->f_flags & O_NONBLOCK)
0328 goto unlock;
0329
0330 spin_unlock(&dev->msg_lock);
0331 ret = wait_event_interruptible_exclusive(dev->waitq,
0332 !list_empty(&dev->send_list));
0333 if (ret)
0334 return ret;
0335
0336 spin_lock(&dev->msg_lock);
0337 }
0338 spin_unlock(&dev->msg_lock);
0339 ret = copy_to_iter(&msg->req, size, to);
0340 spin_lock(&dev->msg_lock);
0341 if (ret != size) {
0342 ret = -EFAULT;
0343 vduse_enqueue_msg(&dev->send_list, msg);
0344 goto unlock;
0345 }
0346 vduse_enqueue_msg(&dev->recv_list, msg);
0347 unlock:
0348 spin_unlock(&dev->msg_lock);
0349
0350 return ret;
0351 }
0352
0353 static bool is_mem_zero(const char *ptr, int size)
0354 {
0355 int i;
0356
0357 for (i = 0; i < size; i++) {
0358 if (ptr[i])
0359 return false;
0360 }
0361 return true;
0362 }
0363
0364 static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from)
0365 {
0366 struct file *file = iocb->ki_filp;
0367 struct vduse_dev *dev = file->private_data;
0368 struct vduse_dev_response resp;
0369 struct vduse_dev_msg *msg;
0370 size_t ret;
0371
0372 ret = copy_from_iter(&resp, sizeof(resp), from);
0373 if (ret != sizeof(resp))
0374 return -EINVAL;
0375
0376 if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved)))
0377 return -EINVAL;
0378
0379 spin_lock(&dev->msg_lock);
0380 msg = vduse_find_msg(&dev->recv_list, resp.request_id);
0381 if (!msg) {
0382 ret = -ENOENT;
0383 goto unlock;
0384 }
0385
0386 memcpy(&msg->resp, &resp, sizeof(resp));
0387 msg->completed = 1;
0388 wake_up(&msg->waitq);
0389 unlock:
0390 spin_unlock(&dev->msg_lock);
0391
0392 return ret;
0393 }
0394
0395 static __poll_t vduse_dev_poll(struct file *file, poll_table *wait)
0396 {
0397 struct vduse_dev *dev = file->private_data;
0398 __poll_t mask = 0;
0399
0400 poll_wait(file, &dev->waitq, wait);
0401
0402 spin_lock(&dev->msg_lock);
0403
0404 if (unlikely(dev->broken))
0405 mask |= EPOLLERR;
0406 if (!list_empty(&dev->send_list))
0407 mask |= EPOLLIN | EPOLLRDNORM;
0408 if (!list_empty(&dev->recv_list))
0409 mask |= EPOLLOUT | EPOLLWRNORM;
0410
0411 spin_unlock(&dev->msg_lock);
0412
0413 return mask;
0414 }
0415
0416 static void vduse_dev_reset(struct vduse_dev *dev)
0417 {
0418 int i;
0419 struct vduse_iova_domain *domain = dev->domain;
0420
0421
0422 if (domain->bounce_map)
0423 vduse_domain_reset_bounce_map(domain);
0424
0425 down_write(&dev->rwsem);
0426
0427 dev->status = 0;
0428 dev->driver_features = 0;
0429 dev->generation++;
0430 spin_lock(&dev->irq_lock);
0431 dev->config_cb.callback = NULL;
0432 dev->config_cb.private = NULL;
0433 spin_unlock(&dev->irq_lock);
0434 flush_work(&dev->inject);
0435
0436 for (i = 0; i < dev->vq_num; i++) {
0437 struct vduse_virtqueue *vq = &dev->vqs[i];
0438
0439 vq->ready = false;
0440 vq->desc_addr = 0;
0441 vq->driver_addr = 0;
0442 vq->device_addr = 0;
0443 vq->num = 0;
0444 memset(&vq->state, 0, sizeof(vq->state));
0445
0446 spin_lock(&vq->kick_lock);
0447 vq->kicked = false;
0448 if (vq->kickfd)
0449 eventfd_ctx_put(vq->kickfd);
0450 vq->kickfd = NULL;
0451 spin_unlock(&vq->kick_lock);
0452
0453 spin_lock(&vq->irq_lock);
0454 vq->cb.callback = NULL;
0455 vq->cb.private = NULL;
0456 spin_unlock(&vq->irq_lock);
0457 flush_work(&vq->inject);
0458 flush_work(&vq->kick);
0459 }
0460
0461 up_write(&dev->rwsem);
0462 }
0463
0464 static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
0465 u64 desc_area, u64 driver_area,
0466 u64 device_area)
0467 {
0468 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0469 struct vduse_virtqueue *vq = &dev->vqs[idx];
0470
0471 vq->desc_addr = desc_area;
0472 vq->driver_addr = driver_area;
0473 vq->device_addr = device_area;
0474
0475 return 0;
0476 }
0477
0478 static void vduse_vq_kick(struct vduse_virtqueue *vq)
0479 {
0480 spin_lock(&vq->kick_lock);
0481 if (!vq->ready)
0482 goto unlock;
0483
0484 if (vq->kickfd)
0485 eventfd_signal(vq->kickfd, 1);
0486 else
0487 vq->kicked = true;
0488 unlock:
0489 spin_unlock(&vq->kick_lock);
0490 }
0491
0492 static void vduse_vq_kick_work(struct work_struct *work)
0493 {
0494 struct vduse_virtqueue *vq = container_of(work,
0495 struct vduse_virtqueue, kick);
0496
0497 vduse_vq_kick(vq);
0498 }
0499
0500 static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
0501 {
0502 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0503 struct vduse_virtqueue *vq = &dev->vqs[idx];
0504
0505 if (!eventfd_signal_allowed()) {
0506 schedule_work(&vq->kick);
0507 return;
0508 }
0509 vduse_vq_kick(vq);
0510 }
0511
0512 static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
0513 struct vdpa_callback *cb)
0514 {
0515 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0516 struct vduse_virtqueue *vq = &dev->vqs[idx];
0517
0518 spin_lock(&vq->irq_lock);
0519 vq->cb.callback = cb->callback;
0520 vq->cb.private = cb->private;
0521 spin_unlock(&vq->irq_lock);
0522 }
0523
0524 static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
0525 {
0526 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0527 struct vduse_virtqueue *vq = &dev->vqs[idx];
0528
0529 vq->num = num;
0530 }
0531
0532 static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
0533 u16 idx, bool ready)
0534 {
0535 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0536 struct vduse_virtqueue *vq = &dev->vqs[idx];
0537
0538 vq->ready = ready;
0539 }
0540
0541 static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
0542 {
0543 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0544 struct vduse_virtqueue *vq = &dev->vqs[idx];
0545
0546 return vq->ready;
0547 }
0548
0549 static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
0550 const struct vdpa_vq_state *state)
0551 {
0552 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0553 struct vduse_virtqueue *vq = &dev->vqs[idx];
0554
0555 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
0556 vq->state.packed.last_avail_counter =
0557 state->packed.last_avail_counter;
0558 vq->state.packed.last_avail_idx = state->packed.last_avail_idx;
0559 vq->state.packed.last_used_counter =
0560 state->packed.last_used_counter;
0561 vq->state.packed.last_used_idx = state->packed.last_used_idx;
0562 } else
0563 vq->state.split.avail_index = state->split.avail_index;
0564
0565 return 0;
0566 }
0567
0568 static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
0569 struct vdpa_vq_state *state)
0570 {
0571 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0572 struct vduse_virtqueue *vq = &dev->vqs[idx];
0573
0574 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
0575 return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
0576
0577 return vduse_dev_get_vq_state_split(dev, vq, &state->split);
0578 }
0579
0580 static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa)
0581 {
0582 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0583
0584 return dev->vq_align;
0585 }
0586
0587 static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa)
0588 {
0589 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0590
0591 return dev->device_features;
0592 }
0593
0594 static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features)
0595 {
0596 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0597
0598 dev->driver_features = features;
0599 return 0;
0600 }
0601
0602 static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa)
0603 {
0604 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0605
0606 return dev->driver_features;
0607 }
0608
0609 static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa,
0610 struct vdpa_callback *cb)
0611 {
0612 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0613
0614 spin_lock(&dev->irq_lock);
0615 dev->config_cb.callback = cb->callback;
0616 dev->config_cb.private = cb->private;
0617 spin_unlock(&dev->irq_lock);
0618 }
0619
0620 static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
0621 {
0622 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0623 u16 num_max = 0;
0624 int i;
0625
0626 for (i = 0; i < dev->vq_num; i++)
0627 if (num_max < dev->vqs[i].num_max)
0628 num_max = dev->vqs[i].num_max;
0629
0630 return num_max;
0631 }
0632
0633 static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa)
0634 {
0635 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0636
0637 return dev->device_id;
0638 }
0639
0640 static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa)
0641 {
0642 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0643
0644 return dev->vendor_id;
0645 }
0646
0647 static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa)
0648 {
0649 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0650
0651 return dev->status;
0652 }
0653
0654 static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
0655 {
0656 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0657
0658 if (vduse_dev_set_status(dev, status))
0659 return;
0660
0661 dev->status = status;
0662 }
0663
0664 static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa)
0665 {
0666 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0667
0668 return dev->config_size;
0669 }
0670
0671 static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset,
0672 void *buf, unsigned int len)
0673 {
0674 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0675
0676
0677 memset(buf, 0, len);
0678
0679 if (offset > dev->config_size)
0680 return;
0681
0682 if (len > dev->config_size - offset)
0683 len = dev->config_size - offset;
0684
0685 memcpy(buf, dev->config + offset, len);
0686 }
0687
0688 static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset,
0689 const void *buf, unsigned int len)
0690 {
0691
0692 }
0693
0694 static int vduse_vdpa_reset(struct vdpa_device *vdpa)
0695 {
0696 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0697 int ret = vduse_dev_set_status(dev, 0);
0698
0699 vduse_dev_reset(dev);
0700
0701 return ret;
0702 }
0703
0704 static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
0705 {
0706 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0707
0708 return dev->generation;
0709 }
0710
0711 static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
0712 unsigned int asid,
0713 struct vhost_iotlb *iotlb)
0714 {
0715 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0716 int ret;
0717
0718 ret = vduse_domain_set_map(dev->domain, iotlb);
0719 if (ret)
0720 return ret;
0721
0722 ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX);
0723 if (ret) {
0724 vduse_domain_clear_map(dev->domain, iotlb);
0725 return ret;
0726 }
0727
0728 return 0;
0729 }
0730
0731 static void vduse_vdpa_free(struct vdpa_device *vdpa)
0732 {
0733 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0734
0735 dev->vdev = NULL;
0736 }
0737
0738 static const struct vdpa_config_ops vduse_vdpa_config_ops = {
0739 .set_vq_address = vduse_vdpa_set_vq_address,
0740 .kick_vq = vduse_vdpa_kick_vq,
0741 .set_vq_cb = vduse_vdpa_set_vq_cb,
0742 .set_vq_num = vduse_vdpa_set_vq_num,
0743 .set_vq_ready = vduse_vdpa_set_vq_ready,
0744 .get_vq_ready = vduse_vdpa_get_vq_ready,
0745 .set_vq_state = vduse_vdpa_set_vq_state,
0746 .get_vq_state = vduse_vdpa_get_vq_state,
0747 .get_vq_align = vduse_vdpa_get_vq_align,
0748 .get_device_features = vduse_vdpa_get_device_features,
0749 .set_driver_features = vduse_vdpa_set_driver_features,
0750 .get_driver_features = vduse_vdpa_get_driver_features,
0751 .set_config_cb = vduse_vdpa_set_config_cb,
0752 .get_vq_num_max = vduse_vdpa_get_vq_num_max,
0753 .get_device_id = vduse_vdpa_get_device_id,
0754 .get_vendor_id = vduse_vdpa_get_vendor_id,
0755 .get_status = vduse_vdpa_get_status,
0756 .set_status = vduse_vdpa_set_status,
0757 .get_config_size = vduse_vdpa_get_config_size,
0758 .get_config = vduse_vdpa_get_config,
0759 .set_config = vduse_vdpa_set_config,
0760 .get_generation = vduse_vdpa_get_generation,
0761 .reset = vduse_vdpa_reset,
0762 .set_map = vduse_vdpa_set_map,
0763 .free = vduse_vdpa_free,
0764 };
0765
0766 static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page,
0767 unsigned long offset, size_t size,
0768 enum dma_data_direction dir,
0769 unsigned long attrs)
0770 {
0771 struct vduse_dev *vdev = dev_to_vduse(dev);
0772 struct vduse_iova_domain *domain = vdev->domain;
0773
0774 return vduse_domain_map_page(domain, page, offset, size, dir, attrs);
0775 }
0776
0777 static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr,
0778 size_t size, enum dma_data_direction dir,
0779 unsigned long attrs)
0780 {
0781 struct vduse_dev *vdev = dev_to_vduse(dev);
0782 struct vduse_iova_domain *domain = vdev->domain;
0783
0784 return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs);
0785 }
0786
0787 static void *vduse_dev_alloc_coherent(struct device *dev, size_t size,
0788 dma_addr_t *dma_addr, gfp_t flag,
0789 unsigned long attrs)
0790 {
0791 struct vduse_dev *vdev = dev_to_vduse(dev);
0792 struct vduse_iova_domain *domain = vdev->domain;
0793 unsigned long iova;
0794 void *addr;
0795
0796 *dma_addr = DMA_MAPPING_ERROR;
0797 addr = vduse_domain_alloc_coherent(domain, size,
0798 (dma_addr_t *)&iova, flag, attrs);
0799 if (!addr)
0800 return NULL;
0801
0802 *dma_addr = (dma_addr_t)iova;
0803
0804 return addr;
0805 }
0806
0807 static void vduse_dev_free_coherent(struct device *dev, size_t size,
0808 void *vaddr, dma_addr_t dma_addr,
0809 unsigned long attrs)
0810 {
0811 struct vduse_dev *vdev = dev_to_vduse(dev);
0812 struct vduse_iova_domain *domain = vdev->domain;
0813
0814 vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs);
0815 }
0816
0817 static size_t vduse_dev_max_mapping_size(struct device *dev)
0818 {
0819 struct vduse_dev *vdev = dev_to_vduse(dev);
0820 struct vduse_iova_domain *domain = vdev->domain;
0821
0822 return domain->bounce_size;
0823 }
0824
0825 static const struct dma_map_ops vduse_dev_dma_ops = {
0826 .map_page = vduse_dev_map_page,
0827 .unmap_page = vduse_dev_unmap_page,
0828 .alloc = vduse_dev_alloc_coherent,
0829 .free = vduse_dev_free_coherent,
0830 .max_mapping_size = vduse_dev_max_mapping_size,
0831 };
0832
0833 static unsigned int perm_to_file_flags(u8 perm)
0834 {
0835 unsigned int flags = 0;
0836
0837 switch (perm) {
0838 case VDUSE_ACCESS_WO:
0839 flags |= O_WRONLY;
0840 break;
0841 case VDUSE_ACCESS_RO:
0842 flags |= O_RDONLY;
0843 break;
0844 case VDUSE_ACCESS_RW:
0845 flags |= O_RDWR;
0846 break;
0847 default:
0848 WARN(1, "invalidate vhost IOTLB permission\n");
0849 break;
0850 }
0851
0852 return flags;
0853 }
0854
0855 static int vduse_kickfd_setup(struct vduse_dev *dev,
0856 struct vduse_vq_eventfd *eventfd)
0857 {
0858 struct eventfd_ctx *ctx = NULL;
0859 struct vduse_virtqueue *vq;
0860 u32 index;
0861
0862 if (eventfd->index >= dev->vq_num)
0863 return -EINVAL;
0864
0865 index = array_index_nospec(eventfd->index, dev->vq_num);
0866 vq = &dev->vqs[index];
0867 if (eventfd->fd >= 0) {
0868 ctx = eventfd_ctx_fdget(eventfd->fd);
0869 if (IS_ERR(ctx))
0870 return PTR_ERR(ctx);
0871 } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN)
0872 return 0;
0873
0874 spin_lock(&vq->kick_lock);
0875 if (vq->kickfd)
0876 eventfd_ctx_put(vq->kickfd);
0877 vq->kickfd = ctx;
0878 if (vq->ready && vq->kicked && vq->kickfd) {
0879 eventfd_signal(vq->kickfd, 1);
0880 vq->kicked = false;
0881 }
0882 spin_unlock(&vq->kick_lock);
0883
0884 return 0;
0885 }
0886
0887 static bool vduse_dev_is_ready(struct vduse_dev *dev)
0888 {
0889 int i;
0890
0891 for (i = 0; i < dev->vq_num; i++)
0892 if (!dev->vqs[i].num_max)
0893 return false;
0894
0895 return true;
0896 }
0897
0898 static void vduse_dev_irq_inject(struct work_struct *work)
0899 {
0900 struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
0901
0902 spin_lock_irq(&dev->irq_lock);
0903 if (dev->config_cb.callback)
0904 dev->config_cb.callback(dev->config_cb.private);
0905 spin_unlock_irq(&dev->irq_lock);
0906 }
0907
0908 static void vduse_vq_irq_inject(struct work_struct *work)
0909 {
0910 struct vduse_virtqueue *vq = container_of(work,
0911 struct vduse_virtqueue, inject);
0912
0913 spin_lock_irq(&vq->irq_lock);
0914 if (vq->ready && vq->cb.callback)
0915 vq->cb.callback(vq->cb.private);
0916 spin_unlock_irq(&vq->irq_lock);
0917 }
0918
0919 static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
0920 struct work_struct *irq_work)
0921 {
0922 int ret = -EINVAL;
0923
0924 down_read(&dev->rwsem);
0925 if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
0926 goto unlock;
0927
0928 ret = 0;
0929 queue_work(vduse_irq_wq, irq_work);
0930 unlock:
0931 up_read(&dev->rwsem);
0932
0933 return ret;
0934 }
0935
0936 static int vduse_dev_dereg_umem(struct vduse_dev *dev,
0937 u64 iova, u64 size)
0938 {
0939 int ret;
0940
0941 mutex_lock(&dev->mem_lock);
0942 ret = -ENOENT;
0943 if (!dev->umem)
0944 goto unlock;
0945
0946 ret = -EINVAL;
0947 if (dev->umem->iova != iova || size != dev->domain->bounce_size)
0948 goto unlock;
0949
0950 vduse_domain_remove_user_bounce_pages(dev->domain);
0951 unpin_user_pages_dirty_lock(dev->umem->pages,
0952 dev->umem->npages, true);
0953 atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm);
0954 mmdrop(dev->umem->mm);
0955 vfree(dev->umem->pages);
0956 kfree(dev->umem);
0957 dev->umem = NULL;
0958 ret = 0;
0959 unlock:
0960 mutex_unlock(&dev->mem_lock);
0961 return ret;
0962 }
0963
0964 static int vduse_dev_reg_umem(struct vduse_dev *dev,
0965 u64 iova, u64 uaddr, u64 size)
0966 {
0967 struct page **page_list = NULL;
0968 struct vduse_umem *umem = NULL;
0969 long pinned = 0;
0970 unsigned long npages, lock_limit;
0971 int ret;
0972
0973 if (!dev->domain->bounce_map ||
0974 size != dev->domain->bounce_size ||
0975 iova != 0 || uaddr & ~PAGE_MASK)
0976 return -EINVAL;
0977
0978 mutex_lock(&dev->mem_lock);
0979 ret = -EEXIST;
0980 if (dev->umem)
0981 goto unlock;
0982
0983 ret = -ENOMEM;
0984 npages = size >> PAGE_SHIFT;
0985 page_list = __vmalloc(array_size(npages, sizeof(struct page *)),
0986 GFP_KERNEL_ACCOUNT);
0987 umem = kzalloc(sizeof(*umem), GFP_KERNEL);
0988 if (!page_list || !umem)
0989 goto unlock;
0990
0991 mmap_read_lock(current->mm);
0992
0993 lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
0994 if (npages + atomic64_read(¤t->mm->pinned_vm) > lock_limit)
0995 goto out;
0996
0997 pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE,
0998 page_list, NULL);
0999 if (pinned != npages) {
1000 ret = pinned < 0 ? pinned : -ENOMEM;
1001 goto out;
1002 }
1003
1004 ret = vduse_domain_add_user_bounce_pages(dev->domain,
1005 page_list, pinned);
1006 if (ret)
1007 goto out;
1008
1009 atomic64_add(npages, ¤t->mm->pinned_vm);
1010
1011 umem->pages = page_list;
1012 umem->npages = pinned;
1013 umem->iova = iova;
1014 umem->mm = current->mm;
1015 mmgrab(current->mm);
1016
1017 dev->umem = umem;
1018 out:
1019 if (ret && pinned > 0)
1020 unpin_user_pages(page_list, pinned);
1021
1022 mmap_read_unlock(current->mm);
1023 unlock:
1024 if (ret) {
1025 vfree(page_list);
1026 kfree(umem);
1027 }
1028 mutex_unlock(&dev->mem_lock);
1029 return ret;
1030 }
1031
1032 static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
1033 unsigned long arg)
1034 {
1035 struct vduse_dev *dev = file->private_data;
1036 void __user *argp = (void __user *)arg;
1037 int ret;
1038
1039 if (unlikely(dev->broken))
1040 return -EPERM;
1041
1042 switch (cmd) {
1043 case VDUSE_IOTLB_GET_FD: {
1044 struct vduse_iotlb_entry entry;
1045 struct vhost_iotlb_map *map;
1046 struct vdpa_map_file *map_file;
1047 struct vduse_iova_domain *domain = dev->domain;
1048 struct file *f = NULL;
1049
1050 ret = -EFAULT;
1051 if (copy_from_user(&entry, argp, sizeof(entry)))
1052 break;
1053
1054 ret = -EINVAL;
1055 if (entry.start > entry.last)
1056 break;
1057
1058 spin_lock(&domain->iotlb_lock);
1059 map = vhost_iotlb_itree_first(domain->iotlb,
1060 entry.start, entry.last);
1061 if (map) {
1062 map_file = (struct vdpa_map_file *)map->opaque;
1063 f = get_file(map_file->file);
1064 entry.offset = map_file->offset;
1065 entry.start = map->start;
1066 entry.last = map->last;
1067 entry.perm = map->perm;
1068 }
1069 spin_unlock(&domain->iotlb_lock);
1070 ret = -EINVAL;
1071 if (!f)
1072 break;
1073
1074 ret = -EFAULT;
1075 if (copy_to_user(argp, &entry, sizeof(entry))) {
1076 fput(f);
1077 break;
1078 }
1079 ret = receive_fd(f, perm_to_file_flags(entry.perm));
1080 fput(f);
1081 break;
1082 }
1083 case VDUSE_DEV_GET_FEATURES:
1084
1085
1086
1087
1088 ret = put_user(dev->driver_features, (u64 __user *)argp);
1089 break;
1090 case VDUSE_DEV_SET_CONFIG: {
1091 struct vduse_config_data config;
1092 unsigned long size = offsetof(struct vduse_config_data,
1093 buffer);
1094
1095 ret = -EFAULT;
1096 if (copy_from_user(&config, argp, size))
1097 break;
1098
1099 ret = -EINVAL;
1100 if (config.offset > dev->config_size ||
1101 config.length == 0 ||
1102 config.length > dev->config_size - config.offset)
1103 break;
1104
1105 ret = -EFAULT;
1106 if (copy_from_user(dev->config + config.offset, argp + size,
1107 config.length))
1108 break;
1109
1110 ret = 0;
1111 break;
1112 }
1113 case VDUSE_DEV_INJECT_CONFIG_IRQ:
1114 ret = vduse_dev_queue_irq_work(dev, &dev->inject);
1115 break;
1116 case VDUSE_VQ_SETUP: {
1117 struct vduse_vq_config config;
1118 u32 index;
1119
1120 ret = -EFAULT;
1121 if (copy_from_user(&config, argp, sizeof(config)))
1122 break;
1123
1124 ret = -EINVAL;
1125 if (config.index >= dev->vq_num)
1126 break;
1127
1128 if (!is_mem_zero((const char *)config.reserved,
1129 sizeof(config.reserved)))
1130 break;
1131
1132 index = array_index_nospec(config.index, dev->vq_num);
1133 dev->vqs[index].num_max = config.max_size;
1134 ret = 0;
1135 break;
1136 }
1137 case VDUSE_VQ_GET_INFO: {
1138 struct vduse_vq_info vq_info;
1139 struct vduse_virtqueue *vq;
1140 u32 index;
1141
1142 ret = -EFAULT;
1143 if (copy_from_user(&vq_info, argp, sizeof(vq_info)))
1144 break;
1145
1146 ret = -EINVAL;
1147 if (vq_info.index >= dev->vq_num)
1148 break;
1149
1150 index = array_index_nospec(vq_info.index, dev->vq_num);
1151 vq = &dev->vqs[index];
1152 vq_info.desc_addr = vq->desc_addr;
1153 vq_info.driver_addr = vq->driver_addr;
1154 vq_info.device_addr = vq->device_addr;
1155 vq_info.num = vq->num;
1156
1157 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
1158 vq_info.packed.last_avail_counter =
1159 vq->state.packed.last_avail_counter;
1160 vq_info.packed.last_avail_idx =
1161 vq->state.packed.last_avail_idx;
1162 vq_info.packed.last_used_counter =
1163 vq->state.packed.last_used_counter;
1164 vq_info.packed.last_used_idx =
1165 vq->state.packed.last_used_idx;
1166 } else
1167 vq_info.split.avail_index =
1168 vq->state.split.avail_index;
1169
1170 vq_info.ready = vq->ready;
1171
1172 ret = -EFAULT;
1173 if (copy_to_user(argp, &vq_info, sizeof(vq_info)))
1174 break;
1175
1176 ret = 0;
1177 break;
1178 }
1179 case VDUSE_VQ_SETUP_KICKFD: {
1180 struct vduse_vq_eventfd eventfd;
1181
1182 ret = -EFAULT;
1183 if (copy_from_user(&eventfd, argp, sizeof(eventfd)))
1184 break;
1185
1186 ret = vduse_kickfd_setup(dev, &eventfd);
1187 break;
1188 }
1189 case VDUSE_VQ_INJECT_IRQ: {
1190 u32 index;
1191
1192 ret = -EFAULT;
1193 if (get_user(index, (u32 __user *)argp))
1194 break;
1195
1196 ret = -EINVAL;
1197 if (index >= dev->vq_num)
1198 break;
1199
1200 index = array_index_nospec(index, dev->vq_num);
1201 ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject);
1202 break;
1203 }
1204 case VDUSE_IOTLB_REG_UMEM: {
1205 struct vduse_iova_umem umem;
1206
1207 ret = -EFAULT;
1208 if (copy_from_user(&umem, argp, sizeof(umem)))
1209 break;
1210
1211 ret = -EINVAL;
1212 if (!is_mem_zero((const char *)umem.reserved,
1213 sizeof(umem.reserved)))
1214 break;
1215
1216 ret = vduse_dev_reg_umem(dev, umem.iova,
1217 umem.uaddr, umem.size);
1218 break;
1219 }
1220 case VDUSE_IOTLB_DEREG_UMEM: {
1221 struct vduse_iova_umem umem;
1222
1223 ret = -EFAULT;
1224 if (copy_from_user(&umem, argp, sizeof(umem)))
1225 break;
1226
1227 ret = -EINVAL;
1228 if (!is_mem_zero((const char *)umem.reserved,
1229 sizeof(umem.reserved)))
1230 break;
1231
1232 ret = vduse_dev_dereg_umem(dev, umem.iova,
1233 umem.size);
1234 break;
1235 }
1236 case VDUSE_IOTLB_GET_INFO: {
1237 struct vduse_iova_info info;
1238 struct vhost_iotlb_map *map;
1239 struct vduse_iova_domain *domain = dev->domain;
1240
1241 ret = -EFAULT;
1242 if (copy_from_user(&info, argp, sizeof(info)))
1243 break;
1244
1245 ret = -EINVAL;
1246 if (info.start > info.last)
1247 break;
1248
1249 if (!is_mem_zero((const char *)info.reserved,
1250 sizeof(info.reserved)))
1251 break;
1252
1253 spin_lock(&domain->iotlb_lock);
1254 map = vhost_iotlb_itree_first(domain->iotlb,
1255 info.start, info.last);
1256 if (map) {
1257 info.start = map->start;
1258 info.last = map->last;
1259 info.capability = 0;
1260 if (domain->bounce_map && map->start == 0 &&
1261 map->last == domain->bounce_size - 1)
1262 info.capability |= VDUSE_IOVA_CAP_UMEM;
1263 }
1264 spin_unlock(&domain->iotlb_lock);
1265 if (!map)
1266 break;
1267
1268 ret = -EFAULT;
1269 if (copy_to_user(argp, &info, sizeof(info)))
1270 break;
1271
1272 ret = 0;
1273 break;
1274 }
1275 default:
1276 ret = -ENOIOCTLCMD;
1277 break;
1278 }
1279
1280 return ret;
1281 }
1282
1283 static int vduse_dev_release(struct inode *inode, struct file *file)
1284 {
1285 struct vduse_dev *dev = file->private_data;
1286
1287 vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
1288 spin_lock(&dev->msg_lock);
1289
1290 list_splice_init(&dev->recv_list, &dev->send_list);
1291 spin_unlock(&dev->msg_lock);
1292 dev->connected = false;
1293
1294 return 0;
1295 }
1296
1297 static struct vduse_dev *vduse_dev_get_from_minor(int minor)
1298 {
1299 struct vduse_dev *dev;
1300
1301 mutex_lock(&vduse_lock);
1302 dev = idr_find(&vduse_idr, minor);
1303 mutex_unlock(&vduse_lock);
1304
1305 return dev;
1306 }
1307
1308 static int vduse_dev_open(struct inode *inode, struct file *file)
1309 {
1310 int ret;
1311 struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode));
1312
1313 if (!dev)
1314 return -ENODEV;
1315
1316 ret = -EBUSY;
1317 mutex_lock(&dev->lock);
1318 if (dev->connected)
1319 goto unlock;
1320
1321 ret = 0;
1322 dev->connected = true;
1323 file->private_data = dev;
1324 unlock:
1325 mutex_unlock(&dev->lock);
1326
1327 return ret;
1328 }
1329
1330 static const struct file_operations vduse_dev_fops = {
1331 .owner = THIS_MODULE,
1332 .open = vduse_dev_open,
1333 .release = vduse_dev_release,
1334 .read_iter = vduse_dev_read_iter,
1335 .write_iter = vduse_dev_write_iter,
1336 .poll = vduse_dev_poll,
1337 .unlocked_ioctl = vduse_dev_ioctl,
1338 .compat_ioctl = compat_ptr_ioctl,
1339 .llseek = noop_llseek,
1340 };
1341
1342 static struct vduse_dev *vduse_dev_create(void)
1343 {
1344 struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
1345
1346 if (!dev)
1347 return NULL;
1348
1349 mutex_init(&dev->lock);
1350 mutex_init(&dev->mem_lock);
1351 spin_lock_init(&dev->msg_lock);
1352 INIT_LIST_HEAD(&dev->send_list);
1353 INIT_LIST_HEAD(&dev->recv_list);
1354 spin_lock_init(&dev->irq_lock);
1355 init_rwsem(&dev->rwsem);
1356
1357 INIT_WORK(&dev->inject, vduse_dev_irq_inject);
1358 init_waitqueue_head(&dev->waitq);
1359
1360 return dev;
1361 }
1362
1363 static void vduse_dev_destroy(struct vduse_dev *dev)
1364 {
1365 kfree(dev);
1366 }
1367
1368 static struct vduse_dev *vduse_find_dev(const char *name)
1369 {
1370 struct vduse_dev *dev;
1371 int id;
1372
1373 idr_for_each_entry(&vduse_idr, dev, id)
1374 if (!strcmp(dev->name, name))
1375 return dev;
1376
1377 return NULL;
1378 }
1379
1380 static int vduse_destroy_dev(char *name)
1381 {
1382 struct vduse_dev *dev = vduse_find_dev(name);
1383
1384 if (!dev)
1385 return -EINVAL;
1386
1387 mutex_lock(&dev->lock);
1388 if (dev->vdev || dev->connected) {
1389 mutex_unlock(&dev->lock);
1390 return -EBUSY;
1391 }
1392 dev->connected = true;
1393 mutex_unlock(&dev->lock);
1394
1395 vduse_dev_reset(dev);
1396 device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1397 idr_remove(&vduse_idr, dev->minor);
1398 kvfree(dev->config);
1399 kfree(dev->vqs);
1400 vduse_domain_destroy(dev->domain);
1401 kfree(dev->name);
1402 vduse_dev_destroy(dev);
1403 module_put(THIS_MODULE);
1404
1405 return 0;
1406 }
1407
1408 static bool device_is_allowed(u32 device_id)
1409 {
1410 int i;
1411
1412 for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++)
1413 if (allowed_device_id[i] == device_id)
1414 return true;
1415
1416 return false;
1417 }
1418
1419 static bool features_is_valid(u64 features)
1420 {
1421 if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM)))
1422 return false;
1423
1424
1425 if (features & (1ULL << VIRTIO_BLK_F_CONFIG_WCE))
1426 return false;
1427
1428 return true;
1429 }
1430
1431 static bool vduse_validate_config(struct vduse_dev_config *config)
1432 {
1433 if (!is_mem_zero((const char *)config->reserved,
1434 sizeof(config->reserved)))
1435 return false;
1436
1437 if (config->vq_align > PAGE_SIZE)
1438 return false;
1439
1440 if (config->config_size > PAGE_SIZE)
1441 return false;
1442
1443 if (!device_is_allowed(config->device_id))
1444 return false;
1445
1446 if (!features_is_valid(config->features))
1447 return false;
1448
1449 return true;
1450 }
1451
1452 static ssize_t msg_timeout_show(struct device *device,
1453 struct device_attribute *attr, char *buf)
1454 {
1455 struct vduse_dev *dev = dev_get_drvdata(device);
1456
1457 return sysfs_emit(buf, "%u\n", dev->msg_timeout);
1458 }
1459
1460 static ssize_t msg_timeout_store(struct device *device,
1461 struct device_attribute *attr,
1462 const char *buf, size_t count)
1463 {
1464 struct vduse_dev *dev = dev_get_drvdata(device);
1465 int ret;
1466
1467 ret = kstrtouint(buf, 10, &dev->msg_timeout);
1468 if (ret < 0)
1469 return ret;
1470
1471 return count;
1472 }
1473
1474 static DEVICE_ATTR_RW(msg_timeout);
1475
1476 static struct attribute *vduse_dev_attrs[] = {
1477 &dev_attr_msg_timeout.attr,
1478 NULL
1479 };
1480
1481 ATTRIBUTE_GROUPS(vduse_dev);
1482
1483 static int vduse_create_dev(struct vduse_dev_config *config,
1484 void *config_buf, u64 api_version)
1485 {
1486 int i, ret;
1487 struct vduse_dev *dev;
1488
1489 ret = -EEXIST;
1490 if (vduse_find_dev(config->name))
1491 goto err;
1492
1493 ret = -ENOMEM;
1494 dev = vduse_dev_create();
1495 if (!dev)
1496 goto err;
1497
1498 dev->api_version = api_version;
1499 dev->device_features = config->features;
1500 dev->device_id = config->device_id;
1501 dev->vendor_id = config->vendor_id;
1502 dev->name = kstrdup(config->name, GFP_KERNEL);
1503 if (!dev->name)
1504 goto err_str;
1505
1506 dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
1507 VDUSE_BOUNCE_SIZE);
1508 if (!dev->domain)
1509 goto err_domain;
1510
1511 dev->config = config_buf;
1512 dev->config_size = config->config_size;
1513 dev->vq_align = config->vq_align;
1514 dev->vq_num = config->vq_num;
1515 dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
1516 if (!dev->vqs)
1517 goto err_vqs;
1518
1519 for (i = 0; i < dev->vq_num; i++) {
1520 dev->vqs[i].index = i;
1521 INIT_WORK(&dev->vqs[i].inject, vduse_vq_irq_inject);
1522 INIT_WORK(&dev->vqs[i].kick, vduse_vq_kick_work);
1523 spin_lock_init(&dev->vqs[i].kick_lock);
1524 spin_lock_init(&dev->vqs[i].irq_lock);
1525 }
1526
1527 ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
1528 if (ret < 0)
1529 goto err_idr;
1530
1531 dev->minor = ret;
1532 dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
1533 dev->dev = device_create_with_groups(vduse_class, NULL,
1534 MKDEV(MAJOR(vduse_major), dev->minor),
1535 dev, vduse_dev_groups, "%s", config->name);
1536 if (IS_ERR(dev->dev)) {
1537 ret = PTR_ERR(dev->dev);
1538 goto err_dev;
1539 }
1540 __module_get(THIS_MODULE);
1541
1542 return 0;
1543 err_dev:
1544 idr_remove(&vduse_idr, dev->minor);
1545 err_idr:
1546 kfree(dev->vqs);
1547 err_vqs:
1548 vduse_domain_destroy(dev->domain);
1549 err_domain:
1550 kfree(dev->name);
1551 err_str:
1552 vduse_dev_destroy(dev);
1553 err:
1554 return ret;
1555 }
1556
1557 static long vduse_ioctl(struct file *file, unsigned int cmd,
1558 unsigned long arg)
1559 {
1560 int ret;
1561 void __user *argp = (void __user *)arg;
1562 struct vduse_control *control = file->private_data;
1563
1564 mutex_lock(&vduse_lock);
1565 switch (cmd) {
1566 case VDUSE_GET_API_VERSION:
1567 ret = put_user(control->api_version, (u64 __user *)argp);
1568 break;
1569 case VDUSE_SET_API_VERSION: {
1570 u64 api_version;
1571
1572 ret = -EFAULT;
1573 if (get_user(api_version, (u64 __user *)argp))
1574 break;
1575
1576 ret = -EINVAL;
1577 if (api_version > VDUSE_API_VERSION)
1578 break;
1579
1580 ret = 0;
1581 control->api_version = api_version;
1582 break;
1583 }
1584 case VDUSE_CREATE_DEV: {
1585 struct vduse_dev_config config;
1586 unsigned long size = offsetof(struct vduse_dev_config, config);
1587 void *buf;
1588
1589 ret = -EFAULT;
1590 if (copy_from_user(&config, argp, size))
1591 break;
1592
1593 ret = -EINVAL;
1594 if (vduse_validate_config(&config) == false)
1595 break;
1596
1597 buf = vmemdup_user(argp + size, config.config_size);
1598 if (IS_ERR(buf)) {
1599 ret = PTR_ERR(buf);
1600 break;
1601 }
1602 config.name[VDUSE_NAME_MAX - 1] = '\0';
1603 ret = vduse_create_dev(&config, buf, control->api_version);
1604 if (ret)
1605 kvfree(buf);
1606 break;
1607 }
1608 case VDUSE_DESTROY_DEV: {
1609 char name[VDUSE_NAME_MAX];
1610
1611 ret = -EFAULT;
1612 if (copy_from_user(name, argp, VDUSE_NAME_MAX))
1613 break;
1614
1615 name[VDUSE_NAME_MAX - 1] = '\0';
1616 ret = vduse_destroy_dev(name);
1617 break;
1618 }
1619 default:
1620 ret = -EINVAL;
1621 break;
1622 }
1623 mutex_unlock(&vduse_lock);
1624
1625 return ret;
1626 }
1627
1628 static int vduse_release(struct inode *inode, struct file *file)
1629 {
1630 struct vduse_control *control = file->private_data;
1631
1632 kfree(control);
1633 return 0;
1634 }
1635
1636 static int vduse_open(struct inode *inode, struct file *file)
1637 {
1638 struct vduse_control *control;
1639
1640 control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL);
1641 if (!control)
1642 return -ENOMEM;
1643
1644 control->api_version = VDUSE_API_VERSION;
1645 file->private_data = control;
1646
1647 return 0;
1648 }
1649
1650 static const struct file_operations vduse_ctrl_fops = {
1651 .owner = THIS_MODULE,
1652 .open = vduse_open,
1653 .release = vduse_release,
1654 .unlocked_ioctl = vduse_ioctl,
1655 .compat_ioctl = compat_ptr_ioctl,
1656 .llseek = noop_llseek,
1657 };
1658
1659 static char *vduse_devnode(struct device *dev, umode_t *mode)
1660 {
1661 return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
1662 }
1663
1664 struct vduse_mgmt_dev {
1665 struct vdpa_mgmt_dev mgmt_dev;
1666 struct device dev;
1667 };
1668
1669 static struct vduse_mgmt_dev *vduse_mgmt;
1670
1671 static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
1672 {
1673 struct vduse_vdpa *vdev;
1674 int ret;
1675
1676 if (dev->vdev)
1677 return -EEXIST;
1678
1679 vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
1680 &vduse_vdpa_config_ops, 1, 1, name, true);
1681 if (IS_ERR(vdev))
1682 return PTR_ERR(vdev);
1683
1684 dev->vdev = vdev;
1685 vdev->dev = dev;
1686 vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask;
1687 ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64));
1688 if (ret) {
1689 put_device(&vdev->vdpa.dev);
1690 return ret;
1691 }
1692 set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops);
1693 vdev->vdpa.dma_dev = &vdev->vdpa.dev;
1694 vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev;
1695
1696 return 0;
1697 }
1698
1699 static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
1700 const struct vdpa_dev_set_config *config)
1701 {
1702 struct vduse_dev *dev;
1703 int ret;
1704
1705 mutex_lock(&vduse_lock);
1706 dev = vduse_find_dev(name);
1707 if (!dev || !vduse_dev_is_ready(dev)) {
1708 mutex_unlock(&vduse_lock);
1709 return -EINVAL;
1710 }
1711 ret = vduse_dev_init_vdpa(dev, name);
1712 mutex_unlock(&vduse_lock);
1713 if (ret)
1714 return ret;
1715
1716 ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
1717 if (ret) {
1718 put_device(&dev->vdev->vdpa.dev);
1719 return ret;
1720 }
1721
1722 return 0;
1723 }
1724
1725 static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
1726 {
1727 _vdpa_unregister_device(dev);
1728 }
1729
1730 static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = {
1731 .dev_add = vdpa_dev_add,
1732 .dev_del = vdpa_dev_del,
1733 };
1734
1735 static struct virtio_device_id id_table[] = {
1736 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
1737 { 0 },
1738 };
1739
1740 static void vduse_mgmtdev_release(struct device *dev)
1741 {
1742 struct vduse_mgmt_dev *mgmt_dev;
1743
1744 mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev);
1745 kfree(mgmt_dev);
1746 }
1747
1748 static int vduse_mgmtdev_init(void)
1749 {
1750 int ret;
1751
1752 vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL);
1753 if (!vduse_mgmt)
1754 return -ENOMEM;
1755
1756 ret = dev_set_name(&vduse_mgmt->dev, "vduse");
1757 if (ret) {
1758 kfree(vduse_mgmt);
1759 return ret;
1760 }
1761
1762 vduse_mgmt->dev.release = vduse_mgmtdev_release;
1763
1764 ret = device_register(&vduse_mgmt->dev);
1765 if (ret)
1766 goto dev_reg_err;
1767
1768 vduse_mgmt->mgmt_dev.id_table = id_table;
1769 vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops;
1770 vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev;
1771 ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev);
1772 if (ret)
1773 device_unregister(&vduse_mgmt->dev);
1774
1775 return ret;
1776
1777 dev_reg_err:
1778 put_device(&vduse_mgmt->dev);
1779 return ret;
1780 }
1781
1782 static void vduse_mgmtdev_exit(void)
1783 {
1784 vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev);
1785 device_unregister(&vduse_mgmt->dev);
1786 }
1787
1788 static int vduse_init(void)
1789 {
1790 int ret;
1791 struct device *dev;
1792
1793 vduse_class = class_create(THIS_MODULE, "vduse");
1794 if (IS_ERR(vduse_class))
1795 return PTR_ERR(vduse_class);
1796
1797 vduse_class->devnode = vduse_devnode;
1798
1799 ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
1800 if (ret)
1801 goto err_chardev_region;
1802
1803
1804 cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops);
1805 vduse_ctrl_cdev.owner = THIS_MODULE;
1806 ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1);
1807 if (ret)
1808 goto err_ctrl_cdev;
1809
1810 dev = device_create(vduse_class, NULL, vduse_major, NULL, "control");
1811 if (IS_ERR(dev)) {
1812 ret = PTR_ERR(dev);
1813 goto err_device;
1814 }
1815
1816
1817 cdev_init(&vduse_cdev, &vduse_dev_fops);
1818 vduse_cdev.owner = THIS_MODULE;
1819 ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1),
1820 VDUSE_DEV_MAX - 1);
1821 if (ret)
1822 goto err_cdev;
1823
1824 vduse_irq_wq = alloc_workqueue("vduse-irq",
1825 WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
1826 if (!vduse_irq_wq) {
1827 ret = -ENOMEM;
1828 goto err_wq;
1829 }
1830
1831 ret = vduse_domain_init();
1832 if (ret)
1833 goto err_domain;
1834
1835 ret = vduse_mgmtdev_init();
1836 if (ret)
1837 goto err_mgmtdev;
1838
1839 return 0;
1840 err_mgmtdev:
1841 vduse_domain_exit();
1842 err_domain:
1843 destroy_workqueue(vduse_irq_wq);
1844 err_wq:
1845 cdev_del(&vduse_cdev);
1846 err_cdev:
1847 device_destroy(vduse_class, vduse_major);
1848 err_device:
1849 cdev_del(&vduse_ctrl_cdev);
1850 err_ctrl_cdev:
1851 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
1852 err_chardev_region:
1853 class_destroy(vduse_class);
1854 return ret;
1855 }
1856 module_init(vduse_init);
1857
1858 static void vduse_exit(void)
1859 {
1860 vduse_mgmtdev_exit();
1861 vduse_domain_exit();
1862 destroy_workqueue(vduse_irq_wq);
1863 cdev_del(&vduse_cdev);
1864 device_destroy(vduse_class, vduse_major);
1865 cdev_del(&vduse_ctrl_cdev);
1866 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
1867 class_destroy(vduse_class);
1868 }
1869 module_exit(vduse_exit);
1870
1871 MODULE_LICENSE(DRV_LICENSE);
1872 MODULE_AUTHOR(DRV_AUTHOR);
1873 MODULE_DESCRIPTION(DRV_DESC);