Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * VDUSE: vDPA Device in Userspace
0004  *
0005  * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
0006  *
0007  * Author: Xie Yongji <xieyongji@bytedance.com>
0008  *
0009  */
0010 
0011 #include <linux/init.h>
0012 #include <linux/module.h>
0013 #include <linux/cdev.h>
0014 #include <linux/device.h>
0015 #include <linux/eventfd.h>
0016 #include <linux/slab.h>
0017 #include <linux/wait.h>
0018 #include <linux/dma-map-ops.h>
0019 #include <linux/poll.h>
0020 #include <linux/file.h>
0021 #include <linux/uio.h>
0022 #include <linux/vdpa.h>
0023 #include <linux/nospec.h>
0024 #include <linux/vmalloc.h>
0025 #include <linux/sched/mm.h>
0026 #include <uapi/linux/vduse.h>
0027 #include <uapi/linux/vdpa.h>
0028 #include <uapi/linux/virtio_config.h>
0029 #include <uapi/linux/virtio_ids.h>
0030 #include <uapi/linux/virtio_blk.h>
0031 #include <linux/mod_devicetable.h>
0032 
0033 #include "iova_domain.h"
0034 
0035 #define DRV_AUTHOR   "Yongji Xie <xieyongji@bytedance.com>"
0036 #define DRV_DESC     "vDPA Device in Userspace"
0037 #define DRV_LICENSE  "GPL v2"
0038 
0039 #define VDUSE_DEV_MAX (1U << MINORBITS)
0040 #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
0041 #define VDUSE_IOVA_SIZE (128 * 1024 * 1024)
0042 #define VDUSE_MSG_DEFAULT_TIMEOUT 30
0043 
0044 struct vduse_virtqueue {
0045     u16 index;
0046     u16 num_max;
0047     u32 num;
0048     u64 desc_addr;
0049     u64 driver_addr;
0050     u64 device_addr;
0051     struct vdpa_vq_state state;
0052     bool ready;
0053     bool kicked;
0054     spinlock_t kick_lock;
0055     spinlock_t irq_lock;
0056     struct eventfd_ctx *kickfd;
0057     struct vdpa_callback cb;
0058     struct work_struct inject;
0059     struct work_struct kick;
0060 };
0061 
0062 struct vduse_dev;
0063 
0064 struct vduse_vdpa {
0065     struct vdpa_device vdpa;
0066     struct vduse_dev *dev;
0067 };
0068 
0069 struct vduse_umem {
0070     unsigned long iova;
0071     unsigned long npages;
0072     struct page **pages;
0073     struct mm_struct *mm;
0074 };
0075 
0076 struct vduse_dev {
0077     struct vduse_vdpa *vdev;
0078     struct device *dev;
0079     struct vduse_virtqueue *vqs;
0080     struct vduse_iova_domain *domain;
0081     char *name;
0082     struct mutex lock;
0083     spinlock_t msg_lock;
0084     u64 msg_unique;
0085     u32 msg_timeout;
0086     wait_queue_head_t waitq;
0087     struct list_head send_list;
0088     struct list_head recv_list;
0089     struct vdpa_callback config_cb;
0090     struct work_struct inject;
0091     spinlock_t irq_lock;
0092     struct rw_semaphore rwsem;
0093     int minor;
0094     bool broken;
0095     bool connected;
0096     u64 api_version;
0097     u64 device_features;
0098     u64 driver_features;
0099     u32 device_id;
0100     u32 vendor_id;
0101     u32 generation;
0102     u32 config_size;
0103     void *config;
0104     u8 status;
0105     u32 vq_num;
0106     u32 vq_align;
0107     struct vduse_umem *umem;
0108     struct mutex mem_lock;
0109 };
0110 
0111 struct vduse_dev_msg {
0112     struct vduse_dev_request req;
0113     struct vduse_dev_response resp;
0114     struct list_head list;
0115     wait_queue_head_t waitq;
0116     bool completed;
0117 };
0118 
0119 struct vduse_control {
0120     u64 api_version;
0121 };
0122 
0123 static DEFINE_MUTEX(vduse_lock);
0124 static DEFINE_IDR(vduse_idr);
0125 
0126 static dev_t vduse_major;
0127 static struct class *vduse_class;
0128 static struct cdev vduse_ctrl_cdev;
0129 static struct cdev vduse_cdev;
0130 static struct workqueue_struct *vduse_irq_wq;
0131 
0132 static u32 allowed_device_id[] = {
0133     VIRTIO_ID_BLOCK,
0134 };
0135 
0136 static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa)
0137 {
0138     struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa);
0139 
0140     return vdev->dev;
0141 }
0142 
0143 static inline struct vduse_dev *dev_to_vduse(struct device *dev)
0144 {
0145     struct vdpa_device *vdpa = dev_to_vdpa(dev);
0146 
0147     return vdpa_to_vduse(vdpa);
0148 }
0149 
0150 static struct vduse_dev_msg *vduse_find_msg(struct list_head *head,
0151                         uint32_t request_id)
0152 {
0153     struct vduse_dev_msg *msg;
0154 
0155     list_for_each_entry(msg, head, list) {
0156         if (msg->req.request_id == request_id) {
0157             list_del(&msg->list);
0158             return msg;
0159         }
0160     }
0161 
0162     return NULL;
0163 }
0164 
0165 static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head)
0166 {
0167     struct vduse_dev_msg *msg = NULL;
0168 
0169     if (!list_empty(head)) {
0170         msg = list_first_entry(head, struct vduse_dev_msg, list);
0171         list_del(&msg->list);
0172     }
0173 
0174     return msg;
0175 }
0176 
0177 static void vduse_enqueue_msg(struct list_head *head,
0178                   struct vduse_dev_msg *msg)
0179 {
0180     list_add_tail(&msg->list, head);
0181 }
0182 
0183 static void vduse_dev_broken(struct vduse_dev *dev)
0184 {
0185     struct vduse_dev_msg *msg, *tmp;
0186 
0187     if (unlikely(dev->broken))
0188         return;
0189 
0190     list_splice_init(&dev->recv_list, &dev->send_list);
0191     list_for_each_entry_safe(msg, tmp, &dev->send_list, list) {
0192         list_del(&msg->list);
0193         msg->completed = 1;
0194         msg->resp.result = VDUSE_REQ_RESULT_FAILED;
0195         wake_up(&msg->waitq);
0196     }
0197     dev->broken = true;
0198     wake_up(&dev->waitq);
0199 }
0200 
0201 static int vduse_dev_msg_sync(struct vduse_dev *dev,
0202                   struct vduse_dev_msg *msg)
0203 {
0204     int ret;
0205 
0206     if (unlikely(dev->broken))
0207         return -EIO;
0208 
0209     init_waitqueue_head(&msg->waitq);
0210     spin_lock(&dev->msg_lock);
0211     if (unlikely(dev->broken)) {
0212         spin_unlock(&dev->msg_lock);
0213         return -EIO;
0214     }
0215     msg->req.request_id = dev->msg_unique++;
0216     vduse_enqueue_msg(&dev->send_list, msg);
0217     wake_up(&dev->waitq);
0218     spin_unlock(&dev->msg_lock);
0219     if (dev->msg_timeout)
0220         ret = wait_event_killable_timeout(msg->waitq, msg->completed,
0221                           (long)dev->msg_timeout * HZ);
0222     else
0223         ret = wait_event_killable(msg->waitq, msg->completed);
0224 
0225     spin_lock(&dev->msg_lock);
0226     if (!msg->completed) {
0227         list_del(&msg->list);
0228         msg->resp.result = VDUSE_REQ_RESULT_FAILED;
0229         /* Mark the device as malfunction when there is a timeout */
0230         if (!ret)
0231             vduse_dev_broken(dev);
0232     }
0233     ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO;
0234     spin_unlock(&dev->msg_lock);
0235 
0236     return ret;
0237 }
0238 
0239 static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev,
0240                      struct vduse_virtqueue *vq,
0241                      struct vdpa_vq_state_packed *packed)
0242 {
0243     struct vduse_dev_msg msg = { 0 };
0244     int ret;
0245 
0246     msg.req.type = VDUSE_GET_VQ_STATE;
0247     msg.req.vq_state.index = vq->index;
0248 
0249     ret = vduse_dev_msg_sync(dev, &msg);
0250     if (ret)
0251         return ret;
0252 
0253     packed->last_avail_counter =
0254             msg.resp.vq_state.packed.last_avail_counter & 0x0001;
0255     packed->last_avail_idx =
0256             msg.resp.vq_state.packed.last_avail_idx & 0x7FFF;
0257     packed->last_used_counter =
0258             msg.resp.vq_state.packed.last_used_counter & 0x0001;
0259     packed->last_used_idx =
0260             msg.resp.vq_state.packed.last_used_idx & 0x7FFF;
0261 
0262     return 0;
0263 }
0264 
0265 static int vduse_dev_get_vq_state_split(struct vduse_dev *dev,
0266                     struct vduse_virtqueue *vq,
0267                     struct vdpa_vq_state_split *split)
0268 {
0269     struct vduse_dev_msg msg = { 0 };
0270     int ret;
0271 
0272     msg.req.type = VDUSE_GET_VQ_STATE;
0273     msg.req.vq_state.index = vq->index;
0274 
0275     ret = vduse_dev_msg_sync(dev, &msg);
0276     if (ret)
0277         return ret;
0278 
0279     split->avail_index = msg.resp.vq_state.split.avail_index;
0280 
0281     return 0;
0282 }
0283 
0284 static int vduse_dev_set_status(struct vduse_dev *dev, u8 status)
0285 {
0286     struct vduse_dev_msg msg = { 0 };
0287 
0288     msg.req.type = VDUSE_SET_STATUS;
0289     msg.req.s.status = status;
0290 
0291     return vduse_dev_msg_sync(dev, &msg);
0292 }
0293 
0294 static int vduse_dev_update_iotlb(struct vduse_dev *dev,
0295                   u64 start, u64 last)
0296 {
0297     struct vduse_dev_msg msg = { 0 };
0298 
0299     if (last < start)
0300         return -EINVAL;
0301 
0302     msg.req.type = VDUSE_UPDATE_IOTLB;
0303     msg.req.iova.start = start;
0304     msg.req.iova.last = last;
0305 
0306     return vduse_dev_msg_sync(dev, &msg);
0307 }
0308 
0309 static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
0310 {
0311     struct file *file = iocb->ki_filp;
0312     struct vduse_dev *dev = file->private_data;
0313     struct vduse_dev_msg *msg;
0314     int size = sizeof(struct vduse_dev_request);
0315     ssize_t ret;
0316 
0317     if (iov_iter_count(to) < size)
0318         return -EINVAL;
0319 
0320     spin_lock(&dev->msg_lock);
0321     while (1) {
0322         msg = vduse_dequeue_msg(&dev->send_list);
0323         if (msg)
0324             break;
0325 
0326         ret = -EAGAIN;
0327         if (file->f_flags & O_NONBLOCK)
0328             goto unlock;
0329 
0330         spin_unlock(&dev->msg_lock);
0331         ret = wait_event_interruptible_exclusive(dev->waitq,
0332                     !list_empty(&dev->send_list));
0333         if (ret)
0334             return ret;
0335 
0336         spin_lock(&dev->msg_lock);
0337     }
0338     spin_unlock(&dev->msg_lock);
0339     ret = copy_to_iter(&msg->req, size, to);
0340     spin_lock(&dev->msg_lock);
0341     if (ret != size) {
0342         ret = -EFAULT;
0343         vduse_enqueue_msg(&dev->send_list, msg);
0344         goto unlock;
0345     }
0346     vduse_enqueue_msg(&dev->recv_list, msg);
0347 unlock:
0348     spin_unlock(&dev->msg_lock);
0349 
0350     return ret;
0351 }
0352 
0353 static bool is_mem_zero(const char *ptr, int size)
0354 {
0355     int i;
0356 
0357     for (i = 0; i < size; i++) {
0358         if (ptr[i])
0359             return false;
0360     }
0361     return true;
0362 }
0363 
0364 static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from)
0365 {
0366     struct file *file = iocb->ki_filp;
0367     struct vduse_dev *dev = file->private_data;
0368     struct vduse_dev_response resp;
0369     struct vduse_dev_msg *msg;
0370     size_t ret;
0371 
0372     ret = copy_from_iter(&resp, sizeof(resp), from);
0373     if (ret != sizeof(resp))
0374         return -EINVAL;
0375 
0376     if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved)))
0377         return -EINVAL;
0378 
0379     spin_lock(&dev->msg_lock);
0380     msg = vduse_find_msg(&dev->recv_list, resp.request_id);
0381     if (!msg) {
0382         ret = -ENOENT;
0383         goto unlock;
0384     }
0385 
0386     memcpy(&msg->resp, &resp, sizeof(resp));
0387     msg->completed = 1;
0388     wake_up(&msg->waitq);
0389 unlock:
0390     spin_unlock(&dev->msg_lock);
0391 
0392     return ret;
0393 }
0394 
0395 static __poll_t vduse_dev_poll(struct file *file, poll_table *wait)
0396 {
0397     struct vduse_dev *dev = file->private_data;
0398     __poll_t mask = 0;
0399 
0400     poll_wait(file, &dev->waitq, wait);
0401 
0402     spin_lock(&dev->msg_lock);
0403 
0404     if (unlikely(dev->broken))
0405         mask |= EPOLLERR;
0406     if (!list_empty(&dev->send_list))
0407         mask |= EPOLLIN | EPOLLRDNORM;
0408     if (!list_empty(&dev->recv_list))
0409         mask |= EPOLLOUT | EPOLLWRNORM;
0410 
0411     spin_unlock(&dev->msg_lock);
0412 
0413     return mask;
0414 }
0415 
0416 static void vduse_dev_reset(struct vduse_dev *dev)
0417 {
0418     int i;
0419     struct vduse_iova_domain *domain = dev->domain;
0420 
0421     /* The coherent mappings are handled in vduse_dev_free_coherent() */
0422     if (domain->bounce_map)
0423         vduse_domain_reset_bounce_map(domain);
0424 
0425     down_write(&dev->rwsem);
0426 
0427     dev->status = 0;
0428     dev->driver_features = 0;
0429     dev->generation++;
0430     spin_lock(&dev->irq_lock);
0431     dev->config_cb.callback = NULL;
0432     dev->config_cb.private = NULL;
0433     spin_unlock(&dev->irq_lock);
0434     flush_work(&dev->inject);
0435 
0436     for (i = 0; i < dev->vq_num; i++) {
0437         struct vduse_virtqueue *vq = &dev->vqs[i];
0438 
0439         vq->ready = false;
0440         vq->desc_addr = 0;
0441         vq->driver_addr = 0;
0442         vq->device_addr = 0;
0443         vq->num = 0;
0444         memset(&vq->state, 0, sizeof(vq->state));
0445 
0446         spin_lock(&vq->kick_lock);
0447         vq->kicked = false;
0448         if (vq->kickfd)
0449             eventfd_ctx_put(vq->kickfd);
0450         vq->kickfd = NULL;
0451         spin_unlock(&vq->kick_lock);
0452 
0453         spin_lock(&vq->irq_lock);
0454         vq->cb.callback = NULL;
0455         vq->cb.private = NULL;
0456         spin_unlock(&vq->irq_lock);
0457         flush_work(&vq->inject);
0458         flush_work(&vq->kick);
0459     }
0460 
0461     up_write(&dev->rwsem);
0462 }
0463 
0464 static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
0465                 u64 desc_area, u64 driver_area,
0466                 u64 device_area)
0467 {
0468     struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0469     struct vduse_virtqueue *vq = &dev->vqs[idx];
0470 
0471     vq->desc_addr = desc_area;
0472     vq->driver_addr = driver_area;
0473     vq->device_addr = device_area;
0474 
0475     return 0;
0476 }
0477 
0478 static void vduse_vq_kick(struct vduse_virtqueue *vq)
0479 {
0480     spin_lock(&vq->kick_lock);
0481     if (!vq->ready)
0482         goto unlock;
0483 
0484     if (vq->kickfd)
0485         eventfd_signal(vq->kickfd, 1);
0486     else
0487         vq->kicked = true;
0488 unlock:
0489     spin_unlock(&vq->kick_lock);
0490 }
0491 
0492 static void vduse_vq_kick_work(struct work_struct *work)
0493 {
0494     struct vduse_virtqueue *vq = container_of(work,
0495                     struct vduse_virtqueue, kick);
0496 
0497     vduse_vq_kick(vq);
0498 }
0499 
0500 static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
0501 {
0502     struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0503     struct vduse_virtqueue *vq = &dev->vqs[idx];
0504 
0505     if (!eventfd_signal_allowed()) {
0506         schedule_work(&vq->kick);
0507         return;
0508     }
0509     vduse_vq_kick(vq);
0510 }
0511 
0512 static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
0513                   struct vdpa_callback *cb)
0514 {
0515     struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0516     struct vduse_virtqueue *vq = &dev->vqs[idx];
0517 
0518     spin_lock(&vq->irq_lock);
0519     vq->cb.callback = cb->callback;
0520     vq->cb.private = cb->private;
0521     spin_unlock(&vq->irq_lock);
0522 }
0523 
0524 static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
0525 {
0526     struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0527     struct vduse_virtqueue *vq = &dev->vqs[idx];
0528 
0529     vq->num = num;
0530 }
0531 
0532 static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
0533                     u16 idx, bool ready)
0534 {
0535     struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0536     struct vduse_virtqueue *vq = &dev->vqs[idx];
0537 
0538     vq->ready = ready;
0539 }
0540 
0541 static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
0542 {
0543     struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0544     struct vduse_virtqueue *vq = &dev->vqs[idx];
0545 
0546     return vq->ready;
0547 }
0548 
0549 static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
0550                 const struct vdpa_vq_state *state)
0551 {
0552     struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0553     struct vduse_virtqueue *vq = &dev->vqs[idx];
0554 
0555     if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
0556         vq->state.packed.last_avail_counter =
0557                 state->packed.last_avail_counter;
0558         vq->state.packed.last_avail_idx = state->packed.last_avail_idx;
0559         vq->state.packed.last_used_counter =
0560                 state->packed.last_used_counter;
0561         vq->state.packed.last_used_idx = state->packed.last_used_idx;
0562     } else
0563         vq->state.split.avail_index = state->split.avail_index;
0564 
0565     return 0;
0566 }
0567 
0568 static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
0569                 struct vdpa_vq_state *state)
0570 {
0571     struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0572     struct vduse_virtqueue *vq = &dev->vqs[idx];
0573 
0574     if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
0575         return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
0576 
0577     return vduse_dev_get_vq_state_split(dev, vq, &state->split);
0578 }
0579 
0580 static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa)
0581 {
0582     struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0583 
0584     return dev->vq_align;
0585 }
0586 
0587 static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa)
0588 {
0589     struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0590 
0591     return dev->device_features;
0592 }
0593 
0594 static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features)
0595 {
0596     struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0597 
0598     dev->driver_features = features;
0599     return 0;
0600 }
0601 
0602 static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa)
0603 {
0604     struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0605 
0606     return dev->driver_features;
0607 }
0608 
0609 static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa,
0610                   struct vdpa_callback *cb)
0611 {
0612     struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0613 
0614     spin_lock(&dev->irq_lock);
0615     dev->config_cb.callback = cb->callback;
0616     dev->config_cb.private = cb->private;
0617     spin_unlock(&dev->irq_lock);
0618 }
0619 
0620 static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
0621 {
0622     struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0623     u16 num_max = 0;
0624     int i;
0625 
0626     for (i = 0; i < dev->vq_num; i++)
0627         if (num_max < dev->vqs[i].num_max)
0628             num_max = dev->vqs[i].num_max;
0629 
0630     return num_max;
0631 }
0632 
0633 static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa)
0634 {
0635     struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0636 
0637     return dev->device_id;
0638 }
0639 
0640 static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa)
0641 {
0642     struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0643 
0644     return dev->vendor_id;
0645 }
0646 
0647 static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa)
0648 {
0649     struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0650 
0651     return dev->status;
0652 }
0653 
0654 static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
0655 {
0656     struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0657 
0658     if (vduse_dev_set_status(dev, status))
0659         return;
0660 
0661     dev->status = status;
0662 }
0663 
0664 static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa)
0665 {
0666     struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0667 
0668     return dev->config_size;
0669 }
0670 
0671 static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset,
0672                   void *buf, unsigned int len)
0673 {
0674     struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0675 
0676     /* Initialize the buffer in case of partial copy. */
0677     memset(buf, 0, len);
0678 
0679     if (offset > dev->config_size)
0680         return;
0681 
0682     if (len > dev->config_size - offset)
0683         len = dev->config_size - offset;
0684 
0685     memcpy(buf, dev->config + offset, len);
0686 }
0687 
0688 static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset,
0689             const void *buf, unsigned int len)
0690 {
0691     /* Now we only support read-only configuration space */
0692 }
0693 
0694 static int vduse_vdpa_reset(struct vdpa_device *vdpa)
0695 {
0696     struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0697     int ret = vduse_dev_set_status(dev, 0);
0698 
0699     vduse_dev_reset(dev);
0700 
0701     return ret;
0702 }
0703 
0704 static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
0705 {
0706     struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0707 
0708     return dev->generation;
0709 }
0710 
0711 static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
0712                 unsigned int asid,
0713                 struct vhost_iotlb *iotlb)
0714 {
0715     struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0716     int ret;
0717 
0718     ret = vduse_domain_set_map(dev->domain, iotlb);
0719     if (ret)
0720         return ret;
0721 
0722     ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX);
0723     if (ret) {
0724         vduse_domain_clear_map(dev->domain, iotlb);
0725         return ret;
0726     }
0727 
0728     return 0;
0729 }
0730 
0731 static void vduse_vdpa_free(struct vdpa_device *vdpa)
0732 {
0733     struct vduse_dev *dev = vdpa_to_vduse(vdpa);
0734 
0735     dev->vdev = NULL;
0736 }
0737 
0738 static const struct vdpa_config_ops vduse_vdpa_config_ops = {
0739     .set_vq_address     = vduse_vdpa_set_vq_address,
0740     .kick_vq        = vduse_vdpa_kick_vq,
0741     .set_vq_cb      = vduse_vdpa_set_vq_cb,
0742     .set_vq_num             = vduse_vdpa_set_vq_num,
0743     .set_vq_ready       = vduse_vdpa_set_vq_ready,
0744     .get_vq_ready       = vduse_vdpa_get_vq_ready,
0745     .set_vq_state       = vduse_vdpa_set_vq_state,
0746     .get_vq_state       = vduse_vdpa_get_vq_state,
0747     .get_vq_align       = vduse_vdpa_get_vq_align,
0748     .get_device_features    = vduse_vdpa_get_device_features,
0749     .set_driver_features    = vduse_vdpa_set_driver_features,
0750     .get_driver_features    = vduse_vdpa_get_driver_features,
0751     .set_config_cb      = vduse_vdpa_set_config_cb,
0752     .get_vq_num_max     = vduse_vdpa_get_vq_num_max,
0753     .get_device_id      = vduse_vdpa_get_device_id,
0754     .get_vendor_id      = vduse_vdpa_get_vendor_id,
0755     .get_status     = vduse_vdpa_get_status,
0756     .set_status     = vduse_vdpa_set_status,
0757     .get_config_size    = vduse_vdpa_get_config_size,
0758     .get_config     = vduse_vdpa_get_config,
0759     .set_config     = vduse_vdpa_set_config,
0760     .get_generation     = vduse_vdpa_get_generation,
0761     .reset          = vduse_vdpa_reset,
0762     .set_map        = vduse_vdpa_set_map,
0763     .free           = vduse_vdpa_free,
0764 };
0765 
0766 static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page,
0767                      unsigned long offset, size_t size,
0768                      enum dma_data_direction dir,
0769                      unsigned long attrs)
0770 {
0771     struct vduse_dev *vdev = dev_to_vduse(dev);
0772     struct vduse_iova_domain *domain = vdev->domain;
0773 
0774     return vduse_domain_map_page(domain, page, offset, size, dir, attrs);
0775 }
0776 
0777 static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr,
0778                 size_t size, enum dma_data_direction dir,
0779                 unsigned long attrs)
0780 {
0781     struct vduse_dev *vdev = dev_to_vduse(dev);
0782     struct vduse_iova_domain *domain = vdev->domain;
0783 
0784     return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs);
0785 }
0786 
0787 static void *vduse_dev_alloc_coherent(struct device *dev, size_t size,
0788                     dma_addr_t *dma_addr, gfp_t flag,
0789                     unsigned long attrs)
0790 {
0791     struct vduse_dev *vdev = dev_to_vduse(dev);
0792     struct vduse_iova_domain *domain = vdev->domain;
0793     unsigned long iova;
0794     void *addr;
0795 
0796     *dma_addr = DMA_MAPPING_ERROR;
0797     addr = vduse_domain_alloc_coherent(domain, size,
0798                 (dma_addr_t *)&iova, flag, attrs);
0799     if (!addr)
0800         return NULL;
0801 
0802     *dma_addr = (dma_addr_t)iova;
0803 
0804     return addr;
0805 }
0806 
0807 static void vduse_dev_free_coherent(struct device *dev, size_t size,
0808                     void *vaddr, dma_addr_t dma_addr,
0809                     unsigned long attrs)
0810 {
0811     struct vduse_dev *vdev = dev_to_vduse(dev);
0812     struct vduse_iova_domain *domain = vdev->domain;
0813 
0814     vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs);
0815 }
0816 
0817 static size_t vduse_dev_max_mapping_size(struct device *dev)
0818 {
0819     struct vduse_dev *vdev = dev_to_vduse(dev);
0820     struct vduse_iova_domain *domain = vdev->domain;
0821 
0822     return domain->bounce_size;
0823 }
0824 
0825 static const struct dma_map_ops vduse_dev_dma_ops = {
0826     .map_page = vduse_dev_map_page,
0827     .unmap_page = vduse_dev_unmap_page,
0828     .alloc = vduse_dev_alloc_coherent,
0829     .free = vduse_dev_free_coherent,
0830     .max_mapping_size = vduse_dev_max_mapping_size,
0831 };
0832 
0833 static unsigned int perm_to_file_flags(u8 perm)
0834 {
0835     unsigned int flags = 0;
0836 
0837     switch (perm) {
0838     case VDUSE_ACCESS_WO:
0839         flags |= O_WRONLY;
0840         break;
0841     case VDUSE_ACCESS_RO:
0842         flags |= O_RDONLY;
0843         break;
0844     case VDUSE_ACCESS_RW:
0845         flags |= O_RDWR;
0846         break;
0847     default:
0848         WARN(1, "invalidate vhost IOTLB permission\n");
0849         break;
0850     }
0851 
0852     return flags;
0853 }
0854 
0855 static int vduse_kickfd_setup(struct vduse_dev *dev,
0856             struct vduse_vq_eventfd *eventfd)
0857 {
0858     struct eventfd_ctx *ctx = NULL;
0859     struct vduse_virtqueue *vq;
0860     u32 index;
0861 
0862     if (eventfd->index >= dev->vq_num)
0863         return -EINVAL;
0864 
0865     index = array_index_nospec(eventfd->index, dev->vq_num);
0866     vq = &dev->vqs[index];
0867     if (eventfd->fd >= 0) {
0868         ctx = eventfd_ctx_fdget(eventfd->fd);
0869         if (IS_ERR(ctx))
0870             return PTR_ERR(ctx);
0871     } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN)
0872         return 0;
0873 
0874     spin_lock(&vq->kick_lock);
0875     if (vq->kickfd)
0876         eventfd_ctx_put(vq->kickfd);
0877     vq->kickfd = ctx;
0878     if (vq->ready && vq->kicked && vq->kickfd) {
0879         eventfd_signal(vq->kickfd, 1);
0880         vq->kicked = false;
0881     }
0882     spin_unlock(&vq->kick_lock);
0883 
0884     return 0;
0885 }
0886 
0887 static bool vduse_dev_is_ready(struct vduse_dev *dev)
0888 {
0889     int i;
0890 
0891     for (i = 0; i < dev->vq_num; i++)
0892         if (!dev->vqs[i].num_max)
0893             return false;
0894 
0895     return true;
0896 }
0897 
0898 static void vduse_dev_irq_inject(struct work_struct *work)
0899 {
0900     struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
0901 
0902     spin_lock_irq(&dev->irq_lock);
0903     if (dev->config_cb.callback)
0904         dev->config_cb.callback(dev->config_cb.private);
0905     spin_unlock_irq(&dev->irq_lock);
0906 }
0907 
0908 static void vduse_vq_irq_inject(struct work_struct *work)
0909 {
0910     struct vduse_virtqueue *vq = container_of(work,
0911                     struct vduse_virtqueue, inject);
0912 
0913     spin_lock_irq(&vq->irq_lock);
0914     if (vq->ready && vq->cb.callback)
0915         vq->cb.callback(vq->cb.private);
0916     spin_unlock_irq(&vq->irq_lock);
0917 }
0918 
0919 static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
0920                     struct work_struct *irq_work)
0921 {
0922     int ret = -EINVAL;
0923 
0924     down_read(&dev->rwsem);
0925     if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
0926         goto unlock;
0927 
0928     ret = 0;
0929     queue_work(vduse_irq_wq, irq_work);
0930 unlock:
0931     up_read(&dev->rwsem);
0932 
0933     return ret;
0934 }
0935 
0936 static int vduse_dev_dereg_umem(struct vduse_dev *dev,
0937                 u64 iova, u64 size)
0938 {
0939     int ret;
0940 
0941     mutex_lock(&dev->mem_lock);
0942     ret = -ENOENT;
0943     if (!dev->umem)
0944         goto unlock;
0945 
0946     ret = -EINVAL;
0947     if (dev->umem->iova != iova || size != dev->domain->bounce_size)
0948         goto unlock;
0949 
0950     vduse_domain_remove_user_bounce_pages(dev->domain);
0951     unpin_user_pages_dirty_lock(dev->umem->pages,
0952                     dev->umem->npages, true);
0953     atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm);
0954     mmdrop(dev->umem->mm);
0955     vfree(dev->umem->pages);
0956     kfree(dev->umem);
0957     dev->umem = NULL;
0958     ret = 0;
0959 unlock:
0960     mutex_unlock(&dev->mem_lock);
0961     return ret;
0962 }
0963 
0964 static int vduse_dev_reg_umem(struct vduse_dev *dev,
0965                   u64 iova, u64 uaddr, u64 size)
0966 {
0967     struct page **page_list = NULL;
0968     struct vduse_umem *umem = NULL;
0969     long pinned = 0;
0970     unsigned long npages, lock_limit;
0971     int ret;
0972 
0973     if (!dev->domain->bounce_map ||
0974         size != dev->domain->bounce_size ||
0975         iova != 0 || uaddr & ~PAGE_MASK)
0976         return -EINVAL;
0977 
0978     mutex_lock(&dev->mem_lock);
0979     ret = -EEXIST;
0980     if (dev->umem)
0981         goto unlock;
0982 
0983     ret = -ENOMEM;
0984     npages = size >> PAGE_SHIFT;
0985     page_list = __vmalloc(array_size(npages, sizeof(struct page *)),
0986                   GFP_KERNEL_ACCOUNT);
0987     umem = kzalloc(sizeof(*umem), GFP_KERNEL);
0988     if (!page_list || !umem)
0989         goto unlock;
0990 
0991     mmap_read_lock(current->mm);
0992 
0993     lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
0994     if (npages + atomic64_read(&current->mm->pinned_vm) > lock_limit)
0995         goto out;
0996 
0997     pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE,
0998                 page_list, NULL);
0999     if (pinned != npages) {
1000         ret = pinned < 0 ? pinned : -ENOMEM;
1001         goto out;
1002     }
1003 
1004     ret = vduse_domain_add_user_bounce_pages(dev->domain,
1005                          page_list, pinned);
1006     if (ret)
1007         goto out;
1008 
1009     atomic64_add(npages, &current->mm->pinned_vm);
1010 
1011     umem->pages = page_list;
1012     umem->npages = pinned;
1013     umem->iova = iova;
1014     umem->mm = current->mm;
1015     mmgrab(current->mm);
1016 
1017     dev->umem = umem;
1018 out:
1019     if (ret && pinned > 0)
1020         unpin_user_pages(page_list, pinned);
1021 
1022     mmap_read_unlock(current->mm);
1023 unlock:
1024     if (ret) {
1025         vfree(page_list);
1026         kfree(umem);
1027     }
1028     mutex_unlock(&dev->mem_lock);
1029     return ret;
1030 }
1031 
1032 static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
1033                 unsigned long arg)
1034 {
1035     struct vduse_dev *dev = file->private_data;
1036     void __user *argp = (void __user *)arg;
1037     int ret;
1038 
1039     if (unlikely(dev->broken))
1040         return -EPERM;
1041 
1042     switch (cmd) {
1043     case VDUSE_IOTLB_GET_FD: {
1044         struct vduse_iotlb_entry entry;
1045         struct vhost_iotlb_map *map;
1046         struct vdpa_map_file *map_file;
1047         struct vduse_iova_domain *domain = dev->domain;
1048         struct file *f = NULL;
1049 
1050         ret = -EFAULT;
1051         if (copy_from_user(&entry, argp, sizeof(entry)))
1052             break;
1053 
1054         ret = -EINVAL;
1055         if (entry.start > entry.last)
1056             break;
1057 
1058         spin_lock(&domain->iotlb_lock);
1059         map = vhost_iotlb_itree_first(domain->iotlb,
1060                           entry.start, entry.last);
1061         if (map) {
1062             map_file = (struct vdpa_map_file *)map->opaque;
1063             f = get_file(map_file->file);
1064             entry.offset = map_file->offset;
1065             entry.start = map->start;
1066             entry.last = map->last;
1067             entry.perm = map->perm;
1068         }
1069         spin_unlock(&domain->iotlb_lock);
1070         ret = -EINVAL;
1071         if (!f)
1072             break;
1073 
1074         ret = -EFAULT;
1075         if (copy_to_user(argp, &entry, sizeof(entry))) {
1076             fput(f);
1077             break;
1078         }
1079         ret = receive_fd(f, perm_to_file_flags(entry.perm));
1080         fput(f);
1081         break;
1082     }
1083     case VDUSE_DEV_GET_FEATURES:
1084         /*
1085          * Just mirror what driver wrote here.
1086          * The driver is expected to check FEATURE_OK later.
1087          */
1088         ret = put_user(dev->driver_features, (u64 __user *)argp);
1089         break;
1090     case VDUSE_DEV_SET_CONFIG: {
1091         struct vduse_config_data config;
1092         unsigned long size = offsetof(struct vduse_config_data,
1093                           buffer);
1094 
1095         ret = -EFAULT;
1096         if (copy_from_user(&config, argp, size))
1097             break;
1098 
1099         ret = -EINVAL;
1100         if (config.offset > dev->config_size ||
1101             config.length == 0 ||
1102             config.length > dev->config_size - config.offset)
1103             break;
1104 
1105         ret = -EFAULT;
1106         if (copy_from_user(dev->config + config.offset, argp + size,
1107                    config.length))
1108             break;
1109 
1110         ret = 0;
1111         break;
1112     }
1113     case VDUSE_DEV_INJECT_CONFIG_IRQ:
1114         ret = vduse_dev_queue_irq_work(dev, &dev->inject);
1115         break;
1116     case VDUSE_VQ_SETUP: {
1117         struct vduse_vq_config config;
1118         u32 index;
1119 
1120         ret = -EFAULT;
1121         if (copy_from_user(&config, argp, sizeof(config)))
1122             break;
1123 
1124         ret = -EINVAL;
1125         if (config.index >= dev->vq_num)
1126             break;
1127 
1128         if (!is_mem_zero((const char *)config.reserved,
1129                  sizeof(config.reserved)))
1130             break;
1131 
1132         index = array_index_nospec(config.index, dev->vq_num);
1133         dev->vqs[index].num_max = config.max_size;
1134         ret = 0;
1135         break;
1136     }
1137     case VDUSE_VQ_GET_INFO: {
1138         struct vduse_vq_info vq_info;
1139         struct vduse_virtqueue *vq;
1140         u32 index;
1141 
1142         ret = -EFAULT;
1143         if (copy_from_user(&vq_info, argp, sizeof(vq_info)))
1144             break;
1145 
1146         ret = -EINVAL;
1147         if (vq_info.index >= dev->vq_num)
1148             break;
1149 
1150         index = array_index_nospec(vq_info.index, dev->vq_num);
1151         vq = &dev->vqs[index];
1152         vq_info.desc_addr = vq->desc_addr;
1153         vq_info.driver_addr = vq->driver_addr;
1154         vq_info.device_addr = vq->device_addr;
1155         vq_info.num = vq->num;
1156 
1157         if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
1158             vq_info.packed.last_avail_counter =
1159                 vq->state.packed.last_avail_counter;
1160             vq_info.packed.last_avail_idx =
1161                 vq->state.packed.last_avail_idx;
1162             vq_info.packed.last_used_counter =
1163                 vq->state.packed.last_used_counter;
1164             vq_info.packed.last_used_idx =
1165                 vq->state.packed.last_used_idx;
1166         } else
1167             vq_info.split.avail_index =
1168                 vq->state.split.avail_index;
1169 
1170         vq_info.ready = vq->ready;
1171 
1172         ret = -EFAULT;
1173         if (copy_to_user(argp, &vq_info, sizeof(vq_info)))
1174             break;
1175 
1176         ret = 0;
1177         break;
1178     }
1179     case VDUSE_VQ_SETUP_KICKFD: {
1180         struct vduse_vq_eventfd eventfd;
1181 
1182         ret = -EFAULT;
1183         if (copy_from_user(&eventfd, argp, sizeof(eventfd)))
1184             break;
1185 
1186         ret = vduse_kickfd_setup(dev, &eventfd);
1187         break;
1188     }
1189     case VDUSE_VQ_INJECT_IRQ: {
1190         u32 index;
1191 
1192         ret = -EFAULT;
1193         if (get_user(index, (u32 __user *)argp))
1194             break;
1195 
1196         ret = -EINVAL;
1197         if (index >= dev->vq_num)
1198             break;
1199 
1200         index = array_index_nospec(index, dev->vq_num);
1201         ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject);
1202         break;
1203     }
1204     case VDUSE_IOTLB_REG_UMEM: {
1205         struct vduse_iova_umem umem;
1206 
1207         ret = -EFAULT;
1208         if (copy_from_user(&umem, argp, sizeof(umem)))
1209             break;
1210 
1211         ret = -EINVAL;
1212         if (!is_mem_zero((const char *)umem.reserved,
1213                  sizeof(umem.reserved)))
1214             break;
1215 
1216         ret = vduse_dev_reg_umem(dev, umem.iova,
1217                      umem.uaddr, umem.size);
1218         break;
1219     }
1220     case VDUSE_IOTLB_DEREG_UMEM: {
1221         struct vduse_iova_umem umem;
1222 
1223         ret = -EFAULT;
1224         if (copy_from_user(&umem, argp, sizeof(umem)))
1225             break;
1226 
1227         ret = -EINVAL;
1228         if (!is_mem_zero((const char *)umem.reserved,
1229                  sizeof(umem.reserved)))
1230             break;
1231 
1232         ret = vduse_dev_dereg_umem(dev, umem.iova,
1233                        umem.size);
1234         break;
1235     }
1236     case VDUSE_IOTLB_GET_INFO: {
1237         struct vduse_iova_info info;
1238         struct vhost_iotlb_map *map;
1239         struct vduse_iova_domain *domain = dev->domain;
1240 
1241         ret = -EFAULT;
1242         if (copy_from_user(&info, argp, sizeof(info)))
1243             break;
1244 
1245         ret = -EINVAL;
1246         if (info.start > info.last)
1247             break;
1248 
1249         if (!is_mem_zero((const char *)info.reserved,
1250                  sizeof(info.reserved)))
1251             break;
1252 
1253         spin_lock(&domain->iotlb_lock);
1254         map = vhost_iotlb_itree_first(domain->iotlb,
1255                           info.start, info.last);
1256         if (map) {
1257             info.start = map->start;
1258             info.last = map->last;
1259             info.capability = 0;
1260             if (domain->bounce_map && map->start == 0 &&
1261                 map->last == domain->bounce_size - 1)
1262                 info.capability |= VDUSE_IOVA_CAP_UMEM;
1263         }
1264         spin_unlock(&domain->iotlb_lock);
1265         if (!map)
1266             break;
1267 
1268         ret = -EFAULT;
1269         if (copy_to_user(argp, &info, sizeof(info)))
1270             break;
1271 
1272         ret = 0;
1273         break;
1274     }
1275     default:
1276         ret = -ENOIOCTLCMD;
1277         break;
1278     }
1279 
1280     return ret;
1281 }
1282 
1283 static int vduse_dev_release(struct inode *inode, struct file *file)
1284 {
1285     struct vduse_dev *dev = file->private_data;
1286 
1287     vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
1288     spin_lock(&dev->msg_lock);
1289     /* Make sure the inflight messages can processed after reconncection */
1290     list_splice_init(&dev->recv_list, &dev->send_list);
1291     spin_unlock(&dev->msg_lock);
1292     dev->connected = false;
1293 
1294     return 0;
1295 }
1296 
1297 static struct vduse_dev *vduse_dev_get_from_minor(int minor)
1298 {
1299     struct vduse_dev *dev;
1300 
1301     mutex_lock(&vduse_lock);
1302     dev = idr_find(&vduse_idr, minor);
1303     mutex_unlock(&vduse_lock);
1304 
1305     return dev;
1306 }
1307 
1308 static int vduse_dev_open(struct inode *inode, struct file *file)
1309 {
1310     int ret;
1311     struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode));
1312 
1313     if (!dev)
1314         return -ENODEV;
1315 
1316     ret = -EBUSY;
1317     mutex_lock(&dev->lock);
1318     if (dev->connected)
1319         goto unlock;
1320 
1321     ret = 0;
1322     dev->connected = true;
1323     file->private_data = dev;
1324 unlock:
1325     mutex_unlock(&dev->lock);
1326 
1327     return ret;
1328 }
1329 
1330 static const struct file_operations vduse_dev_fops = {
1331     .owner      = THIS_MODULE,
1332     .open       = vduse_dev_open,
1333     .release    = vduse_dev_release,
1334     .read_iter  = vduse_dev_read_iter,
1335     .write_iter = vduse_dev_write_iter,
1336     .poll       = vduse_dev_poll,
1337     .unlocked_ioctl = vduse_dev_ioctl,
1338     .compat_ioctl   = compat_ptr_ioctl,
1339     .llseek     = noop_llseek,
1340 };
1341 
1342 static struct vduse_dev *vduse_dev_create(void)
1343 {
1344     struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
1345 
1346     if (!dev)
1347         return NULL;
1348 
1349     mutex_init(&dev->lock);
1350     mutex_init(&dev->mem_lock);
1351     spin_lock_init(&dev->msg_lock);
1352     INIT_LIST_HEAD(&dev->send_list);
1353     INIT_LIST_HEAD(&dev->recv_list);
1354     spin_lock_init(&dev->irq_lock);
1355     init_rwsem(&dev->rwsem);
1356 
1357     INIT_WORK(&dev->inject, vduse_dev_irq_inject);
1358     init_waitqueue_head(&dev->waitq);
1359 
1360     return dev;
1361 }
1362 
1363 static void vduse_dev_destroy(struct vduse_dev *dev)
1364 {
1365     kfree(dev);
1366 }
1367 
1368 static struct vduse_dev *vduse_find_dev(const char *name)
1369 {
1370     struct vduse_dev *dev;
1371     int id;
1372 
1373     idr_for_each_entry(&vduse_idr, dev, id)
1374         if (!strcmp(dev->name, name))
1375             return dev;
1376 
1377     return NULL;
1378 }
1379 
1380 static int vduse_destroy_dev(char *name)
1381 {
1382     struct vduse_dev *dev = vduse_find_dev(name);
1383 
1384     if (!dev)
1385         return -EINVAL;
1386 
1387     mutex_lock(&dev->lock);
1388     if (dev->vdev || dev->connected) {
1389         mutex_unlock(&dev->lock);
1390         return -EBUSY;
1391     }
1392     dev->connected = true;
1393     mutex_unlock(&dev->lock);
1394 
1395     vduse_dev_reset(dev);
1396     device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1397     idr_remove(&vduse_idr, dev->minor);
1398     kvfree(dev->config);
1399     kfree(dev->vqs);
1400     vduse_domain_destroy(dev->domain);
1401     kfree(dev->name);
1402     vduse_dev_destroy(dev);
1403     module_put(THIS_MODULE);
1404 
1405     return 0;
1406 }
1407 
1408 static bool device_is_allowed(u32 device_id)
1409 {
1410     int i;
1411 
1412     for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++)
1413         if (allowed_device_id[i] == device_id)
1414             return true;
1415 
1416     return false;
1417 }
1418 
1419 static bool features_is_valid(u64 features)
1420 {
1421     if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM)))
1422         return false;
1423 
1424     /* Now we only support read-only configuration space */
1425     if (features & (1ULL << VIRTIO_BLK_F_CONFIG_WCE))
1426         return false;
1427 
1428     return true;
1429 }
1430 
1431 static bool vduse_validate_config(struct vduse_dev_config *config)
1432 {
1433     if (!is_mem_zero((const char *)config->reserved,
1434              sizeof(config->reserved)))
1435         return false;
1436 
1437     if (config->vq_align > PAGE_SIZE)
1438         return false;
1439 
1440     if (config->config_size > PAGE_SIZE)
1441         return false;
1442 
1443     if (!device_is_allowed(config->device_id))
1444         return false;
1445 
1446     if (!features_is_valid(config->features))
1447         return false;
1448 
1449     return true;
1450 }
1451 
1452 static ssize_t msg_timeout_show(struct device *device,
1453                 struct device_attribute *attr, char *buf)
1454 {
1455     struct vduse_dev *dev = dev_get_drvdata(device);
1456 
1457     return sysfs_emit(buf, "%u\n", dev->msg_timeout);
1458 }
1459 
1460 static ssize_t msg_timeout_store(struct device *device,
1461                  struct device_attribute *attr,
1462                  const char *buf, size_t count)
1463 {
1464     struct vduse_dev *dev = dev_get_drvdata(device);
1465     int ret;
1466 
1467     ret = kstrtouint(buf, 10, &dev->msg_timeout);
1468     if (ret < 0)
1469         return ret;
1470 
1471     return count;
1472 }
1473 
1474 static DEVICE_ATTR_RW(msg_timeout);
1475 
1476 static struct attribute *vduse_dev_attrs[] = {
1477     &dev_attr_msg_timeout.attr,
1478     NULL
1479 };
1480 
1481 ATTRIBUTE_GROUPS(vduse_dev);
1482 
1483 static int vduse_create_dev(struct vduse_dev_config *config,
1484                 void *config_buf, u64 api_version)
1485 {
1486     int i, ret;
1487     struct vduse_dev *dev;
1488 
1489     ret = -EEXIST;
1490     if (vduse_find_dev(config->name))
1491         goto err;
1492 
1493     ret = -ENOMEM;
1494     dev = vduse_dev_create();
1495     if (!dev)
1496         goto err;
1497 
1498     dev->api_version = api_version;
1499     dev->device_features = config->features;
1500     dev->device_id = config->device_id;
1501     dev->vendor_id = config->vendor_id;
1502     dev->name = kstrdup(config->name, GFP_KERNEL);
1503     if (!dev->name)
1504         goto err_str;
1505 
1506     dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
1507                       VDUSE_BOUNCE_SIZE);
1508     if (!dev->domain)
1509         goto err_domain;
1510 
1511     dev->config = config_buf;
1512     dev->config_size = config->config_size;
1513     dev->vq_align = config->vq_align;
1514     dev->vq_num = config->vq_num;
1515     dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
1516     if (!dev->vqs)
1517         goto err_vqs;
1518 
1519     for (i = 0; i < dev->vq_num; i++) {
1520         dev->vqs[i].index = i;
1521         INIT_WORK(&dev->vqs[i].inject, vduse_vq_irq_inject);
1522         INIT_WORK(&dev->vqs[i].kick, vduse_vq_kick_work);
1523         spin_lock_init(&dev->vqs[i].kick_lock);
1524         spin_lock_init(&dev->vqs[i].irq_lock);
1525     }
1526 
1527     ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
1528     if (ret < 0)
1529         goto err_idr;
1530 
1531     dev->minor = ret;
1532     dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
1533     dev->dev = device_create_with_groups(vduse_class, NULL,
1534                 MKDEV(MAJOR(vduse_major), dev->minor),
1535                 dev, vduse_dev_groups, "%s", config->name);
1536     if (IS_ERR(dev->dev)) {
1537         ret = PTR_ERR(dev->dev);
1538         goto err_dev;
1539     }
1540     __module_get(THIS_MODULE);
1541 
1542     return 0;
1543 err_dev:
1544     idr_remove(&vduse_idr, dev->minor);
1545 err_idr:
1546     kfree(dev->vqs);
1547 err_vqs:
1548     vduse_domain_destroy(dev->domain);
1549 err_domain:
1550     kfree(dev->name);
1551 err_str:
1552     vduse_dev_destroy(dev);
1553 err:
1554     return ret;
1555 }
1556 
1557 static long vduse_ioctl(struct file *file, unsigned int cmd,
1558             unsigned long arg)
1559 {
1560     int ret;
1561     void __user *argp = (void __user *)arg;
1562     struct vduse_control *control = file->private_data;
1563 
1564     mutex_lock(&vduse_lock);
1565     switch (cmd) {
1566     case VDUSE_GET_API_VERSION:
1567         ret = put_user(control->api_version, (u64 __user *)argp);
1568         break;
1569     case VDUSE_SET_API_VERSION: {
1570         u64 api_version;
1571 
1572         ret = -EFAULT;
1573         if (get_user(api_version, (u64 __user *)argp))
1574             break;
1575 
1576         ret = -EINVAL;
1577         if (api_version > VDUSE_API_VERSION)
1578             break;
1579 
1580         ret = 0;
1581         control->api_version = api_version;
1582         break;
1583     }
1584     case VDUSE_CREATE_DEV: {
1585         struct vduse_dev_config config;
1586         unsigned long size = offsetof(struct vduse_dev_config, config);
1587         void *buf;
1588 
1589         ret = -EFAULT;
1590         if (copy_from_user(&config, argp, size))
1591             break;
1592 
1593         ret = -EINVAL;
1594         if (vduse_validate_config(&config) == false)
1595             break;
1596 
1597         buf = vmemdup_user(argp + size, config.config_size);
1598         if (IS_ERR(buf)) {
1599             ret = PTR_ERR(buf);
1600             break;
1601         }
1602         config.name[VDUSE_NAME_MAX - 1] = '\0';
1603         ret = vduse_create_dev(&config, buf, control->api_version);
1604         if (ret)
1605             kvfree(buf);
1606         break;
1607     }
1608     case VDUSE_DESTROY_DEV: {
1609         char name[VDUSE_NAME_MAX];
1610 
1611         ret = -EFAULT;
1612         if (copy_from_user(name, argp, VDUSE_NAME_MAX))
1613             break;
1614 
1615         name[VDUSE_NAME_MAX - 1] = '\0';
1616         ret = vduse_destroy_dev(name);
1617         break;
1618     }
1619     default:
1620         ret = -EINVAL;
1621         break;
1622     }
1623     mutex_unlock(&vduse_lock);
1624 
1625     return ret;
1626 }
1627 
1628 static int vduse_release(struct inode *inode, struct file *file)
1629 {
1630     struct vduse_control *control = file->private_data;
1631 
1632     kfree(control);
1633     return 0;
1634 }
1635 
1636 static int vduse_open(struct inode *inode, struct file *file)
1637 {
1638     struct vduse_control *control;
1639 
1640     control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL);
1641     if (!control)
1642         return -ENOMEM;
1643 
1644     control->api_version = VDUSE_API_VERSION;
1645     file->private_data = control;
1646 
1647     return 0;
1648 }
1649 
1650 static const struct file_operations vduse_ctrl_fops = {
1651     .owner      = THIS_MODULE,
1652     .open       = vduse_open,
1653     .release    = vduse_release,
1654     .unlocked_ioctl = vduse_ioctl,
1655     .compat_ioctl   = compat_ptr_ioctl,
1656     .llseek     = noop_llseek,
1657 };
1658 
1659 static char *vduse_devnode(struct device *dev, umode_t *mode)
1660 {
1661     return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
1662 }
1663 
1664 struct vduse_mgmt_dev {
1665     struct vdpa_mgmt_dev mgmt_dev;
1666     struct device dev;
1667 };
1668 
1669 static struct vduse_mgmt_dev *vduse_mgmt;
1670 
1671 static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
1672 {
1673     struct vduse_vdpa *vdev;
1674     int ret;
1675 
1676     if (dev->vdev)
1677         return -EEXIST;
1678 
1679     vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
1680                  &vduse_vdpa_config_ops, 1, 1, name, true);
1681     if (IS_ERR(vdev))
1682         return PTR_ERR(vdev);
1683 
1684     dev->vdev = vdev;
1685     vdev->dev = dev;
1686     vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask;
1687     ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64));
1688     if (ret) {
1689         put_device(&vdev->vdpa.dev);
1690         return ret;
1691     }
1692     set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops);
1693     vdev->vdpa.dma_dev = &vdev->vdpa.dev;
1694     vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev;
1695 
1696     return 0;
1697 }
1698 
1699 static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
1700             const struct vdpa_dev_set_config *config)
1701 {
1702     struct vduse_dev *dev;
1703     int ret;
1704 
1705     mutex_lock(&vduse_lock);
1706     dev = vduse_find_dev(name);
1707     if (!dev || !vduse_dev_is_ready(dev)) {
1708         mutex_unlock(&vduse_lock);
1709         return -EINVAL;
1710     }
1711     ret = vduse_dev_init_vdpa(dev, name);
1712     mutex_unlock(&vduse_lock);
1713     if (ret)
1714         return ret;
1715 
1716     ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
1717     if (ret) {
1718         put_device(&dev->vdev->vdpa.dev);
1719         return ret;
1720     }
1721 
1722     return 0;
1723 }
1724 
1725 static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
1726 {
1727     _vdpa_unregister_device(dev);
1728 }
1729 
1730 static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = {
1731     .dev_add = vdpa_dev_add,
1732     .dev_del = vdpa_dev_del,
1733 };
1734 
1735 static struct virtio_device_id id_table[] = {
1736     { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
1737     { 0 },
1738 };
1739 
1740 static void vduse_mgmtdev_release(struct device *dev)
1741 {
1742     struct vduse_mgmt_dev *mgmt_dev;
1743 
1744     mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev);
1745     kfree(mgmt_dev);
1746 }
1747 
1748 static int vduse_mgmtdev_init(void)
1749 {
1750     int ret;
1751 
1752     vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL);
1753     if (!vduse_mgmt)
1754         return -ENOMEM;
1755 
1756     ret = dev_set_name(&vduse_mgmt->dev, "vduse");
1757     if (ret) {
1758         kfree(vduse_mgmt);
1759         return ret;
1760     }
1761 
1762     vduse_mgmt->dev.release = vduse_mgmtdev_release;
1763 
1764     ret = device_register(&vduse_mgmt->dev);
1765     if (ret)
1766         goto dev_reg_err;
1767 
1768     vduse_mgmt->mgmt_dev.id_table = id_table;
1769     vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops;
1770     vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev;
1771     ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev);
1772     if (ret)
1773         device_unregister(&vduse_mgmt->dev);
1774 
1775     return ret;
1776 
1777 dev_reg_err:
1778     put_device(&vduse_mgmt->dev);
1779     return ret;
1780 }
1781 
1782 static void vduse_mgmtdev_exit(void)
1783 {
1784     vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev);
1785     device_unregister(&vduse_mgmt->dev);
1786 }
1787 
1788 static int vduse_init(void)
1789 {
1790     int ret;
1791     struct device *dev;
1792 
1793     vduse_class = class_create(THIS_MODULE, "vduse");
1794     if (IS_ERR(vduse_class))
1795         return PTR_ERR(vduse_class);
1796 
1797     vduse_class->devnode = vduse_devnode;
1798 
1799     ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
1800     if (ret)
1801         goto err_chardev_region;
1802 
1803     /* /dev/vduse/control */
1804     cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops);
1805     vduse_ctrl_cdev.owner = THIS_MODULE;
1806     ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1);
1807     if (ret)
1808         goto err_ctrl_cdev;
1809 
1810     dev = device_create(vduse_class, NULL, vduse_major, NULL, "control");
1811     if (IS_ERR(dev)) {
1812         ret = PTR_ERR(dev);
1813         goto err_device;
1814     }
1815 
1816     /* /dev/vduse/$DEVICE */
1817     cdev_init(&vduse_cdev, &vduse_dev_fops);
1818     vduse_cdev.owner = THIS_MODULE;
1819     ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1),
1820                VDUSE_DEV_MAX - 1);
1821     if (ret)
1822         goto err_cdev;
1823 
1824     vduse_irq_wq = alloc_workqueue("vduse-irq",
1825                 WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
1826     if (!vduse_irq_wq) {
1827         ret = -ENOMEM;
1828         goto err_wq;
1829     }
1830 
1831     ret = vduse_domain_init();
1832     if (ret)
1833         goto err_domain;
1834 
1835     ret = vduse_mgmtdev_init();
1836     if (ret)
1837         goto err_mgmtdev;
1838 
1839     return 0;
1840 err_mgmtdev:
1841     vduse_domain_exit();
1842 err_domain:
1843     destroy_workqueue(vduse_irq_wq);
1844 err_wq:
1845     cdev_del(&vduse_cdev);
1846 err_cdev:
1847     device_destroy(vduse_class, vduse_major);
1848 err_device:
1849     cdev_del(&vduse_ctrl_cdev);
1850 err_ctrl_cdev:
1851     unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
1852 err_chardev_region:
1853     class_destroy(vduse_class);
1854     return ret;
1855 }
1856 module_init(vduse_init);
1857 
1858 static void vduse_exit(void)
1859 {
1860     vduse_mgmtdev_exit();
1861     vduse_domain_exit();
1862     destroy_workqueue(vduse_irq_wq);
1863     cdev_del(&vduse_cdev);
1864     device_destroy(vduse_class, vduse_major);
1865     cdev_del(&vduse_ctrl_cdev);
1866     unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
1867     class_destroy(vduse_class);
1868 }
1869 module_exit(vduse_exit);
1870 
1871 MODULE_LICENSE(DRV_LICENSE);
1872 MODULE_AUTHOR(DRV_AUTHOR);
1873 MODULE_DESCRIPTION(DRV_DESC);