Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Copyright (C) 2018-2020 Intel Corporation.
0004  * Copyright (C) 2020 Red Hat, Inc.
0005  *
0006  * Author: Tiwei Bie <tiwei.bie@intel.com>
0007  *         Jason Wang <jasowang@redhat.com>
0008  *
0009  * Thanks Michael S. Tsirkin for the valuable comments and
0010  * suggestions.  And thanks to Cunming Liang and Zhihong Wang for all
0011  * their supports.
0012  */
0013 
0014 #include <linux/kernel.h>
0015 #include <linux/module.h>
0016 #include <linux/cdev.h>
0017 #include <linux/device.h>
0018 #include <linux/mm.h>
0019 #include <linux/slab.h>
0020 #include <linux/iommu.h>
0021 #include <linux/uuid.h>
0022 #include <linux/vdpa.h>
0023 #include <linux/nospec.h>
0024 #include <linux/vhost.h>
0025 
0026 #include "vhost.h"
0027 
0028 enum {
0029     VHOST_VDPA_BACKEND_FEATURES =
0030     (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) |
0031     (1ULL << VHOST_BACKEND_F_IOTLB_BATCH) |
0032     (1ULL << VHOST_BACKEND_F_IOTLB_ASID),
0033 };
0034 
0035 #define VHOST_VDPA_DEV_MAX (1U << MINORBITS)
0036 
0037 #define VHOST_VDPA_IOTLB_BUCKETS 16
0038 
0039 struct vhost_vdpa_as {
0040     struct hlist_node hash_link;
0041     struct vhost_iotlb iotlb;
0042     u32 id;
0043 };
0044 
0045 struct vhost_vdpa {
0046     struct vhost_dev vdev;
0047     struct iommu_domain *domain;
0048     struct vhost_virtqueue *vqs;
0049     struct completion completion;
0050     struct vdpa_device *vdpa;
0051     struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS];
0052     struct device dev;
0053     struct cdev cdev;
0054     atomic_t opened;
0055     u32 nvqs;
0056     int virtio_id;
0057     int minor;
0058     struct eventfd_ctx *config_ctx;
0059     int in_batch;
0060     struct vdpa_iova_range range;
0061     u32 batch_asid;
0062 };
0063 
0064 static DEFINE_IDA(vhost_vdpa_ida);
0065 
0066 static dev_t vhost_vdpa_major;
0067 
0068 static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb)
0069 {
0070     struct vhost_vdpa_as *as = container_of(iotlb, struct
0071                         vhost_vdpa_as, iotlb);
0072     return as->id;
0073 }
0074 
0075 static struct vhost_vdpa_as *asid_to_as(struct vhost_vdpa *v, u32 asid)
0076 {
0077     struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS];
0078     struct vhost_vdpa_as *as;
0079 
0080     hlist_for_each_entry(as, head, hash_link)
0081         if (as->id == asid)
0082             return as;
0083 
0084     return NULL;
0085 }
0086 
0087 static struct vhost_iotlb *asid_to_iotlb(struct vhost_vdpa *v, u32 asid)
0088 {
0089     struct vhost_vdpa_as *as = asid_to_as(v, asid);
0090 
0091     if (!as)
0092         return NULL;
0093 
0094     return &as->iotlb;
0095 }
0096 
0097 static struct vhost_vdpa_as *vhost_vdpa_alloc_as(struct vhost_vdpa *v, u32 asid)
0098 {
0099     struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS];
0100     struct vhost_vdpa_as *as;
0101 
0102     if (asid_to_as(v, asid))
0103         return NULL;
0104 
0105     if (asid >= v->vdpa->nas)
0106         return NULL;
0107 
0108     as = kmalloc(sizeof(*as), GFP_KERNEL);
0109     if (!as)
0110         return NULL;
0111 
0112     vhost_iotlb_init(&as->iotlb, 0, 0);
0113     as->id = asid;
0114     hlist_add_head(&as->hash_link, head);
0115 
0116     return as;
0117 }
0118 
0119 static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v,
0120                               u32 asid)
0121 {
0122     struct vhost_vdpa_as *as = asid_to_as(v, asid);
0123 
0124     if (as)
0125         return as;
0126 
0127     return vhost_vdpa_alloc_as(v, asid);
0128 }
0129 
0130 static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid)
0131 {
0132     struct vhost_vdpa_as *as = asid_to_as(v, asid);
0133 
0134     if (!as)
0135         return -EINVAL;
0136 
0137     hlist_del(&as->hash_link);
0138     vhost_iotlb_reset(&as->iotlb);
0139     kfree(as);
0140 
0141     return 0;
0142 }
0143 
0144 static void handle_vq_kick(struct vhost_work *work)
0145 {
0146     struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
0147                           poll.work);
0148     struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev);
0149     const struct vdpa_config_ops *ops = v->vdpa->config;
0150 
0151     ops->kick_vq(v->vdpa, vq - v->vqs);
0152 }
0153 
0154 static irqreturn_t vhost_vdpa_virtqueue_cb(void *private)
0155 {
0156     struct vhost_virtqueue *vq = private;
0157     struct eventfd_ctx *call_ctx = vq->call_ctx.ctx;
0158 
0159     if (call_ctx)
0160         eventfd_signal(call_ctx, 1);
0161 
0162     return IRQ_HANDLED;
0163 }
0164 
0165 static irqreturn_t vhost_vdpa_config_cb(void *private)
0166 {
0167     struct vhost_vdpa *v = private;
0168     struct eventfd_ctx *config_ctx = v->config_ctx;
0169 
0170     if (config_ctx)
0171         eventfd_signal(config_ctx, 1);
0172 
0173     return IRQ_HANDLED;
0174 }
0175 
0176 static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
0177 {
0178     struct vhost_virtqueue *vq = &v->vqs[qid];
0179     const struct vdpa_config_ops *ops = v->vdpa->config;
0180     struct vdpa_device *vdpa = v->vdpa;
0181     int ret, irq;
0182 
0183     if (!ops->get_vq_irq)
0184         return;
0185 
0186     irq = ops->get_vq_irq(vdpa, qid);
0187     if (irq < 0)
0188         return;
0189 
0190     irq_bypass_unregister_producer(&vq->call_ctx.producer);
0191     if (!vq->call_ctx.ctx)
0192         return;
0193 
0194     vq->call_ctx.producer.token = vq->call_ctx.ctx;
0195     vq->call_ctx.producer.irq = irq;
0196     ret = irq_bypass_register_producer(&vq->call_ctx.producer);
0197     if (unlikely(ret))
0198         dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret =  %d\n",
0199              qid, vq->call_ctx.producer.token, ret);
0200 }
0201 
0202 static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid)
0203 {
0204     struct vhost_virtqueue *vq = &v->vqs[qid];
0205 
0206     irq_bypass_unregister_producer(&vq->call_ctx.producer);
0207 }
0208 
0209 static int vhost_vdpa_reset(struct vhost_vdpa *v)
0210 {
0211     struct vdpa_device *vdpa = v->vdpa;
0212 
0213     v->in_batch = 0;
0214 
0215     return vdpa_reset(vdpa);
0216 }
0217 
0218 static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp)
0219 {
0220     struct vdpa_device *vdpa = v->vdpa;
0221     const struct vdpa_config_ops *ops = vdpa->config;
0222     u32 device_id;
0223 
0224     device_id = ops->get_device_id(vdpa);
0225 
0226     if (copy_to_user(argp, &device_id, sizeof(device_id)))
0227         return -EFAULT;
0228 
0229     return 0;
0230 }
0231 
0232 static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp)
0233 {
0234     struct vdpa_device *vdpa = v->vdpa;
0235     const struct vdpa_config_ops *ops = vdpa->config;
0236     u8 status;
0237 
0238     status = ops->get_status(vdpa);
0239 
0240     if (copy_to_user(statusp, &status, sizeof(status)))
0241         return -EFAULT;
0242 
0243     return 0;
0244 }
0245 
0246 static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
0247 {
0248     struct vdpa_device *vdpa = v->vdpa;
0249     const struct vdpa_config_ops *ops = vdpa->config;
0250     u8 status, status_old;
0251     u32 nvqs = v->nvqs;
0252     int ret;
0253     u16 i;
0254 
0255     if (copy_from_user(&status, statusp, sizeof(status)))
0256         return -EFAULT;
0257 
0258     status_old = ops->get_status(vdpa);
0259 
0260     /*
0261      * Userspace shouldn't remove status bits unless reset the
0262      * status to 0.
0263      */
0264     if (status != 0 && (status_old & ~status) != 0)
0265         return -EINVAL;
0266 
0267     if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK))
0268         for (i = 0; i < nvqs; i++)
0269             vhost_vdpa_unsetup_vq_irq(v, i);
0270 
0271     if (status == 0) {
0272         ret = vdpa_reset(vdpa);
0273         if (ret)
0274             return ret;
0275     } else
0276         vdpa_set_status(vdpa, status);
0277 
0278     if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK))
0279         for (i = 0; i < nvqs; i++)
0280             vhost_vdpa_setup_vq_irq(v, i);
0281 
0282     return 0;
0283 }
0284 
0285 static int vhost_vdpa_config_validate(struct vhost_vdpa *v,
0286                       struct vhost_vdpa_config *c)
0287 {
0288     struct vdpa_device *vdpa = v->vdpa;
0289     size_t size = vdpa->config->get_config_size(vdpa);
0290 
0291     if (c->len == 0 || c->off > size)
0292         return -EINVAL;
0293 
0294     if (c->len > size - c->off)
0295         return -E2BIG;
0296 
0297     return 0;
0298 }
0299 
0300 static long vhost_vdpa_get_config(struct vhost_vdpa *v,
0301                   struct vhost_vdpa_config __user *c)
0302 {
0303     struct vdpa_device *vdpa = v->vdpa;
0304     struct vhost_vdpa_config config;
0305     unsigned long size = offsetof(struct vhost_vdpa_config, buf);
0306     u8 *buf;
0307 
0308     if (copy_from_user(&config, c, size))
0309         return -EFAULT;
0310     if (vhost_vdpa_config_validate(v, &config))
0311         return -EINVAL;
0312     buf = kvzalloc(config.len, GFP_KERNEL);
0313     if (!buf)
0314         return -ENOMEM;
0315 
0316     vdpa_get_config(vdpa, config.off, buf, config.len);
0317 
0318     if (copy_to_user(c->buf, buf, config.len)) {
0319         kvfree(buf);
0320         return -EFAULT;
0321     }
0322 
0323     kvfree(buf);
0324     return 0;
0325 }
0326 
0327 static long vhost_vdpa_set_config(struct vhost_vdpa *v,
0328                   struct vhost_vdpa_config __user *c)
0329 {
0330     struct vdpa_device *vdpa = v->vdpa;
0331     struct vhost_vdpa_config config;
0332     unsigned long size = offsetof(struct vhost_vdpa_config, buf);
0333     u8 *buf;
0334 
0335     if (copy_from_user(&config, c, size))
0336         return -EFAULT;
0337     if (vhost_vdpa_config_validate(v, &config))
0338         return -EINVAL;
0339 
0340     buf = vmemdup_user(c->buf, config.len);
0341     if (IS_ERR(buf))
0342         return PTR_ERR(buf);
0343 
0344     vdpa_set_config(vdpa, config.off, buf, config.len);
0345 
0346     kvfree(buf);
0347     return 0;
0348 }
0349 
0350 static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v)
0351 {
0352     struct vdpa_device *vdpa = v->vdpa;
0353     const struct vdpa_config_ops *ops = vdpa->config;
0354 
0355     return ops->suspend;
0356 }
0357 
0358 static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep)
0359 {
0360     struct vdpa_device *vdpa = v->vdpa;
0361     const struct vdpa_config_ops *ops = vdpa->config;
0362     u64 features;
0363 
0364     features = ops->get_device_features(vdpa);
0365 
0366     if (copy_to_user(featurep, &features, sizeof(features)))
0367         return -EFAULT;
0368 
0369     return 0;
0370 }
0371 
0372 static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
0373 {
0374     struct vdpa_device *vdpa = v->vdpa;
0375     const struct vdpa_config_ops *ops = vdpa->config;
0376     u64 features;
0377 
0378     /*
0379      * It's not allowed to change the features after they have
0380      * been negotiated.
0381      */
0382     if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK)
0383         return -EBUSY;
0384 
0385     if (copy_from_user(&features, featurep, sizeof(features)))
0386         return -EFAULT;
0387 
0388     if (vdpa_set_features(vdpa, features))
0389         return -EINVAL;
0390 
0391     return 0;
0392 }
0393 
0394 static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp)
0395 {
0396     struct vdpa_device *vdpa = v->vdpa;
0397     const struct vdpa_config_ops *ops = vdpa->config;
0398     u16 num;
0399 
0400     num = ops->get_vq_num_max(vdpa);
0401 
0402     if (copy_to_user(argp, &num, sizeof(num)))
0403         return -EFAULT;
0404 
0405     return 0;
0406 }
0407 
0408 static void vhost_vdpa_config_put(struct vhost_vdpa *v)
0409 {
0410     if (v->config_ctx) {
0411         eventfd_ctx_put(v->config_ctx);
0412         v->config_ctx = NULL;
0413     }
0414 }
0415 
0416 static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp)
0417 {
0418     struct vdpa_callback cb;
0419     int fd;
0420     struct eventfd_ctx *ctx;
0421 
0422     cb.callback = vhost_vdpa_config_cb;
0423     cb.private = v;
0424     if (copy_from_user(&fd, argp, sizeof(fd)))
0425         return  -EFAULT;
0426 
0427     ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd);
0428     swap(ctx, v->config_ctx);
0429 
0430     if (!IS_ERR_OR_NULL(ctx))
0431         eventfd_ctx_put(ctx);
0432 
0433     if (IS_ERR(v->config_ctx)) {
0434         long ret = PTR_ERR(v->config_ctx);
0435 
0436         v->config_ctx = NULL;
0437         return ret;
0438     }
0439 
0440     v->vdpa->config->set_config_cb(v->vdpa, &cb);
0441 
0442     return 0;
0443 }
0444 
0445 static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp)
0446 {
0447     struct vhost_vdpa_iova_range range = {
0448         .first = v->range.first,
0449         .last = v->range.last,
0450     };
0451 
0452     if (copy_to_user(argp, &range, sizeof(range)))
0453         return -EFAULT;
0454     return 0;
0455 }
0456 
0457 static long vhost_vdpa_get_config_size(struct vhost_vdpa *v, u32 __user *argp)
0458 {
0459     struct vdpa_device *vdpa = v->vdpa;
0460     const struct vdpa_config_ops *ops = vdpa->config;
0461     u32 size;
0462 
0463     size = ops->get_config_size(vdpa);
0464 
0465     if (copy_to_user(argp, &size, sizeof(size)))
0466         return -EFAULT;
0467 
0468     return 0;
0469 }
0470 
0471 static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp)
0472 {
0473     struct vdpa_device *vdpa = v->vdpa;
0474 
0475     if (copy_to_user(argp, &vdpa->nvqs, sizeof(vdpa->nvqs)))
0476         return -EFAULT;
0477 
0478     return 0;
0479 }
0480 
0481 /* After a successful return of ioctl the device must not process more
0482  * virtqueue descriptors. The device can answer to read or writes of config
0483  * fields as if it were not suspended. In particular, writing to "queue_enable"
0484  * with a value of 1 will not make the device start processing buffers.
0485  */
0486 static long vhost_vdpa_suspend(struct vhost_vdpa *v)
0487 {
0488     struct vdpa_device *vdpa = v->vdpa;
0489     const struct vdpa_config_ops *ops = vdpa->config;
0490 
0491     if (!ops->suspend)
0492         return -EOPNOTSUPP;
0493 
0494     return ops->suspend(vdpa);
0495 }
0496 
0497 static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
0498                    void __user *argp)
0499 {
0500     struct vdpa_device *vdpa = v->vdpa;
0501     const struct vdpa_config_ops *ops = vdpa->config;
0502     struct vdpa_vq_state vq_state;
0503     struct vdpa_callback cb;
0504     struct vhost_virtqueue *vq;
0505     struct vhost_vring_state s;
0506     u32 idx;
0507     long r;
0508 
0509     r = get_user(idx, (u32 __user *)argp);
0510     if (r < 0)
0511         return r;
0512 
0513     if (idx >= v->nvqs)
0514         return -ENOBUFS;
0515 
0516     idx = array_index_nospec(idx, v->nvqs);
0517     vq = &v->vqs[idx];
0518 
0519     switch (cmd) {
0520     case VHOST_VDPA_SET_VRING_ENABLE:
0521         if (copy_from_user(&s, argp, sizeof(s)))
0522             return -EFAULT;
0523         ops->set_vq_ready(vdpa, idx, s.num);
0524         return 0;
0525     case VHOST_VDPA_GET_VRING_GROUP:
0526         if (!ops->get_vq_group)
0527             return -EOPNOTSUPP;
0528         s.index = idx;
0529         s.num = ops->get_vq_group(vdpa, idx);
0530         if (s.num >= vdpa->ngroups)
0531             return -EIO;
0532         else if (copy_to_user(argp, &s, sizeof(s)))
0533             return -EFAULT;
0534         return 0;
0535     case VHOST_VDPA_SET_GROUP_ASID:
0536         if (copy_from_user(&s, argp, sizeof(s)))
0537             return -EFAULT;
0538         if (s.num >= vdpa->nas)
0539             return -EINVAL;
0540         if (!ops->set_group_asid)
0541             return -EOPNOTSUPP;
0542         return ops->set_group_asid(vdpa, idx, s.num);
0543     case VHOST_GET_VRING_BASE:
0544         r = ops->get_vq_state(v->vdpa, idx, &vq_state);
0545         if (r)
0546             return r;
0547 
0548         vq->last_avail_idx = vq_state.split.avail_index;
0549         break;
0550     }
0551 
0552     r = vhost_vring_ioctl(&v->vdev, cmd, argp);
0553     if (r)
0554         return r;
0555 
0556     switch (cmd) {
0557     case VHOST_SET_VRING_ADDR:
0558         if (ops->set_vq_address(vdpa, idx,
0559                     (u64)(uintptr_t)vq->desc,
0560                     (u64)(uintptr_t)vq->avail,
0561                     (u64)(uintptr_t)vq->used))
0562             r = -EINVAL;
0563         break;
0564 
0565     case VHOST_SET_VRING_BASE:
0566         vq_state.split.avail_index = vq->last_avail_idx;
0567         if (ops->set_vq_state(vdpa, idx, &vq_state))
0568             r = -EINVAL;
0569         break;
0570 
0571     case VHOST_SET_VRING_CALL:
0572         if (vq->call_ctx.ctx) {
0573             cb.callback = vhost_vdpa_virtqueue_cb;
0574             cb.private = vq;
0575         } else {
0576             cb.callback = NULL;
0577             cb.private = NULL;
0578         }
0579         ops->set_vq_cb(vdpa, idx, &cb);
0580         vhost_vdpa_setup_vq_irq(v, idx);
0581         break;
0582 
0583     case VHOST_SET_VRING_NUM:
0584         ops->set_vq_num(vdpa, idx, vq->num);
0585         break;
0586     }
0587 
0588     return r;
0589 }
0590 
0591 static long vhost_vdpa_unlocked_ioctl(struct file *filep,
0592                       unsigned int cmd, unsigned long arg)
0593 {
0594     struct vhost_vdpa *v = filep->private_data;
0595     struct vhost_dev *d = &v->vdev;
0596     void __user *argp = (void __user *)arg;
0597     u64 __user *featurep = argp;
0598     u64 features;
0599     long r = 0;
0600 
0601     if (cmd == VHOST_SET_BACKEND_FEATURES) {
0602         if (copy_from_user(&features, featurep, sizeof(features)))
0603             return -EFAULT;
0604         if (features & ~(VHOST_VDPA_BACKEND_FEATURES |
0605                  BIT_ULL(VHOST_BACKEND_F_SUSPEND)))
0606             return -EOPNOTSUPP;
0607         if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) &&
0608              !vhost_vdpa_can_suspend(v))
0609             return -EOPNOTSUPP;
0610         vhost_set_backend_features(&v->vdev, features);
0611         return 0;
0612     }
0613 
0614     mutex_lock(&d->mutex);
0615 
0616     switch (cmd) {
0617     case VHOST_VDPA_GET_DEVICE_ID:
0618         r = vhost_vdpa_get_device_id(v, argp);
0619         break;
0620     case VHOST_VDPA_GET_STATUS:
0621         r = vhost_vdpa_get_status(v, argp);
0622         break;
0623     case VHOST_VDPA_SET_STATUS:
0624         r = vhost_vdpa_set_status(v, argp);
0625         break;
0626     case VHOST_VDPA_GET_CONFIG:
0627         r = vhost_vdpa_get_config(v, argp);
0628         break;
0629     case VHOST_VDPA_SET_CONFIG:
0630         r = vhost_vdpa_set_config(v, argp);
0631         break;
0632     case VHOST_GET_FEATURES:
0633         r = vhost_vdpa_get_features(v, argp);
0634         break;
0635     case VHOST_SET_FEATURES:
0636         r = vhost_vdpa_set_features(v, argp);
0637         break;
0638     case VHOST_VDPA_GET_VRING_NUM:
0639         r = vhost_vdpa_get_vring_num(v, argp);
0640         break;
0641     case VHOST_VDPA_GET_GROUP_NUM:
0642         if (copy_to_user(argp, &v->vdpa->ngroups,
0643                  sizeof(v->vdpa->ngroups)))
0644             r = -EFAULT;
0645         break;
0646     case VHOST_VDPA_GET_AS_NUM:
0647         if (copy_to_user(argp, &v->vdpa->nas, sizeof(v->vdpa->nas)))
0648             r = -EFAULT;
0649         break;
0650     case VHOST_SET_LOG_BASE:
0651     case VHOST_SET_LOG_FD:
0652         r = -ENOIOCTLCMD;
0653         break;
0654     case VHOST_VDPA_SET_CONFIG_CALL:
0655         r = vhost_vdpa_set_config_call(v, argp);
0656         break;
0657     case VHOST_GET_BACKEND_FEATURES:
0658         features = VHOST_VDPA_BACKEND_FEATURES;
0659         if (vhost_vdpa_can_suspend(v))
0660             features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND);
0661         if (copy_to_user(featurep, &features, sizeof(features)))
0662             r = -EFAULT;
0663         break;
0664     case VHOST_VDPA_GET_IOVA_RANGE:
0665         r = vhost_vdpa_get_iova_range(v, argp);
0666         break;
0667     case VHOST_VDPA_GET_CONFIG_SIZE:
0668         r = vhost_vdpa_get_config_size(v, argp);
0669         break;
0670     case VHOST_VDPA_GET_VQS_COUNT:
0671         r = vhost_vdpa_get_vqs_count(v, argp);
0672         break;
0673     case VHOST_VDPA_SUSPEND:
0674         r = vhost_vdpa_suspend(v);
0675         break;
0676     default:
0677         r = vhost_dev_ioctl(&v->vdev, cmd, argp);
0678         if (r == -ENOIOCTLCMD)
0679             r = vhost_vdpa_vring_ioctl(v, cmd, argp);
0680         break;
0681     }
0682 
0683     mutex_unlock(&d->mutex);
0684     return r;
0685 }
0686 
0687 static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v,
0688                 struct vhost_iotlb *iotlb,
0689                 u64 start, u64 last)
0690 {
0691     struct vhost_dev *dev = &v->vdev;
0692     struct vhost_iotlb_map *map;
0693     struct page *page;
0694     unsigned long pfn, pinned;
0695 
0696     while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
0697         pinned = PFN_DOWN(map->size);
0698         for (pfn = PFN_DOWN(map->addr);
0699              pinned > 0; pfn++, pinned--) {
0700             page = pfn_to_page(pfn);
0701             if (map->perm & VHOST_ACCESS_WO)
0702                 set_page_dirty_lock(page);
0703             unpin_user_page(page);
0704         }
0705         atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm);
0706         vhost_iotlb_map_free(iotlb, map);
0707     }
0708 }
0709 
0710 static void vhost_vdpa_va_unmap(struct vhost_vdpa *v,
0711                 struct vhost_iotlb *iotlb,
0712                 u64 start, u64 last)
0713 {
0714     struct vhost_iotlb_map *map;
0715     struct vdpa_map_file *map_file;
0716 
0717     while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
0718         map_file = (struct vdpa_map_file *)map->opaque;
0719         fput(map_file->file);
0720         kfree(map_file);
0721         vhost_iotlb_map_free(iotlb, map);
0722     }
0723 }
0724 
0725 static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v,
0726                    struct vhost_iotlb *iotlb,
0727                    u64 start, u64 last)
0728 {
0729     struct vdpa_device *vdpa = v->vdpa;
0730 
0731     if (vdpa->use_va)
0732         return vhost_vdpa_va_unmap(v, iotlb, start, last);
0733 
0734     return vhost_vdpa_pa_unmap(v, iotlb, start, last);
0735 }
0736 
0737 static int perm_to_iommu_flags(u32 perm)
0738 {
0739     int flags = 0;
0740 
0741     switch (perm) {
0742     case VHOST_ACCESS_WO:
0743         flags |= IOMMU_WRITE;
0744         break;
0745     case VHOST_ACCESS_RO:
0746         flags |= IOMMU_READ;
0747         break;
0748     case VHOST_ACCESS_RW:
0749         flags |= (IOMMU_WRITE | IOMMU_READ);
0750         break;
0751     default:
0752         WARN(1, "invalidate vhost IOTLB permission\n");
0753         break;
0754     }
0755 
0756     return flags | IOMMU_CACHE;
0757 }
0758 
0759 static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
0760               u64 iova, u64 size, u64 pa, u32 perm, void *opaque)
0761 {
0762     struct vhost_dev *dev = &v->vdev;
0763     struct vdpa_device *vdpa = v->vdpa;
0764     const struct vdpa_config_ops *ops = vdpa->config;
0765     u32 asid = iotlb_to_asid(iotlb);
0766     int r = 0;
0767 
0768     r = vhost_iotlb_add_range_ctx(iotlb, iova, iova + size - 1,
0769                       pa, perm, opaque);
0770     if (r)
0771         return r;
0772 
0773     if (ops->dma_map) {
0774         r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque);
0775     } else if (ops->set_map) {
0776         if (!v->in_batch)
0777             r = ops->set_map(vdpa, asid, iotlb);
0778     } else {
0779         r = iommu_map(v->domain, iova, pa, size,
0780                   perm_to_iommu_flags(perm));
0781     }
0782     if (r) {
0783         vhost_iotlb_del_range(iotlb, iova, iova + size - 1);
0784         return r;
0785     }
0786 
0787     if (!vdpa->use_va)
0788         atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
0789 
0790     return 0;
0791 }
0792 
0793 static void vhost_vdpa_unmap(struct vhost_vdpa *v,
0794                  struct vhost_iotlb *iotlb,
0795                  u64 iova, u64 size)
0796 {
0797     struct vdpa_device *vdpa = v->vdpa;
0798     const struct vdpa_config_ops *ops = vdpa->config;
0799     u32 asid = iotlb_to_asid(iotlb);
0800 
0801     vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1);
0802 
0803     if (ops->dma_map) {
0804         ops->dma_unmap(vdpa, asid, iova, size);
0805     } else if (ops->set_map) {
0806         if (!v->in_batch)
0807             ops->set_map(vdpa, asid, iotlb);
0808     } else {
0809         iommu_unmap(v->domain, iova, size);
0810     }
0811 
0812     /* If we are in the middle of batch processing, delay the free
0813      * of AS until BATCH_END.
0814      */
0815     if (!v->in_batch && !iotlb->nmaps)
0816         vhost_vdpa_remove_as(v, asid);
0817 }
0818 
0819 static int vhost_vdpa_va_map(struct vhost_vdpa *v,
0820                  struct vhost_iotlb *iotlb,
0821                  u64 iova, u64 size, u64 uaddr, u32 perm)
0822 {
0823     struct vhost_dev *dev = &v->vdev;
0824     u64 offset, map_size, map_iova = iova;
0825     struct vdpa_map_file *map_file;
0826     struct vm_area_struct *vma;
0827     int ret = 0;
0828 
0829     mmap_read_lock(dev->mm);
0830 
0831     while (size) {
0832         vma = find_vma(dev->mm, uaddr);
0833         if (!vma) {
0834             ret = -EINVAL;
0835             break;
0836         }
0837         map_size = min(size, vma->vm_end - uaddr);
0838         if (!(vma->vm_file && (vma->vm_flags & VM_SHARED) &&
0839             !(vma->vm_flags & (VM_IO | VM_PFNMAP))))
0840             goto next;
0841 
0842         map_file = kzalloc(sizeof(*map_file), GFP_KERNEL);
0843         if (!map_file) {
0844             ret = -ENOMEM;
0845             break;
0846         }
0847         offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start;
0848         map_file->offset = offset;
0849         map_file->file = get_file(vma->vm_file);
0850         ret = vhost_vdpa_map(v, iotlb, map_iova, map_size, uaddr,
0851                      perm, map_file);
0852         if (ret) {
0853             fput(map_file->file);
0854             kfree(map_file);
0855             break;
0856         }
0857 next:
0858         size -= map_size;
0859         uaddr += map_size;
0860         map_iova += map_size;
0861     }
0862     if (ret)
0863         vhost_vdpa_unmap(v, iotlb, iova, map_iova - iova);
0864 
0865     mmap_read_unlock(dev->mm);
0866 
0867     return ret;
0868 }
0869 
0870 static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
0871                  struct vhost_iotlb *iotlb,
0872                  u64 iova, u64 size, u64 uaddr, u32 perm)
0873 {
0874     struct vhost_dev *dev = &v->vdev;
0875     struct page **page_list;
0876     unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
0877     unsigned int gup_flags = FOLL_LONGTERM;
0878     unsigned long npages, cur_base, map_pfn, last_pfn = 0;
0879     unsigned long lock_limit, sz2pin, nchunks, i;
0880     u64 start = iova;
0881     long pinned;
0882     int ret = 0;
0883 
0884     /* Limit the use of memory for bookkeeping */
0885     page_list = (struct page **) __get_free_page(GFP_KERNEL);
0886     if (!page_list)
0887         return -ENOMEM;
0888 
0889     if (perm & VHOST_ACCESS_WO)
0890         gup_flags |= FOLL_WRITE;
0891 
0892     npages = PFN_UP(size + (iova & ~PAGE_MASK));
0893     if (!npages) {
0894         ret = -EINVAL;
0895         goto free;
0896     }
0897 
0898     mmap_read_lock(dev->mm);
0899 
0900     lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
0901     if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
0902         ret = -ENOMEM;
0903         goto unlock;
0904     }
0905 
0906     cur_base = uaddr & PAGE_MASK;
0907     iova &= PAGE_MASK;
0908     nchunks = 0;
0909 
0910     while (npages) {
0911         sz2pin = min_t(unsigned long, npages, list_size);
0912         pinned = pin_user_pages(cur_base, sz2pin,
0913                     gup_flags, page_list, NULL);
0914         if (sz2pin != pinned) {
0915             if (pinned < 0) {
0916                 ret = pinned;
0917             } else {
0918                 unpin_user_pages(page_list, pinned);
0919                 ret = -ENOMEM;
0920             }
0921             goto out;
0922         }
0923         nchunks++;
0924 
0925         if (!last_pfn)
0926             map_pfn = page_to_pfn(page_list[0]);
0927 
0928         for (i = 0; i < pinned; i++) {
0929             unsigned long this_pfn = page_to_pfn(page_list[i]);
0930             u64 csize;
0931 
0932             if (last_pfn && (this_pfn != last_pfn + 1)) {
0933                 /* Pin a contiguous chunk of memory */
0934                 csize = PFN_PHYS(last_pfn - map_pfn + 1);
0935                 ret = vhost_vdpa_map(v, iotlb, iova, csize,
0936                              PFN_PHYS(map_pfn),
0937                              perm, NULL);
0938                 if (ret) {
0939                     /*
0940                      * Unpin the pages that are left unmapped
0941                      * from this point on in the current
0942                      * page_list. The remaining outstanding
0943                      * ones which may stride across several
0944                      * chunks will be covered in the common
0945                      * error path subsequently.
0946                      */
0947                     unpin_user_pages(&page_list[i],
0948                              pinned - i);
0949                     goto out;
0950                 }
0951 
0952                 map_pfn = this_pfn;
0953                 iova += csize;
0954                 nchunks = 0;
0955             }
0956 
0957             last_pfn = this_pfn;
0958         }
0959 
0960         cur_base += PFN_PHYS(pinned);
0961         npages -= pinned;
0962     }
0963 
0964     /* Pin the rest chunk */
0965     ret = vhost_vdpa_map(v, iotlb, iova, PFN_PHYS(last_pfn - map_pfn + 1),
0966                  PFN_PHYS(map_pfn), perm, NULL);
0967 out:
0968     if (ret) {
0969         if (nchunks) {
0970             unsigned long pfn;
0971 
0972             /*
0973              * Unpin the outstanding pages which are yet to be
0974              * mapped but haven't due to vdpa_map() or
0975              * pin_user_pages() failure.
0976              *
0977              * Mapped pages are accounted in vdpa_map(), hence
0978              * the corresponding unpinning will be handled by
0979              * vdpa_unmap().
0980              */
0981             WARN_ON(!last_pfn);
0982             for (pfn = map_pfn; pfn <= last_pfn; pfn++)
0983                 unpin_user_page(pfn_to_page(pfn));
0984         }
0985         vhost_vdpa_unmap(v, iotlb, start, size);
0986     }
0987 unlock:
0988     mmap_read_unlock(dev->mm);
0989 free:
0990     free_page((unsigned long)page_list);
0991     return ret;
0992 
0993 }
0994 
0995 static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
0996                        struct vhost_iotlb *iotlb,
0997                        struct vhost_iotlb_msg *msg)
0998 {
0999     struct vdpa_device *vdpa = v->vdpa;
1000 
1001     if (msg->iova < v->range.first || !msg->size ||
1002         msg->iova > U64_MAX - msg->size + 1 ||
1003         msg->iova + msg->size - 1 > v->range.last)
1004         return -EINVAL;
1005 
1006     if (vhost_iotlb_itree_first(iotlb, msg->iova,
1007                     msg->iova + msg->size - 1))
1008         return -EEXIST;
1009 
1010     if (vdpa->use_va)
1011         return vhost_vdpa_va_map(v, iotlb, msg->iova, msg->size,
1012                      msg->uaddr, msg->perm);
1013 
1014     return vhost_vdpa_pa_map(v, iotlb, msg->iova, msg->size, msg->uaddr,
1015                  msg->perm);
1016 }
1017 
1018 static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid,
1019                     struct vhost_iotlb_msg *msg)
1020 {
1021     struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev);
1022     struct vdpa_device *vdpa = v->vdpa;
1023     const struct vdpa_config_ops *ops = vdpa->config;
1024     struct vhost_iotlb *iotlb = NULL;
1025     struct vhost_vdpa_as *as = NULL;
1026     int r = 0;
1027 
1028     mutex_lock(&dev->mutex);
1029 
1030     r = vhost_dev_check_owner(dev);
1031     if (r)
1032         goto unlock;
1033 
1034     if (msg->type == VHOST_IOTLB_UPDATE ||
1035         msg->type == VHOST_IOTLB_BATCH_BEGIN) {
1036         as = vhost_vdpa_find_alloc_as(v, asid);
1037         if (!as) {
1038             dev_err(&v->dev, "can't find and alloc asid %d\n",
1039                 asid);
1040             r = -EINVAL;
1041             goto unlock;
1042         }
1043         iotlb = &as->iotlb;
1044     } else
1045         iotlb = asid_to_iotlb(v, asid);
1046 
1047     if ((v->in_batch && v->batch_asid != asid) || !iotlb) {
1048         if (v->in_batch && v->batch_asid != asid) {
1049             dev_info(&v->dev, "batch id %d asid %d\n",
1050                  v->batch_asid, asid);
1051         }
1052         if (!iotlb)
1053             dev_err(&v->dev, "no iotlb for asid %d\n", asid);
1054         r = -EINVAL;
1055         goto unlock;
1056     }
1057 
1058     switch (msg->type) {
1059     case VHOST_IOTLB_UPDATE:
1060         r = vhost_vdpa_process_iotlb_update(v, iotlb, msg);
1061         break;
1062     case VHOST_IOTLB_INVALIDATE:
1063         vhost_vdpa_unmap(v, iotlb, msg->iova, msg->size);
1064         break;
1065     case VHOST_IOTLB_BATCH_BEGIN:
1066         v->batch_asid = asid;
1067         v->in_batch = true;
1068         break;
1069     case VHOST_IOTLB_BATCH_END:
1070         if (v->in_batch && ops->set_map)
1071             ops->set_map(vdpa, asid, iotlb);
1072         v->in_batch = false;
1073         if (!iotlb->nmaps)
1074             vhost_vdpa_remove_as(v, asid);
1075         break;
1076     default:
1077         r = -EINVAL;
1078         break;
1079     }
1080 unlock:
1081     mutex_unlock(&dev->mutex);
1082 
1083     return r;
1084 }
1085 
1086 static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb,
1087                      struct iov_iter *from)
1088 {
1089     struct file *file = iocb->ki_filp;
1090     struct vhost_vdpa *v = file->private_data;
1091     struct vhost_dev *dev = &v->vdev;
1092 
1093     return vhost_chr_write_iter(dev, from);
1094 }
1095 
1096 static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
1097 {
1098     struct vdpa_device *vdpa = v->vdpa;
1099     const struct vdpa_config_ops *ops = vdpa->config;
1100     struct device *dma_dev = vdpa_get_dma_dev(vdpa);
1101     struct bus_type *bus;
1102     int ret;
1103 
1104     /* Device want to do DMA by itself */
1105     if (ops->set_map || ops->dma_map)
1106         return 0;
1107 
1108     bus = dma_dev->bus;
1109     if (!bus)
1110         return -EFAULT;
1111 
1112     if (!device_iommu_capable(dma_dev, IOMMU_CAP_CACHE_COHERENCY))
1113         return -ENOTSUPP;
1114 
1115     v->domain = iommu_domain_alloc(bus);
1116     if (!v->domain)
1117         return -EIO;
1118 
1119     ret = iommu_attach_device(v->domain, dma_dev);
1120     if (ret)
1121         goto err_attach;
1122 
1123     return 0;
1124 
1125 err_attach:
1126     iommu_domain_free(v->domain);
1127     return ret;
1128 }
1129 
1130 static void vhost_vdpa_free_domain(struct vhost_vdpa *v)
1131 {
1132     struct vdpa_device *vdpa = v->vdpa;
1133     struct device *dma_dev = vdpa_get_dma_dev(vdpa);
1134 
1135     if (v->domain) {
1136         iommu_detach_device(v->domain, dma_dev);
1137         iommu_domain_free(v->domain);
1138     }
1139 
1140     v->domain = NULL;
1141 }
1142 
1143 static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v)
1144 {
1145     struct vdpa_iova_range *range = &v->range;
1146     struct vdpa_device *vdpa = v->vdpa;
1147     const struct vdpa_config_ops *ops = vdpa->config;
1148 
1149     if (ops->get_iova_range) {
1150         *range = ops->get_iova_range(vdpa);
1151     } else if (v->domain && v->domain->geometry.force_aperture) {
1152         range->first = v->domain->geometry.aperture_start;
1153         range->last = v->domain->geometry.aperture_end;
1154     } else {
1155         range->first = 0;
1156         range->last = ULLONG_MAX;
1157     }
1158 }
1159 
1160 static void vhost_vdpa_cleanup(struct vhost_vdpa *v)
1161 {
1162     struct vhost_vdpa_as *as;
1163     u32 asid;
1164 
1165     vhost_dev_cleanup(&v->vdev);
1166     kfree(v->vdev.vqs);
1167 
1168     for (asid = 0; asid < v->vdpa->nas; asid++) {
1169         as = asid_to_as(v, asid);
1170         if (as)
1171             vhost_vdpa_remove_as(v, asid);
1172     }
1173 }
1174 
1175 static int vhost_vdpa_open(struct inode *inode, struct file *filep)
1176 {
1177     struct vhost_vdpa *v;
1178     struct vhost_dev *dev;
1179     struct vhost_virtqueue **vqs;
1180     int r, opened;
1181     u32 i, nvqs;
1182 
1183     v = container_of(inode->i_cdev, struct vhost_vdpa, cdev);
1184 
1185     opened = atomic_cmpxchg(&v->opened, 0, 1);
1186     if (opened)
1187         return -EBUSY;
1188 
1189     nvqs = v->nvqs;
1190     r = vhost_vdpa_reset(v);
1191     if (r)
1192         goto err;
1193 
1194     vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL);
1195     if (!vqs) {
1196         r = -ENOMEM;
1197         goto err;
1198     }
1199 
1200     dev = &v->vdev;
1201     for (i = 0; i < nvqs; i++) {
1202         vqs[i] = &v->vqs[i];
1203         vqs[i]->handle_kick = handle_vq_kick;
1204     }
1205     vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
1206                vhost_vdpa_process_iotlb_msg);
1207 
1208     r = vhost_vdpa_alloc_domain(v);
1209     if (r)
1210         goto err_alloc_domain;
1211 
1212     vhost_vdpa_set_iova_range(v);
1213 
1214     filep->private_data = v;
1215 
1216     return 0;
1217 
1218 err_alloc_domain:
1219     vhost_vdpa_cleanup(v);
1220 err:
1221     atomic_dec(&v->opened);
1222     return r;
1223 }
1224 
1225 static void vhost_vdpa_clean_irq(struct vhost_vdpa *v)
1226 {
1227     u32 i;
1228 
1229     for (i = 0; i < v->nvqs; i++)
1230         vhost_vdpa_unsetup_vq_irq(v, i);
1231 }
1232 
1233 static int vhost_vdpa_release(struct inode *inode, struct file *filep)
1234 {
1235     struct vhost_vdpa *v = filep->private_data;
1236     struct vhost_dev *d = &v->vdev;
1237 
1238     mutex_lock(&d->mutex);
1239     filep->private_data = NULL;
1240     vhost_vdpa_clean_irq(v);
1241     vhost_vdpa_reset(v);
1242     vhost_dev_stop(&v->vdev);
1243     vhost_vdpa_free_domain(v);
1244     vhost_vdpa_config_put(v);
1245     vhost_vdpa_cleanup(v);
1246     mutex_unlock(&d->mutex);
1247 
1248     atomic_dec(&v->opened);
1249     complete(&v->completion);
1250 
1251     return 0;
1252 }
1253 
1254 #ifdef CONFIG_MMU
1255 static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf)
1256 {
1257     struct vhost_vdpa *v = vmf->vma->vm_file->private_data;
1258     struct vdpa_device *vdpa = v->vdpa;
1259     const struct vdpa_config_ops *ops = vdpa->config;
1260     struct vdpa_notification_area notify;
1261     struct vm_area_struct *vma = vmf->vma;
1262     u16 index = vma->vm_pgoff;
1263 
1264     notify = ops->get_vq_notification(vdpa, index);
1265 
1266     vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1267     if (remap_pfn_range(vma, vmf->address & PAGE_MASK,
1268                 PFN_DOWN(notify.addr), PAGE_SIZE,
1269                 vma->vm_page_prot))
1270         return VM_FAULT_SIGBUS;
1271 
1272     return VM_FAULT_NOPAGE;
1273 }
1274 
1275 static const struct vm_operations_struct vhost_vdpa_vm_ops = {
1276     .fault = vhost_vdpa_fault,
1277 };
1278 
1279 static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma)
1280 {
1281     struct vhost_vdpa *v = vma->vm_file->private_data;
1282     struct vdpa_device *vdpa = v->vdpa;
1283     const struct vdpa_config_ops *ops = vdpa->config;
1284     struct vdpa_notification_area notify;
1285     unsigned long index = vma->vm_pgoff;
1286 
1287     if (vma->vm_end - vma->vm_start != PAGE_SIZE)
1288         return -EINVAL;
1289     if ((vma->vm_flags & VM_SHARED) == 0)
1290         return -EINVAL;
1291     if (vma->vm_flags & VM_READ)
1292         return -EINVAL;
1293     if (index > 65535)
1294         return -EINVAL;
1295     if (!ops->get_vq_notification)
1296         return -ENOTSUPP;
1297 
1298     /* To be safe and easily modelled by userspace, We only
1299      * support the doorbell which sits on the page boundary and
1300      * does not share the page with other registers.
1301      */
1302     notify = ops->get_vq_notification(vdpa, index);
1303     if (notify.addr & (PAGE_SIZE - 1))
1304         return -EINVAL;
1305     if (vma->vm_end - vma->vm_start != notify.size)
1306         return -ENOTSUPP;
1307 
1308     vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
1309     vma->vm_ops = &vhost_vdpa_vm_ops;
1310     return 0;
1311 }
1312 #endif /* CONFIG_MMU */
1313 
1314 static const struct file_operations vhost_vdpa_fops = {
1315     .owner      = THIS_MODULE,
1316     .open       = vhost_vdpa_open,
1317     .release    = vhost_vdpa_release,
1318     .write_iter = vhost_vdpa_chr_write_iter,
1319     .unlocked_ioctl = vhost_vdpa_unlocked_ioctl,
1320 #ifdef CONFIG_MMU
1321     .mmap       = vhost_vdpa_mmap,
1322 #endif /* CONFIG_MMU */
1323     .compat_ioctl   = compat_ptr_ioctl,
1324 };
1325 
1326 static void vhost_vdpa_release_dev(struct device *device)
1327 {
1328     struct vhost_vdpa *v =
1329            container_of(device, struct vhost_vdpa, dev);
1330 
1331     ida_simple_remove(&vhost_vdpa_ida, v->minor);
1332     kfree(v->vqs);
1333     kfree(v);
1334 }
1335 
1336 static int vhost_vdpa_probe(struct vdpa_device *vdpa)
1337 {
1338     const struct vdpa_config_ops *ops = vdpa->config;
1339     struct vhost_vdpa *v;
1340     int minor;
1341     int i, r;
1342 
1343     /* We can't support platform IOMMU device with more than 1
1344      * group or as
1345      */
1346     if (!ops->set_map && !ops->dma_map &&
1347         (vdpa->ngroups > 1 || vdpa->nas > 1))
1348         return -EOPNOTSUPP;
1349 
1350     v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
1351     if (!v)
1352         return -ENOMEM;
1353 
1354     minor = ida_simple_get(&vhost_vdpa_ida, 0,
1355                    VHOST_VDPA_DEV_MAX, GFP_KERNEL);
1356     if (minor < 0) {
1357         kfree(v);
1358         return minor;
1359     }
1360 
1361     atomic_set(&v->opened, 0);
1362     v->minor = minor;
1363     v->vdpa = vdpa;
1364     v->nvqs = vdpa->nvqs;
1365     v->virtio_id = ops->get_device_id(vdpa);
1366 
1367     device_initialize(&v->dev);
1368     v->dev.release = vhost_vdpa_release_dev;
1369     v->dev.parent = &vdpa->dev;
1370     v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor);
1371     v->vqs = kmalloc_array(v->nvqs, sizeof(struct vhost_virtqueue),
1372                    GFP_KERNEL);
1373     if (!v->vqs) {
1374         r = -ENOMEM;
1375         goto err;
1376     }
1377 
1378     r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor);
1379     if (r)
1380         goto err;
1381 
1382     cdev_init(&v->cdev, &vhost_vdpa_fops);
1383     v->cdev.owner = THIS_MODULE;
1384 
1385     r = cdev_device_add(&v->cdev, &v->dev);
1386     if (r)
1387         goto err;
1388 
1389     init_completion(&v->completion);
1390     vdpa_set_drvdata(vdpa, v);
1391 
1392     for (i = 0; i < VHOST_VDPA_IOTLB_BUCKETS; i++)
1393         INIT_HLIST_HEAD(&v->as[i]);
1394 
1395     return 0;
1396 
1397 err:
1398     put_device(&v->dev);
1399     ida_simple_remove(&vhost_vdpa_ida, v->minor);
1400     return r;
1401 }
1402 
1403 static void vhost_vdpa_remove(struct vdpa_device *vdpa)
1404 {
1405     struct vhost_vdpa *v = vdpa_get_drvdata(vdpa);
1406     int opened;
1407 
1408     cdev_device_del(&v->cdev, &v->dev);
1409 
1410     do {
1411         opened = atomic_cmpxchg(&v->opened, 0, 1);
1412         if (!opened)
1413             break;
1414         wait_for_completion(&v->completion);
1415     } while (1);
1416 
1417     put_device(&v->dev);
1418 }
1419 
1420 static struct vdpa_driver vhost_vdpa_driver = {
1421     .driver = {
1422         .name   = "vhost_vdpa",
1423     },
1424     .probe  = vhost_vdpa_probe,
1425     .remove = vhost_vdpa_remove,
1426 };
1427 
1428 static int __init vhost_vdpa_init(void)
1429 {
1430     int r;
1431 
1432     r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX,
1433                 "vhost-vdpa");
1434     if (r)
1435         goto err_alloc_chrdev;
1436 
1437     r = vdpa_register_driver(&vhost_vdpa_driver);
1438     if (r)
1439         goto err_vdpa_register_driver;
1440 
1441     return 0;
1442 
1443 err_vdpa_register_driver:
1444     unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
1445 err_alloc_chrdev:
1446     return r;
1447 }
1448 module_init(vhost_vdpa_init);
1449 
1450 static void __exit vhost_vdpa_exit(void)
1451 {
1452     vdpa_unregister_driver(&vhost_vdpa_driver);
1453     unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
1454 }
1455 module_exit(vhost_vdpa_exit);
1456 
1457 MODULE_VERSION("0.0.1");
1458 MODULE_LICENSE("GPL v2");
1459 MODULE_AUTHOR("Intel Corporation");
1460 MODULE_DESCRIPTION("vDPA-based vhost backend for virtio");