Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Helpers for the host side of a virtio ring.
0004  *
0005  * Since these may be in userspace, we use (inline) accessors.
0006  */
0007 #include <linux/compiler.h>
0008 #include <linux/module.h>
0009 #include <linux/vringh.h>
0010 #include <linux/virtio_ring.h>
0011 #include <linux/kernel.h>
0012 #include <linux/ratelimit.h>
0013 #include <linux/uaccess.h>
0014 #include <linux/slab.h>
0015 #include <linux/export.h>
0016 #if IS_REACHABLE(CONFIG_VHOST_IOTLB)
0017 #include <linux/bvec.h>
0018 #include <linux/highmem.h>
0019 #include <linux/vhost_iotlb.h>
0020 #endif
0021 #include <uapi/linux/virtio_config.h>
0022 
0023 static __printf(1,2) __cold void vringh_bad(const char *fmt, ...)
0024 {
0025     static DEFINE_RATELIMIT_STATE(vringh_rs,
0026                       DEFAULT_RATELIMIT_INTERVAL,
0027                       DEFAULT_RATELIMIT_BURST);
0028     if (__ratelimit(&vringh_rs)) {
0029         va_list ap;
0030         va_start(ap, fmt);
0031         printk(KERN_NOTICE "vringh:");
0032         vprintk(fmt, ap);
0033         va_end(ap);
0034     }
0035 }
0036 
0037 /* Returns vring->num if empty, -ve on error. */
0038 static inline int __vringh_get_head(const struct vringh *vrh,
0039                     int (*getu16)(const struct vringh *vrh,
0040                           u16 *val, const __virtio16 *p),
0041                     u16 *last_avail_idx)
0042 {
0043     u16 avail_idx, i, head;
0044     int err;
0045 
0046     err = getu16(vrh, &avail_idx, &vrh->vring.avail->idx);
0047     if (err) {
0048         vringh_bad("Failed to access avail idx at %p",
0049                &vrh->vring.avail->idx);
0050         return err;
0051     }
0052 
0053     if (*last_avail_idx == avail_idx)
0054         return vrh->vring.num;
0055 
0056     /* Only get avail ring entries after they have been exposed by guest. */
0057     virtio_rmb(vrh->weak_barriers);
0058 
0059     i = *last_avail_idx & (vrh->vring.num - 1);
0060 
0061     err = getu16(vrh, &head, &vrh->vring.avail->ring[i]);
0062     if (err) {
0063         vringh_bad("Failed to read head: idx %d address %p",
0064                *last_avail_idx, &vrh->vring.avail->ring[i]);
0065         return err;
0066     }
0067 
0068     if (head >= vrh->vring.num) {
0069         vringh_bad("Guest says index %u > %u is available",
0070                head, vrh->vring.num);
0071         return -EINVAL;
0072     }
0073 
0074     (*last_avail_idx)++;
0075     return head;
0076 }
0077 
0078 /**
0079  * vringh_kiov_advance - skip bytes from vring_kiov
0080  * @iov: an iov passed to vringh_getdesc_*() (updated as we consume)
0081  * @len: the maximum length to advance
0082  */
0083 void vringh_kiov_advance(struct vringh_kiov *iov, size_t len)
0084 {
0085     while (len && iov->i < iov->used) {
0086         size_t partlen = min(iov->iov[iov->i].iov_len, len);
0087 
0088         iov->consumed += partlen;
0089         iov->iov[iov->i].iov_len -= partlen;
0090         iov->iov[iov->i].iov_base += partlen;
0091 
0092         if (!iov->iov[iov->i].iov_len) {
0093             /* Fix up old iov element then increment. */
0094             iov->iov[iov->i].iov_len = iov->consumed;
0095             iov->iov[iov->i].iov_base -= iov->consumed;
0096 
0097             iov->consumed = 0;
0098             iov->i++;
0099         }
0100 
0101         len -= partlen;
0102     }
0103 }
0104 EXPORT_SYMBOL(vringh_kiov_advance);
0105 
0106 /* Copy some bytes to/from the iovec.  Returns num copied. */
0107 static inline ssize_t vringh_iov_xfer(struct vringh *vrh,
0108                       struct vringh_kiov *iov,
0109                       void *ptr, size_t len,
0110                       int (*xfer)(const struct vringh *vrh,
0111                           void *addr, void *ptr,
0112                           size_t len))
0113 {
0114     int err, done = 0;
0115 
0116     while (len && iov->i < iov->used) {
0117         size_t partlen;
0118 
0119         partlen = min(iov->iov[iov->i].iov_len, len);
0120         err = xfer(vrh, iov->iov[iov->i].iov_base, ptr, partlen);
0121         if (err)
0122             return err;
0123         done += partlen;
0124         len -= partlen;
0125         ptr += partlen;
0126 
0127         vringh_kiov_advance(iov, partlen);
0128     }
0129     return done;
0130 }
0131 
0132 /* May reduce *len if range is shorter. */
0133 static inline bool range_check(struct vringh *vrh, u64 addr, size_t *len,
0134                    struct vringh_range *range,
0135                    bool (*getrange)(struct vringh *,
0136                         u64, struct vringh_range *))
0137 {
0138     if (addr < range->start || addr > range->end_incl) {
0139         if (!getrange(vrh, addr, range))
0140             return false;
0141     }
0142     BUG_ON(addr < range->start || addr > range->end_incl);
0143 
0144     /* To end of memory? */
0145     if (unlikely(addr + *len == 0)) {
0146         if (range->end_incl == -1ULL)
0147             return true;
0148         goto truncate;
0149     }
0150 
0151     /* Otherwise, don't wrap. */
0152     if (addr + *len < addr) {
0153         vringh_bad("Wrapping descriptor %zu@0x%llx",
0154                *len, (unsigned long long)addr);
0155         return false;
0156     }
0157 
0158     if (unlikely(addr + *len - 1 > range->end_incl))
0159         goto truncate;
0160     return true;
0161 
0162 truncate:
0163     *len = range->end_incl + 1 - addr;
0164     return true;
0165 }
0166 
0167 static inline bool no_range_check(struct vringh *vrh, u64 addr, size_t *len,
0168                   struct vringh_range *range,
0169                   bool (*getrange)(struct vringh *,
0170                            u64, struct vringh_range *))
0171 {
0172     return true;
0173 }
0174 
0175 /* No reason for this code to be inline. */
0176 static int move_to_indirect(const struct vringh *vrh,
0177                 int *up_next, u16 *i, void *addr,
0178                 const struct vring_desc *desc,
0179                 struct vring_desc **descs, int *desc_max)
0180 {
0181     u32 len;
0182 
0183     /* Indirect tables can't have indirect. */
0184     if (*up_next != -1) {
0185         vringh_bad("Multilevel indirect %u->%u", *up_next, *i);
0186         return -EINVAL;
0187     }
0188 
0189     len = vringh32_to_cpu(vrh, desc->len);
0190     if (unlikely(len % sizeof(struct vring_desc))) {
0191         vringh_bad("Strange indirect len %u", desc->len);
0192         return -EINVAL;
0193     }
0194 
0195     /* We will check this when we follow it! */
0196     if (desc->flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT))
0197         *up_next = vringh16_to_cpu(vrh, desc->next);
0198     else
0199         *up_next = -2;
0200     *descs = addr;
0201     *desc_max = len / sizeof(struct vring_desc);
0202 
0203     /* Now, start at the first indirect. */
0204     *i = 0;
0205     return 0;
0206 }
0207 
0208 static int resize_iovec(struct vringh_kiov *iov, gfp_t gfp)
0209 {
0210     struct kvec *new;
0211     unsigned int flag, new_num = (iov->max_num & ~VRINGH_IOV_ALLOCATED) * 2;
0212 
0213     if (new_num < 8)
0214         new_num = 8;
0215 
0216     flag = (iov->max_num & VRINGH_IOV_ALLOCATED);
0217     if (flag)
0218         new = krealloc_array(iov->iov, new_num,
0219                      sizeof(struct iovec), gfp);
0220     else {
0221         new = kmalloc_array(new_num, sizeof(struct iovec), gfp);
0222         if (new) {
0223             memcpy(new, iov->iov,
0224                    iov->max_num * sizeof(struct iovec));
0225             flag = VRINGH_IOV_ALLOCATED;
0226         }
0227     }
0228     if (!new)
0229         return -ENOMEM;
0230     iov->iov = new;
0231     iov->max_num = (new_num | flag);
0232     return 0;
0233 }
0234 
0235 static u16 __cold return_from_indirect(const struct vringh *vrh, int *up_next,
0236                        struct vring_desc **descs, int *desc_max)
0237 {
0238     u16 i = *up_next;
0239 
0240     *up_next = -1;
0241     *descs = vrh->vring.desc;
0242     *desc_max = vrh->vring.num;
0243     return i;
0244 }
0245 
0246 static int slow_copy(struct vringh *vrh, void *dst, const void *src,
0247              bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len,
0248                     struct vringh_range *range,
0249                     bool (*getrange)(struct vringh *vrh,
0250                              u64,
0251                              struct vringh_range *)),
0252              bool (*getrange)(struct vringh *vrh,
0253                       u64 addr,
0254                       struct vringh_range *r),
0255              struct vringh_range *range,
0256              int (*copy)(const struct vringh *vrh,
0257                  void *dst, const void *src, size_t len))
0258 {
0259     size_t part, len = sizeof(struct vring_desc);
0260 
0261     do {
0262         u64 addr;
0263         int err;
0264 
0265         part = len;
0266         addr = (u64)(unsigned long)src - range->offset;
0267 
0268         if (!rcheck(vrh, addr, &part, range, getrange))
0269             return -EINVAL;
0270 
0271         err = copy(vrh, dst, src, part);
0272         if (err)
0273             return err;
0274 
0275         dst += part;
0276         src += part;
0277         len -= part;
0278     } while (len);
0279     return 0;
0280 }
0281 
0282 static inline int
0283 __vringh_iov(struct vringh *vrh, u16 i,
0284          struct vringh_kiov *riov,
0285          struct vringh_kiov *wiov,
0286          bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len,
0287                 struct vringh_range *range,
0288                 bool (*getrange)(struct vringh *, u64,
0289                          struct vringh_range *)),
0290          bool (*getrange)(struct vringh *, u64, struct vringh_range *),
0291          gfp_t gfp,
0292          int (*copy)(const struct vringh *vrh,
0293              void *dst, const void *src, size_t len))
0294 {
0295     int err, count = 0, indirect_count = 0, up_next, desc_max;
0296     struct vring_desc desc, *descs;
0297     struct vringh_range range = { -1ULL, 0 }, slowrange;
0298     bool slow = false;
0299 
0300     /* We start traversing vring's descriptor table. */
0301     descs = vrh->vring.desc;
0302     desc_max = vrh->vring.num;
0303     up_next = -1;
0304 
0305     /* You must want something! */
0306     if (WARN_ON(!riov && !wiov))
0307         return -EINVAL;
0308 
0309     if (riov)
0310         riov->i = riov->used = riov->consumed = 0;
0311     if (wiov)
0312         wiov->i = wiov->used = wiov->consumed = 0;
0313 
0314     for (;;) {
0315         void *addr;
0316         struct vringh_kiov *iov;
0317         size_t len;
0318 
0319         if (unlikely(slow))
0320             err = slow_copy(vrh, &desc, &descs[i], rcheck, getrange,
0321                     &slowrange, copy);
0322         else
0323             err = copy(vrh, &desc, &descs[i], sizeof(desc));
0324         if (unlikely(err))
0325             goto fail;
0326 
0327         if (unlikely(desc.flags &
0328                  cpu_to_vringh16(vrh, VRING_DESC_F_INDIRECT))) {
0329             u64 a = vringh64_to_cpu(vrh, desc.addr);
0330 
0331             /* Make sure it's OK, and get offset. */
0332             len = vringh32_to_cpu(vrh, desc.len);
0333             if (!rcheck(vrh, a, &len, &range, getrange)) {
0334                 err = -EINVAL;
0335                 goto fail;
0336             }
0337 
0338             if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) {
0339                 slow = true;
0340                 /* We need to save this range to use offset */
0341                 slowrange = range;
0342             }
0343 
0344             addr = (void *)(long)(a + range.offset);
0345             err = move_to_indirect(vrh, &up_next, &i, addr, &desc,
0346                            &descs, &desc_max);
0347             if (err)
0348                 goto fail;
0349             continue;
0350         }
0351 
0352         if (up_next == -1)
0353             count++;
0354         else
0355             indirect_count++;
0356 
0357         if (count > vrh->vring.num || indirect_count > desc_max) {
0358             vringh_bad("Descriptor loop in %p", descs);
0359             err = -ELOOP;
0360             goto fail;
0361         }
0362 
0363         if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_WRITE))
0364             iov = wiov;
0365         else {
0366             iov = riov;
0367             if (unlikely(wiov && wiov->used)) {
0368                 vringh_bad("Readable desc %p after writable",
0369                        &descs[i]);
0370                 err = -EINVAL;
0371                 goto fail;
0372             }
0373         }
0374 
0375         if (!iov) {
0376             vringh_bad("Unexpected %s desc",
0377                    !wiov ? "writable" : "readable");
0378             err = -EPROTO;
0379             goto fail;
0380         }
0381 
0382     again:
0383         /* Make sure it's OK, and get offset. */
0384         len = vringh32_to_cpu(vrh, desc.len);
0385         if (!rcheck(vrh, vringh64_to_cpu(vrh, desc.addr), &len, &range,
0386                 getrange)) {
0387             err = -EINVAL;
0388             goto fail;
0389         }
0390         addr = (void *)(unsigned long)(vringh64_to_cpu(vrh, desc.addr) +
0391                            range.offset);
0392 
0393         if (unlikely(iov->used == (iov->max_num & ~VRINGH_IOV_ALLOCATED))) {
0394             err = resize_iovec(iov, gfp);
0395             if (err)
0396                 goto fail;
0397         }
0398 
0399         iov->iov[iov->used].iov_base = addr;
0400         iov->iov[iov->used].iov_len = len;
0401         iov->used++;
0402 
0403         if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) {
0404             desc.len = cpu_to_vringh32(vrh,
0405                    vringh32_to_cpu(vrh, desc.len) - len);
0406             desc.addr = cpu_to_vringh64(vrh,
0407                     vringh64_to_cpu(vrh, desc.addr) + len);
0408             goto again;
0409         }
0410 
0411         if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT)) {
0412             i = vringh16_to_cpu(vrh, desc.next);
0413         } else {
0414             /* Just in case we need to finish traversing above. */
0415             if (unlikely(up_next > 0)) {
0416                 i = return_from_indirect(vrh, &up_next,
0417                              &descs, &desc_max);
0418                 slow = false;
0419                 indirect_count = 0;
0420             } else
0421                 break;
0422         }
0423 
0424         if (i >= desc_max) {
0425             vringh_bad("Chained index %u > %u", i, desc_max);
0426             err = -EINVAL;
0427             goto fail;
0428         }
0429     }
0430 
0431     return 0;
0432 
0433 fail:
0434     return err;
0435 }
0436 
0437 static inline int __vringh_complete(struct vringh *vrh,
0438                     const struct vring_used_elem *used,
0439                     unsigned int num_used,
0440                     int (*putu16)(const struct vringh *vrh,
0441                           __virtio16 *p, u16 val),
0442                     int (*putused)(const struct vringh *vrh,
0443                            struct vring_used_elem *dst,
0444                            const struct vring_used_elem
0445                            *src, unsigned num))
0446 {
0447     struct vring_used *used_ring;
0448     int err;
0449     u16 used_idx, off;
0450 
0451     used_ring = vrh->vring.used;
0452     used_idx = vrh->last_used_idx + vrh->completed;
0453 
0454     off = used_idx % vrh->vring.num;
0455 
0456     /* Compiler knows num_used == 1 sometimes, hence extra check */
0457     if (num_used > 1 && unlikely(off + num_used >= vrh->vring.num)) {
0458         u16 part = vrh->vring.num - off;
0459         err = putused(vrh, &used_ring->ring[off], used, part);
0460         if (!err)
0461             err = putused(vrh, &used_ring->ring[0], used + part,
0462                       num_used - part);
0463     } else
0464         err = putused(vrh, &used_ring->ring[off], used, num_used);
0465 
0466     if (err) {
0467         vringh_bad("Failed to write %u used entries %u at %p",
0468                num_used, off, &used_ring->ring[off]);
0469         return err;
0470     }
0471 
0472     /* Make sure buffer is written before we update index. */
0473     virtio_wmb(vrh->weak_barriers);
0474 
0475     err = putu16(vrh, &vrh->vring.used->idx, used_idx + num_used);
0476     if (err) {
0477         vringh_bad("Failed to update used index at %p",
0478                &vrh->vring.used->idx);
0479         return err;
0480     }
0481 
0482     vrh->completed += num_used;
0483     return 0;
0484 }
0485 
0486 
0487 static inline int __vringh_need_notify(struct vringh *vrh,
0488                        int (*getu16)(const struct vringh *vrh,
0489                              u16 *val,
0490                              const __virtio16 *p))
0491 {
0492     bool notify;
0493     u16 used_event;
0494     int err;
0495 
0496     /* Flush out used index update. This is paired with the
0497      * barrier that the Guest executes when enabling
0498      * interrupts. */
0499     virtio_mb(vrh->weak_barriers);
0500 
0501     /* Old-style, without event indices. */
0502     if (!vrh->event_indices) {
0503         u16 flags;
0504         err = getu16(vrh, &flags, &vrh->vring.avail->flags);
0505         if (err) {
0506             vringh_bad("Failed to get flags at %p",
0507                    &vrh->vring.avail->flags);
0508             return err;
0509         }
0510         return (!(flags & VRING_AVAIL_F_NO_INTERRUPT));
0511     }
0512 
0513     /* Modern: we know when other side wants to know. */
0514     err = getu16(vrh, &used_event, &vring_used_event(&vrh->vring));
0515     if (err) {
0516         vringh_bad("Failed to get used event idx at %p",
0517                &vring_used_event(&vrh->vring));
0518         return err;
0519     }
0520 
0521     /* Just in case we added so many that we wrap. */
0522     if (unlikely(vrh->completed > 0xffff))
0523         notify = true;
0524     else
0525         notify = vring_need_event(used_event,
0526                       vrh->last_used_idx + vrh->completed,
0527                       vrh->last_used_idx);
0528 
0529     vrh->last_used_idx += vrh->completed;
0530     vrh->completed = 0;
0531     return notify;
0532 }
0533 
0534 static inline bool __vringh_notify_enable(struct vringh *vrh,
0535                       int (*getu16)(const struct vringh *vrh,
0536                             u16 *val, const __virtio16 *p),
0537                       int (*putu16)(const struct vringh *vrh,
0538                             __virtio16 *p, u16 val))
0539 {
0540     u16 avail;
0541 
0542     if (!vrh->event_indices) {
0543         /* Old-school; update flags. */
0544         if (putu16(vrh, &vrh->vring.used->flags, 0) != 0) {
0545             vringh_bad("Clearing used flags %p",
0546                    &vrh->vring.used->flags);
0547             return true;
0548         }
0549     } else {
0550         if (putu16(vrh, &vring_avail_event(&vrh->vring),
0551                vrh->last_avail_idx) != 0) {
0552             vringh_bad("Updating avail event index %p",
0553                    &vring_avail_event(&vrh->vring));
0554             return true;
0555         }
0556     }
0557 
0558     /* They could have slipped one in as we were doing that: make
0559      * sure it's written, then check again. */
0560     virtio_mb(vrh->weak_barriers);
0561 
0562     if (getu16(vrh, &avail, &vrh->vring.avail->idx) != 0) {
0563         vringh_bad("Failed to check avail idx at %p",
0564                &vrh->vring.avail->idx);
0565         return true;
0566     }
0567 
0568     /* This is unlikely, so we just leave notifications enabled
0569      * (if we're using event_indices, we'll only get one
0570      * notification anyway). */
0571     return avail == vrh->last_avail_idx;
0572 }
0573 
0574 static inline void __vringh_notify_disable(struct vringh *vrh,
0575                        int (*putu16)(const struct vringh *vrh,
0576                              __virtio16 *p, u16 val))
0577 {
0578     if (!vrh->event_indices) {
0579         /* Old-school; update flags. */
0580         if (putu16(vrh, &vrh->vring.used->flags,
0581                VRING_USED_F_NO_NOTIFY)) {
0582             vringh_bad("Setting used flags %p",
0583                    &vrh->vring.used->flags);
0584         }
0585     }
0586 }
0587 
0588 /* Userspace access helpers: in this case, addresses are really userspace. */
0589 static inline int getu16_user(const struct vringh *vrh, u16 *val, const __virtio16 *p)
0590 {
0591     __virtio16 v = 0;
0592     int rc = get_user(v, (__force __virtio16 __user *)p);
0593     *val = vringh16_to_cpu(vrh, v);
0594     return rc;
0595 }
0596 
0597 static inline int putu16_user(const struct vringh *vrh, __virtio16 *p, u16 val)
0598 {
0599     __virtio16 v = cpu_to_vringh16(vrh, val);
0600     return put_user(v, (__force __virtio16 __user *)p);
0601 }
0602 
0603 static inline int copydesc_user(const struct vringh *vrh,
0604                 void *dst, const void *src, size_t len)
0605 {
0606     return copy_from_user(dst, (__force void __user *)src, len) ?
0607         -EFAULT : 0;
0608 }
0609 
0610 static inline int putused_user(const struct vringh *vrh,
0611                    struct vring_used_elem *dst,
0612                    const struct vring_used_elem *src,
0613                    unsigned int num)
0614 {
0615     return copy_to_user((__force void __user *)dst, src,
0616                 sizeof(*dst) * num) ? -EFAULT : 0;
0617 }
0618 
0619 static inline int xfer_from_user(const struct vringh *vrh, void *src,
0620                  void *dst, size_t len)
0621 {
0622     return copy_from_user(dst, (__force void __user *)src, len) ?
0623         -EFAULT : 0;
0624 }
0625 
0626 static inline int xfer_to_user(const struct vringh *vrh,
0627                    void *dst, void *src, size_t len)
0628 {
0629     return copy_to_user((__force void __user *)dst, src, len) ?
0630         -EFAULT : 0;
0631 }
0632 
0633 /**
0634  * vringh_init_user - initialize a vringh for a userspace vring.
0635  * @vrh: the vringh to initialize.
0636  * @features: the feature bits for this ring.
0637  * @num: the number of elements.
0638  * @weak_barriers: true if we only need memory barriers, not I/O.
0639  * @desc: the userpace descriptor pointer.
0640  * @avail: the userpace avail pointer.
0641  * @used: the userpace used pointer.
0642  *
0643  * Returns an error if num is invalid: you should check pointers
0644  * yourself!
0645  */
0646 int vringh_init_user(struct vringh *vrh, u64 features,
0647              unsigned int num, bool weak_barriers,
0648              vring_desc_t __user *desc,
0649              vring_avail_t __user *avail,
0650              vring_used_t __user *used)
0651 {
0652     /* Sane power of 2 please! */
0653     if (!num || num > 0xffff || (num & (num - 1))) {
0654         vringh_bad("Bad ring size %u", num);
0655         return -EINVAL;
0656     }
0657 
0658     vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1));
0659     vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX));
0660     vrh->weak_barriers = weak_barriers;
0661     vrh->completed = 0;
0662     vrh->last_avail_idx = 0;
0663     vrh->last_used_idx = 0;
0664     vrh->vring.num = num;
0665     /* vring expects kernel addresses, but only used via accessors. */
0666     vrh->vring.desc = (__force struct vring_desc *)desc;
0667     vrh->vring.avail = (__force struct vring_avail *)avail;
0668     vrh->vring.used = (__force struct vring_used *)used;
0669     return 0;
0670 }
0671 EXPORT_SYMBOL(vringh_init_user);
0672 
0673 /**
0674  * vringh_getdesc_user - get next available descriptor from userspace ring.
0675  * @vrh: the userspace vring.
0676  * @riov: where to put the readable descriptors (or NULL)
0677  * @wiov: where to put the writable descriptors (or NULL)
0678  * @getrange: function to call to check ranges.
0679  * @head: head index we received, for passing to vringh_complete_user().
0680  *
0681  * Returns 0 if there was no descriptor, 1 if there was, or -errno.
0682  *
0683  * Note that on error return, you can tell the difference between an
0684  * invalid ring and a single invalid descriptor: in the former case,
0685  * *head will be vrh->vring.num.  You may be able to ignore an invalid
0686  * descriptor, but there's not much you can do with an invalid ring.
0687  *
0688  * Note that you can reuse riov and wiov with subsequent calls. Content is
0689  * overwritten and memory reallocated if more space is needed.
0690  * When you don't have to use riov and wiov anymore, you should clean up them
0691  * calling vringh_iov_cleanup() to release the memory, even on error!
0692  */
0693 int vringh_getdesc_user(struct vringh *vrh,
0694             struct vringh_iov *riov,
0695             struct vringh_iov *wiov,
0696             bool (*getrange)(struct vringh *vrh,
0697                      u64 addr, struct vringh_range *r),
0698             u16 *head)
0699 {
0700     int err;
0701 
0702     *head = vrh->vring.num;
0703     err = __vringh_get_head(vrh, getu16_user, &vrh->last_avail_idx);
0704     if (err < 0)
0705         return err;
0706 
0707     /* Empty... */
0708     if (err == vrh->vring.num)
0709         return 0;
0710 
0711     /* We need the layouts to be the identical for this to work */
0712     BUILD_BUG_ON(sizeof(struct vringh_kiov) != sizeof(struct vringh_iov));
0713     BUILD_BUG_ON(offsetof(struct vringh_kiov, iov) !=
0714              offsetof(struct vringh_iov, iov));
0715     BUILD_BUG_ON(offsetof(struct vringh_kiov, i) !=
0716              offsetof(struct vringh_iov, i));
0717     BUILD_BUG_ON(offsetof(struct vringh_kiov, used) !=
0718              offsetof(struct vringh_iov, used));
0719     BUILD_BUG_ON(offsetof(struct vringh_kiov, max_num) !=
0720              offsetof(struct vringh_iov, max_num));
0721     BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec));
0722     BUILD_BUG_ON(offsetof(struct iovec, iov_base) !=
0723              offsetof(struct kvec, iov_base));
0724     BUILD_BUG_ON(offsetof(struct iovec, iov_len) !=
0725              offsetof(struct kvec, iov_len));
0726     BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_base)
0727              != sizeof(((struct kvec *)NULL)->iov_base));
0728     BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_len)
0729              != sizeof(((struct kvec *)NULL)->iov_len));
0730 
0731     *head = err;
0732     err = __vringh_iov(vrh, *head, (struct vringh_kiov *)riov,
0733                (struct vringh_kiov *)wiov,
0734                range_check, getrange, GFP_KERNEL, copydesc_user);
0735     if (err)
0736         return err;
0737 
0738     return 1;
0739 }
0740 EXPORT_SYMBOL(vringh_getdesc_user);
0741 
0742 /**
0743  * vringh_iov_pull_user - copy bytes from vring_iov.
0744  * @riov: the riov as passed to vringh_getdesc_user() (updated as we consume)
0745  * @dst: the place to copy.
0746  * @len: the maximum length to copy.
0747  *
0748  * Returns the bytes copied <= len or a negative errno.
0749  */
0750 ssize_t vringh_iov_pull_user(struct vringh_iov *riov, void *dst, size_t len)
0751 {
0752     return vringh_iov_xfer(NULL, (struct vringh_kiov *)riov,
0753                    dst, len, xfer_from_user);
0754 }
0755 EXPORT_SYMBOL(vringh_iov_pull_user);
0756 
0757 /**
0758  * vringh_iov_push_user - copy bytes into vring_iov.
0759  * @wiov: the wiov as passed to vringh_getdesc_user() (updated as we consume)
0760  * @src: the place to copy from.
0761  * @len: the maximum length to copy.
0762  *
0763  * Returns the bytes copied <= len or a negative errno.
0764  */
0765 ssize_t vringh_iov_push_user(struct vringh_iov *wiov,
0766                  const void *src, size_t len)
0767 {
0768     return vringh_iov_xfer(NULL, (struct vringh_kiov *)wiov,
0769                    (void *)src, len, xfer_to_user);
0770 }
0771 EXPORT_SYMBOL(vringh_iov_push_user);
0772 
0773 /**
0774  * vringh_abandon_user - we've decided not to handle the descriptor(s).
0775  * @vrh: the vring.
0776  * @num: the number of descriptors to put back (ie. num
0777  *   vringh_get_user() to undo).
0778  *
0779  * The next vringh_get_user() will return the old descriptor(s) again.
0780  */
0781 void vringh_abandon_user(struct vringh *vrh, unsigned int num)
0782 {
0783     /* We only update vring_avail_event(vr) when we want to be notified,
0784      * so we haven't changed that yet. */
0785     vrh->last_avail_idx -= num;
0786 }
0787 EXPORT_SYMBOL(vringh_abandon_user);
0788 
0789 /**
0790  * vringh_complete_user - we've finished with descriptor, publish it.
0791  * @vrh: the vring.
0792  * @head: the head as filled in by vringh_getdesc_user.
0793  * @len: the length of data we have written.
0794  *
0795  * You should check vringh_need_notify_user() after one or more calls
0796  * to this function.
0797  */
0798 int vringh_complete_user(struct vringh *vrh, u16 head, u32 len)
0799 {
0800     struct vring_used_elem used;
0801 
0802     used.id = cpu_to_vringh32(vrh, head);
0803     used.len = cpu_to_vringh32(vrh, len);
0804     return __vringh_complete(vrh, &used, 1, putu16_user, putused_user);
0805 }
0806 EXPORT_SYMBOL(vringh_complete_user);
0807 
0808 /**
0809  * vringh_complete_multi_user - we've finished with many descriptors.
0810  * @vrh: the vring.
0811  * @used: the head, length pairs.
0812  * @num_used: the number of used elements.
0813  *
0814  * You should check vringh_need_notify_user() after one or more calls
0815  * to this function.
0816  */
0817 int vringh_complete_multi_user(struct vringh *vrh,
0818                    const struct vring_used_elem used[],
0819                    unsigned num_used)
0820 {
0821     return __vringh_complete(vrh, used, num_used,
0822                  putu16_user, putused_user);
0823 }
0824 EXPORT_SYMBOL(vringh_complete_multi_user);
0825 
0826 /**
0827  * vringh_notify_enable_user - we want to know if something changes.
0828  * @vrh: the vring.
0829  *
0830  * This always enables notifications, but returns false if there are
0831  * now more buffers available in the vring.
0832  */
0833 bool vringh_notify_enable_user(struct vringh *vrh)
0834 {
0835     return __vringh_notify_enable(vrh, getu16_user, putu16_user);
0836 }
0837 EXPORT_SYMBOL(vringh_notify_enable_user);
0838 
0839 /**
0840  * vringh_notify_disable_user - don't tell us if something changes.
0841  * @vrh: the vring.
0842  *
0843  * This is our normal running state: we disable and then only enable when
0844  * we're going to sleep.
0845  */
0846 void vringh_notify_disable_user(struct vringh *vrh)
0847 {
0848     __vringh_notify_disable(vrh, putu16_user);
0849 }
0850 EXPORT_SYMBOL(vringh_notify_disable_user);
0851 
0852 /**
0853  * vringh_need_notify_user - must we tell the other side about used buffers?
0854  * @vrh: the vring we've called vringh_complete_user() on.
0855  *
0856  * Returns -errno or 0 if we don't need to tell the other side, 1 if we do.
0857  */
0858 int vringh_need_notify_user(struct vringh *vrh)
0859 {
0860     return __vringh_need_notify(vrh, getu16_user);
0861 }
0862 EXPORT_SYMBOL(vringh_need_notify_user);
0863 
0864 /* Kernelspace access helpers. */
0865 static inline int getu16_kern(const struct vringh *vrh,
0866                   u16 *val, const __virtio16 *p)
0867 {
0868     *val = vringh16_to_cpu(vrh, READ_ONCE(*p));
0869     return 0;
0870 }
0871 
0872 static inline int putu16_kern(const struct vringh *vrh, __virtio16 *p, u16 val)
0873 {
0874     WRITE_ONCE(*p, cpu_to_vringh16(vrh, val));
0875     return 0;
0876 }
0877 
0878 static inline int copydesc_kern(const struct vringh *vrh,
0879                 void *dst, const void *src, size_t len)
0880 {
0881     memcpy(dst, src, len);
0882     return 0;
0883 }
0884 
0885 static inline int putused_kern(const struct vringh *vrh,
0886                    struct vring_used_elem *dst,
0887                    const struct vring_used_elem *src,
0888                    unsigned int num)
0889 {
0890     memcpy(dst, src, num * sizeof(*dst));
0891     return 0;
0892 }
0893 
0894 static inline int xfer_kern(const struct vringh *vrh, void *src,
0895                 void *dst, size_t len)
0896 {
0897     memcpy(dst, src, len);
0898     return 0;
0899 }
0900 
0901 static inline int kern_xfer(const struct vringh *vrh, void *dst,
0902                 void *src, size_t len)
0903 {
0904     memcpy(dst, src, len);
0905     return 0;
0906 }
0907 
0908 /**
0909  * vringh_init_kern - initialize a vringh for a kernelspace vring.
0910  * @vrh: the vringh to initialize.
0911  * @features: the feature bits for this ring.
0912  * @num: the number of elements.
0913  * @weak_barriers: true if we only need memory barriers, not I/O.
0914  * @desc: the userpace descriptor pointer.
0915  * @avail: the userpace avail pointer.
0916  * @used: the userpace used pointer.
0917  *
0918  * Returns an error if num is invalid.
0919  */
0920 int vringh_init_kern(struct vringh *vrh, u64 features,
0921              unsigned int num, bool weak_barriers,
0922              struct vring_desc *desc,
0923              struct vring_avail *avail,
0924              struct vring_used *used)
0925 {
0926     /* Sane power of 2 please! */
0927     if (!num || num > 0xffff || (num & (num - 1))) {
0928         vringh_bad("Bad ring size %u", num);
0929         return -EINVAL;
0930     }
0931 
0932     vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1));
0933     vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX));
0934     vrh->weak_barriers = weak_barriers;
0935     vrh->completed = 0;
0936     vrh->last_avail_idx = 0;
0937     vrh->last_used_idx = 0;
0938     vrh->vring.num = num;
0939     vrh->vring.desc = desc;
0940     vrh->vring.avail = avail;
0941     vrh->vring.used = used;
0942     return 0;
0943 }
0944 EXPORT_SYMBOL(vringh_init_kern);
0945 
0946 /**
0947  * vringh_getdesc_kern - get next available descriptor from kernelspace ring.
0948  * @vrh: the kernelspace vring.
0949  * @riov: where to put the readable descriptors (or NULL)
0950  * @wiov: where to put the writable descriptors (or NULL)
0951  * @head: head index we received, for passing to vringh_complete_kern().
0952  * @gfp: flags for allocating larger riov/wiov.
0953  *
0954  * Returns 0 if there was no descriptor, 1 if there was, or -errno.
0955  *
0956  * Note that on error return, you can tell the difference between an
0957  * invalid ring and a single invalid descriptor: in the former case,
0958  * *head will be vrh->vring.num.  You may be able to ignore an invalid
0959  * descriptor, but there's not much you can do with an invalid ring.
0960  *
0961  * Note that you can reuse riov and wiov with subsequent calls. Content is
0962  * overwritten and memory reallocated if more space is needed.
0963  * When you don't have to use riov and wiov anymore, you should clean up them
0964  * calling vringh_kiov_cleanup() to release the memory, even on error!
0965  */
0966 int vringh_getdesc_kern(struct vringh *vrh,
0967             struct vringh_kiov *riov,
0968             struct vringh_kiov *wiov,
0969             u16 *head,
0970             gfp_t gfp)
0971 {
0972     int err;
0973 
0974     err = __vringh_get_head(vrh, getu16_kern, &vrh->last_avail_idx);
0975     if (err < 0)
0976         return err;
0977 
0978     /* Empty... */
0979     if (err == vrh->vring.num)
0980         return 0;
0981 
0982     *head = err;
0983     err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL,
0984                gfp, copydesc_kern);
0985     if (err)
0986         return err;
0987 
0988     return 1;
0989 }
0990 EXPORT_SYMBOL(vringh_getdesc_kern);
0991 
0992 /**
0993  * vringh_iov_pull_kern - copy bytes from vring_iov.
0994  * @riov: the riov as passed to vringh_getdesc_kern() (updated as we consume)
0995  * @dst: the place to copy.
0996  * @len: the maximum length to copy.
0997  *
0998  * Returns the bytes copied <= len or a negative errno.
0999  */
1000 ssize_t vringh_iov_pull_kern(struct vringh_kiov *riov, void *dst, size_t len)
1001 {
1002     return vringh_iov_xfer(NULL, riov, dst, len, xfer_kern);
1003 }
1004 EXPORT_SYMBOL(vringh_iov_pull_kern);
1005 
1006 /**
1007  * vringh_iov_push_kern - copy bytes into vring_iov.
1008  * @wiov: the wiov as passed to vringh_getdesc_kern() (updated as we consume)
1009  * @src: the place to copy from.
1010  * @len: the maximum length to copy.
1011  *
1012  * Returns the bytes copied <= len or a negative errno.
1013  */
1014 ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov,
1015                  const void *src, size_t len)
1016 {
1017     return vringh_iov_xfer(NULL, wiov, (void *)src, len, kern_xfer);
1018 }
1019 EXPORT_SYMBOL(vringh_iov_push_kern);
1020 
1021 /**
1022  * vringh_abandon_kern - we've decided not to handle the descriptor(s).
1023  * @vrh: the vring.
1024  * @num: the number of descriptors to put back (ie. num
1025  *   vringh_get_kern() to undo).
1026  *
1027  * The next vringh_get_kern() will return the old descriptor(s) again.
1028  */
1029 void vringh_abandon_kern(struct vringh *vrh, unsigned int num)
1030 {
1031     /* We only update vring_avail_event(vr) when we want to be notified,
1032      * so we haven't changed that yet. */
1033     vrh->last_avail_idx -= num;
1034 }
1035 EXPORT_SYMBOL(vringh_abandon_kern);
1036 
1037 /**
1038  * vringh_complete_kern - we've finished with descriptor, publish it.
1039  * @vrh: the vring.
1040  * @head: the head as filled in by vringh_getdesc_kern.
1041  * @len: the length of data we have written.
1042  *
1043  * You should check vringh_need_notify_kern() after one or more calls
1044  * to this function.
1045  */
1046 int vringh_complete_kern(struct vringh *vrh, u16 head, u32 len)
1047 {
1048     struct vring_used_elem used;
1049 
1050     used.id = cpu_to_vringh32(vrh, head);
1051     used.len = cpu_to_vringh32(vrh, len);
1052 
1053     return __vringh_complete(vrh, &used, 1, putu16_kern, putused_kern);
1054 }
1055 EXPORT_SYMBOL(vringh_complete_kern);
1056 
1057 /**
1058  * vringh_notify_enable_kern - we want to know if something changes.
1059  * @vrh: the vring.
1060  *
1061  * This always enables notifications, but returns false if there are
1062  * now more buffers available in the vring.
1063  */
1064 bool vringh_notify_enable_kern(struct vringh *vrh)
1065 {
1066     return __vringh_notify_enable(vrh, getu16_kern, putu16_kern);
1067 }
1068 EXPORT_SYMBOL(vringh_notify_enable_kern);
1069 
1070 /**
1071  * vringh_notify_disable_kern - don't tell us if something changes.
1072  * @vrh: the vring.
1073  *
1074  * This is our normal running state: we disable and then only enable when
1075  * we're going to sleep.
1076  */
1077 void vringh_notify_disable_kern(struct vringh *vrh)
1078 {
1079     __vringh_notify_disable(vrh, putu16_kern);
1080 }
1081 EXPORT_SYMBOL(vringh_notify_disable_kern);
1082 
1083 /**
1084  * vringh_need_notify_kern - must we tell the other side about used buffers?
1085  * @vrh: the vring we've called vringh_complete_kern() on.
1086  *
1087  * Returns -errno or 0 if we don't need to tell the other side, 1 if we do.
1088  */
1089 int vringh_need_notify_kern(struct vringh *vrh)
1090 {
1091     return __vringh_need_notify(vrh, getu16_kern);
1092 }
1093 EXPORT_SYMBOL(vringh_need_notify_kern);
1094 
1095 #if IS_REACHABLE(CONFIG_VHOST_IOTLB)
1096 
1097 static int iotlb_translate(const struct vringh *vrh,
1098                u64 addr, u64 len, u64 *translated,
1099                struct bio_vec iov[],
1100                int iov_size, u32 perm)
1101 {
1102     struct vhost_iotlb_map *map;
1103     struct vhost_iotlb *iotlb = vrh->iotlb;
1104     int ret = 0;
1105     u64 s = 0;
1106 
1107     spin_lock(vrh->iotlb_lock);
1108 
1109     while (len > s) {
1110         u64 size, pa, pfn;
1111 
1112         if (unlikely(ret >= iov_size)) {
1113             ret = -ENOBUFS;
1114             break;
1115         }
1116 
1117         map = vhost_iotlb_itree_first(iotlb, addr,
1118                           addr + len - 1);
1119         if (!map || map->start > addr) {
1120             ret = -EINVAL;
1121             break;
1122         } else if (!(map->perm & perm)) {
1123             ret = -EPERM;
1124             break;
1125         }
1126 
1127         size = map->size - addr + map->start;
1128         pa = map->addr + addr - map->start;
1129         pfn = pa >> PAGE_SHIFT;
1130         iov[ret].bv_page = pfn_to_page(pfn);
1131         iov[ret].bv_len = min(len - s, size);
1132         iov[ret].bv_offset = pa & (PAGE_SIZE - 1);
1133         s += size;
1134         addr += size;
1135         ++ret;
1136     }
1137 
1138     spin_unlock(vrh->iotlb_lock);
1139 
1140     if (translated)
1141         *translated = min(len, s);
1142 
1143     return ret;
1144 }
1145 
1146 static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
1147                   void *src, size_t len)
1148 {
1149     u64 total_translated = 0;
1150 
1151     while (total_translated < len) {
1152         struct bio_vec iov[16];
1153         struct iov_iter iter;
1154         u64 translated;
1155         int ret;
1156 
1157         ret = iotlb_translate(vrh, (u64)(uintptr_t)src,
1158                       len - total_translated, &translated,
1159                       iov, ARRAY_SIZE(iov), VHOST_MAP_RO);
1160         if (ret == -ENOBUFS)
1161             ret = ARRAY_SIZE(iov);
1162         else if (ret < 0)
1163             return ret;
1164 
1165         iov_iter_bvec(&iter, READ, iov, ret, translated);
1166 
1167         ret = copy_from_iter(dst, translated, &iter);
1168         if (ret < 0)
1169             return ret;
1170 
1171         src += translated;
1172         dst += translated;
1173         total_translated += translated;
1174     }
1175 
1176     return total_translated;
1177 }
1178 
1179 static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
1180                 void *src, size_t len)
1181 {
1182     u64 total_translated = 0;
1183 
1184     while (total_translated < len) {
1185         struct bio_vec iov[16];
1186         struct iov_iter iter;
1187         u64 translated;
1188         int ret;
1189 
1190         ret = iotlb_translate(vrh, (u64)(uintptr_t)dst,
1191                       len - total_translated, &translated,
1192                       iov, ARRAY_SIZE(iov), VHOST_MAP_WO);
1193         if (ret == -ENOBUFS)
1194             ret = ARRAY_SIZE(iov);
1195         else if (ret < 0)
1196             return ret;
1197 
1198         iov_iter_bvec(&iter, WRITE, iov, ret, translated);
1199 
1200         ret = copy_to_iter(src, translated, &iter);
1201         if (ret < 0)
1202             return ret;
1203 
1204         src += translated;
1205         dst += translated;
1206         total_translated += translated;
1207     }
1208 
1209     return total_translated;
1210 }
1211 
1212 static inline int getu16_iotlb(const struct vringh *vrh,
1213                    u16 *val, const __virtio16 *p)
1214 {
1215     struct bio_vec iov;
1216     void *kaddr, *from;
1217     int ret;
1218 
1219     /* Atomic read is needed for getu16 */
1220     ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
1221                   &iov, 1, VHOST_MAP_RO);
1222     if (ret < 0)
1223         return ret;
1224 
1225     kaddr = kmap_atomic(iov.bv_page);
1226     from = kaddr + iov.bv_offset;
1227     *val = vringh16_to_cpu(vrh, READ_ONCE(*(__virtio16 *)from));
1228     kunmap_atomic(kaddr);
1229 
1230     return 0;
1231 }
1232 
1233 static inline int putu16_iotlb(const struct vringh *vrh,
1234                    __virtio16 *p, u16 val)
1235 {
1236     struct bio_vec iov;
1237     void *kaddr, *to;
1238     int ret;
1239 
1240     /* Atomic write is needed for putu16 */
1241     ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
1242                   &iov, 1, VHOST_MAP_WO);
1243     if (ret < 0)
1244         return ret;
1245 
1246     kaddr = kmap_atomic(iov.bv_page);
1247     to = kaddr + iov.bv_offset;
1248     WRITE_ONCE(*(__virtio16 *)to, cpu_to_vringh16(vrh, val));
1249     kunmap_atomic(kaddr);
1250 
1251     return 0;
1252 }
1253 
1254 static inline int copydesc_iotlb(const struct vringh *vrh,
1255                  void *dst, const void *src, size_t len)
1256 {
1257     int ret;
1258 
1259     ret = copy_from_iotlb(vrh, dst, (void *)src, len);
1260     if (ret != len)
1261         return -EFAULT;
1262 
1263     return 0;
1264 }
1265 
1266 static inline int xfer_from_iotlb(const struct vringh *vrh, void *src,
1267                   void *dst, size_t len)
1268 {
1269     int ret;
1270 
1271     ret = copy_from_iotlb(vrh, dst, src, len);
1272     if (ret != len)
1273         return -EFAULT;
1274 
1275     return 0;
1276 }
1277 
1278 static inline int xfer_to_iotlb(const struct vringh *vrh,
1279                    void *dst, void *src, size_t len)
1280 {
1281     int ret;
1282 
1283     ret = copy_to_iotlb(vrh, dst, src, len);
1284     if (ret != len)
1285         return -EFAULT;
1286 
1287     return 0;
1288 }
1289 
1290 static inline int putused_iotlb(const struct vringh *vrh,
1291                 struct vring_used_elem *dst,
1292                 const struct vring_used_elem *src,
1293                 unsigned int num)
1294 {
1295     int size = num * sizeof(*dst);
1296     int ret;
1297 
1298     ret = copy_to_iotlb(vrh, dst, (void *)src, num * sizeof(*dst));
1299     if (ret != size)
1300         return -EFAULT;
1301 
1302     return 0;
1303 }
1304 
1305 /**
1306  * vringh_init_iotlb - initialize a vringh for a ring with IOTLB.
1307  * @vrh: the vringh to initialize.
1308  * @features: the feature bits for this ring.
1309  * @num: the number of elements.
1310  * @weak_barriers: true if we only need memory barriers, not I/O.
1311  * @desc: the userpace descriptor pointer.
1312  * @avail: the userpace avail pointer.
1313  * @used: the userpace used pointer.
1314  *
1315  * Returns an error if num is invalid.
1316  */
1317 int vringh_init_iotlb(struct vringh *vrh, u64 features,
1318               unsigned int num, bool weak_barriers,
1319               struct vring_desc *desc,
1320               struct vring_avail *avail,
1321               struct vring_used *used)
1322 {
1323     return vringh_init_kern(vrh, features, num, weak_barriers,
1324                 desc, avail, used);
1325 }
1326 EXPORT_SYMBOL(vringh_init_iotlb);
1327 
1328 /**
1329  * vringh_set_iotlb - initialize a vringh for a ring with IOTLB.
1330  * @vrh: the vring
1331  * @iotlb: iotlb associated with this vring
1332  * @iotlb_lock: spinlock to synchronize the iotlb accesses
1333  */
1334 void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
1335               spinlock_t *iotlb_lock)
1336 {
1337     vrh->iotlb = iotlb;
1338     vrh->iotlb_lock = iotlb_lock;
1339 }
1340 EXPORT_SYMBOL(vringh_set_iotlb);
1341 
1342 /**
1343  * vringh_getdesc_iotlb - get next available descriptor from ring with
1344  * IOTLB.
1345  * @vrh: the kernelspace vring.
1346  * @riov: where to put the readable descriptors (or NULL)
1347  * @wiov: where to put the writable descriptors (or NULL)
1348  * @head: head index we received, for passing to vringh_complete_iotlb().
1349  * @gfp: flags for allocating larger riov/wiov.
1350  *
1351  * Returns 0 if there was no descriptor, 1 if there was, or -errno.
1352  *
1353  * Note that on error return, you can tell the difference between an
1354  * invalid ring and a single invalid descriptor: in the former case,
1355  * *head will be vrh->vring.num.  You may be able to ignore an invalid
1356  * descriptor, but there's not much you can do with an invalid ring.
1357  *
1358  * Note that you can reuse riov and wiov with subsequent calls. Content is
1359  * overwritten and memory reallocated if more space is needed.
1360  * When you don't have to use riov and wiov anymore, you should clean up them
1361  * calling vringh_kiov_cleanup() to release the memory, even on error!
1362  */
1363 int vringh_getdesc_iotlb(struct vringh *vrh,
1364              struct vringh_kiov *riov,
1365              struct vringh_kiov *wiov,
1366              u16 *head,
1367              gfp_t gfp)
1368 {
1369     int err;
1370 
1371     err = __vringh_get_head(vrh, getu16_iotlb, &vrh->last_avail_idx);
1372     if (err < 0)
1373         return err;
1374 
1375     /* Empty... */
1376     if (err == vrh->vring.num)
1377         return 0;
1378 
1379     *head = err;
1380     err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL,
1381                gfp, copydesc_iotlb);
1382     if (err)
1383         return err;
1384 
1385     return 1;
1386 }
1387 EXPORT_SYMBOL(vringh_getdesc_iotlb);
1388 
1389 /**
1390  * vringh_iov_pull_iotlb - copy bytes from vring_iov.
1391  * @vrh: the vring.
1392  * @riov: the riov as passed to vringh_getdesc_iotlb() (updated as we consume)
1393  * @dst: the place to copy.
1394  * @len: the maximum length to copy.
1395  *
1396  * Returns the bytes copied <= len or a negative errno.
1397  */
1398 ssize_t vringh_iov_pull_iotlb(struct vringh *vrh,
1399                   struct vringh_kiov *riov,
1400                   void *dst, size_t len)
1401 {
1402     return vringh_iov_xfer(vrh, riov, dst, len, xfer_from_iotlb);
1403 }
1404 EXPORT_SYMBOL(vringh_iov_pull_iotlb);
1405 
1406 /**
1407  * vringh_iov_push_iotlb - copy bytes into vring_iov.
1408  * @vrh: the vring.
1409  * @wiov: the wiov as passed to vringh_getdesc_iotlb() (updated as we consume)
1410  * @src: the place to copy from.
1411  * @len: the maximum length to copy.
1412  *
1413  * Returns the bytes copied <= len or a negative errno.
1414  */
1415 ssize_t vringh_iov_push_iotlb(struct vringh *vrh,
1416                   struct vringh_kiov *wiov,
1417                   const void *src, size_t len)
1418 {
1419     return vringh_iov_xfer(vrh, wiov, (void *)src, len, xfer_to_iotlb);
1420 }
1421 EXPORT_SYMBOL(vringh_iov_push_iotlb);
1422 
1423 /**
1424  * vringh_abandon_iotlb - we've decided not to handle the descriptor(s).
1425  * @vrh: the vring.
1426  * @num: the number of descriptors to put back (ie. num
1427  *   vringh_get_iotlb() to undo).
1428  *
1429  * The next vringh_get_iotlb() will return the old descriptor(s) again.
1430  */
1431 void vringh_abandon_iotlb(struct vringh *vrh, unsigned int num)
1432 {
1433     /* We only update vring_avail_event(vr) when we want to be notified,
1434      * so we haven't changed that yet.
1435      */
1436     vrh->last_avail_idx -= num;
1437 }
1438 EXPORT_SYMBOL(vringh_abandon_iotlb);
1439 
1440 /**
1441  * vringh_complete_iotlb - we've finished with descriptor, publish it.
1442  * @vrh: the vring.
1443  * @head: the head as filled in by vringh_getdesc_iotlb.
1444  * @len: the length of data we have written.
1445  *
1446  * You should check vringh_need_notify_iotlb() after one or more calls
1447  * to this function.
1448  */
1449 int vringh_complete_iotlb(struct vringh *vrh, u16 head, u32 len)
1450 {
1451     struct vring_used_elem used;
1452 
1453     used.id = cpu_to_vringh32(vrh, head);
1454     used.len = cpu_to_vringh32(vrh, len);
1455 
1456     return __vringh_complete(vrh, &used, 1, putu16_iotlb, putused_iotlb);
1457 }
1458 EXPORT_SYMBOL(vringh_complete_iotlb);
1459 
1460 /**
1461  * vringh_notify_enable_iotlb - we want to know if something changes.
1462  * @vrh: the vring.
1463  *
1464  * This always enables notifications, but returns false if there are
1465  * now more buffers available in the vring.
1466  */
1467 bool vringh_notify_enable_iotlb(struct vringh *vrh)
1468 {
1469     return __vringh_notify_enable(vrh, getu16_iotlb, putu16_iotlb);
1470 }
1471 EXPORT_SYMBOL(vringh_notify_enable_iotlb);
1472 
1473 /**
1474  * vringh_notify_disable_iotlb - don't tell us if something changes.
1475  * @vrh: the vring.
1476  *
1477  * This is our normal running state: we disable and then only enable when
1478  * we're going to sleep.
1479  */
1480 void vringh_notify_disable_iotlb(struct vringh *vrh)
1481 {
1482     __vringh_notify_disable(vrh, putu16_iotlb);
1483 }
1484 EXPORT_SYMBOL(vringh_notify_disable_iotlb);
1485 
1486 /**
1487  * vringh_need_notify_iotlb - must we tell the other side about used buffers?
1488  * @vrh: the vring we've called vringh_complete_iotlb() on.
1489  *
1490  * Returns -errno or 0 if we don't need to tell the other side, 1 if we do.
1491  */
1492 int vringh_need_notify_iotlb(struct vringh *vrh)
1493 {
1494     return __vringh_need_notify(vrh, getu16_iotlb);
1495 }
1496 EXPORT_SYMBOL(vringh_need_notify_iotlb);
1497 
1498 #endif
1499 
1500 MODULE_LICENSE("GPL");