Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /* Virtio ring implementation.
0003  *
0004  *  Copyright 2007 Rusty Russell IBM Corporation
0005  */
0006 #include <linux/virtio.h>
0007 #include <linux/virtio_ring.h>
0008 #include <linux/virtio_config.h>
0009 #include <linux/device.h>
0010 #include <linux/slab.h>
0011 #include <linux/module.h>
0012 #include <linux/hrtimer.h>
0013 #include <linux/dma-mapping.h>
0014 #include <linux/spinlock.h>
0015 #include <xen/xen.h>
0016 
0017 #ifdef DEBUG
0018 /* For development, we want to crash whenever the ring is screwed. */
0019 #define BAD_RING(_vq, fmt, args...)             \
0020     do {                            \
0021         dev_err(&(_vq)->vq.vdev->dev,           \
0022             "%s:"fmt, (_vq)->vq.name, ##args);  \
0023         BUG();                      \
0024     } while (0)
0025 /* Caller is supposed to guarantee no reentry. */
0026 #define START_USE(_vq)                      \
0027     do {                            \
0028         if ((_vq)->in_use)              \
0029             panic("%s:in_use = %i\n",       \
0030                   (_vq)->vq.name, (_vq)->in_use);   \
0031         (_vq)->in_use = __LINE__;           \
0032     } while (0)
0033 #define END_USE(_vq) \
0034     do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
0035 #define LAST_ADD_TIME_UPDATE(_vq)               \
0036     do {                            \
0037         ktime_t now = ktime_get();          \
0038                                 \
0039         /* No kick or get, with .1 second between?  Warn. */ \
0040         if ((_vq)->last_add_time_valid)         \
0041             WARN_ON(ktime_to_ms(ktime_sub(now,  \
0042                 (_vq)->last_add_time)) > 100);  \
0043         (_vq)->last_add_time = now;         \
0044         (_vq)->last_add_time_valid = true;      \
0045     } while (0)
0046 #define LAST_ADD_TIME_CHECK(_vq)                \
0047     do {                            \
0048         if ((_vq)->last_add_time_valid) {       \
0049             WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
0050                       (_vq)->last_add_time)) > 100); \
0051         }                       \
0052     } while (0)
0053 #define LAST_ADD_TIME_INVALID(_vq)              \
0054     ((_vq)->last_add_time_valid = false)
0055 #else
0056 #define BAD_RING(_vq, fmt, args...)             \
0057     do {                            \
0058         dev_err(&_vq->vq.vdev->dev,         \
0059             "%s:"fmt, (_vq)->vq.name, ##args);  \
0060         (_vq)->broken = true;               \
0061     } while (0)
0062 #define START_USE(vq)
0063 #define END_USE(vq)
0064 #define LAST_ADD_TIME_UPDATE(vq)
0065 #define LAST_ADD_TIME_CHECK(vq)
0066 #define LAST_ADD_TIME_INVALID(vq)
0067 #endif
0068 
0069 struct vring_desc_state_split {
0070     void *data;         /* Data for callback. */
0071     struct vring_desc *indir_desc;  /* Indirect descriptor, if any. */
0072 };
0073 
0074 struct vring_desc_state_packed {
0075     void *data;         /* Data for callback. */
0076     struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
0077     u16 num;            /* Descriptor list length. */
0078     u16 last;           /* The last desc state in a list. */
0079 };
0080 
0081 struct vring_desc_extra {
0082     dma_addr_t addr;        /* Descriptor DMA addr. */
0083     u32 len;            /* Descriptor length. */
0084     u16 flags;          /* Descriptor flags. */
0085     u16 next;           /* The next desc state in a list. */
0086 };
0087 
0088 struct vring_virtqueue_split {
0089     /* Actual memory layout for this queue. */
0090     struct vring vring;
0091 
0092     /* Last written value to avail->flags */
0093     u16 avail_flags_shadow;
0094 
0095     /*
0096      * Last written value to avail->idx in
0097      * guest byte order.
0098      */
0099     u16 avail_idx_shadow;
0100 
0101     /* Per-descriptor state. */
0102     struct vring_desc_state_split *desc_state;
0103     struct vring_desc_extra *desc_extra;
0104 
0105     /* DMA address and size information */
0106     dma_addr_t queue_dma_addr;
0107     size_t queue_size_in_bytes;
0108 
0109     /*
0110      * The parameters for creating vrings are reserved for creating new
0111      * vring.
0112      */
0113     u32 vring_align;
0114     bool may_reduce_num;
0115 };
0116 
0117 struct vring_virtqueue_packed {
0118     /* Actual memory layout for this queue. */
0119     struct {
0120         unsigned int num;
0121         struct vring_packed_desc *desc;
0122         struct vring_packed_desc_event *driver;
0123         struct vring_packed_desc_event *device;
0124     } vring;
0125 
0126     /* Driver ring wrap counter. */
0127     bool avail_wrap_counter;
0128 
0129     /* Avail used flags. */
0130     u16 avail_used_flags;
0131 
0132     /* Index of the next avail descriptor. */
0133     u16 next_avail_idx;
0134 
0135     /*
0136      * Last written value to driver->flags in
0137      * guest byte order.
0138      */
0139     u16 event_flags_shadow;
0140 
0141     /* Per-descriptor state. */
0142     struct vring_desc_state_packed *desc_state;
0143     struct vring_desc_extra *desc_extra;
0144 
0145     /* DMA address and size information */
0146     dma_addr_t ring_dma_addr;
0147     dma_addr_t driver_event_dma_addr;
0148     dma_addr_t device_event_dma_addr;
0149     size_t ring_size_in_bytes;
0150     size_t event_size_in_bytes;
0151 };
0152 
0153 struct vring_virtqueue {
0154     struct virtqueue vq;
0155 
0156     /* Is this a packed ring? */
0157     bool packed_ring;
0158 
0159     /* Is DMA API used? */
0160     bool use_dma_api;
0161 
0162     /* Can we use weak barriers? */
0163     bool weak_barriers;
0164 
0165     /* Other side has made a mess, don't try any more. */
0166     bool broken;
0167 
0168     /* Host supports indirect buffers */
0169     bool indirect;
0170 
0171     /* Host publishes avail event idx */
0172     bool event;
0173 
0174     /* Head of free buffer list. */
0175     unsigned int free_head;
0176     /* Number we've added since last sync. */
0177     unsigned int num_added;
0178 
0179     /* Last used index  we've seen.
0180      * for split ring, it just contains last used index
0181      * for packed ring:
0182      * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
0183      * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter.
0184      */
0185     u16 last_used_idx;
0186 
0187     /* Hint for event idx: already triggered no need to disable. */
0188     bool event_triggered;
0189 
0190     union {
0191         /* Available for split ring */
0192         struct vring_virtqueue_split split;
0193 
0194         /* Available for packed ring */
0195         struct vring_virtqueue_packed packed;
0196     };
0197 
0198     /* How to notify other side. FIXME: commonalize hcalls! */
0199     bool (*notify)(struct virtqueue *vq);
0200 
0201     /* DMA, allocation, and size information */
0202     bool we_own_ring;
0203 
0204 #ifdef DEBUG
0205     /* They're supposed to lock for us. */
0206     unsigned int in_use;
0207 
0208     /* Figure out if their kicks are too delayed. */
0209     bool last_add_time_valid;
0210     ktime_t last_add_time;
0211 #endif
0212 };
0213 
0214 static struct virtqueue *__vring_new_virtqueue(unsigned int index,
0215                            struct vring_virtqueue_split *vring_split,
0216                            struct virtio_device *vdev,
0217                            bool weak_barriers,
0218                            bool context,
0219                            bool (*notify)(struct virtqueue *),
0220                            void (*callback)(struct virtqueue *),
0221                            const char *name);
0222 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num);
0223 static void vring_free(struct virtqueue *_vq);
0224 
0225 /*
0226  * Helpers.
0227  */
0228 
0229 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
0230 
0231 static inline bool virtqueue_use_indirect(struct vring_virtqueue *vq,
0232                       unsigned int total_sg)
0233 {
0234     /*
0235      * If the host supports indirect descriptor tables, and we have multiple
0236      * buffers, then go indirect. FIXME: tune this threshold
0237      */
0238     return (vq->indirect && total_sg > 1 && vq->vq.num_free);
0239 }
0240 
0241 /*
0242  * Modern virtio devices have feature bits to specify whether they need a
0243  * quirk and bypass the IOMMU. If not there, just use the DMA API.
0244  *
0245  * If there, the interaction between virtio and DMA API is messy.
0246  *
0247  * On most systems with virtio, physical addresses match bus addresses,
0248  * and it doesn't particularly matter whether we use the DMA API.
0249  *
0250  * On some systems, including Xen and any system with a physical device
0251  * that speaks virtio behind a physical IOMMU, we must use the DMA API
0252  * for virtio DMA to work at all.
0253  *
0254  * On other systems, including SPARC and PPC64, virtio-pci devices are
0255  * enumerated as though they are behind an IOMMU, but the virtio host
0256  * ignores the IOMMU, so we must either pretend that the IOMMU isn't
0257  * there or somehow map everything as the identity.
0258  *
0259  * For the time being, we preserve historic behavior and bypass the DMA
0260  * API.
0261  *
0262  * TODO: install a per-device DMA ops structure that does the right thing
0263  * taking into account all the above quirks, and use the DMA API
0264  * unconditionally on data path.
0265  */
0266 
0267 static bool vring_use_dma_api(struct virtio_device *vdev)
0268 {
0269     if (!virtio_has_dma_quirk(vdev))
0270         return true;
0271 
0272     /* Otherwise, we are left to guess. */
0273     /*
0274      * In theory, it's possible to have a buggy QEMU-supposed
0275      * emulated Q35 IOMMU and Xen enabled at the same time.  On
0276      * such a configuration, virtio has never worked and will
0277      * not work without an even larger kludge.  Instead, enable
0278      * the DMA API if we're a Xen guest, which at least allows
0279      * all of the sensible Xen configurations to work correctly.
0280      */
0281     if (xen_domain())
0282         return true;
0283 
0284     return false;
0285 }
0286 
0287 size_t virtio_max_dma_size(struct virtio_device *vdev)
0288 {
0289     size_t max_segment_size = SIZE_MAX;
0290 
0291     if (vring_use_dma_api(vdev))
0292         max_segment_size = dma_max_mapping_size(vdev->dev.parent);
0293 
0294     return max_segment_size;
0295 }
0296 EXPORT_SYMBOL_GPL(virtio_max_dma_size);
0297 
0298 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
0299                   dma_addr_t *dma_handle, gfp_t flag)
0300 {
0301     if (vring_use_dma_api(vdev)) {
0302         return dma_alloc_coherent(vdev->dev.parent, size,
0303                       dma_handle, flag);
0304     } else {
0305         void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
0306 
0307         if (queue) {
0308             phys_addr_t phys_addr = virt_to_phys(queue);
0309             *dma_handle = (dma_addr_t)phys_addr;
0310 
0311             /*
0312              * Sanity check: make sure we dind't truncate
0313              * the address.  The only arches I can find that
0314              * have 64-bit phys_addr_t but 32-bit dma_addr_t
0315              * are certain non-highmem MIPS and x86
0316              * configurations, but these configurations
0317              * should never allocate physical pages above 32
0318              * bits, so this is fine.  Just in case, throw a
0319              * warning and abort if we end up with an
0320              * unrepresentable address.
0321              */
0322             if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
0323                 free_pages_exact(queue, PAGE_ALIGN(size));
0324                 return NULL;
0325             }
0326         }
0327         return queue;
0328     }
0329 }
0330 
0331 static void vring_free_queue(struct virtio_device *vdev, size_t size,
0332                  void *queue, dma_addr_t dma_handle)
0333 {
0334     if (vring_use_dma_api(vdev))
0335         dma_free_coherent(vdev->dev.parent, size, queue, dma_handle);
0336     else
0337         free_pages_exact(queue, PAGE_ALIGN(size));
0338 }
0339 
0340 /*
0341  * The DMA ops on various arches are rather gnarly right now, and
0342  * making all of the arch DMA ops work on the vring device itself
0343  * is a mess.  For now, we use the parent device for DMA ops.
0344  */
0345 static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq)
0346 {
0347     return vq->vq.vdev->dev.parent;
0348 }
0349 
0350 /* Map one sg entry. */
0351 static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
0352                    struct scatterlist *sg,
0353                    enum dma_data_direction direction)
0354 {
0355     if (!vq->use_dma_api)
0356         return (dma_addr_t)sg_phys(sg);
0357 
0358     /*
0359      * We can't use dma_map_sg, because we don't use scatterlists in
0360      * the way it expects (we don't guarantee that the scatterlist
0361      * will exist for the lifetime of the mapping).
0362      */
0363     return dma_map_page(vring_dma_dev(vq),
0364                 sg_page(sg), sg->offset, sg->length,
0365                 direction);
0366 }
0367 
0368 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
0369                    void *cpu_addr, size_t size,
0370                    enum dma_data_direction direction)
0371 {
0372     if (!vq->use_dma_api)
0373         return (dma_addr_t)virt_to_phys(cpu_addr);
0374 
0375     return dma_map_single(vring_dma_dev(vq),
0376                   cpu_addr, size, direction);
0377 }
0378 
0379 static int vring_mapping_error(const struct vring_virtqueue *vq,
0380                    dma_addr_t addr)
0381 {
0382     if (!vq->use_dma_api)
0383         return 0;
0384 
0385     return dma_mapping_error(vring_dma_dev(vq), addr);
0386 }
0387 
0388 static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
0389 {
0390     vq->vq.num_free = num;
0391 
0392     if (vq->packed_ring)
0393         vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR);
0394     else
0395         vq->last_used_idx = 0;
0396 
0397     vq->event_triggered = false;
0398     vq->num_added = 0;
0399 
0400 #ifdef DEBUG
0401     vq->in_use = false;
0402     vq->last_add_time_valid = false;
0403 #endif
0404 }
0405 
0406 
0407 /*
0408  * Split ring specific functions - *_split().
0409  */
0410 
0411 static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
0412                        struct vring_desc *desc)
0413 {
0414     u16 flags;
0415 
0416     if (!vq->use_dma_api)
0417         return;
0418 
0419     flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
0420 
0421     dma_unmap_page(vring_dma_dev(vq),
0422                virtio64_to_cpu(vq->vq.vdev, desc->addr),
0423                virtio32_to_cpu(vq->vq.vdev, desc->len),
0424                (flags & VRING_DESC_F_WRITE) ?
0425                DMA_FROM_DEVICE : DMA_TO_DEVICE);
0426 }
0427 
0428 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
0429                       unsigned int i)
0430 {
0431     struct vring_desc_extra *extra = vq->split.desc_extra;
0432     u16 flags;
0433 
0434     if (!vq->use_dma_api)
0435         goto out;
0436 
0437     flags = extra[i].flags;
0438 
0439     if (flags & VRING_DESC_F_INDIRECT) {
0440         dma_unmap_single(vring_dma_dev(vq),
0441                  extra[i].addr,
0442                  extra[i].len,
0443                  (flags & VRING_DESC_F_WRITE) ?
0444                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
0445     } else {
0446         dma_unmap_page(vring_dma_dev(vq),
0447                    extra[i].addr,
0448                    extra[i].len,
0449                    (flags & VRING_DESC_F_WRITE) ?
0450                    DMA_FROM_DEVICE : DMA_TO_DEVICE);
0451     }
0452 
0453 out:
0454     return extra[i].next;
0455 }
0456 
0457 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
0458                            unsigned int total_sg,
0459                            gfp_t gfp)
0460 {
0461     struct vring_desc *desc;
0462     unsigned int i;
0463 
0464     /*
0465      * We require lowmem mappings for the descriptors because
0466      * otherwise virt_to_phys will give us bogus addresses in the
0467      * virtqueue.
0468      */
0469     gfp &= ~__GFP_HIGHMEM;
0470 
0471     desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp);
0472     if (!desc)
0473         return NULL;
0474 
0475     for (i = 0; i < total_sg; i++)
0476         desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
0477     return desc;
0478 }
0479 
0480 static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
0481                             struct vring_desc *desc,
0482                             unsigned int i,
0483                             dma_addr_t addr,
0484                             unsigned int len,
0485                             u16 flags,
0486                             bool indirect)
0487 {
0488     struct vring_virtqueue *vring = to_vvq(vq);
0489     struct vring_desc_extra *extra = vring->split.desc_extra;
0490     u16 next;
0491 
0492     desc[i].flags = cpu_to_virtio16(vq->vdev, flags);
0493     desc[i].addr = cpu_to_virtio64(vq->vdev, addr);
0494     desc[i].len = cpu_to_virtio32(vq->vdev, len);
0495 
0496     if (!indirect) {
0497         next = extra[i].next;
0498         desc[i].next = cpu_to_virtio16(vq->vdev, next);
0499 
0500         extra[i].addr = addr;
0501         extra[i].len = len;
0502         extra[i].flags = flags;
0503     } else
0504         next = virtio16_to_cpu(vq->vdev, desc[i].next);
0505 
0506     return next;
0507 }
0508 
0509 static inline int virtqueue_add_split(struct virtqueue *_vq,
0510                       struct scatterlist *sgs[],
0511                       unsigned int total_sg,
0512                       unsigned int out_sgs,
0513                       unsigned int in_sgs,
0514                       void *data,
0515                       void *ctx,
0516                       gfp_t gfp)
0517 {
0518     struct vring_virtqueue *vq = to_vvq(_vq);
0519     struct scatterlist *sg;
0520     struct vring_desc *desc;
0521     unsigned int i, n, avail, descs_used, prev, err_idx;
0522     int head;
0523     bool indirect;
0524 
0525     START_USE(vq);
0526 
0527     BUG_ON(data == NULL);
0528     BUG_ON(ctx && vq->indirect);
0529 
0530     if (unlikely(vq->broken)) {
0531         END_USE(vq);
0532         return -EIO;
0533     }
0534 
0535     LAST_ADD_TIME_UPDATE(vq);
0536 
0537     BUG_ON(total_sg == 0);
0538 
0539     head = vq->free_head;
0540 
0541     if (virtqueue_use_indirect(vq, total_sg))
0542         desc = alloc_indirect_split(_vq, total_sg, gfp);
0543     else {
0544         desc = NULL;
0545         WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
0546     }
0547 
0548     if (desc) {
0549         /* Use a single buffer which doesn't continue */
0550         indirect = true;
0551         /* Set up rest to use this indirect table. */
0552         i = 0;
0553         descs_used = 1;
0554     } else {
0555         indirect = false;
0556         desc = vq->split.vring.desc;
0557         i = head;
0558         descs_used = total_sg;
0559     }
0560 
0561     if (unlikely(vq->vq.num_free < descs_used)) {
0562         pr_debug("Can't add buf len %i - avail = %i\n",
0563              descs_used, vq->vq.num_free);
0564         /* FIXME: for historical reasons, we force a notify here if
0565          * there are outgoing parts to the buffer.  Presumably the
0566          * host should service the ring ASAP. */
0567         if (out_sgs)
0568             vq->notify(&vq->vq);
0569         if (indirect)
0570             kfree(desc);
0571         END_USE(vq);
0572         return -ENOSPC;
0573     }
0574 
0575     for (n = 0; n < out_sgs; n++) {
0576         for (sg = sgs[n]; sg; sg = sg_next(sg)) {
0577             dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
0578             if (vring_mapping_error(vq, addr))
0579                 goto unmap_release;
0580 
0581             prev = i;
0582             /* Note that we trust indirect descriptor
0583              * table since it use stream DMA mapping.
0584              */
0585             i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
0586                              VRING_DESC_F_NEXT,
0587                              indirect);
0588         }
0589     }
0590     for (; n < (out_sgs + in_sgs); n++) {
0591         for (sg = sgs[n]; sg; sg = sg_next(sg)) {
0592             dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
0593             if (vring_mapping_error(vq, addr))
0594                 goto unmap_release;
0595 
0596             prev = i;
0597             /* Note that we trust indirect descriptor
0598              * table since it use stream DMA mapping.
0599              */
0600             i = virtqueue_add_desc_split(_vq, desc, i, addr,
0601                              sg->length,
0602                              VRING_DESC_F_NEXT |
0603                              VRING_DESC_F_WRITE,
0604                              indirect);
0605         }
0606     }
0607     /* Last one doesn't continue. */
0608     desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
0609     if (!indirect && vq->use_dma_api)
0610         vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &=
0611             ~VRING_DESC_F_NEXT;
0612 
0613     if (indirect) {
0614         /* Now that the indirect table is filled in, map it. */
0615         dma_addr_t addr = vring_map_single(
0616             vq, desc, total_sg * sizeof(struct vring_desc),
0617             DMA_TO_DEVICE);
0618         if (vring_mapping_error(vq, addr))
0619             goto unmap_release;
0620 
0621         virtqueue_add_desc_split(_vq, vq->split.vring.desc,
0622                      head, addr,
0623                      total_sg * sizeof(struct vring_desc),
0624                      VRING_DESC_F_INDIRECT,
0625                      false);
0626     }
0627 
0628     /* We're using some buffers from the free list. */
0629     vq->vq.num_free -= descs_used;
0630 
0631     /* Update free pointer */
0632     if (indirect)
0633         vq->free_head = vq->split.desc_extra[head].next;
0634     else
0635         vq->free_head = i;
0636 
0637     /* Store token and indirect buffer state. */
0638     vq->split.desc_state[head].data = data;
0639     if (indirect)
0640         vq->split.desc_state[head].indir_desc = desc;
0641     else
0642         vq->split.desc_state[head].indir_desc = ctx;
0643 
0644     /* Put entry in available array (but don't update avail->idx until they
0645      * do sync). */
0646     avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
0647     vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
0648 
0649     /* Descriptors and available array need to be set before we expose the
0650      * new available array entries. */
0651     virtio_wmb(vq->weak_barriers);
0652     vq->split.avail_idx_shadow++;
0653     vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
0654                         vq->split.avail_idx_shadow);
0655     vq->num_added++;
0656 
0657     pr_debug("Added buffer head %i to %p\n", head, vq);
0658     END_USE(vq);
0659 
0660     /* This is very unlikely, but theoretically possible.  Kick
0661      * just in case. */
0662     if (unlikely(vq->num_added == (1 << 16) - 1))
0663         virtqueue_kick(_vq);
0664 
0665     return 0;
0666 
0667 unmap_release:
0668     err_idx = i;
0669 
0670     if (indirect)
0671         i = 0;
0672     else
0673         i = head;
0674 
0675     for (n = 0; n < total_sg; n++) {
0676         if (i == err_idx)
0677             break;
0678         if (indirect) {
0679             vring_unmap_one_split_indirect(vq, &desc[i]);
0680             i = virtio16_to_cpu(_vq->vdev, desc[i].next);
0681         } else
0682             i = vring_unmap_one_split(vq, i);
0683     }
0684 
0685     if (indirect)
0686         kfree(desc);
0687 
0688     END_USE(vq);
0689     return -ENOMEM;
0690 }
0691 
0692 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
0693 {
0694     struct vring_virtqueue *vq = to_vvq(_vq);
0695     u16 new, old;
0696     bool needs_kick;
0697 
0698     START_USE(vq);
0699     /* We need to expose available array entries before checking avail
0700      * event. */
0701     virtio_mb(vq->weak_barriers);
0702 
0703     old = vq->split.avail_idx_shadow - vq->num_added;
0704     new = vq->split.avail_idx_shadow;
0705     vq->num_added = 0;
0706 
0707     LAST_ADD_TIME_CHECK(vq);
0708     LAST_ADD_TIME_INVALID(vq);
0709 
0710     if (vq->event) {
0711         needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
0712                     vring_avail_event(&vq->split.vring)),
0713                           new, old);
0714     } else {
0715         needs_kick = !(vq->split.vring.used->flags &
0716                     cpu_to_virtio16(_vq->vdev,
0717                         VRING_USED_F_NO_NOTIFY));
0718     }
0719     END_USE(vq);
0720     return needs_kick;
0721 }
0722 
0723 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
0724                  void **ctx)
0725 {
0726     unsigned int i, j;
0727     __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
0728 
0729     /* Clear data ptr. */
0730     vq->split.desc_state[head].data = NULL;
0731 
0732     /* Put back on free list: unmap first-level descriptors and find end */
0733     i = head;
0734 
0735     while (vq->split.vring.desc[i].flags & nextflag) {
0736         vring_unmap_one_split(vq, i);
0737         i = vq->split.desc_extra[i].next;
0738         vq->vq.num_free++;
0739     }
0740 
0741     vring_unmap_one_split(vq, i);
0742     vq->split.desc_extra[i].next = vq->free_head;
0743     vq->free_head = head;
0744 
0745     /* Plus final descriptor */
0746     vq->vq.num_free++;
0747 
0748     if (vq->indirect) {
0749         struct vring_desc *indir_desc =
0750                 vq->split.desc_state[head].indir_desc;
0751         u32 len;
0752 
0753         /* Free the indirect table, if any, now that it's unmapped. */
0754         if (!indir_desc)
0755             return;
0756 
0757         len = vq->split.desc_extra[head].len;
0758 
0759         BUG_ON(!(vq->split.desc_extra[head].flags &
0760                 VRING_DESC_F_INDIRECT));
0761         BUG_ON(len == 0 || len % sizeof(struct vring_desc));
0762 
0763         for (j = 0; j < len / sizeof(struct vring_desc); j++)
0764             vring_unmap_one_split_indirect(vq, &indir_desc[j]);
0765 
0766         kfree(indir_desc);
0767         vq->split.desc_state[head].indir_desc = NULL;
0768     } else if (ctx) {
0769         *ctx = vq->split.desc_state[head].indir_desc;
0770     }
0771 }
0772 
0773 static inline bool more_used_split(const struct vring_virtqueue *vq)
0774 {
0775     return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
0776             vq->split.vring.used->idx);
0777 }
0778 
0779 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
0780                      unsigned int *len,
0781                      void **ctx)
0782 {
0783     struct vring_virtqueue *vq = to_vvq(_vq);
0784     void *ret;
0785     unsigned int i;
0786     u16 last_used;
0787 
0788     START_USE(vq);
0789 
0790     if (unlikely(vq->broken)) {
0791         END_USE(vq);
0792         return NULL;
0793     }
0794 
0795     if (!more_used_split(vq)) {
0796         pr_debug("No more buffers in queue\n");
0797         END_USE(vq);
0798         return NULL;
0799     }
0800 
0801     /* Only get used array entries after they have been exposed by host. */
0802     virtio_rmb(vq->weak_barriers);
0803 
0804     last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
0805     i = virtio32_to_cpu(_vq->vdev,
0806             vq->split.vring.used->ring[last_used].id);
0807     *len = virtio32_to_cpu(_vq->vdev,
0808             vq->split.vring.used->ring[last_used].len);
0809 
0810     if (unlikely(i >= vq->split.vring.num)) {
0811         BAD_RING(vq, "id %u out of range\n", i);
0812         return NULL;
0813     }
0814     if (unlikely(!vq->split.desc_state[i].data)) {
0815         BAD_RING(vq, "id %u is not a head!\n", i);
0816         return NULL;
0817     }
0818 
0819     /* detach_buf_split clears data, so grab it now. */
0820     ret = vq->split.desc_state[i].data;
0821     detach_buf_split(vq, i, ctx);
0822     vq->last_used_idx++;
0823     /* If we expect an interrupt for the next entry, tell host
0824      * by writing event index and flush out the write before
0825      * the read in the next get_buf call. */
0826     if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
0827         virtio_store_mb(vq->weak_barriers,
0828                 &vring_used_event(&vq->split.vring),
0829                 cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
0830 
0831     LAST_ADD_TIME_INVALID(vq);
0832 
0833     END_USE(vq);
0834     return ret;
0835 }
0836 
0837 static void virtqueue_disable_cb_split(struct virtqueue *_vq)
0838 {
0839     struct vring_virtqueue *vq = to_vvq(_vq);
0840 
0841     if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
0842         vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
0843         if (vq->event)
0844             /* TODO: this is a hack. Figure out a cleaner value to write. */
0845             vring_used_event(&vq->split.vring) = 0x0;
0846         else
0847             vq->split.vring.avail->flags =
0848                 cpu_to_virtio16(_vq->vdev,
0849                         vq->split.avail_flags_shadow);
0850     }
0851 }
0852 
0853 static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
0854 {
0855     struct vring_virtqueue *vq = to_vvq(_vq);
0856     u16 last_used_idx;
0857 
0858     START_USE(vq);
0859 
0860     /* We optimistically turn back on interrupts, then check if there was
0861      * more to do. */
0862     /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
0863      * either clear the flags bit or point the event index at the next
0864      * entry. Always do both to keep code simple. */
0865     if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
0866         vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
0867         if (!vq->event)
0868             vq->split.vring.avail->flags =
0869                 cpu_to_virtio16(_vq->vdev,
0870                         vq->split.avail_flags_shadow);
0871     }
0872     vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev,
0873             last_used_idx = vq->last_used_idx);
0874     END_USE(vq);
0875     return last_used_idx;
0876 }
0877 
0878 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_idx)
0879 {
0880     struct vring_virtqueue *vq = to_vvq(_vq);
0881 
0882     return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
0883             vq->split.vring.used->idx);
0884 }
0885 
0886 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
0887 {
0888     struct vring_virtqueue *vq = to_vvq(_vq);
0889     u16 bufs;
0890 
0891     START_USE(vq);
0892 
0893     /* We optimistically turn back on interrupts, then check if there was
0894      * more to do. */
0895     /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
0896      * either clear the flags bit or point the event index at the next
0897      * entry. Always update the event index to keep code simple. */
0898     if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
0899         vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
0900         if (!vq->event)
0901             vq->split.vring.avail->flags =
0902                 cpu_to_virtio16(_vq->vdev,
0903                         vq->split.avail_flags_shadow);
0904     }
0905     /* TODO: tune this threshold */
0906     bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
0907 
0908     virtio_store_mb(vq->weak_barriers,
0909             &vring_used_event(&vq->split.vring),
0910             cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
0911 
0912     if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx)
0913                     - vq->last_used_idx) > bufs)) {
0914         END_USE(vq);
0915         return false;
0916     }
0917 
0918     END_USE(vq);
0919     return true;
0920 }
0921 
0922 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
0923 {
0924     struct vring_virtqueue *vq = to_vvq(_vq);
0925     unsigned int i;
0926     void *buf;
0927 
0928     START_USE(vq);
0929 
0930     for (i = 0; i < vq->split.vring.num; i++) {
0931         if (!vq->split.desc_state[i].data)
0932             continue;
0933         /* detach_buf_split clears data, so grab it now. */
0934         buf = vq->split.desc_state[i].data;
0935         detach_buf_split(vq, i, NULL);
0936         vq->split.avail_idx_shadow--;
0937         vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
0938                 vq->split.avail_idx_shadow);
0939         END_USE(vq);
0940         return buf;
0941     }
0942     /* That should have freed everything. */
0943     BUG_ON(vq->vq.num_free != vq->split.vring.num);
0944 
0945     END_USE(vq);
0946     return NULL;
0947 }
0948 
0949 static void virtqueue_vring_init_split(struct vring_virtqueue_split *vring_split,
0950                        struct vring_virtqueue *vq)
0951 {
0952     struct virtio_device *vdev;
0953 
0954     vdev = vq->vq.vdev;
0955 
0956     vring_split->avail_flags_shadow = 0;
0957     vring_split->avail_idx_shadow = 0;
0958 
0959     /* No callback?  Tell other side not to bother us. */
0960     if (!vq->vq.callback) {
0961         vring_split->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
0962         if (!vq->event)
0963             vring_split->vring.avail->flags = cpu_to_virtio16(vdev,
0964                     vring_split->avail_flags_shadow);
0965     }
0966 }
0967 
0968 static void virtqueue_reinit_split(struct vring_virtqueue *vq)
0969 {
0970     int num;
0971 
0972     num = vq->split.vring.num;
0973 
0974     vq->split.vring.avail->flags = 0;
0975     vq->split.vring.avail->idx = 0;
0976 
0977     /* reset avail event */
0978     vq->split.vring.avail->ring[num] = 0;
0979 
0980     vq->split.vring.used->flags = 0;
0981     vq->split.vring.used->idx = 0;
0982 
0983     /* reset used event */
0984     *(__virtio16 *)&(vq->split.vring.used->ring[num]) = 0;
0985 
0986     virtqueue_init(vq, num);
0987 
0988     virtqueue_vring_init_split(&vq->split, vq);
0989 }
0990 
0991 static void virtqueue_vring_attach_split(struct vring_virtqueue *vq,
0992                      struct vring_virtqueue_split *vring_split)
0993 {
0994     vq->split = *vring_split;
0995 
0996     /* Put everything in free lists. */
0997     vq->free_head = 0;
0998 }
0999 
1000 static int vring_alloc_state_extra_split(struct vring_virtqueue_split *vring_split)
1001 {
1002     struct vring_desc_state_split *state;
1003     struct vring_desc_extra *extra;
1004     u32 num = vring_split->vring.num;
1005 
1006     state = kmalloc_array(num, sizeof(struct vring_desc_state_split), GFP_KERNEL);
1007     if (!state)
1008         goto err_state;
1009 
1010     extra = vring_alloc_desc_extra(num);
1011     if (!extra)
1012         goto err_extra;
1013 
1014     memset(state, 0, num * sizeof(struct vring_desc_state_split));
1015 
1016     vring_split->desc_state = state;
1017     vring_split->desc_extra = extra;
1018     return 0;
1019 
1020 err_extra:
1021     kfree(state);
1022 err_state:
1023     return -ENOMEM;
1024 }
1025 
1026 static void vring_free_split(struct vring_virtqueue_split *vring_split,
1027                  struct virtio_device *vdev)
1028 {
1029     vring_free_queue(vdev, vring_split->queue_size_in_bytes,
1030              vring_split->vring.desc,
1031              vring_split->queue_dma_addr);
1032 
1033     kfree(vring_split->desc_state);
1034     kfree(vring_split->desc_extra);
1035 }
1036 
1037 static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split,
1038                    struct virtio_device *vdev,
1039                    u32 num,
1040                    unsigned int vring_align,
1041                    bool may_reduce_num)
1042 {
1043     void *queue = NULL;
1044     dma_addr_t dma_addr;
1045 
1046     /* We assume num is a power of 2. */
1047     if (num & (num - 1)) {
1048         dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
1049         return -EINVAL;
1050     }
1051 
1052     /* TODO: allocate each queue chunk individually */
1053     for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
1054         queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1055                       &dma_addr,
1056                       GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
1057         if (queue)
1058             break;
1059         if (!may_reduce_num)
1060             return -ENOMEM;
1061     }
1062 
1063     if (!num)
1064         return -ENOMEM;
1065 
1066     if (!queue) {
1067         /* Try to get a single page. You are my only hope! */
1068         queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
1069                       &dma_addr, GFP_KERNEL|__GFP_ZERO);
1070     }
1071     if (!queue)
1072         return -ENOMEM;
1073 
1074     vring_init(&vring_split->vring, num, queue, vring_align);
1075 
1076     vring_split->queue_dma_addr = dma_addr;
1077     vring_split->queue_size_in_bytes = vring_size(num, vring_align);
1078 
1079     vring_split->vring_align = vring_align;
1080     vring_split->may_reduce_num = may_reduce_num;
1081 
1082     return 0;
1083 }
1084 
1085 static struct virtqueue *vring_create_virtqueue_split(
1086     unsigned int index,
1087     unsigned int num,
1088     unsigned int vring_align,
1089     struct virtio_device *vdev,
1090     bool weak_barriers,
1091     bool may_reduce_num,
1092     bool context,
1093     bool (*notify)(struct virtqueue *),
1094     void (*callback)(struct virtqueue *),
1095     const char *name)
1096 {
1097     struct vring_virtqueue_split vring_split = {};
1098     struct virtqueue *vq;
1099     int err;
1100 
1101     err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align,
1102                       may_reduce_num);
1103     if (err)
1104         return NULL;
1105 
1106     vq = __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
1107                    context, notify, callback, name);
1108     if (!vq) {
1109         vring_free_split(&vring_split, vdev);
1110         return NULL;
1111     }
1112 
1113     to_vvq(vq)->we_own_ring = true;
1114 
1115     return vq;
1116 }
1117 
1118 static int virtqueue_resize_split(struct virtqueue *_vq, u32 num)
1119 {
1120     struct vring_virtqueue_split vring_split = {};
1121     struct vring_virtqueue *vq = to_vvq(_vq);
1122     struct virtio_device *vdev = _vq->vdev;
1123     int err;
1124 
1125     err = vring_alloc_queue_split(&vring_split, vdev, num,
1126                       vq->split.vring_align,
1127                       vq->split.may_reduce_num);
1128     if (err)
1129         goto err;
1130 
1131     err = vring_alloc_state_extra_split(&vring_split);
1132     if (err)
1133         goto err_state_extra;
1134 
1135     vring_free(&vq->vq);
1136 
1137     virtqueue_vring_init_split(&vring_split, vq);
1138 
1139     virtqueue_init(vq, vring_split.vring.num);
1140     virtqueue_vring_attach_split(vq, &vring_split);
1141 
1142     return 0;
1143 
1144 err_state_extra:
1145     vring_free_split(&vring_split, vdev);
1146 err:
1147     virtqueue_reinit_split(vq);
1148     return -ENOMEM;
1149 }
1150 
1151 
1152 /*
1153  * Packed ring specific functions - *_packed().
1154  */
1155 static inline bool packed_used_wrap_counter(u16 last_used_idx)
1156 {
1157     return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1158 }
1159 
1160 static inline u16 packed_last_used(u16 last_used_idx)
1161 {
1162     return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR));
1163 }
1164 
1165 static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
1166                      struct vring_desc_extra *extra)
1167 {
1168     u16 flags;
1169 
1170     if (!vq->use_dma_api)
1171         return;
1172 
1173     flags = extra->flags;
1174 
1175     if (flags & VRING_DESC_F_INDIRECT) {
1176         dma_unmap_single(vring_dma_dev(vq),
1177                  extra->addr, extra->len,
1178                  (flags & VRING_DESC_F_WRITE) ?
1179                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
1180     } else {
1181         dma_unmap_page(vring_dma_dev(vq),
1182                    extra->addr, extra->len,
1183                    (flags & VRING_DESC_F_WRITE) ?
1184                    DMA_FROM_DEVICE : DMA_TO_DEVICE);
1185     }
1186 }
1187 
1188 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
1189                    struct vring_packed_desc *desc)
1190 {
1191     u16 flags;
1192 
1193     if (!vq->use_dma_api)
1194         return;
1195 
1196     flags = le16_to_cpu(desc->flags);
1197 
1198     dma_unmap_page(vring_dma_dev(vq),
1199                le64_to_cpu(desc->addr),
1200                le32_to_cpu(desc->len),
1201                (flags & VRING_DESC_F_WRITE) ?
1202                DMA_FROM_DEVICE : DMA_TO_DEVICE);
1203 }
1204 
1205 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
1206                                gfp_t gfp)
1207 {
1208     struct vring_packed_desc *desc;
1209 
1210     /*
1211      * We require lowmem mappings for the descriptors because
1212      * otherwise virt_to_phys will give us bogus addresses in the
1213      * virtqueue.
1214      */
1215     gfp &= ~__GFP_HIGHMEM;
1216 
1217     desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp);
1218 
1219     return desc;
1220 }
1221 
1222 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
1223                      struct scatterlist *sgs[],
1224                      unsigned int total_sg,
1225                      unsigned int out_sgs,
1226                      unsigned int in_sgs,
1227                      void *data,
1228                      gfp_t gfp)
1229 {
1230     struct vring_packed_desc *desc;
1231     struct scatterlist *sg;
1232     unsigned int i, n, err_idx;
1233     u16 head, id;
1234     dma_addr_t addr;
1235 
1236     head = vq->packed.next_avail_idx;
1237     desc = alloc_indirect_packed(total_sg, gfp);
1238     if (!desc)
1239         return -ENOMEM;
1240 
1241     if (unlikely(vq->vq.num_free < 1)) {
1242         pr_debug("Can't add buf len 1 - avail = 0\n");
1243         kfree(desc);
1244         END_USE(vq);
1245         return -ENOSPC;
1246     }
1247 
1248     i = 0;
1249     id = vq->free_head;
1250     BUG_ON(id == vq->packed.vring.num);
1251 
1252     for (n = 0; n < out_sgs + in_sgs; n++) {
1253         for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1254             addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1255                     DMA_TO_DEVICE : DMA_FROM_DEVICE);
1256             if (vring_mapping_error(vq, addr))
1257                 goto unmap_release;
1258 
1259             desc[i].flags = cpu_to_le16(n < out_sgs ?
1260                         0 : VRING_DESC_F_WRITE);
1261             desc[i].addr = cpu_to_le64(addr);
1262             desc[i].len = cpu_to_le32(sg->length);
1263             i++;
1264         }
1265     }
1266 
1267     /* Now that the indirect table is filled in, map it. */
1268     addr = vring_map_single(vq, desc,
1269             total_sg * sizeof(struct vring_packed_desc),
1270             DMA_TO_DEVICE);
1271     if (vring_mapping_error(vq, addr))
1272         goto unmap_release;
1273 
1274     vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1275     vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1276                 sizeof(struct vring_packed_desc));
1277     vq->packed.vring.desc[head].id = cpu_to_le16(id);
1278 
1279     if (vq->use_dma_api) {
1280         vq->packed.desc_extra[id].addr = addr;
1281         vq->packed.desc_extra[id].len = total_sg *
1282                 sizeof(struct vring_packed_desc);
1283         vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
1284                           vq->packed.avail_used_flags;
1285     }
1286 
1287     /*
1288      * A driver MUST NOT make the first descriptor in the list
1289      * available before all subsequent descriptors comprising
1290      * the list are made available.
1291      */
1292     virtio_wmb(vq->weak_barriers);
1293     vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
1294                         vq->packed.avail_used_flags);
1295 
1296     /* We're using some buffers from the free list. */
1297     vq->vq.num_free -= 1;
1298 
1299     /* Update free pointer */
1300     n = head + 1;
1301     if (n >= vq->packed.vring.num) {
1302         n = 0;
1303         vq->packed.avail_wrap_counter ^= 1;
1304         vq->packed.avail_used_flags ^=
1305                 1 << VRING_PACKED_DESC_F_AVAIL |
1306                 1 << VRING_PACKED_DESC_F_USED;
1307     }
1308     vq->packed.next_avail_idx = n;
1309     vq->free_head = vq->packed.desc_extra[id].next;
1310 
1311     /* Store token and indirect buffer state. */
1312     vq->packed.desc_state[id].num = 1;
1313     vq->packed.desc_state[id].data = data;
1314     vq->packed.desc_state[id].indir_desc = desc;
1315     vq->packed.desc_state[id].last = id;
1316 
1317     vq->num_added += 1;
1318 
1319     pr_debug("Added buffer head %i to %p\n", head, vq);
1320     END_USE(vq);
1321 
1322     return 0;
1323 
1324 unmap_release:
1325     err_idx = i;
1326 
1327     for (i = 0; i < err_idx; i++)
1328         vring_unmap_desc_packed(vq, &desc[i]);
1329 
1330     kfree(desc);
1331 
1332     END_USE(vq);
1333     return -ENOMEM;
1334 }
1335 
1336 static inline int virtqueue_add_packed(struct virtqueue *_vq,
1337                        struct scatterlist *sgs[],
1338                        unsigned int total_sg,
1339                        unsigned int out_sgs,
1340                        unsigned int in_sgs,
1341                        void *data,
1342                        void *ctx,
1343                        gfp_t gfp)
1344 {
1345     struct vring_virtqueue *vq = to_vvq(_vq);
1346     struct vring_packed_desc *desc;
1347     struct scatterlist *sg;
1348     unsigned int i, n, c, descs_used, err_idx;
1349     __le16 head_flags, flags;
1350     u16 head, id, prev, curr, avail_used_flags;
1351     int err;
1352 
1353     START_USE(vq);
1354 
1355     BUG_ON(data == NULL);
1356     BUG_ON(ctx && vq->indirect);
1357 
1358     if (unlikely(vq->broken)) {
1359         END_USE(vq);
1360         return -EIO;
1361     }
1362 
1363     LAST_ADD_TIME_UPDATE(vq);
1364 
1365     BUG_ON(total_sg == 0);
1366 
1367     if (virtqueue_use_indirect(vq, total_sg)) {
1368         err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
1369                             in_sgs, data, gfp);
1370         if (err != -ENOMEM) {
1371             END_USE(vq);
1372             return err;
1373         }
1374 
1375         /* fall back on direct */
1376     }
1377 
1378     head = vq->packed.next_avail_idx;
1379     avail_used_flags = vq->packed.avail_used_flags;
1380 
1381     WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1382 
1383     desc = vq->packed.vring.desc;
1384     i = head;
1385     descs_used = total_sg;
1386 
1387     if (unlikely(vq->vq.num_free < descs_used)) {
1388         pr_debug("Can't add buf len %i - avail = %i\n",
1389              descs_used, vq->vq.num_free);
1390         END_USE(vq);
1391         return -ENOSPC;
1392     }
1393 
1394     id = vq->free_head;
1395     BUG_ON(id == vq->packed.vring.num);
1396 
1397     curr = id;
1398     c = 0;
1399     for (n = 0; n < out_sgs + in_sgs; n++) {
1400         for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1401             dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1402                     DMA_TO_DEVICE : DMA_FROM_DEVICE);
1403             if (vring_mapping_error(vq, addr))
1404                 goto unmap_release;
1405 
1406             flags = cpu_to_le16(vq->packed.avail_used_flags |
1407                     (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
1408                     (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
1409             if (i == head)
1410                 head_flags = flags;
1411             else
1412                 desc[i].flags = flags;
1413 
1414             desc[i].addr = cpu_to_le64(addr);
1415             desc[i].len = cpu_to_le32(sg->length);
1416             desc[i].id = cpu_to_le16(id);
1417 
1418             if (unlikely(vq->use_dma_api)) {
1419                 vq->packed.desc_extra[curr].addr = addr;
1420                 vq->packed.desc_extra[curr].len = sg->length;
1421                 vq->packed.desc_extra[curr].flags =
1422                     le16_to_cpu(flags);
1423             }
1424             prev = curr;
1425             curr = vq->packed.desc_extra[curr].next;
1426 
1427             if ((unlikely(++i >= vq->packed.vring.num))) {
1428                 i = 0;
1429                 vq->packed.avail_used_flags ^=
1430                     1 << VRING_PACKED_DESC_F_AVAIL |
1431                     1 << VRING_PACKED_DESC_F_USED;
1432             }
1433         }
1434     }
1435 
1436     if (i < head)
1437         vq->packed.avail_wrap_counter ^= 1;
1438 
1439     /* We're using some buffers from the free list. */
1440     vq->vq.num_free -= descs_used;
1441 
1442     /* Update free pointer */
1443     vq->packed.next_avail_idx = i;
1444     vq->free_head = curr;
1445 
1446     /* Store token. */
1447     vq->packed.desc_state[id].num = descs_used;
1448     vq->packed.desc_state[id].data = data;
1449     vq->packed.desc_state[id].indir_desc = ctx;
1450     vq->packed.desc_state[id].last = prev;
1451 
1452     /*
1453      * A driver MUST NOT make the first descriptor in the list
1454      * available before all subsequent descriptors comprising
1455      * the list are made available.
1456      */
1457     virtio_wmb(vq->weak_barriers);
1458     vq->packed.vring.desc[head].flags = head_flags;
1459     vq->num_added += descs_used;
1460 
1461     pr_debug("Added buffer head %i to %p\n", head, vq);
1462     END_USE(vq);
1463 
1464     return 0;
1465 
1466 unmap_release:
1467     err_idx = i;
1468     i = head;
1469     curr = vq->free_head;
1470 
1471     vq->packed.avail_used_flags = avail_used_flags;
1472 
1473     for (n = 0; n < total_sg; n++) {
1474         if (i == err_idx)
1475             break;
1476         vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
1477         curr = vq->packed.desc_extra[curr].next;
1478         i++;
1479         if (i >= vq->packed.vring.num)
1480             i = 0;
1481     }
1482 
1483     END_USE(vq);
1484     return -EIO;
1485 }
1486 
1487 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
1488 {
1489     struct vring_virtqueue *vq = to_vvq(_vq);
1490     u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1491     bool needs_kick;
1492     union {
1493         struct {
1494             __le16 off_wrap;
1495             __le16 flags;
1496         };
1497         u32 u32;
1498     } snapshot;
1499 
1500     START_USE(vq);
1501 
1502     /*
1503      * We need to expose the new flags value before checking notification
1504      * suppressions.
1505      */
1506     virtio_mb(vq->weak_barriers);
1507 
1508     old = vq->packed.next_avail_idx - vq->num_added;
1509     new = vq->packed.next_avail_idx;
1510     vq->num_added = 0;
1511 
1512     snapshot.u32 = *(u32 *)vq->packed.vring.device;
1513     flags = le16_to_cpu(snapshot.flags);
1514 
1515     LAST_ADD_TIME_CHECK(vq);
1516     LAST_ADD_TIME_INVALID(vq);
1517 
1518     if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
1519         needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
1520         goto out;
1521     }
1522 
1523     off_wrap = le16_to_cpu(snapshot.off_wrap);
1524 
1525     wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1526     event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1527     if (wrap_counter != vq->packed.avail_wrap_counter)
1528         event_idx -= vq->packed.vring.num;
1529 
1530     needs_kick = vring_need_event(event_idx, new, old);
1531 out:
1532     END_USE(vq);
1533     return needs_kick;
1534 }
1535 
1536 static void detach_buf_packed(struct vring_virtqueue *vq,
1537                   unsigned int id, void **ctx)
1538 {
1539     struct vring_desc_state_packed *state = NULL;
1540     struct vring_packed_desc *desc;
1541     unsigned int i, curr;
1542 
1543     state = &vq->packed.desc_state[id];
1544 
1545     /* Clear data ptr. */
1546     state->data = NULL;
1547 
1548     vq->packed.desc_extra[state->last].next = vq->free_head;
1549     vq->free_head = id;
1550     vq->vq.num_free += state->num;
1551 
1552     if (unlikely(vq->use_dma_api)) {
1553         curr = id;
1554         for (i = 0; i < state->num; i++) {
1555             vring_unmap_extra_packed(vq,
1556                          &vq->packed.desc_extra[curr]);
1557             curr = vq->packed.desc_extra[curr].next;
1558         }
1559     }
1560 
1561     if (vq->indirect) {
1562         u32 len;
1563 
1564         /* Free the indirect table, if any, now that it's unmapped. */
1565         desc = state->indir_desc;
1566         if (!desc)
1567             return;
1568 
1569         if (vq->use_dma_api) {
1570             len = vq->packed.desc_extra[id].len;
1571             for (i = 0; i < len / sizeof(struct vring_packed_desc);
1572                     i++)
1573                 vring_unmap_desc_packed(vq, &desc[i]);
1574         }
1575         kfree(desc);
1576         state->indir_desc = NULL;
1577     } else if (ctx) {
1578         *ctx = state->indir_desc;
1579     }
1580 }
1581 
1582 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
1583                        u16 idx, bool used_wrap_counter)
1584 {
1585     bool avail, used;
1586     u16 flags;
1587 
1588     flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
1589     avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
1590     used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
1591 
1592     return avail == used && used == used_wrap_counter;
1593 }
1594 
1595 static inline bool more_used_packed(const struct vring_virtqueue *vq)
1596 {
1597     u16 last_used;
1598     u16 last_used_idx;
1599     bool used_wrap_counter;
1600 
1601     last_used_idx = READ_ONCE(vq->last_used_idx);
1602     last_used = packed_last_used(last_used_idx);
1603     used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1604     return is_used_desc_packed(vq, last_used, used_wrap_counter);
1605 }
1606 
1607 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
1608                       unsigned int *len,
1609                       void **ctx)
1610 {
1611     struct vring_virtqueue *vq = to_vvq(_vq);
1612     u16 last_used, id, last_used_idx;
1613     bool used_wrap_counter;
1614     void *ret;
1615 
1616     START_USE(vq);
1617 
1618     if (unlikely(vq->broken)) {
1619         END_USE(vq);
1620         return NULL;
1621     }
1622 
1623     if (!more_used_packed(vq)) {
1624         pr_debug("No more buffers in queue\n");
1625         END_USE(vq);
1626         return NULL;
1627     }
1628 
1629     /* Only get used elements after they have been exposed by host. */
1630     virtio_rmb(vq->weak_barriers);
1631 
1632     last_used_idx = READ_ONCE(vq->last_used_idx);
1633     used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1634     last_used = packed_last_used(last_used_idx);
1635     id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
1636     *len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
1637 
1638     if (unlikely(id >= vq->packed.vring.num)) {
1639         BAD_RING(vq, "id %u out of range\n", id);
1640         return NULL;
1641     }
1642     if (unlikely(!vq->packed.desc_state[id].data)) {
1643         BAD_RING(vq, "id %u is not a head!\n", id);
1644         return NULL;
1645     }
1646 
1647     /* detach_buf_packed clears data, so grab it now. */
1648     ret = vq->packed.desc_state[id].data;
1649     detach_buf_packed(vq, id, ctx);
1650 
1651     last_used += vq->packed.desc_state[id].num;
1652     if (unlikely(last_used >= vq->packed.vring.num)) {
1653         last_used -= vq->packed.vring.num;
1654         used_wrap_counter ^= 1;
1655     }
1656 
1657     last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1658     WRITE_ONCE(vq->last_used_idx, last_used);
1659 
1660     /*
1661      * If we expect an interrupt for the next entry, tell host
1662      * by writing event index and flush out the write before
1663      * the read in the next get_buf call.
1664      */
1665     if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
1666         virtio_store_mb(vq->weak_barriers,
1667                 &vq->packed.vring.driver->off_wrap,
1668                 cpu_to_le16(vq->last_used_idx));
1669 
1670     LAST_ADD_TIME_INVALID(vq);
1671 
1672     END_USE(vq);
1673     return ret;
1674 }
1675 
1676 static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
1677 {
1678     struct vring_virtqueue *vq = to_vvq(_vq);
1679 
1680     if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
1681         vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1682         vq->packed.vring.driver->flags =
1683             cpu_to_le16(vq->packed.event_flags_shadow);
1684     }
1685 }
1686 
1687 static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
1688 {
1689     struct vring_virtqueue *vq = to_vvq(_vq);
1690 
1691     START_USE(vq);
1692 
1693     /*
1694      * We optimistically turn back on interrupts, then check if there was
1695      * more to do.
1696      */
1697 
1698     if (vq->event) {
1699         vq->packed.vring.driver->off_wrap =
1700             cpu_to_le16(vq->last_used_idx);
1701         /*
1702          * We need to update event offset and event wrap
1703          * counter first before updating event flags.
1704          */
1705         virtio_wmb(vq->weak_barriers);
1706     }
1707 
1708     if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1709         vq->packed.event_flags_shadow = vq->event ?
1710                 VRING_PACKED_EVENT_FLAG_DESC :
1711                 VRING_PACKED_EVENT_FLAG_ENABLE;
1712         vq->packed.vring.driver->flags =
1713                 cpu_to_le16(vq->packed.event_flags_shadow);
1714     }
1715 
1716     END_USE(vq);
1717     return vq->last_used_idx;
1718 }
1719 
1720 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
1721 {
1722     struct vring_virtqueue *vq = to_vvq(_vq);
1723     bool wrap_counter;
1724     u16 used_idx;
1725 
1726     wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1727     used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1728 
1729     return is_used_desc_packed(vq, used_idx, wrap_counter);
1730 }
1731 
1732 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
1733 {
1734     struct vring_virtqueue *vq = to_vvq(_vq);
1735     u16 used_idx, wrap_counter, last_used_idx;
1736     u16 bufs;
1737 
1738     START_USE(vq);
1739 
1740     /*
1741      * We optimistically turn back on interrupts, then check if there was
1742      * more to do.
1743      */
1744 
1745     if (vq->event) {
1746         /* TODO: tune this threshold */
1747         bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
1748         last_used_idx = READ_ONCE(vq->last_used_idx);
1749         wrap_counter = packed_used_wrap_counter(last_used_idx);
1750 
1751         used_idx = packed_last_used(last_used_idx) + bufs;
1752         if (used_idx >= vq->packed.vring.num) {
1753             used_idx -= vq->packed.vring.num;
1754             wrap_counter ^= 1;
1755         }
1756 
1757         vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
1758             (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1759 
1760         /*
1761          * We need to update event offset and event wrap
1762          * counter first before updating event flags.
1763          */
1764         virtio_wmb(vq->weak_barriers);
1765     }
1766 
1767     if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1768         vq->packed.event_flags_shadow = vq->event ?
1769                 VRING_PACKED_EVENT_FLAG_DESC :
1770                 VRING_PACKED_EVENT_FLAG_ENABLE;
1771         vq->packed.vring.driver->flags =
1772                 cpu_to_le16(vq->packed.event_flags_shadow);
1773     }
1774 
1775     /*
1776      * We need to update event suppression structure first
1777      * before re-checking for more used buffers.
1778      */
1779     virtio_mb(vq->weak_barriers);
1780 
1781     last_used_idx = READ_ONCE(vq->last_used_idx);
1782     wrap_counter = packed_used_wrap_counter(last_used_idx);
1783     used_idx = packed_last_used(last_used_idx);
1784     if (is_used_desc_packed(vq, used_idx, wrap_counter)) {
1785         END_USE(vq);
1786         return false;
1787     }
1788 
1789     END_USE(vq);
1790     return true;
1791 }
1792 
1793 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
1794 {
1795     struct vring_virtqueue *vq = to_vvq(_vq);
1796     unsigned int i;
1797     void *buf;
1798 
1799     START_USE(vq);
1800 
1801     for (i = 0; i < vq->packed.vring.num; i++) {
1802         if (!vq->packed.desc_state[i].data)
1803             continue;
1804         /* detach_buf clears data, so grab it now. */
1805         buf = vq->packed.desc_state[i].data;
1806         detach_buf_packed(vq, i, NULL);
1807         END_USE(vq);
1808         return buf;
1809     }
1810     /* That should have freed everything. */
1811     BUG_ON(vq->vq.num_free != vq->packed.vring.num);
1812 
1813     END_USE(vq);
1814     return NULL;
1815 }
1816 
1817 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num)
1818 {
1819     struct vring_desc_extra *desc_extra;
1820     unsigned int i;
1821 
1822     desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra),
1823                    GFP_KERNEL);
1824     if (!desc_extra)
1825         return NULL;
1826 
1827     memset(desc_extra, 0, num * sizeof(struct vring_desc_extra));
1828 
1829     for (i = 0; i < num - 1; i++)
1830         desc_extra[i].next = i + 1;
1831 
1832     return desc_extra;
1833 }
1834 
1835 static void vring_free_packed(struct vring_virtqueue_packed *vring_packed,
1836                   struct virtio_device *vdev)
1837 {
1838     if (vring_packed->vring.desc)
1839         vring_free_queue(vdev, vring_packed->ring_size_in_bytes,
1840                  vring_packed->vring.desc,
1841                  vring_packed->ring_dma_addr);
1842 
1843     if (vring_packed->vring.driver)
1844         vring_free_queue(vdev, vring_packed->event_size_in_bytes,
1845                  vring_packed->vring.driver,
1846                  vring_packed->driver_event_dma_addr);
1847 
1848     if (vring_packed->vring.device)
1849         vring_free_queue(vdev, vring_packed->event_size_in_bytes,
1850                  vring_packed->vring.device,
1851                  vring_packed->device_event_dma_addr);
1852 
1853     kfree(vring_packed->desc_state);
1854     kfree(vring_packed->desc_extra);
1855 }
1856 
1857 static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed,
1858                     struct virtio_device *vdev,
1859                     u32 num)
1860 {
1861     struct vring_packed_desc *ring;
1862     struct vring_packed_desc_event *driver, *device;
1863     dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
1864     size_t ring_size_in_bytes, event_size_in_bytes;
1865 
1866     ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
1867 
1868     ring = vring_alloc_queue(vdev, ring_size_in_bytes,
1869                  &ring_dma_addr,
1870                  GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1871     if (!ring)
1872         goto err;
1873 
1874     vring_packed->vring.desc         = ring;
1875     vring_packed->ring_dma_addr      = ring_dma_addr;
1876     vring_packed->ring_size_in_bytes = ring_size_in_bytes;
1877 
1878     event_size_in_bytes = sizeof(struct vring_packed_desc_event);
1879 
1880     driver = vring_alloc_queue(vdev, event_size_in_bytes,
1881                    &driver_event_dma_addr,
1882                    GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1883     if (!driver)
1884         goto err;
1885 
1886     vring_packed->vring.driver          = driver;
1887     vring_packed->event_size_in_bytes   = event_size_in_bytes;
1888     vring_packed->driver_event_dma_addr = driver_event_dma_addr;
1889 
1890     device = vring_alloc_queue(vdev, event_size_in_bytes,
1891                    &device_event_dma_addr,
1892                    GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1893     if (!device)
1894         goto err;
1895 
1896     vring_packed->vring.device          = device;
1897     vring_packed->device_event_dma_addr = device_event_dma_addr;
1898 
1899     vring_packed->vring.num = num;
1900 
1901     return 0;
1902 
1903 err:
1904     vring_free_packed(vring_packed, vdev);
1905     return -ENOMEM;
1906 }
1907 
1908 static int vring_alloc_state_extra_packed(struct vring_virtqueue_packed *vring_packed)
1909 {
1910     struct vring_desc_state_packed *state;
1911     struct vring_desc_extra *extra;
1912     u32 num = vring_packed->vring.num;
1913 
1914     state = kmalloc_array(num, sizeof(struct vring_desc_state_packed), GFP_KERNEL);
1915     if (!state)
1916         goto err_desc_state;
1917 
1918     memset(state, 0, num * sizeof(struct vring_desc_state_packed));
1919 
1920     extra = vring_alloc_desc_extra(num);
1921     if (!extra)
1922         goto err_desc_extra;
1923 
1924     vring_packed->desc_state = state;
1925     vring_packed->desc_extra = extra;
1926 
1927     return 0;
1928 
1929 err_desc_extra:
1930     kfree(state);
1931 err_desc_state:
1932     return -ENOMEM;
1933 }
1934 
1935 static void virtqueue_vring_init_packed(struct vring_virtqueue_packed *vring_packed,
1936                     bool callback)
1937 {
1938     vring_packed->next_avail_idx = 0;
1939     vring_packed->avail_wrap_counter = 1;
1940     vring_packed->event_flags_shadow = 0;
1941     vring_packed->avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
1942 
1943     /* No callback?  Tell other side not to bother us. */
1944     if (!callback) {
1945         vring_packed->event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1946         vring_packed->vring.driver->flags =
1947             cpu_to_le16(vring_packed->event_flags_shadow);
1948     }
1949 }
1950 
1951 static void virtqueue_vring_attach_packed(struct vring_virtqueue *vq,
1952                       struct vring_virtqueue_packed *vring_packed)
1953 {
1954     vq->packed = *vring_packed;
1955 
1956     /* Put everything in free lists. */
1957     vq->free_head = 0;
1958 }
1959 
1960 static void virtqueue_reinit_packed(struct vring_virtqueue *vq)
1961 {
1962     memset(vq->packed.vring.device, 0, vq->packed.event_size_in_bytes);
1963     memset(vq->packed.vring.driver, 0, vq->packed.event_size_in_bytes);
1964 
1965     /* we need to reset the desc.flags. For more, see is_used_desc_packed() */
1966     memset(vq->packed.vring.desc, 0, vq->packed.ring_size_in_bytes);
1967 
1968     virtqueue_init(vq, vq->packed.vring.num);
1969     virtqueue_vring_init_packed(&vq->packed, !!vq->vq.callback);
1970 }
1971 
1972 static struct virtqueue *vring_create_virtqueue_packed(
1973     unsigned int index,
1974     unsigned int num,
1975     unsigned int vring_align,
1976     struct virtio_device *vdev,
1977     bool weak_barriers,
1978     bool may_reduce_num,
1979     bool context,
1980     bool (*notify)(struct virtqueue *),
1981     void (*callback)(struct virtqueue *),
1982     const char *name)
1983 {
1984     struct vring_virtqueue_packed vring_packed = {};
1985     struct vring_virtqueue *vq;
1986     int err;
1987 
1988     if (vring_alloc_queue_packed(&vring_packed, vdev, num))
1989         goto err_ring;
1990 
1991     vq = kmalloc(sizeof(*vq), GFP_KERNEL);
1992     if (!vq)
1993         goto err_vq;
1994 
1995     vq->vq.callback = callback;
1996     vq->vq.vdev = vdev;
1997     vq->vq.name = name;
1998     vq->vq.index = index;
1999     vq->vq.reset = false;
2000     vq->we_own_ring = true;
2001     vq->notify = notify;
2002     vq->weak_barriers = weak_barriers;
2003 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2004     vq->broken = true;
2005 #else
2006     vq->broken = false;
2007 #endif
2008     vq->packed_ring = true;
2009     vq->use_dma_api = vring_use_dma_api(vdev);
2010 
2011     vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2012         !context;
2013     vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2014 
2015     if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2016         vq->weak_barriers = false;
2017 
2018     err = vring_alloc_state_extra_packed(&vring_packed);
2019     if (err)
2020         goto err_state_extra;
2021 
2022     virtqueue_vring_init_packed(&vring_packed, !!callback);
2023 
2024     virtqueue_init(vq, num);
2025     virtqueue_vring_attach_packed(vq, &vring_packed);
2026 
2027     spin_lock(&vdev->vqs_list_lock);
2028     list_add_tail(&vq->vq.list, &vdev->vqs);
2029     spin_unlock(&vdev->vqs_list_lock);
2030     return &vq->vq;
2031 
2032 err_state_extra:
2033     kfree(vq);
2034 err_vq:
2035     vring_free_packed(&vring_packed, vdev);
2036 err_ring:
2037     return NULL;
2038 }
2039 
2040 static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num)
2041 {
2042     struct vring_virtqueue_packed vring_packed = {};
2043     struct vring_virtqueue *vq = to_vvq(_vq);
2044     struct virtio_device *vdev = _vq->vdev;
2045     int err;
2046 
2047     if (vring_alloc_queue_packed(&vring_packed, vdev, num))
2048         goto err_ring;
2049 
2050     err = vring_alloc_state_extra_packed(&vring_packed);
2051     if (err)
2052         goto err_state_extra;
2053 
2054     vring_free(&vq->vq);
2055 
2056     virtqueue_vring_init_packed(&vring_packed, !!vq->vq.callback);
2057 
2058     virtqueue_init(vq, vring_packed.vring.num);
2059     virtqueue_vring_attach_packed(vq, &vring_packed);
2060 
2061     return 0;
2062 
2063 err_state_extra:
2064     vring_free_packed(&vring_packed, vdev);
2065 err_ring:
2066     virtqueue_reinit_packed(vq);
2067     return -ENOMEM;
2068 }
2069 
2070 
2071 /*
2072  * Generic functions and exported symbols.
2073  */
2074 
2075 static inline int virtqueue_add(struct virtqueue *_vq,
2076                 struct scatterlist *sgs[],
2077                 unsigned int total_sg,
2078                 unsigned int out_sgs,
2079                 unsigned int in_sgs,
2080                 void *data,
2081                 void *ctx,
2082                 gfp_t gfp)
2083 {
2084     struct vring_virtqueue *vq = to_vvq(_vq);
2085 
2086     return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
2087                     out_sgs, in_sgs, data, ctx, gfp) :
2088                  virtqueue_add_split(_vq, sgs, total_sg,
2089                     out_sgs, in_sgs, data, ctx, gfp);
2090 }
2091 
2092 /**
2093  * virtqueue_add_sgs - expose buffers to other end
2094  * @_vq: the struct virtqueue we're talking about.
2095  * @sgs: array of terminated scatterlists.
2096  * @out_sgs: the number of scatterlists readable by other side
2097  * @in_sgs: the number of scatterlists which are writable (after readable ones)
2098  * @data: the token identifying the buffer.
2099  * @gfp: how to do memory allocations (if necessary).
2100  *
2101  * Caller must ensure we don't call this with other virtqueue operations
2102  * at the same time (except where noted).
2103  *
2104  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2105  */
2106 int virtqueue_add_sgs(struct virtqueue *_vq,
2107               struct scatterlist *sgs[],
2108               unsigned int out_sgs,
2109               unsigned int in_sgs,
2110               void *data,
2111               gfp_t gfp)
2112 {
2113     unsigned int i, total_sg = 0;
2114 
2115     /* Count them first. */
2116     for (i = 0; i < out_sgs + in_sgs; i++) {
2117         struct scatterlist *sg;
2118 
2119         for (sg = sgs[i]; sg; sg = sg_next(sg))
2120             total_sg++;
2121     }
2122     return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
2123                  data, NULL, gfp);
2124 }
2125 EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
2126 
2127 /**
2128  * virtqueue_add_outbuf - expose output buffers to other end
2129  * @vq: the struct virtqueue we're talking about.
2130  * @sg: scatterlist (must be well-formed and terminated!)
2131  * @num: the number of entries in @sg readable by other side
2132  * @data: the token identifying the buffer.
2133  * @gfp: how to do memory allocations (if necessary).
2134  *
2135  * Caller must ensure we don't call this with other virtqueue operations
2136  * at the same time (except where noted).
2137  *
2138  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2139  */
2140 int virtqueue_add_outbuf(struct virtqueue *vq,
2141              struct scatterlist *sg, unsigned int num,
2142              void *data,
2143              gfp_t gfp)
2144 {
2145     return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
2146 }
2147 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
2148 
2149 /**
2150  * virtqueue_add_inbuf - expose input buffers to other end
2151  * @vq: the struct virtqueue we're talking about.
2152  * @sg: scatterlist (must be well-formed and terminated!)
2153  * @num: the number of entries in @sg writable by other side
2154  * @data: the token identifying the buffer.
2155  * @gfp: how to do memory allocations (if necessary).
2156  *
2157  * Caller must ensure we don't call this with other virtqueue operations
2158  * at the same time (except where noted).
2159  *
2160  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2161  */
2162 int virtqueue_add_inbuf(struct virtqueue *vq,
2163             struct scatterlist *sg, unsigned int num,
2164             void *data,
2165             gfp_t gfp)
2166 {
2167     return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
2168 }
2169 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
2170 
2171 /**
2172  * virtqueue_add_inbuf_ctx - expose input buffers to other end
2173  * @vq: the struct virtqueue we're talking about.
2174  * @sg: scatterlist (must be well-formed and terminated!)
2175  * @num: the number of entries in @sg writable by other side
2176  * @data: the token identifying the buffer.
2177  * @ctx: extra context for the token
2178  * @gfp: how to do memory allocations (if necessary).
2179  *
2180  * Caller must ensure we don't call this with other virtqueue operations
2181  * at the same time (except where noted).
2182  *
2183  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2184  */
2185 int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
2186             struct scatterlist *sg, unsigned int num,
2187             void *data,
2188             void *ctx,
2189             gfp_t gfp)
2190 {
2191     return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
2192 }
2193 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
2194 
2195 /**
2196  * virtqueue_kick_prepare - first half of split virtqueue_kick call.
2197  * @_vq: the struct virtqueue
2198  *
2199  * Instead of virtqueue_kick(), you can do:
2200  *  if (virtqueue_kick_prepare(vq))
2201  *      virtqueue_notify(vq);
2202  *
2203  * This is sometimes useful because the virtqueue_kick_prepare() needs
2204  * to be serialized, but the actual virtqueue_notify() call does not.
2205  */
2206 bool virtqueue_kick_prepare(struct virtqueue *_vq)
2207 {
2208     struct vring_virtqueue *vq = to_vvq(_vq);
2209 
2210     return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
2211                  virtqueue_kick_prepare_split(_vq);
2212 }
2213 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
2214 
2215 /**
2216  * virtqueue_notify - second half of split virtqueue_kick call.
2217  * @_vq: the struct virtqueue
2218  *
2219  * This does not need to be serialized.
2220  *
2221  * Returns false if host notify failed or queue is broken, otherwise true.
2222  */
2223 bool virtqueue_notify(struct virtqueue *_vq)
2224 {
2225     struct vring_virtqueue *vq = to_vvq(_vq);
2226 
2227     if (unlikely(vq->broken))
2228         return false;
2229 
2230     /* Prod other side to tell it about changes. */
2231     if (!vq->notify(_vq)) {
2232         vq->broken = true;
2233         return false;
2234     }
2235     return true;
2236 }
2237 EXPORT_SYMBOL_GPL(virtqueue_notify);
2238 
2239 /**
2240  * virtqueue_kick - update after add_buf
2241  * @vq: the struct virtqueue
2242  *
2243  * After one or more virtqueue_add_* calls, invoke this to kick
2244  * the other side.
2245  *
2246  * Caller must ensure we don't call this with other virtqueue
2247  * operations at the same time (except where noted).
2248  *
2249  * Returns false if kick failed, otherwise true.
2250  */
2251 bool virtqueue_kick(struct virtqueue *vq)
2252 {
2253     if (virtqueue_kick_prepare(vq))
2254         return virtqueue_notify(vq);
2255     return true;
2256 }
2257 EXPORT_SYMBOL_GPL(virtqueue_kick);
2258 
2259 /**
2260  * virtqueue_get_buf_ctx - get the next used buffer
2261  * @_vq: the struct virtqueue we're talking about.
2262  * @len: the length written into the buffer
2263  * @ctx: extra context for the token
2264  *
2265  * If the device wrote data into the buffer, @len will be set to the
2266  * amount written.  This means you don't need to clear the buffer
2267  * beforehand to ensure there's no data leakage in the case of short
2268  * writes.
2269  *
2270  * Caller must ensure we don't call this with other virtqueue
2271  * operations at the same time (except where noted).
2272  *
2273  * Returns NULL if there are no used buffers, or the "data" token
2274  * handed to virtqueue_add_*().
2275  */
2276 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
2277                 void **ctx)
2278 {
2279     struct vring_virtqueue *vq = to_vvq(_vq);
2280 
2281     return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
2282                  virtqueue_get_buf_ctx_split(_vq, len, ctx);
2283 }
2284 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
2285 
2286 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
2287 {
2288     return virtqueue_get_buf_ctx(_vq, len, NULL);
2289 }
2290 EXPORT_SYMBOL_GPL(virtqueue_get_buf);
2291 /**
2292  * virtqueue_disable_cb - disable callbacks
2293  * @_vq: the struct virtqueue we're talking about.
2294  *
2295  * Note that this is not necessarily synchronous, hence unreliable and only
2296  * useful as an optimization.
2297  *
2298  * Unlike other operations, this need not be serialized.
2299  */
2300 void virtqueue_disable_cb(struct virtqueue *_vq)
2301 {
2302     struct vring_virtqueue *vq = to_vvq(_vq);
2303 
2304     /* If device triggered an event already it won't trigger one again:
2305      * no need to disable.
2306      */
2307     if (vq->event_triggered)
2308         return;
2309 
2310     if (vq->packed_ring)
2311         virtqueue_disable_cb_packed(_vq);
2312     else
2313         virtqueue_disable_cb_split(_vq);
2314 }
2315 EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
2316 
2317 /**
2318  * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
2319  * @_vq: the struct virtqueue we're talking about.
2320  *
2321  * This re-enables callbacks; it returns current queue state
2322  * in an opaque unsigned value. This value should be later tested by
2323  * virtqueue_poll, to detect a possible race between the driver checking for
2324  * more work, and enabling callbacks.
2325  *
2326  * Caller must ensure we don't call this with other virtqueue
2327  * operations at the same time (except where noted).
2328  */
2329 unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq)
2330 {
2331     struct vring_virtqueue *vq = to_vvq(_vq);
2332 
2333     if (vq->event_triggered)
2334         vq->event_triggered = false;
2335 
2336     return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
2337                  virtqueue_enable_cb_prepare_split(_vq);
2338 }
2339 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
2340 
2341 /**
2342  * virtqueue_poll - query pending used buffers
2343  * @_vq: the struct virtqueue we're talking about.
2344  * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
2345  *
2346  * Returns "true" if there are pending used buffers in the queue.
2347  *
2348  * This does not need to be serialized.
2349  */
2350 bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx)
2351 {
2352     struct vring_virtqueue *vq = to_vvq(_vq);
2353 
2354     if (unlikely(vq->broken))
2355         return false;
2356 
2357     virtio_mb(vq->weak_barriers);
2358     return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
2359                  virtqueue_poll_split(_vq, last_used_idx);
2360 }
2361 EXPORT_SYMBOL_GPL(virtqueue_poll);
2362 
2363 /**
2364  * virtqueue_enable_cb - restart callbacks after disable_cb.
2365  * @_vq: the struct virtqueue we're talking about.
2366  *
2367  * This re-enables callbacks; it returns "false" if there are pending
2368  * buffers in the queue, to detect a possible race between the driver
2369  * checking for more work, and enabling callbacks.
2370  *
2371  * Caller must ensure we don't call this with other virtqueue
2372  * operations at the same time (except where noted).
2373  */
2374 bool virtqueue_enable_cb(struct virtqueue *_vq)
2375 {
2376     unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq);
2377 
2378     return !virtqueue_poll(_vq, last_used_idx);
2379 }
2380 EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
2381 
2382 /**
2383  * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
2384  * @_vq: the struct virtqueue we're talking about.
2385  *
2386  * This re-enables callbacks but hints to the other side to delay
2387  * interrupts until most of the available buffers have been processed;
2388  * it returns "false" if there are many pending buffers in the queue,
2389  * to detect a possible race between the driver checking for more work,
2390  * and enabling callbacks.
2391  *
2392  * Caller must ensure we don't call this with other virtqueue
2393  * operations at the same time (except where noted).
2394  */
2395 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
2396 {
2397     struct vring_virtqueue *vq = to_vvq(_vq);
2398 
2399     if (vq->event_triggered)
2400         vq->event_triggered = false;
2401 
2402     return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
2403                  virtqueue_enable_cb_delayed_split(_vq);
2404 }
2405 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
2406 
2407 /**
2408  * virtqueue_detach_unused_buf - detach first unused buffer
2409  * @_vq: the struct virtqueue we're talking about.
2410  *
2411  * Returns NULL or the "data" token handed to virtqueue_add_*().
2412  * This is not valid on an active queue; it is useful for device
2413  * shutdown or the reset queue.
2414  */
2415 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
2416 {
2417     struct vring_virtqueue *vq = to_vvq(_vq);
2418 
2419     return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
2420                  virtqueue_detach_unused_buf_split(_vq);
2421 }
2422 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
2423 
2424 static inline bool more_used(const struct vring_virtqueue *vq)
2425 {
2426     return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
2427 }
2428 
2429 /**
2430  * vring_interrupt - notify a virtqueue on an interrupt
2431  * @irq: the IRQ number (ignored)
2432  * @_vq: the struct virtqueue to notify
2433  *
2434  * Calls the callback function of @_vq to process the virtqueue
2435  * notification.
2436  */
2437 irqreturn_t vring_interrupt(int irq, void *_vq)
2438 {
2439     struct vring_virtqueue *vq = to_vvq(_vq);
2440 
2441     if (!more_used(vq)) {
2442         pr_debug("virtqueue interrupt with no work for %p\n", vq);
2443         return IRQ_NONE;
2444     }
2445 
2446     if (unlikely(vq->broken)) {
2447 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2448         dev_warn_once(&vq->vq.vdev->dev,
2449                   "virtio vring IRQ raised before DRIVER_OK");
2450         return IRQ_NONE;
2451 #else
2452         return IRQ_HANDLED;
2453 #endif
2454     }
2455 
2456     /* Just a hint for performance: so it's ok that this can be racy! */
2457     if (vq->event)
2458         vq->event_triggered = true;
2459 
2460     pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
2461     if (vq->vq.callback)
2462         vq->vq.callback(&vq->vq);
2463 
2464     return IRQ_HANDLED;
2465 }
2466 EXPORT_SYMBOL_GPL(vring_interrupt);
2467 
2468 /* Only available for split ring */
2469 static struct virtqueue *__vring_new_virtqueue(unsigned int index,
2470                            struct vring_virtqueue_split *vring_split,
2471                            struct virtio_device *vdev,
2472                            bool weak_barriers,
2473                            bool context,
2474                            bool (*notify)(struct virtqueue *),
2475                            void (*callback)(struct virtqueue *),
2476                            const char *name)
2477 {
2478     struct vring_virtqueue *vq;
2479     int err;
2480 
2481     if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2482         return NULL;
2483 
2484     vq = kmalloc(sizeof(*vq), GFP_KERNEL);
2485     if (!vq)
2486         return NULL;
2487 
2488     vq->packed_ring = false;
2489     vq->vq.callback = callback;
2490     vq->vq.vdev = vdev;
2491     vq->vq.name = name;
2492     vq->vq.index = index;
2493     vq->vq.reset = false;
2494     vq->we_own_ring = false;
2495     vq->notify = notify;
2496     vq->weak_barriers = weak_barriers;
2497 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2498     vq->broken = true;
2499 #else
2500     vq->broken = false;
2501 #endif
2502     vq->use_dma_api = vring_use_dma_api(vdev);
2503 
2504     vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2505         !context;
2506     vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2507 
2508     if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2509         vq->weak_barriers = false;
2510 
2511     err = vring_alloc_state_extra_split(vring_split);
2512     if (err) {
2513         kfree(vq);
2514         return NULL;
2515     }
2516 
2517     virtqueue_vring_init_split(vring_split, vq);
2518 
2519     virtqueue_init(vq, vring_split->vring.num);
2520     virtqueue_vring_attach_split(vq, vring_split);
2521 
2522     spin_lock(&vdev->vqs_list_lock);
2523     list_add_tail(&vq->vq.list, &vdev->vqs);
2524     spin_unlock(&vdev->vqs_list_lock);
2525     return &vq->vq;
2526 }
2527 
2528 struct virtqueue *vring_create_virtqueue(
2529     unsigned int index,
2530     unsigned int num,
2531     unsigned int vring_align,
2532     struct virtio_device *vdev,
2533     bool weak_barriers,
2534     bool may_reduce_num,
2535     bool context,
2536     bool (*notify)(struct virtqueue *),
2537     void (*callback)(struct virtqueue *),
2538     const char *name)
2539 {
2540 
2541     if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2542         return vring_create_virtqueue_packed(index, num, vring_align,
2543                 vdev, weak_barriers, may_reduce_num,
2544                 context, notify, callback, name);
2545 
2546     return vring_create_virtqueue_split(index, num, vring_align,
2547             vdev, weak_barriers, may_reduce_num,
2548             context, notify, callback, name);
2549 }
2550 EXPORT_SYMBOL_GPL(vring_create_virtqueue);
2551 
2552 /**
2553  * virtqueue_resize - resize the vring of vq
2554  * @_vq: the struct virtqueue we're talking about.
2555  * @num: new ring num
2556  * @recycle: callback for recycle the useless buffer
2557  *
2558  * When it is really necessary to create a new vring, it will set the current vq
2559  * into the reset state. Then call the passed callback to recycle the buffer
2560  * that is no longer used. Only after the new vring is successfully created, the
2561  * old vring will be released.
2562  *
2563  * Caller must ensure we don't call this with other virtqueue operations
2564  * at the same time (except where noted).
2565  *
2566  * Returns zero or a negative error.
2567  * 0: success.
2568  * -ENOMEM: Failed to allocate a new ring, fall back to the original ring size.
2569  *  vq can still work normally
2570  * -EBUSY: Failed to sync with device, vq may not work properly
2571  * -ENOENT: Transport or device not supported
2572  * -E2BIG/-EINVAL: num error
2573  * -EPERM: Operation not permitted
2574  *
2575  */
2576 int virtqueue_resize(struct virtqueue *_vq, u32 num,
2577              void (*recycle)(struct virtqueue *vq, void *buf))
2578 {
2579     struct vring_virtqueue *vq = to_vvq(_vq);
2580     struct virtio_device *vdev = vq->vq.vdev;
2581     void *buf;
2582     int err;
2583 
2584     if (!vq->we_own_ring)
2585         return -EPERM;
2586 
2587     if (num > vq->vq.num_max)
2588         return -E2BIG;
2589 
2590     if (!num)
2591         return -EINVAL;
2592 
2593     if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) == num)
2594         return 0;
2595 
2596     if (!vdev->config->disable_vq_and_reset)
2597         return -ENOENT;
2598 
2599     if (!vdev->config->enable_vq_after_reset)
2600         return -ENOENT;
2601 
2602     err = vdev->config->disable_vq_and_reset(_vq);
2603     if (err)
2604         return err;
2605 
2606     while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
2607         recycle(_vq, buf);
2608 
2609     if (vq->packed_ring)
2610         err = virtqueue_resize_packed(_vq, num);
2611     else
2612         err = virtqueue_resize_split(_vq, num);
2613 
2614     if (vdev->config->enable_vq_after_reset(_vq))
2615         return -EBUSY;
2616 
2617     return err;
2618 }
2619 EXPORT_SYMBOL_GPL(virtqueue_resize);
2620 
2621 /* Only available for split ring */
2622 struct virtqueue *vring_new_virtqueue(unsigned int index,
2623                       unsigned int num,
2624                       unsigned int vring_align,
2625                       struct virtio_device *vdev,
2626                       bool weak_barriers,
2627                       bool context,
2628                       void *pages,
2629                       bool (*notify)(struct virtqueue *vq),
2630                       void (*callback)(struct virtqueue *vq),
2631                       const char *name)
2632 {
2633     struct vring_virtqueue_split vring_split = {};
2634 
2635     if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2636         return NULL;
2637 
2638     vring_init(&vring_split.vring, num, pages, vring_align);
2639     return __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
2640                      context, notify, callback, name);
2641 }
2642 EXPORT_SYMBOL_GPL(vring_new_virtqueue);
2643 
2644 static void vring_free(struct virtqueue *_vq)
2645 {
2646     struct vring_virtqueue *vq = to_vvq(_vq);
2647 
2648     if (vq->we_own_ring) {
2649         if (vq->packed_ring) {
2650             vring_free_queue(vq->vq.vdev,
2651                      vq->packed.ring_size_in_bytes,
2652                      vq->packed.vring.desc,
2653                      vq->packed.ring_dma_addr);
2654 
2655             vring_free_queue(vq->vq.vdev,
2656                      vq->packed.event_size_in_bytes,
2657                      vq->packed.vring.driver,
2658                      vq->packed.driver_event_dma_addr);
2659 
2660             vring_free_queue(vq->vq.vdev,
2661                      vq->packed.event_size_in_bytes,
2662                      vq->packed.vring.device,
2663                      vq->packed.device_event_dma_addr);
2664 
2665             kfree(vq->packed.desc_state);
2666             kfree(vq->packed.desc_extra);
2667         } else {
2668             vring_free_queue(vq->vq.vdev,
2669                      vq->split.queue_size_in_bytes,
2670                      vq->split.vring.desc,
2671                      vq->split.queue_dma_addr);
2672         }
2673     }
2674     if (!vq->packed_ring) {
2675         kfree(vq->split.desc_state);
2676         kfree(vq->split.desc_extra);
2677     }
2678 }
2679 
2680 void vring_del_virtqueue(struct virtqueue *_vq)
2681 {
2682     struct vring_virtqueue *vq = to_vvq(_vq);
2683 
2684     spin_lock(&vq->vq.vdev->vqs_list_lock);
2685     list_del(&_vq->list);
2686     spin_unlock(&vq->vq.vdev->vqs_list_lock);
2687 
2688     vring_free(_vq);
2689 
2690     kfree(vq);
2691 }
2692 EXPORT_SYMBOL_GPL(vring_del_virtqueue);
2693 
2694 /* Manipulates transport-specific feature bits. */
2695 void vring_transport_features(struct virtio_device *vdev)
2696 {
2697     unsigned int i;
2698 
2699     for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
2700         switch (i) {
2701         case VIRTIO_RING_F_INDIRECT_DESC:
2702             break;
2703         case VIRTIO_RING_F_EVENT_IDX:
2704             break;
2705         case VIRTIO_F_VERSION_1:
2706             break;
2707         case VIRTIO_F_ACCESS_PLATFORM:
2708             break;
2709         case VIRTIO_F_RING_PACKED:
2710             break;
2711         case VIRTIO_F_ORDER_PLATFORM:
2712             break;
2713         default:
2714             /* We don't understand this bit. */
2715             __virtio_clear_bit(vdev, i);
2716         }
2717     }
2718 }
2719 EXPORT_SYMBOL_GPL(vring_transport_features);
2720 
2721 /**
2722  * virtqueue_get_vring_size - return the size of the virtqueue's vring
2723  * @_vq: the struct virtqueue containing the vring of interest.
2724  *
2725  * Returns the size of the vring.  This is mainly used for boasting to
2726  * userspace.  Unlike other operations, this need not be serialized.
2727  */
2728 unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
2729 {
2730 
2731     struct vring_virtqueue *vq = to_vvq(_vq);
2732 
2733     return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
2734 }
2735 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
2736 
2737 /*
2738  * This function should only be called by the core, not directly by the driver.
2739  */
2740 void __virtqueue_break(struct virtqueue *_vq)
2741 {
2742     struct vring_virtqueue *vq = to_vvq(_vq);
2743 
2744     /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2745     WRITE_ONCE(vq->broken, true);
2746 }
2747 EXPORT_SYMBOL_GPL(__virtqueue_break);
2748 
2749 /*
2750  * This function should only be called by the core, not directly by the driver.
2751  */
2752 void __virtqueue_unbreak(struct virtqueue *_vq)
2753 {
2754     struct vring_virtqueue *vq = to_vvq(_vq);
2755 
2756     /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2757     WRITE_ONCE(vq->broken, false);
2758 }
2759 EXPORT_SYMBOL_GPL(__virtqueue_unbreak);
2760 
2761 bool virtqueue_is_broken(struct virtqueue *_vq)
2762 {
2763     struct vring_virtqueue *vq = to_vvq(_vq);
2764 
2765     return READ_ONCE(vq->broken);
2766 }
2767 EXPORT_SYMBOL_GPL(virtqueue_is_broken);
2768 
2769 /*
2770  * This should prevent the device from being used, allowing drivers to
2771  * recover.  You may need to grab appropriate locks to flush.
2772  */
2773 void virtio_break_device(struct virtio_device *dev)
2774 {
2775     struct virtqueue *_vq;
2776 
2777     spin_lock(&dev->vqs_list_lock);
2778     list_for_each_entry(_vq, &dev->vqs, list) {
2779         struct vring_virtqueue *vq = to_vvq(_vq);
2780 
2781         /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2782         WRITE_ONCE(vq->broken, true);
2783     }
2784     spin_unlock(&dev->vqs_list_lock);
2785 }
2786 EXPORT_SYMBOL_GPL(virtio_break_device);
2787 
2788 /*
2789  * This should allow the device to be used by the driver. You may
2790  * need to grab appropriate locks to flush the write to
2791  * vq->broken. This should only be used in some specific case e.g
2792  * (probing and restoring). This function should only be called by the
2793  * core, not directly by the driver.
2794  */
2795 void __virtio_unbreak_device(struct virtio_device *dev)
2796 {
2797     struct virtqueue *_vq;
2798 
2799     spin_lock(&dev->vqs_list_lock);
2800     list_for_each_entry(_vq, &dev->vqs, list) {
2801         struct vring_virtqueue *vq = to_vvq(_vq);
2802 
2803         /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2804         WRITE_ONCE(vq->broken, false);
2805     }
2806     spin_unlock(&dev->vqs_list_lock);
2807 }
2808 EXPORT_SYMBOL_GPL(__virtio_unbreak_device);
2809 
2810 dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
2811 {
2812     struct vring_virtqueue *vq = to_vvq(_vq);
2813 
2814     BUG_ON(!vq->we_own_ring);
2815 
2816     if (vq->packed_ring)
2817         return vq->packed.ring_dma_addr;
2818 
2819     return vq->split.queue_dma_addr;
2820 }
2821 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
2822 
2823 dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
2824 {
2825     struct vring_virtqueue *vq = to_vvq(_vq);
2826 
2827     BUG_ON(!vq->we_own_ring);
2828 
2829     if (vq->packed_ring)
2830         return vq->packed.driver_event_dma_addr;
2831 
2832     return vq->split.queue_dma_addr +
2833         ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
2834 }
2835 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
2836 
2837 dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
2838 {
2839     struct vring_virtqueue *vq = to_vvq(_vq);
2840 
2841     BUG_ON(!vq->we_own_ring);
2842 
2843     if (vq->packed_ring)
2844         return vq->packed.device_event_dma_addr;
2845 
2846     return vq->split.queue_dma_addr +
2847         ((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
2848 }
2849 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
2850 
2851 /* Only available for split ring */
2852 const struct vring *virtqueue_get_vring(struct virtqueue *vq)
2853 {
2854     return &to_vvq(vq)->split.vring;
2855 }
2856 EXPORT_SYMBOL_GPL(virtqueue_get_vring);
2857 
2858 MODULE_LICENSE("GPL");