Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Copyright (C) 2016 Red Hat, Inc.
0004  * Author: Michael S. Tsirkin <mst@redhat.com>
0005  *
0006  * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
0007  * signalling, unconditionally.
0008  */
0009 #define _GNU_SOURCE
0010 #include "main.h"
0011 #include <stdlib.h>
0012 #include <stdio.h>
0013 #include <string.h>
0014 
0015 /* Next - Where next entry will be written.
0016  * Prev - "Next" value when event triggered previously.
0017  * Event - Peer requested event after writing this entry.
0018  */
0019 static inline bool need_event(unsigned short event,
0020                   unsigned short next,
0021                   unsigned short prev)
0022 {
0023     return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
0024 }
0025 
0026 /* Design:
0027  * Guest adds descriptors with unique index values and DESC_HW in flags.
0028  * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
0029  * Flags are always set last.
0030  */
0031 #define DESC_HW 0x1
0032 
0033 struct desc {
0034     unsigned short flags;
0035     unsigned short index;
0036     unsigned len;
0037     unsigned long long addr;
0038 };
0039 
0040 /* how much padding is needed to avoid false cache sharing */
0041 #define HOST_GUEST_PADDING 0x80
0042 
0043 /* Mostly read */
0044 struct event {
0045     unsigned short kick_index;
0046     unsigned char reserved0[HOST_GUEST_PADDING - 2];
0047     unsigned short call_index;
0048     unsigned char reserved1[HOST_GUEST_PADDING - 2];
0049 };
0050 
0051 struct data {
0052     void *buf; /* descriptor is writeable, we can't get buf from there */
0053     void *data;
0054 } *data;
0055 
0056 struct desc *ring;
0057 struct event *event;
0058 
0059 struct guest {
0060     unsigned avail_idx;
0061     unsigned last_used_idx;
0062     unsigned num_free;
0063     unsigned kicked_avail_idx;
0064     unsigned char reserved[HOST_GUEST_PADDING - 12];
0065 } guest;
0066 
0067 struct host {
0068     /* we do not need to track last avail index
0069      * unless we have more than one in flight.
0070      */
0071     unsigned used_idx;
0072     unsigned called_used_idx;
0073     unsigned char reserved[HOST_GUEST_PADDING - 4];
0074 } host;
0075 
0076 /* implemented by ring */
0077 void alloc_ring(void)
0078 {
0079     int ret;
0080     int i;
0081 
0082     ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
0083     if (ret) {
0084         perror("Unable to allocate ring buffer.\n");
0085         exit(3);
0086     }
0087     event = calloc(1, sizeof(*event));
0088     if (!event) {
0089         perror("Unable to allocate event buffer.\n");
0090         exit(3);
0091     }
0092     guest.avail_idx = 0;
0093     guest.kicked_avail_idx = -1;
0094     guest.last_used_idx = 0;
0095     host.used_idx = 0;
0096     host.called_used_idx = -1;
0097     for (i = 0; i < ring_size; ++i) {
0098         struct desc desc = {
0099             .index = i,
0100         };
0101         ring[i] = desc;
0102     }
0103     guest.num_free = ring_size;
0104     data = calloc(ring_size, sizeof(*data));
0105     if (!data) {
0106         perror("Unable to allocate data buffer.\n");
0107         exit(3);
0108     }
0109 }
0110 
0111 /* guest side */
0112 int add_inbuf(unsigned len, void *buf, void *datap)
0113 {
0114     unsigned head, index;
0115 
0116     if (!guest.num_free)
0117         return -1;
0118 
0119     guest.num_free--;
0120     head = (ring_size - 1) & (guest.avail_idx++);
0121 
0122     /* Start with a write. On MESI architectures this helps
0123      * avoid a shared state with consumer that is polling this descriptor.
0124      */
0125     ring[head].addr = (unsigned long)(void*)buf;
0126     ring[head].len = len;
0127     /* read below might bypass write above. That is OK because it's just an
0128      * optimization. If this happens, we will get the cache line in a
0129      * shared state which is unfortunate, but probably not worth it to
0130      * add an explicit full barrier to avoid this.
0131      */
0132     barrier();
0133     index = ring[head].index;
0134     data[index].buf = buf;
0135     data[index].data = datap;
0136     /* Barrier A (for pairing) */
0137     smp_release();
0138     ring[head].flags = DESC_HW;
0139 
0140     return 0;
0141 }
0142 
0143 void *get_buf(unsigned *lenp, void **bufp)
0144 {
0145     unsigned head = (ring_size - 1) & guest.last_used_idx;
0146     unsigned index;
0147     void *datap;
0148 
0149     if (ring[head].flags & DESC_HW)
0150         return NULL;
0151     /* Barrier B (for pairing) */
0152     smp_acquire();
0153     *lenp = ring[head].len;
0154     index = ring[head].index & (ring_size - 1);
0155     datap = data[index].data;
0156     *bufp = data[index].buf;
0157     data[index].buf = NULL;
0158     data[index].data = NULL;
0159     guest.num_free++;
0160     guest.last_used_idx++;
0161     return datap;
0162 }
0163 
0164 bool used_empty()
0165 {
0166     unsigned head = (ring_size - 1) & guest.last_used_idx;
0167 
0168     return (ring[head].flags & DESC_HW);
0169 }
0170 
0171 void disable_call()
0172 {
0173     /* Doing nothing to disable calls might cause
0174      * extra interrupts, but reduces the number of cache misses.
0175      */
0176 }
0177 
0178 bool enable_call()
0179 {
0180     event->call_index = guest.last_used_idx;
0181     /* Flush call index write */
0182     /* Barrier D (for pairing) */
0183     smp_mb();
0184     return used_empty();
0185 }
0186 
0187 void kick_available(void)
0188 {
0189     bool need;
0190 
0191     /* Flush in previous flags write */
0192     /* Barrier C (for pairing) */
0193     smp_mb();
0194     need = need_event(event->kick_index,
0195                guest.avail_idx,
0196                guest.kicked_avail_idx);
0197 
0198     guest.kicked_avail_idx = guest.avail_idx;
0199     if (need)
0200         kick();
0201 }
0202 
0203 /* host side */
0204 void disable_kick()
0205 {
0206     /* Doing nothing to disable kicks might cause
0207      * extra interrupts, but reduces the number of cache misses.
0208      */
0209 }
0210 
0211 bool enable_kick()
0212 {
0213     event->kick_index = host.used_idx;
0214     /* Barrier C (for pairing) */
0215     smp_mb();
0216     return avail_empty();
0217 }
0218 
0219 bool avail_empty()
0220 {
0221     unsigned head = (ring_size - 1) & host.used_idx;
0222 
0223     return !(ring[head].flags & DESC_HW);
0224 }
0225 
0226 bool use_buf(unsigned *lenp, void **bufp)
0227 {
0228     unsigned head = (ring_size - 1) & host.used_idx;
0229 
0230     if (!(ring[head].flags & DESC_HW))
0231         return false;
0232 
0233     /* make sure length read below is not speculated */
0234     /* Barrier A (for pairing) */
0235     smp_acquire();
0236 
0237     /* simple in-order completion: we don't need
0238      * to touch index at all. This also means we
0239      * can just modify the descriptor in-place.
0240      */
0241     ring[head].len--;
0242     /* Make sure len is valid before flags.
0243      * Note: alternative is to write len and flags in one access -
0244      * possible on 64 bit architectures but wmb is free on Intel anyway
0245      * so I have no way to test whether it's a gain.
0246      */
0247     /* Barrier B (for pairing) */
0248     smp_release();
0249     ring[head].flags = 0;
0250     host.used_idx++;
0251     return true;
0252 }
0253 
0254 void call_used(void)
0255 {
0256     bool need;
0257 
0258     /* Flush in previous flags write */
0259     /* Barrier D (for pairing) */
0260     smp_mb();
0261 
0262     need = need_event(event->call_index,
0263             host.used_idx,
0264             host.called_used_idx);
0265 
0266     host.called_used_idx = host.used_idx;
0267 
0268     if (need)
0269         call();
0270 }