Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /* Simple test of virtio code, entirely in userpsace. */
0003 #define _GNU_SOURCE
0004 #include <sched.h>
0005 #include <err.h>
0006 #include <linux/kernel.h>
0007 #include <linux/err.h>
0008 #include <linux/virtio.h>
0009 #include <linux/vringh.h>
0010 #include <linux/virtio_ring.h>
0011 #include <linux/virtio_config.h>
0012 #include <linux/uaccess.h>
0013 #include <sys/types.h>
0014 #include <sys/stat.h>
0015 #include <sys/mman.h>
0016 #include <sys/wait.h>
0017 #include <fcntl.h>
0018 
0019 #define USER_MEM (1024*1024)
0020 void *__user_addr_min, *__user_addr_max;
0021 void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end;
0022 static u64 user_addr_offset;
0023 
0024 #define RINGSIZE 256
0025 #define ALIGN 4096
0026 
0027 static bool never_notify_host(struct virtqueue *vq)
0028 {
0029     abort();
0030 }
0031 
0032 static void never_callback_guest(struct virtqueue *vq)
0033 {
0034     abort();
0035 }
0036 
0037 static bool getrange_iov(struct vringh *vrh, u64 addr, struct vringh_range *r)
0038 {
0039     if (addr < (u64)(unsigned long)__user_addr_min - user_addr_offset)
0040         return false;
0041     if (addr >= (u64)(unsigned long)__user_addr_max - user_addr_offset)
0042         return false;
0043 
0044     r->start = (u64)(unsigned long)__user_addr_min - user_addr_offset;
0045     r->end_incl = (u64)(unsigned long)__user_addr_max - 1 - user_addr_offset;
0046     r->offset = user_addr_offset;
0047     return true;
0048 }
0049 
0050 /* We return single byte ranges. */
0051 static bool getrange_slow(struct vringh *vrh, u64 addr, struct vringh_range *r)
0052 {
0053     if (addr < (u64)(unsigned long)__user_addr_min - user_addr_offset)
0054         return false;
0055     if (addr >= (u64)(unsigned long)__user_addr_max - user_addr_offset)
0056         return false;
0057 
0058     r->start = addr;
0059     r->end_incl = r->start;
0060     r->offset = user_addr_offset;
0061     return true;
0062 }
0063 
0064 struct guest_virtio_device {
0065     struct virtio_device vdev;
0066     int to_host_fd;
0067     unsigned long notifies;
0068 };
0069 
0070 static bool parallel_notify_host(struct virtqueue *vq)
0071 {
0072     int rc;
0073     struct guest_virtio_device *gvdev;
0074 
0075     gvdev = container_of(vq->vdev, struct guest_virtio_device, vdev);
0076     rc = write(gvdev->to_host_fd, "", 1);
0077     if (rc < 0)
0078         return false;
0079     gvdev->notifies++;
0080     return true;
0081 }
0082 
0083 static bool no_notify_host(struct virtqueue *vq)
0084 {
0085     return true;
0086 }
0087 
0088 #define NUM_XFERS (10000000)
0089 
0090 /* We aim for two "distant" cpus. */
0091 static void find_cpus(unsigned int *first, unsigned int *last)
0092 {
0093     unsigned int i;
0094 
0095     *first = -1U;
0096     *last = 0;
0097     for (i = 0; i < 4096; i++) {
0098         cpu_set_t set;
0099         CPU_ZERO(&set);
0100         CPU_SET(i, &set);
0101         if (sched_setaffinity(getpid(), sizeof(set), &set) == 0) {
0102             if (i < *first)
0103                 *first = i;
0104             if (i > *last)
0105                 *last = i;
0106         }
0107     }
0108 }
0109 
0110 /* Opencoded version for fast mode */
0111 static inline int vringh_get_head(struct vringh *vrh, u16 *head)
0112 {
0113     u16 avail_idx, i;
0114     int err;
0115 
0116     err = get_user(avail_idx, &vrh->vring.avail->idx);
0117     if (err)
0118         return err;
0119 
0120     if (vrh->last_avail_idx == avail_idx)
0121         return 0;
0122 
0123     /* Only get avail ring entries after they have been exposed by guest. */
0124     virtio_rmb(vrh->weak_barriers);
0125 
0126     i = vrh->last_avail_idx & (vrh->vring.num - 1);
0127 
0128     err = get_user(*head, &vrh->vring.avail->ring[i]);
0129     if (err)
0130         return err;
0131 
0132     vrh->last_avail_idx++;
0133     return 1;
0134 }
0135 
0136 static int parallel_test(u64 features,
0137              bool (*getrange)(struct vringh *vrh,
0138                       u64 addr, struct vringh_range *r),
0139              bool fast_vringh)
0140 {
0141     void *host_map, *guest_map;
0142     int fd, mapsize, to_guest[2], to_host[2];
0143     unsigned long xfers = 0, notifies = 0, receives = 0;
0144     unsigned int first_cpu, last_cpu;
0145     cpu_set_t cpu_set;
0146     char buf[128];
0147 
0148     /* Create real file to mmap. */
0149     fd = open("/tmp/vringh_test-file", O_RDWR|O_CREAT|O_TRUNC, 0600);
0150     if (fd < 0)
0151         err(1, "Opening /tmp/vringh_test-file");
0152 
0153     /* Extra room at the end for some data, and indirects */
0154     mapsize = vring_size(RINGSIZE, ALIGN)
0155         + RINGSIZE * 2 * sizeof(int)
0156         + RINGSIZE * 6 * sizeof(struct vring_desc);
0157     mapsize = (mapsize + getpagesize() - 1) & ~(getpagesize() - 1);
0158     ftruncate(fd, mapsize);
0159 
0160     /* Parent and child use separate addresses, to check our mapping logic! */
0161     host_map = mmap(NULL, mapsize, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
0162     guest_map = mmap(NULL, mapsize, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
0163 
0164     pipe(to_guest);
0165     pipe(to_host);
0166 
0167     CPU_ZERO(&cpu_set);
0168     find_cpus(&first_cpu, &last_cpu);
0169     printf("Using CPUS %u and %u\n", first_cpu, last_cpu);
0170     fflush(stdout);
0171 
0172     if (fork() != 0) {
0173         struct vringh vrh;
0174         int status, err, rlen = 0;
0175         char rbuf[5];
0176 
0177         /* We are the host: never access guest addresses! */
0178         munmap(guest_map, mapsize);
0179 
0180         __user_addr_min = host_map;
0181         __user_addr_max = __user_addr_min + mapsize;
0182         user_addr_offset = host_map - guest_map;
0183         assert(user_addr_offset);
0184 
0185         close(to_guest[0]);
0186         close(to_host[1]);
0187 
0188         vring_init(&vrh.vring, RINGSIZE, host_map, ALIGN);
0189         vringh_init_user(&vrh, features, RINGSIZE, true,
0190                  vrh.vring.desc, vrh.vring.avail, vrh.vring.used);
0191         CPU_SET(first_cpu, &cpu_set);
0192         if (sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set))
0193             errx(1, "Could not set affinity to cpu %u", first_cpu);
0194 
0195         while (xfers < NUM_XFERS) {
0196             struct iovec host_riov[2], host_wiov[2];
0197             struct vringh_iov riov, wiov;
0198             u16 head, written;
0199 
0200             if (fast_vringh) {
0201                 for (;;) {
0202                     err = vringh_get_head(&vrh, &head);
0203                     if (err != 0)
0204                         break;
0205                     err = vringh_need_notify_user(&vrh);
0206                     if (err < 0)
0207                         errx(1, "vringh_need_notify_user: %i",
0208                              err);
0209                     if (err) {
0210                         write(to_guest[1], "", 1);
0211                         notifies++;
0212                     }
0213                 }
0214                 if (err != 1)
0215                     errx(1, "vringh_get_head");
0216                 written = 0;
0217                 goto complete;
0218             } else {
0219                 vringh_iov_init(&riov,
0220                         host_riov,
0221                         ARRAY_SIZE(host_riov));
0222                 vringh_iov_init(&wiov,
0223                         host_wiov,
0224                         ARRAY_SIZE(host_wiov));
0225 
0226                 err = vringh_getdesc_user(&vrh, &riov, &wiov,
0227                               getrange, &head);
0228             }
0229             if (err == 0) {
0230                 err = vringh_need_notify_user(&vrh);
0231                 if (err < 0)
0232                     errx(1, "vringh_need_notify_user: %i",
0233                          err);
0234                 if (err) {
0235                     write(to_guest[1], "", 1);
0236                     notifies++;
0237                 }
0238 
0239                 if (!vringh_notify_enable_user(&vrh))
0240                     continue;
0241 
0242                 /* Swallow all notifies at once. */
0243                 if (read(to_host[0], buf, sizeof(buf)) < 1)
0244                     break;
0245 
0246                 vringh_notify_disable_user(&vrh);
0247                 receives++;
0248                 continue;
0249             }
0250             if (err != 1)
0251                 errx(1, "vringh_getdesc_user: %i", err);
0252 
0253             /* We simply copy bytes. */
0254             if (riov.used) {
0255                 rlen = vringh_iov_pull_user(&riov, rbuf,
0256                                 sizeof(rbuf));
0257                 if (rlen != 4)
0258                     errx(1, "vringh_iov_pull_user: %i",
0259                          rlen);
0260                 assert(riov.i == riov.used);
0261                 written = 0;
0262             } else {
0263                 err = vringh_iov_push_user(&wiov, rbuf, rlen);
0264                 if (err != rlen)
0265                     errx(1, "vringh_iov_push_user: %i",
0266                          err);
0267                 assert(wiov.i == wiov.used);
0268                 written = err;
0269             }
0270         complete:
0271             xfers++;
0272 
0273             err = vringh_complete_user(&vrh, head, written);
0274             if (err != 0)
0275                 errx(1, "vringh_complete_user: %i", err);
0276         }
0277 
0278         err = vringh_need_notify_user(&vrh);
0279         if (err < 0)
0280             errx(1, "vringh_need_notify_user: %i", err);
0281         if (err) {
0282             write(to_guest[1], "", 1);
0283             notifies++;
0284         }
0285         wait(&status);
0286         if (!WIFEXITED(status))
0287             errx(1, "Child died with signal %i?", WTERMSIG(status));
0288         if (WEXITSTATUS(status) != 0)
0289             errx(1, "Child exited %i?", WEXITSTATUS(status));
0290         printf("Host: notified %lu, pinged %lu\n", notifies, receives);
0291         return 0;
0292     } else {
0293         struct guest_virtio_device gvdev;
0294         struct virtqueue *vq;
0295         unsigned int *data;
0296         struct vring_desc *indirects;
0297         unsigned int finished = 0;
0298 
0299         /* We pass sg[]s pointing into here, but we need RINGSIZE+1 */
0300         data = guest_map + vring_size(RINGSIZE, ALIGN);
0301         indirects = (void *)data + (RINGSIZE + 1) * 2 * sizeof(int);
0302 
0303         /* We are the guest. */
0304         munmap(host_map, mapsize);
0305 
0306         close(to_guest[1]);
0307         close(to_host[0]);
0308 
0309         gvdev.vdev.features = features;
0310         INIT_LIST_HEAD(&gvdev.vdev.vqs);
0311         gvdev.to_host_fd = to_host[1];
0312         gvdev.notifies = 0;
0313 
0314         CPU_SET(first_cpu, &cpu_set);
0315         if (sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set))
0316             err(1, "Could not set affinity to cpu %u", first_cpu);
0317 
0318         vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &gvdev.vdev, true,
0319                      false, guest_map,
0320                      fast_vringh ? no_notify_host
0321                      : parallel_notify_host,
0322                      never_callback_guest, "guest vq");
0323 
0324         /* Don't kfree indirects. */
0325         __kfree_ignore_start = indirects;
0326         __kfree_ignore_end = indirects + RINGSIZE * 6;
0327 
0328         while (xfers < NUM_XFERS) {
0329             struct scatterlist sg[4];
0330             unsigned int num_sg, len;
0331             int *dbuf, err;
0332             bool output = !(xfers % 2);
0333 
0334             /* Consume bufs. */
0335             while ((dbuf = virtqueue_get_buf(vq, &len)) != NULL) {
0336                 if (len == 4)
0337                     assert(*dbuf == finished - 1);
0338                 else if (!fast_vringh)
0339                     assert(*dbuf == finished);
0340                 finished++;
0341             }
0342 
0343             /* Produce a buffer. */
0344             dbuf = data + (xfers % (RINGSIZE + 1));
0345 
0346             if (output)
0347                 *dbuf = xfers;
0348             else
0349                 *dbuf = -1;
0350 
0351             switch ((xfers / sizeof(*dbuf)) % 4) {
0352             case 0:
0353                 /* Nasty three-element sg list. */
0354                 sg_init_table(sg, num_sg = 3);
0355                 sg_set_buf(&sg[0], (void *)dbuf, 1);
0356                 sg_set_buf(&sg[1], (void *)dbuf + 1, 2);
0357                 sg_set_buf(&sg[2], (void *)dbuf + 3, 1);
0358                 break;
0359             case 1:
0360                 sg_init_table(sg, num_sg = 2);
0361                 sg_set_buf(&sg[0], (void *)dbuf, 1);
0362                 sg_set_buf(&sg[1], (void *)dbuf + 1, 3);
0363                 break;
0364             case 2:
0365                 sg_init_table(sg, num_sg = 1);
0366                 sg_set_buf(&sg[0], (void *)dbuf, 4);
0367                 break;
0368             case 3:
0369                 sg_init_table(sg, num_sg = 4);
0370                 sg_set_buf(&sg[0], (void *)dbuf, 1);
0371                 sg_set_buf(&sg[1], (void *)dbuf + 1, 1);
0372                 sg_set_buf(&sg[2], (void *)dbuf + 2, 1);
0373                 sg_set_buf(&sg[3], (void *)dbuf + 3, 1);
0374                 break;
0375             }
0376 
0377             /* May allocate an indirect, so force it to allocate
0378              * user addr */
0379             __kmalloc_fake = indirects + (xfers % RINGSIZE) * 4;
0380             if (output)
0381                 err = virtqueue_add_outbuf(vq, sg, num_sg, dbuf,
0382                                GFP_KERNEL);
0383             else
0384                 err = virtqueue_add_inbuf(vq, sg, num_sg,
0385                               dbuf, GFP_KERNEL);
0386 
0387             if (err == -ENOSPC) {
0388                 if (!virtqueue_enable_cb_delayed(vq))
0389                     continue;
0390                 /* Swallow all notifies at once. */
0391                 if (read(to_guest[0], buf, sizeof(buf)) < 1)
0392                     break;
0393                 
0394                 receives++;
0395                 virtqueue_disable_cb(vq);
0396                 continue;
0397             }
0398 
0399             if (err)
0400                 errx(1, "virtqueue_add_in/outbuf: %i", err);
0401 
0402             xfers++;
0403             virtqueue_kick(vq);
0404         }
0405 
0406         /* Any extra? */
0407         while (finished != xfers) {
0408             int *dbuf;
0409             unsigned int len;
0410 
0411             /* Consume bufs. */
0412             dbuf = virtqueue_get_buf(vq, &len);
0413             if (dbuf) {
0414                 if (len == 4)
0415                     assert(*dbuf == finished - 1);
0416                 else
0417                     assert(len == 0);
0418                 finished++;
0419                 continue;
0420             }
0421 
0422             if (!virtqueue_enable_cb_delayed(vq))
0423                 continue;
0424             if (read(to_guest[0], buf, sizeof(buf)) < 1)
0425                 break;
0426                 
0427             receives++;
0428             virtqueue_disable_cb(vq);
0429         }
0430 
0431         printf("Guest: notified %lu, pinged %lu\n",
0432                gvdev.notifies, receives);
0433         vring_del_virtqueue(vq);
0434         return 0;
0435     }
0436 }
0437 
0438 int main(int argc, char *argv[])
0439 {
0440     struct virtio_device vdev;
0441     struct virtqueue *vq;
0442     struct vringh vrh;
0443     struct scatterlist guest_sg[RINGSIZE], *sgs[2];
0444     struct iovec host_riov[2], host_wiov[2];
0445     struct vringh_iov riov, wiov;
0446     struct vring_used_elem used[RINGSIZE];
0447     char buf[28];
0448     u16 head;
0449     int err;
0450     unsigned i;
0451     void *ret;
0452     bool (*getrange)(struct vringh *vrh, u64 addr, struct vringh_range *r);
0453     bool fast_vringh = false, parallel = false;
0454 
0455     getrange = getrange_iov;
0456     vdev.features = 0;
0457     INIT_LIST_HEAD(&vdev.vqs);
0458 
0459     while (argv[1]) {
0460         if (strcmp(argv[1], "--indirect") == 0)
0461             __virtio_set_bit(&vdev, VIRTIO_RING_F_INDIRECT_DESC);
0462         else if (strcmp(argv[1], "--eventidx") == 0)
0463             __virtio_set_bit(&vdev, VIRTIO_RING_F_EVENT_IDX);
0464         else if (strcmp(argv[1], "--virtio-1") == 0)
0465             __virtio_set_bit(&vdev, VIRTIO_F_VERSION_1);
0466         else if (strcmp(argv[1], "--slow-range") == 0)
0467             getrange = getrange_slow;
0468         else if (strcmp(argv[1], "--fast-vringh") == 0)
0469             fast_vringh = true;
0470         else if (strcmp(argv[1], "--parallel") == 0)
0471             parallel = true;
0472         else
0473             errx(1, "Unknown arg %s", argv[1]);
0474         argv++;
0475     }
0476 
0477     if (parallel)
0478         return parallel_test(vdev.features, getrange, fast_vringh);
0479 
0480     if (posix_memalign(&__user_addr_min, PAGE_SIZE, USER_MEM) != 0)
0481         abort();
0482     __user_addr_max = __user_addr_min + USER_MEM;
0483     memset(__user_addr_min, 0, vring_size(RINGSIZE, ALIGN));
0484 
0485     /* Set up guest side. */
0486     vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true, false,
0487                  __user_addr_min,
0488                  never_notify_host, never_callback_guest,
0489                  "guest vq");
0490 
0491     /* Set up host side. */
0492     vring_init(&vrh.vring, RINGSIZE, __user_addr_min, ALIGN);
0493     vringh_init_user(&vrh, vdev.features, RINGSIZE, true,
0494              vrh.vring.desc, vrh.vring.avail, vrh.vring.used);
0495 
0496     /* No descriptor to get yet... */
0497     err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
0498     if (err != 0)
0499         errx(1, "vringh_getdesc_user: %i", err);
0500 
0501     /* Guest puts in a descriptor. */
0502     memcpy(__user_addr_max - 1, "a", 1);
0503     sg_init_table(guest_sg, 1);
0504     sg_set_buf(&guest_sg[0], __user_addr_max - 1, 1);
0505     sg_init_table(guest_sg+1, 1);
0506     sg_set_buf(&guest_sg[1], __user_addr_max - 3, 2);
0507     sgs[0] = &guest_sg[0];
0508     sgs[1] = &guest_sg[1];
0509 
0510     /* May allocate an indirect, so force it to allocate user addr */
0511     __kmalloc_fake = __user_addr_min + vring_size(RINGSIZE, ALIGN);
0512     err = virtqueue_add_sgs(vq, sgs, 1, 1, &err, GFP_KERNEL);
0513     if (err)
0514         errx(1, "virtqueue_add_sgs: %i", err);
0515     __kmalloc_fake = NULL;
0516 
0517     /* Host retreives it. */
0518     vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov));
0519     vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov));
0520 
0521     err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
0522     if (err != 1)
0523         errx(1, "vringh_getdesc_user: %i", err);
0524 
0525     assert(riov.used == 1);
0526     assert(riov.iov[0].iov_base == __user_addr_max - 1);
0527     assert(riov.iov[0].iov_len == 1);
0528     if (getrange != getrange_slow) {
0529         assert(wiov.used == 1);
0530         assert(wiov.iov[0].iov_base == __user_addr_max - 3);
0531         assert(wiov.iov[0].iov_len == 2);
0532     } else {
0533         assert(wiov.used == 2);
0534         assert(wiov.iov[0].iov_base == __user_addr_max - 3);
0535         assert(wiov.iov[0].iov_len == 1);
0536         assert(wiov.iov[1].iov_base == __user_addr_max - 2);
0537         assert(wiov.iov[1].iov_len == 1);
0538     }
0539 
0540     err = vringh_iov_pull_user(&riov, buf, 5);
0541     if (err != 1)
0542         errx(1, "vringh_iov_pull_user: %i", err);
0543     assert(buf[0] == 'a');
0544     assert(riov.i == 1);
0545     assert(vringh_iov_pull_user(&riov, buf, 5) == 0);
0546 
0547     memcpy(buf, "bcdef", 5);
0548     err = vringh_iov_push_user(&wiov, buf, 5);
0549     if (err != 2)
0550         errx(1, "vringh_iov_push_user: %i", err);
0551     assert(memcmp(__user_addr_max - 3, "bc", 2) == 0);
0552     assert(wiov.i == wiov.used);
0553     assert(vringh_iov_push_user(&wiov, buf, 5) == 0);
0554 
0555     /* Host is done. */
0556     err = vringh_complete_user(&vrh, head, err);
0557     if (err != 0)
0558         errx(1, "vringh_complete_user: %i", err);
0559 
0560     /* Guest should see used token now. */
0561     __kfree_ignore_start = __user_addr_min + vring_size(RINGSIZE, ALIGN);
0562     __kfree_ignore_end = __kfree_ignore_start + 1;
0563     ret = virtqueue_get_buf(vq, &i);
0564     if (ret != &err)
0565         errx(1, "virtqueue_get_buf: %p", ret);
0566     assert(i == 2);
0567 
0568     /* Guest puts in a huge descriptor. */
0569     sg_init_table(guest_sg, RINGSIZE);
0570     for (i = 0; i < RINGSIZE; i++) {
0571         sg_set_buf(&guest_sg[i],
0572                __user_addr_max - USER_MEM/4, USER_MEM/4);
0573     }
0574 
0575     /* Fill contents with recognisable garbage. */
0576     for (i = 0; i < USER_MEM/4; i++)
0577         ((char *)__user_addr_max - USER_MEM/4)[i] = i;
0578 
0579     /* This will allocate an indirect, so force it to allocate user addr */
0580     __kmalloc_fake = __user_addr_min + vring_size(RINGSIZE, ALIGN);
0581     err = virtqueue_add_outbuf(vq, guest_sg, RINGSIZE, &err, GFP_KERNEL);
0582     if (err)
0583         errx(1, "virtqueue_add_outbuf (large): %i", err);
0584     __kmalloc_fake = NULL;
0585 
0586     /* Host picks it up (allocates new iov). */
0587     vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov));
0588     vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov));
0589 
0590     err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
0591     if (err != 1)
0592         errx(1, "vringh_getdesc_user: %i", err);
0593 
0594     assert(riov.max_num & VRINGH_IOV_ALLOCATED);
0595     assert(riov.iov != host_riov);
0596     if (getrange != getrange_slow)
0597         assert(riov.used == RINGSIZE);
0598     else
0599         assert(riov.used == RINGSIZE * USER_MEM/4);
0600 
0601     assert(!(wiov.max_num & VRINGH_IOV_ALLOCATED));
0602     assert(wiov.used == 0);
0603 
0604     /* Pull data back out (in odd chunks), should be as expected. */
0605     for (i = 0; i < RINGSIZE * USER_MEM/4; i += 3) {
0606         err = vringh_iov_pull_user(&riov, buf, 3);
0607         if (err != 3 && i + err != RINGSIZE * USER_MEM/4)
0608             errx(1, "vringh_iov_pull_user large: %i", err);
0609         assert(buf[0] == (char)i);
0610         assert(err < 2 || buf[1] == (char)(i + 1));
0611         assert(err < 3 || buf[2] == (char)(i + 2));
0612     }
0613     assert(riov.i == riov.used);
0614     vringh_iov_cleanup(&riov);
0615     vringh_iov_cleanup(&wiov);
0616 
0617     /* Complete using multi interface, just because we can. */
0618     used[0].id = head;
0619     used[0].len = 0;
0620     err = vringh_complete_multi_user(&vrh, used, 1);
0621     if (err)
0622         errx(1, "vringh_complete_multi_user(1): %i", err);
0623 
0624     /* Free up those descriptors. */
0625     ret = virtqueue_get_buf(vq, &i);
0626     if (ret != &err)
0627         errx(1, "virtqueue_get_buf: %p", ret);
0628 
0629     /* Add lots of descriptors. */
0630     sg_init_table(guest_sg, 1);
0631     sg_set_buf(&guest_sg[0], __user_addr_max - 1, 1);
0632     for (i = 0; i < RINGSIZE; i++) {
0633         err = virtqueue_add_outbuf(vq, guest_sg, 1, &err, GFP_KERNEL);
0634         if (err)
0635             errx(1, "virtqueue_add_outbuf (multiple): %i", err);
0636     }
0637 
0638     /* Now get many, and consume them all at once. */
0639     vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov));
0640     vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov));
0641 
0642     for (i = 0; i < RINGSIZE; i++) {
0643         err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
0644         if (err != 1)
0645             errx(1, "vringh_getdesc_user: %i", err);
0646         used[i].id = head;
0647         used[i].len = 0;
0648     }
0649     /* Make sure it wraps around ring, to test! */
0650     assert(vrh.vring.used->idx % RINGSIZE != 0);
0651     err = vringh_complete_multi_user(&vrh, used, RINGSIZE);
0652     if (err)
0653         errx(1, "vringh_complete_multi_user: %i", err);
0654 
0655     /* Free those buffers. */
0656     for (i = 0; i < RINGSIZE; i++) {
0657         unsigned len;
0658         assert(virtqueue_get_buf(vq, &len) != NULL);
0659     }
0660 
0661     /* Test weird (but legal!) indirect. */
0662     if (__virtio_test_bit(&vdev, VIRTIO_RING_F_INDIRECT_DESC)) {
0663         char *data = __user_addr_max - USER_MEM/4;
0664         struct vring_desc *d = __user_addr_max - USER_MEM/2;
0665         struct vring vring;
0666 
0667         /* Force creation of direct, which we modify. */
0668         __virtio_clear_bit(&vdev, VIRTIO_RING_F_INDIRECT_DESC);
0669         vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true,
0670                      false, __user_addr_min,
0671                      never_notify_host,
0672                      never_callback_guest,
0673                      "guest vq");
0674 
0675         sg_init_table(guest_sg, 4);
0676         sg_set_buf(&guest_sg[0], d, sizeof(*d)*2);
0677         sg_set_buf(&guest_sg[1], d + 2, sizeof(*d)*1);
0678         sg_set_buf(&guest_sg[2], data + 6, 4);
0679         sg_set_buf(&guest_sg[3], d + 3, sizeof(*d)*3);
0680 
0681         err = virtqueue_add_outbuf(vq, guest_sg, 4, &err, GFP_KERNEL);
0682         if (err)
0683             errx(1, "virtqueue_add_outbuf (indirect): %i", err);
0684 
0685         vring_init(&vring, RINGSIZE, __user_addr_min, ALIGN);
0686 
0687         /* They're used in order, but double-check... */
0688         assert(vring.desc[0].addr == (unsigned long)d);
0689         assert(vring.desc[1].addr == (unsigned long)(d+2));
0690         assert(vring.desc[2].addr == (unsigned long)data + 6);
0691         assert(vring.desc[3].addr == (unsigned long)(d+3));
0692         vring.desc[0].flags |= VRING_DESC_F_INDIRECT;
0693         vring.desc[1].flags |= VRING_DESC_F_INDIRECT;
0694         vring.desc[3].flags |= VRING_DESC_F_INDIRECT;
0695 
0696         /* First indirect */
0697         d[0].addr = (unsigned long)data;
0698         d[0].len = 1;
0699         d[0].flags = VRING_DESC_F_NEXT;
0700         d[0].next = 1;
0701         d[1].addr = (unsigned long)data + 1;
0702         d[1].len = 2;
0703         d[1].flags = 0;
0704 
0705         /* Second indirect */
0706         d[2].addr = (unsigned long)data + 3;
0707         d[2].len = 3;
0708         d[2].flags = 0;
0709 
0710         /* Third indirect */
0711         d[3].addr = (unsigned long)data + 10;
0712         d[3].len = 5;
0713         d[3].flags = VRING_DESC_F_NEXT;
0714         d[3].next = 1;
0715         d[4].addr = (unsigned long)data + 15;
0716         d[4].len = 6;
0717         d[4].flags = VRING_DESC_F_NEXT;
0718         d[4].next = 2;
0719         d[5].addr = (unsigned long)data + 21;
0720         d[5].len = 7;
0721         d[5].flags = 0;
0722 
0723         /* Host picks it up (allocates new iov). */
0724         vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov));
0725         vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov));
0726 
0727         err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
0728         if (err != 1)
0729             errx(1, "vringh_getdesc_user: %i", err);
0730 
0731         if (head != 0)
0732             errx(1, "vringh_getdesc_user: head %i not 0", head);
0733 
0734         assert(riov.max_num & VRINGH_IOV_ALLOCATED);
0735         if (getrange != getrange_slow)
0736             assert(riov.used == 7);
0737         else
0738             assert(riov.used == 28);
0739         err = vringh_iov_pull_user(&riov, buf, 29);
0740         assert(err == 28);
0741 
0742         /* Data should be linear. */
0743         for (i = 0; i < err; i++)
0744             assert(buf[i] == i);
0745         vringh_iov_cleanup(&riov);
0746     }
0747 
0748     /* Don't leak memory... */
0749     vring_del_virtqueue(vq);
0750     free(__user_addr_min);
0751 
0752     return 0;
0753 }