Back to home page

OSCL-LXR

 
 

    


0001 /* Evaluate MSG_ZEROCOPY
0002  *
0003  * Send traffic between two processes over one of the supported
0004  * protocols and modes:
0005  *
0006  * PF_INET/PF_INET6
0007  * - SOCK_STREAM
0008  * - SOCK_DGRAM
0009  * - SOCK_DGRAM with UDP_CORK
0010  * - SOCK_RAW
0011  * - SOCK_RAW with IP_HDRINCL
0012  *
0013  * PF_PACKET
0014  * - SOCK_DGRAM
0015  * - SOCK_RAW
0016  *
0017  * PF_RDS
0018  * - SOCK_SEQPACKET
0019  *
0020  * Start this program on two connected hosts, one in send mode and
0021  * the other with option '-r' to put it in receiver mode.
0022  *
0023  * If zerocopy mode ('-z') is enabled, the sender will verify that
0024  * the kernel queues completions on the error queue for all zerocopy
0025  * transfers.
0026  */
0027 
0028 #define _GNU_SOURCE
0029 
0030 #include <arpa/inet.h>
0031 #include <error.h>
0032 #include <errno.h>
0033 #include <limits.h>
0034 #include <linux/errqueue.h>
0035 #include <linux/if_packet.h>
0036 #include <linux/ipv6.h>
0037 #include <linux/socket.h>
0038 #include <linux/sockios.h>
0039 #include <net/ethernet.h>
0040 #include <net/if.h>
0041 #include <netinet/ip.h>
0042 #include <netinet/ip6.h>
0043 #include <netinet/tcp.h>
0044 #include <netinet/udp.h>
0045 #include <poll.h>
0046 #include <sched.h>
0047 #include <stdbool.h>
0048 #include <stdio.h>
0049 #include <stdint.h>
0050 #include <stdlib.h>
0051 #include <string.h>
0052 #include <sys/ioctl.h>
0053 #include <sys/socket.h>
0054 #include <sys/stat.h>
0055 #include <sys/time.h>
0056 #include <sys/types.h>
0057 #include <sys/wait.h>
0058 #include <unistd.h>
0059 #include <linux/rds.h>
0060 
0061 #ifndef SO_EE_ORIGIN_ZEROCOPY
0062 #define SO_EE_ORIGIN_ZEROCOPY       5
0063 #endif
0064 
0065 #ifndef SO_ZEROCOPY
0066 #define SO_ZEROCOPY 60
0067 #endif
0068 
0069 #ifndef SO_EE_CODE_ZEROCOPY_COPIED
0070 #define SO_EE_CODE_ZEROCOPY_COPIED  1
0071 #endif
0072 
0073 #ifndef MSG_ZEROCOPY
0074 #define MSG_ZEROCOPY    0x4000000
0075 #endif
0076 
0077 static int  cfg_cork;
0078 static bool cfg_cork_mixed;
0079 static int  cfg_cpu     = -1;       /* default: pin to last cpu */
0080 static int  cfg_family      = PF_UNSPEC;
0081 static int  cfg_ifindex     = 1;
0082 static int  cfg_payload_len;
0083 static int  cfg_port        = 8000;
0084 static bool cfg_rx;
0085 static int  cfg_runtime_ms  = 4200;
0086 static int  cfg_verbose;
0087 static int  cfg_waittime_ms = 500;
0088 static bool cfg_zerocopy;
0089 
0090 static socklen_t cfg_alen;
0091 static struct sockaddr_storage cfg_dst_addr;
0092 static struct sockaddr_storage cfg_src_addr;
0093 
0094 static char payload[IP_MAXPACKET];
0095 static long packets, bytes, completions, expected_completions;
0096 static int  zerocopied = -1;
0097 static uint32_t next_completion;
0098 
0099 static unsigned long gettimeofday_ms(void)
0100 {
0101     struct timeval tv;
0102 
0103     gettimeofday(&tv, NULL);
0104     return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
0105 }
0106 
0107 static uint16_t get_ip_csum(const uint16_t *start, int num_words)
0108 {
0109     unsigned long sum = 0;
0110     int i;
0111 
0112     for (i = 0; i < num_words; i++)
0113         sum += start[i];
0114 
0115     while (sum >> 16)
0116         sum = (sum & 0xFFFF) + (sum >> 16);
0117 
0118     return ~sum;
0119 }
0120 
0121 static int do_setcpu(int cpu)
0122 {
0123     cpu_set_t mask;
0124 
0125     CPU_ZERO(&mask);
0126     CPU_SET(cpu, &mask);
0127     if (sched_setaffinity(0, sizeof(mask), &mask))
0128         fprintf(stderr, "cpu: unable to pin, may increase variance.\n");
0129     else if (cfg_verbose)
0130         fprintf(stderr, "cpu: %u\n", cpu);
0131 
0132     return 0;
0133 }
0134 
0135 static void do_setsockopt(int fd, int level, int optname, int val)
0136 {
0137     if (setsockopt(fd, level, optname, &val, sizeof(val)))
0138         error(1, errno, "setsockopt %d.%d: %d", level, optname, val);
0139 }
0140 
0141 static int do_poll(int fd, int events)
0142 {
0143     struct pollfd pfd;
0144     int ret;
0145 
0146     pfd.events = events;
0147     pfd.revents = 0;
0148     pfd.fd = fd;
0149 
0150     ret = poll(&pfd, 1, cfg_waittime_ms);
0151     if (ret == -1)
0152         error(1, errno, "poll");
0153 
0154     return ret && (pfd.revents & events);
0155 }
0156 
0157 static int do_accept(int fd)
0158 {
0159     int fda = fd;
0160 
0161     fd = accept(fda, NULL, NULL);
0162     if (fd == -1)
0163         error(1, errno, "accept");
0164     if (close(fda))
0165         error(1, errno, "close listen sock");
0166 
0167     return fd;
0168 }
0169 
0170 static void add_zcopy_cookie(struct msghdr *msg, uint32_t cookie)
0171 {
0172     struct cmsghdr *cm;
0173 
0174     if (!msg->msg_control)
0175         error(1, errno, "NULL cookie");
0176     cm = (void *)msg->msg_control;
0177     cm->cmsg_len = CMSG_LEN(sizeof(cookie));
0178     cm->cmsg_level = SOL_RDS;
0179     cm->cmsg_type = RDS_CMSG_ZCOPY_COOKIE;
0180     memcpy(CMSG_DATA(cm), &cookie, sizeof(cookie));
0181 }
0182 
0183 static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
0184 {
0185     int ret, len, i, flags;
0186     static uint32_t cookie;
0187     char ckbuf[CMSG_SPACE(sizeof(cookie))];
0188 
0189     len = 0;
0190     for (i = 0; i < msg->msg_iovlen; i++)
0191         len += msg->msg_iov[i].iov_len;
0192 
0193     flags = MSG_DONTWAIT;
0194     if (do_zerocopy) {
0195         flags |= MSG_ZEROCOPY;
0196         if (domain == PF_RDS) {
0197             memset(&msg->msg_control, 0, sizeof(msg->msg_control));
0198             msg->msg_controllen = CMSG_SPACE(sizeof(cookie));
0199             msg->msg_control = (struct cmsghdr *)ckbuf;
0200             add_zcopy_cookie(msg, ++cookie);
0201         }
0202     }
0203 
0204     ret = sendmsg(fd, msg, flags);
0205     if (ret == -1 && errno == EAGAIN)
0206         return false;
0207     if (ret == -1)
0208         error(1, errno, "send");
0209     if (cfg_verbose && ret != len)
0210         fprintf(stderr, "send: ret=%u != %u\n", ret, len);
0211 
0212     if (len) {
0213         packets++;
0214         bytes += ret;
0215         if (do_zerocopy && ret)
0216             expected_completions++;
0217     }
0218     if (do_zerocopy && domain == PF_RDS) {
0219         msg->msg_control = NULL;
0220         msg->msg_controllen = 0;
0221     }
0222 
0223     return true;
0224 }
0225 
0226 static void do_sendmsg_corked(int fd, struct msghdr *msg)
0227 {
0228     bool do_zerocopy = cfg_zerocopy;
0229     int i, payload_len, extra_len;
0230 
0231     /* split up the packet. for non-multiple, make first buffer longer */
0232     payload_len = cfg_payload_len / cfg_cork;
0233     extra_len = cfg_payload_len - (cfg_cork * payload_len);
0234 
0235     do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 1);
0236 
0237     for (i = 0; i < cfg_cork; i++) {
0238 
0239         /* in mixed-frags mode, alternate zerocopy and copy frags
0240          * start with non-zerocopy, to ensure attach later works
0241          */
0242         if (cfg_cork_mixed)
0243             do_zerocopy = (i & 1);
0244 
0245         msg->msg_iov[0].iov_len = payload_len + extra_len;
0246         extra_len = 0;
0247 
0248         do_sendmsg(fd, msg, do_zerocopy,
0249                (cfg_dst_addr.ss_family == AF_INET ?
0250                 PF_INET : PF_INET6));
0251     }
0252 
0253     do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
0254 }
0255 
0256 static int setup_iph(struct iphdr *iph, uint16_t payload_len)
0257 {
0258     struct sockaddr_in *daddr = (void *) &cfg_dst_addr;
0259     struct sockaddr_in *saddr = (void *) &cfg_src_addr;
0260 
0261     memset(iph, 0, sizeof(*iph));
0262 
0263     iph->version    = 4;
0264     iph->tos    = 0;
0265     iph->ihl    = 5;
0266     iph->ttl    = 2;
0267     iph->saddr  = saddr->sin_addr.s_addr;
0268     iph->daddr  = daddr->sin_addr.s_addr;
0269     iph->protocol   = IPPROTO_EGP;
0270     iph->tot_len    = htons(sizeof(*iph) + payload_len);
0271     iph->check  = get_ip_csum((void *) iph, iph->ihl << 1);
0272 
0273     return sizeof(*iph);
0274 }
0275 
0276 static int setup_ip6h(struct ipv6hdr *ip6h, uint16_t payload_len)
0277 {
0278     struct sockaddr_in6 *daddr = (void *) &cfg_dst_addr;
0279     struct sockaddr_in6 *saddr = (void *) &cfg_src_addr;
0280 
0281     memset(ip6h, 0, sizeof(*ip6h));
0282 
0283     ip6h->version       = 6;
0284     ip6h->payload_len   = htons(payload_len);
0285     ip6h->nexthdr       = IPPROTO_EGP;
0286     ip6h->hop_limit     = 2;
0287     ip6h->saddr     = saddr->sin6_addr;
0288     ip6h->daddr     = daddr->sin6_addr;
0289 
0290     return sizeof(*ip6h);
0291 }
0292 
0293 
0294 static void setup_sockaddr(int domain, const char *str_addr,
0295                struct sockaddr_storage *sockaddr)
0296 {
0297     struct sockaddr_in6 *addr6 = (void *) sockaddr;
0298     struct sockaddr_in *addr4 = (void *) sockaddr;
0299 
0300     switch (domain) {
0301     case PF_INET:
0302         memset(addr4, 0, sizeof(*addr4));
0303         addr4->sin_family = AF_INET;
0304         addr4->sin_port = htons(cfg_port);
0305         if (str_addr &&
0306             inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
0307             error(1, 0, "ipv4 parse error: %s", str_addr);
0308         break;
0309     case PF_INET6:
0310         memset(addr6, 0, sizeof(*addr6));
0311         addr6->sin6_family = AF_INET6;
0312         addr6->sin6_port = htons(cfg_port);
0313         if (str_addr &&
0314             inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
0315             error(1, 0, "ipv6 parse error: %s", str_addr);
0316         break;
0317     default:
0318         error(1, 0, "illegal domain");
0319     }
0320 }
0321 
0322 static int do_setup_tx(int domain, int type, int protocol)
0323 {
0324     int fd;
0325 
0326     fd = socket(domain, type, protocol);
0327     if (fd == -1)
0328         error(1, errno, "socket t");
0329 
0330     do_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, 1 << 21);
0331     if (cfg_zerocopy)
0332         do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1);
0333 
0334     if (domain != PF_PACKET && domain != PF_RDS)
0335         if (connect(fd, (void *) &cfg_dst_addr, cfg_alen))
0336             error(1, errno, "connect");
0337 
0338     if (domain == PF_RDS) {
0339         if (bind(fd, (void *) &cfg_src_addr, cfg_alen))
0340             error(1, errno, "bind");
0341     }
0342 
0343     return fd;
0344 }
0345 
0346 static uint32_t do_process_zerocopy_cookies(struct rds_zcopy_cookies *ck)
0347 {
0348     int i;
0349 
0350     if (ck->num > RDS_MAX_ZCOOKIES)
0351         error(1, 0, "Returned %d cookies, max expected %d\n",
0352               ck->num, RDS_MAX_ZCOOKIES);
0353     for (i = 0; i < ck->num; i++)
0354         if (cfg_verbose >= 2)
0355             fprintf(stderr, "%d\n", ck->cookies[i]);
0356     return ck->num;
0357 }
0358 
0359 static bool do_recvmsg_completion(int fd)
0360 {
0361     char cmsgbuf[CMSG_SPACE(sizeof(struct rds_zcopy_cookies))];
0362     struct rds_zcopy_cookies *ck;
0363     struct cmsghdr *cmsg;
0364     struct msghdr msg;
0365     bool ret = false;
0366 
0367     memset(&msg, 0, sizeof(msg));
0368     msg.msg_control = cmsgbuf;
0369     msg.msg_controllen = sizeof(cmsgbuf);
0370 
0371     if (recvmsg(fd, &msg, MSG_DONTWAIT))
0372         return ret;
0373 
0374     if (msg.msg_flags & MSG_CTRUNC)
0375         error(1, errno, "recvmsg notification: truncated");
0376 
0377     for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
0378         if (cmsg->cmsg_level == SOL_RDS &&
0379             cmsg->cmsg_type == RDS_CMSG_ZCOPY_COMPLETION) {
0380 
0381             ck = (struct rds_zcopy_cookies *)CMSG_DATA(cmsg);
0382             completions += do_process_zerocopy_cookies(ck);
0383             ret = true;
0384             break;
0385         }
0386         error(0, 0, "ignoring cmsg at level %d type %d\n",
0387                 cmsg->cmsg_level, cmsg->cmsg_type);
0388     }
0389     return ret;
0390 }
0391 
0392 static bool do_recv_completion(int fd, int domain)
0393 {
0394     struct sock_extended_err *serr;
0395     struct msghdr msg = {};
0396     struct cmsghdr *cm;
0397     uint32_t hi, lo, range;
0398     int ret, zerocopy;
0399     char control[100];
0400 
0401     if (domain == PF_RDS)
0402         return do_recvmsg_completion(fd);
0403 
0404     msg.msg_control = control;
0405     msg.msg_controllen = sizeof(control);
0406 
0407     ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
0408     if (ret == -1 && errno == EAGAIN)
0409         return false;
0410     if (ret == -1)
0411         error(1, errno, "recvmsg notification");
0412     if (msg.msg_flags & MSG_CTRUNC)
0413         error(1, errno, "recvmsg notification: truncated");
0414 
0415     cm = CMSG_FIRSTHDR(&msg);
0416     if (!cm)
0417         error(1, 0, "cmsg: no cmsg");
0418     if (!((cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_RECVERR) ||
0419           (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_RECVERR) ||
0420           (cm->cmsg_level == SOL_PACKET && cm->cmsg_type == PACKET_TX_TIMESTAMP)))
0421         error(1, 0, "serr: wrong type: %d.%d",
0422               cm->cmsg_level, cm->cmsg_type);
0423 
0424     serr = (void *) CMSG_DATA(cm);
0425 
0426     if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
0427         error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
0428     if (serr->ee_errno != 0)
0429         error(1, 0, "serr: wrong error code: %u", serr->ee_errno);
0430 
0431     hi = serr->ee_data;
0432     lo = serr->ee_info;
0433     range = hi - lo + 1;
0434 
0435     /* Detect notification gaps. These should not happen often, if at all.
0436      * Gaps can occur due to drops, reordering and retransmissions.
0437      */
0438     if (lo != next_completion)
0439         fprintf(stderr, "gap: %u..%u does not append to %u\n",
0440             lo, hi, next_completion);
0441     next_completion = hi + 1;
0442 
0443     zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
0444     if (zerocopied == -1)
0445         zerocopied = zerocopy;
0446     else if (zerocopied != zerocopy) {
0447         fprintf(stderr, "serr: inconsistent\n");
0448         zerocopied = zerocopy;
0449     }
0450 
0451     if (cfg_verbose >= 2)
0452         fprintf(stderr, "completed: %u (h=%u l=%u)\n",
0453             range, hi, lo);
0454 
0455     completions += range;
0456     return true;
0457 }
0458 
0459 /* Read all outstanding messages on the errqueue */
0460 static void do_recv_completions(int fd, int domain)
0461 {
0462     while (do_recv_completion(fd, domain)) {}
0463 }
0464 
0465 /* Wait for all remaining completions on the errqueue */
0466 static void do_recv_remaining_completions(int fd, int domain)
0467 {
0468     int64_t tstop = gettimeofday_ms() + cfg_waittime_ms;
0469 
0470     while (completions < expected_completions &&
0471            gettimeofday_ms() < tstop) {
0472         if (do_poll(fd, domain == PF_RDS ? POLLIN : POLLERR))
0473             do_recv_completions(fd, domain);
0474     }
0475 
0476     if (completions < expected_completions)
0477         fprintf(stderr, "missing notifications: %lu < %lu\n",
0478             completions, expected_completions);
0479 }
0480 
0481 static void do_tx(int domain, int type, int protocol)
0482 {
0483     struct iovec iov[3] = { {0} };
0484     struct sockaddr_ll laddr;
0485     struct msghdr msg = {0};
0486     struct ethhdr eth;
0487     union {
0488         struct ipv6hdr ip6h;
0489         struct iphdr iph;
0490     } nh;
0491     uint64_t tstop;
0492     int fd;
0493 
0494     fd = do_setup_tx(domain, type, protocol);
0495 
0496     if (domain == PF_PACKET) {
0497         uint16_t proto = cfg_family == PF_INET ? ETH_P_IP : ETH_P_IPV6;
0498 
0499         /* sock_raw passes ll header as data */
0500         if (type == SOCK_RAW) {
0501             memset(eth.h_dest, 0x06, ETH_ALEN);
0502             memset(eth.h_source, 0x02, ETH_ALEN);
0503             eth.h_proto = htons(proto);
0504             iov[0].iov_base = &eth;
0505             iov[0].iov_len = sizeof(eth);
0506             msg.msg_iovlen++;
0507         }
0508 
0509         /* both sock_raw and sock_dgram expect name */
0510         memset(&laddr, 0, sizeof(laddr));
0511         laddr.sll_family    = AF_PACKET;
0512         laddr.sll_ifindex   = cfg_ifindex;
0513         laddr.sll_protocol  = htons(proto);
0514         laddr.sll_halen     = ETH_ALEN;
0515 
0516         memset(laddr.sll_addr, 0x06, ETH_ALEN);
0517 
0518         msg.msg_name        = &laddr;
0519         msg.msg_namelen     = sizeof(laddr);
0520     }
0521 
0522     /* packet and raw sockets with hdrincl must pass network header */
0523     if (domain == PF_PACKET || protocol == IPPROTO_RAW) {
0524         if (cfg_family == PF_INET)
0525             iov[1].iov_len = setup_iph(&nh.iph, cfg_payload_len);
0526         else
0527             iov[1].iov_len = setup_ip6h(&nh.ip6h, cfg_payload_len);
0528 
0529         iov[1].iov_base = (void *) &nh;
0530         msg.msg_iovlen++;
0531     }
0532 
0533     if (domain == PF_RDS) {
0534         msg.msg_name = &cfg_dst_addr;
0535         msg.msg_namelen =  (cfg_dst_addr.ss_family == AF_INET ?
0536                     sizeof(struct sockaddr_in) :
0537                     sizeof(struct sockaddr_in6));
0538     }
0539 
0540     iov[2].iov_base = payload;
0541     iov[2].iov_len = cfg_payload_len;
0542     msg.msg_iovlen++;
0543     msg.msg_iov = &iov[3 - msg.msg_iovlen];
0544 
0545     tstop = gettimeofday_ms() + cfg_runtime_ms;
0546     do {
0547         if (cfg_cork)
0548             do_sendmsg_corked(fd, &msg);
0549         else
0550             do_sendmsg(fd, &msg, cfg_zerocopy, domain);
0551 
0552         while (!do_poll(fd, POLLOUT)) {
0553             if (cfg_zerocopy)
0554                 do_recv_completions(fd, domain);
0555         }
0556 
0557     } while (gettimeofday_ms() < tstop);
0558 
0559     if (cfg_zerocopy)
0560         do_recv_remaining_completions(fd, domain);
0561 
0562     if (close(fd))
0563         error(1, errno, "close");
0564 
0565     fprintf(stderr, "tx=%lu (%lu MB) txc=%lu zc=%c\n",
0566         packets, bytes >> 20, completions,
0567         zerocopied == 1 ? 'y' : 'n');
0568 }
0569 
0570 static int do_setup_rx(int domain, int type, int protocol)
0571 {
0572     int fd;
0573 
0574     /* If tx over PF_PACKET, rx over PF_INET(6)/SOCK_RAW,
0575      * to recv the only copy of the packet, not a clone
0576      */
0577     if (domain == PF_PACKET)
0578         error(1, 0, "Use PF_INET/SOCK_RAW to read");
0579 
0580     if (type == SOCK_RAW && protocol == IPPROTO_RAW)
0581         error(1, 0, "IPPROTO_RAW: not supported on Rx");
0582 
0583     fd = socket(domain, type, protocol);
0584     if (fd == -1)
0585         error(1, errno, "socket r");
0586 
0587     do_setsockopt(fd, SOL_SOCKET, SO_RCVBUF, 1 << 21);
0588     do_setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, 1 << 16);
0589     do_setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, 1);
0590 
0591     if (bind(fd, (void *) &cfg_dst_addr, cfg_alen))
0592         error(1, errno, "bind");
0593 
0594     if (type == SOCK_STREAM) {
0595         if (listen(fd, 1))
0596             error(1, errno, "listen");
0597         fd = do_accept(fd);
0598     }
0599 
0600     return fd;
0601 }
0602 
0603 /* Flush all outstanding bytes for the tcp receive queue */
0604 static void do_flush_tcp(int fd)
0605 {
0606     int ret;
0607 
0608     /* MSG_TRUNC flushes up to len bytes */
0609     ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT);
0610     if (ret == -1 && errno == EAGAIN)
0611         return;
0612     if (ret == -1)
0613         error(1, errno, "flush");
0614     if (!ret)
0615         return;
0616 
0617     packets++;
0618     bytes += ret;
0619 }
0620 
0621 /* Flush all outstanding datagrams. Verify first few bytes of each. */
0622 static void do_flush_datagram(int fd, int type)
0623 {
0624     int ret, off = 0;
0625     char buf[64];
0626 
0627     /* MSG_TRUNC will return full datagram length */
0628     ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT | MSG_TRUNC);
0629     if (ret == -1 && errno == EAGAIN)
0630         return;
0631 
0632     /* raw ipv4 return with header, raw ipv6 without */
0633     if (cfg_family == PF_INET && type == SOCK_RAW) {
0634         off += sizeof(struct iphdr);
0635         ret -= sizeof(struct iphdr);
0636     }
0637 
0638     if (ret == -1)
0639         error(1, errno, "recv");
0640     if (ret != cfg_payload_len)
0641         error(1, 0, "recv: ret=%u != %u", ret, cfg_payload_len);
0642     if (ret > sizeof(buf) - off)
0643         ret = sizeof(buf) - off;
0644     if (memcmp(buf + off, payload, ret))
0645         error(1, 0, "recv: data mismatch");
0646 
0647     packets++;
0648     bytes += cfg_payload_len;
0649 }
0650 
0651 static void do_rx(int domain, int type, int protocol)
0652 {
0653     const int cfg_receiver_wait_ms = 400;
0654     uint64_t tstop;
0655     int fd;
0656 
0657     fd = do_setup_rx(domain, type, protocol);
0658 
0659     tstop = gettimeofday_ms() + cfg_runtime_ms + cfg_receiver_wait_ms;
0660     do {
0661         if (type == SOCK_STREAM)
0662             do_flush_tcp(fd);
0663         else
0664             do_flush_datagram(fd, type);
0665 
0666         do_poll(fd, POLLIN);
0667 
0668     } while (gettimeofday_ms() < tstop);
0669 
0670     if (close(fd))
0671         error(1, errno, "close");
0672 
0673     fprintf(stderr, "rx=%lu (%lu MB)\n", packets, bytes >> 20);
0674 }
0675 
0676 static void do_test(int domain, int type, int protocol)
0677 {
0678     int i;
0679 
0680     if (cfg_cork && (domain == PF_PACKET || type != SOCK_DGRAM))
0681         error(1, 0, "can only cork udp sockets");
0682 
0683     do_setcpu(cfg_cpu);
0684 
0685     for (i = 0; i < IP_MAXPACKET; i++)
0686         payload[i] = 'a' + (i % 26);
0687 
0688     if (cfg_rx)
0689         do_rx(domain, type, protocol);
0690     else
0691         do_tx(domain, type, protocol);
0692 }
0693 
0694 static void usage(const char *filepath)
0695 {
0696     error(1, 0, "Usage: %s [options] <test>", filepath);
0697 }
0698 
0699 static void parse_opts(int argc, char **argv)
0700 {
0701     const int max_payload_len = sizeof(payload) -
0702                     sizeof(struct ipv6hdr) -
0703                     sizeof(struct tcphdr) -
0704                     40 /* max tcp options */;
0705     int c;
0706     char *daddr = NULL, *saddr = NULL;
0707     char *cfg_test;
0708 
0709     cfg_payload_len = max_payload_len;
0710 
0711     while ((c = getopt(argc, argv, "46c:C:D:i:mp:rs:S:t:vz")) != -1) {
0712         switch (c) {
0713         case '4':
0714             if (cfg_family != PF_UNSPEC)
0715                 error(1, 0, "Pass one of -4 or -6");
0716             cfg_family = PF_INET;
0717             cfg_alen = sizeof(struct sockaddr_in);
0718             break;
0719         case '6':
0720             if (cfg_family != PF_UNSPEC)
0721                 error(1, 0, "Pass one of -4 or -6");
0722             cfg_family = PF_INET6;
0723             cfg_alen = sizeof(struct sockaddr_in6);
0724             break;
0725         case 'c':
0726             cfg_cork = strtol(optarg, NULL, 0);
0727             break;
0728         case 'C':
0729             cfg_cpu = strtol(optarg, NULL, 0);
0730             break;
0731         case 'D':
0732             daddr = optarg;
0733             break;
0734         case 'i':
0735             cfg_ifindex = if_nametoindex(optarg);
0736             if (cfg_ifindex == 0)
0737                 error(1, errno, "invalid iface: %s", optarg);
0738             break;
0739         case 'm':
0740             cfg_cork_mixed = true;
0741             break;
0742         case 'p':
0743             cfg_port = strtoul(optarg, NULL, 0);
0744             break;
0745         case 'r':
0746             cfg_rx = true;
0747             break;
0748         case 's':
0749             cfg_payload_len = strtoul(optarg, NULL, 0);
0750             break;
0751         case 'S':
0752             saddr = optarg;
0753             break;
0754         case 't':
0755             cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000;
0756             break;
0757         case 'v':
0758             cfg_verbose++;
0759             break;
0760         case 'z':
0761             cfg_zerocopy = true;
0762             break;
0763         }
0764     }
0765 
0766     cfg_test = argv[argc - 1];
0767     if (strcmp(cfg_test, "rds") == 0) {
0768         if (!daddr)
0769             error(1, 0, "-D <server addr> required for PF_RDS\n");
0770         if (!cfg_rx && !saddr)
0771             error(1, 0, "-S <client addr> required for PF_RDS\n");
0772     }
0773     setup_sockaddr(cfg_family, daddr, &cfg_dst_addr);
0774     setup_sockaddr(cfg_family, saddr, &cfg_src_addr);
0775 
0776     if (cfg_payload_len > max_payload_len)
0777         error(1, 0, "-s: payload exceeds max (%d)", max_payload_len);
0778     if (cfg_cork_mixed && (!cfg_zerocopy || !cfg_cork))
0779         error(1, 0, "-m: cork_mixed requires corking and zerocopy");
0780 
0781     if (optind != argc - 1)
0782         usage(argv[0]);
0783 }
0784 
0785 int main(int argc, char **argv)
0786 {
0787     const char *cfg_test;
0788 
0789     parse_opts(argc, argv);
0790 
0791     cfg_test = argv[argc - 1];
0792 
0793     if (!strcmp(cfg_test, "packet"))
0794         do_test(PF_PACKET, SOCK_RAW, 0);
0795     else if (!strcmp(cfg_test, "packet_dgram"))
0796         do_test(PF_PACKET, SOCK_DGRAM, 0);
0797     else if (!strcmp(cfg_test, "raw"))
0798         do_test(cfg_family, SOCK_RAW, IPPROTO_EGP);
0799     else if (!strcmp(cfg_test, "raw_hdrincl"))
0800         do_test(cfg_family, SOCK_RAW, IPPROTO_RAW);
0801     else if (!strcmp(cfg_test, "tcp"))
0802         do_test(cfg_family, SOCK_STREAM, 0);
0803     else if (!strcmp(cfg_test, "udp"))
0804         do_test(cfg_family, SOCK_DGRAM, 0);
0805     else if (!strcmp(cfg_test, "rds"))
0806         do_test(PF_RDS, SOCK_SEQPACKET, 0);
0807     else
0808         error(1, 0, "unknown cfg_test %s", cfg_test);
0809 
0810     return 0;
0811 }