Back to home page

OSCL-LXR

 
 

    


0001 #!/bin/bash
0002 # SPDX-License-Identifier: GPL-2.0
0003 #
0004 # Copyright (c) 2019 Facebook
0005 #
0006 # This program is free software; you can redistribute it and/or
0007 # modify it under the terms of version 2 of the GNU General Public
0008 # License as published by the Free Software Foundation.
0009 
0010 Usage() {
0011   echo "Script for testing HBM (Host Bandwidth Manager) framework."
0012   echo "It creates a cgroup to use for testing and load a BPF program to limit"
0013   echo "egress or ingress bandwidth. It then uses iperf3 or netperf to create"
0014   echo "loads. The output is the goodput in Mbps (unless -D was used)."
0015   echo ""
0016   echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>]"
0017   echo "             [-D] [-d=<delay>|--delay=<delay>] [--debug] [-E] [--edt]"
0018   echo "             [-f=<#flows>|--flows=<#flows>] [-h] [-i=<id>|--id=<id >]"
0019   echo "             [-l] [-N] [--no_cn] [-p=<port>|--port=<port>] [-P]"
0020   echo "             [-q=<qdisc>] [-R] [-s=<server>|--server=<server]"
0021   echo "             [-S|--stats] -t=<time>|--time=<time>] [-w] [cubic|dctcp]"
0022   echo "  Where:"
0023   echo "    out               egress (default)"
0024   echo "    -b or --bpf       BPF program filename to load and attach."
0025   echo "                      Default is hbm_out_kern.o for egress,"
0026   echo "    -c or -cc         TCP congestion control (cubic or dctcp)"
0027   echo "    --debug           print BPF trace buffer"
0028   echo "    -d or --delay     add a delay in ms using netem"
0029   echo "    -D                In addition to the goodput in Mbps, it also outputs"
0030   echo "                      other detailed information. This information is"
0031   echo "                      test dependent (i.e. iperf3 or netperf)."
0032   echo "    -E                enable ECN (not required for dctcp)"
0033   echo "    --edt             use fq's Earliest Departure Time (requires fq)"
0034   echo "    -f or --flows     number of concurrent flows (default=1)"
0035   echo "    -i or --id        cgroup id (an integer, default is 1)"
0036   echo "    -N                use netperf instead of iperf3"
0037   echo "    --no_cn           Do not return CN notifications"
0038   echo "    -l                do not limit flows using loopback"
0039   echo "    -h                Help"
0040   echo "    -p or --port      iperf3 port (default is 5201)"
0041   echo "    -P                use an iperf3 instance for each flow"
0042   echo "    -q                use the specified qdisc"
0043   echo "    -r or --rate      rate in Mbps (default 1s 1Gbps)"
0044   echo "    -R                Use TCP_RR for netperf. 1st flow has req"
0045   echo "                      size of 10KB, rest of 1MB. Reply in all"
0046   echo "                      cases is 1 byte."
0047   echo "                      More detailed output for each flow can be found"
0048   echo "                      in the files netperf.<cg>.<flow>, where <cg> is the"
0049   echo "                      cgroup id as specified with the -i flag, and <flow>"
0050   echo "                      is the flow id starting at 1 and increasing by 1 for"
0051   echo "                      flow (as specified by -f)."
0052   echo "    -s or --server    hostname of netperf server. Used to create netperf"
0053   echo "                      test traffic between to hosts (default is within host)"
0054   echo "                      netserver must be running on the host."
0055   echo "    -S or --stats     whether to update hbm stats (default is yes)."
0056   echo "    -t or --time      duration of iperf3 in seconds (default=5)"
0057   echo "    -w                Work conserving flag. cgroup can increase its"
0058   echo "                      bandwidth beyond the rate limit specified"
0059   echo "                      while there is available bandwidth. Current"
0060   echo "                      implementation assumes there is only one NIC"
0061   echo "                      (eth0), but can be extended to support multiple"
0062   echo "                       NICs."
0063   echo "    cubic or dctcp    specify which TCP CC to use"
0064   echo " "
0065   exit
0066 }
0067 
0068 #set -x
0069 
0070 debug_flag=0
0071 args="$@"
0072 name="$0"
0073 netem=0
0074 cc=x
0075 dir="-o"
0076 dir_name="out"
0077 dur=5
0078 flows=1
0079 id=1
0080 prog=""
0081 port=5201
0082 rate=1000
0083 multi_iperf=0
0084 flow_cnt=1
0085 use_netperf=0
0086 rr=0
0087 ecn=0
0088 details=0
0089 server=""
0090 qdisc=""
0091 flags=""
0092 do_stats=0
0093 
0094 BPFFS=/sys/fs/bpf
0095 function config_bpffs () {
0096         if mount | grep $BPFFS > /dev/null; then
0097                 echo "bpffs already mounted"
0098         else
0099                 echo "bpffs not mounted. Mounting..."
0100                 mount -t bpf none $BPFFS
0101         fi
0102 }
0103 
0104 function start_hbm () {
0105   rm -f hbm.out
0106   echo "./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog" > hbm.out
0107   echo " " >> hbm.out
0108   ./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog >> hbm.out 2>&1  &
0109   echo $!
0110 }
0111 
0112 processArgs () {
0113   for i in $args ; do
0114     case $i in
0115     # Support for upcomming ingress rate limiting
0116     #in)         # support for upcoming ingress rate limiting
0117     #  dir="-i"
0118     #  dir_name="in"
0119     #  ;;
0120     out)
0121       dir="-o"
0122       dir_name="out"
0123       ;;
0124     -b=*|--bpf=*)
0125       prog="${i#*=}"
0126       ;;
0127     -c=*|--cc=*)
0128       cc="${i#*=}"
0129       ;;
0130     --no_cn)
0131       flags="$flags --no_cn"
0132       ;;
0133     --debug)
0134       flags="$flags -d"
0135       debug_flag=1
0136       ;;
0137     -d=*|--delay=*)
0138       netem="${i#*=}"
0139       ;;
0140     -D)
0141       details=1
0142       ;;
0143     -E)
0144       ecn=1
0145       ;;
0146     --edt)
0147       flags="$flags --edt"
0148       qdisc="fq"
0149      ;;
0150     -f=*|--flows=*)
0151       flows="${i#*=}"
0152       ;;
0153     -i=*|--id=*)
0154       id="${i#*=}"
0155       ;;
0156     -l)
0157       flags="$flags -l"
0158       ;;
0159     -N)
0160       use_netperf=1
0161       ;;
0162     -p=*|--port=*)
0163       port="${i#*=}"
0164       ;;
0165     -P)
0166       multi_iperf=1
0167       ;;
0168     -q=*)
0169       qdisc="${i#*=}"
0170       ;;
0171     -r=*|--rate=*)
0172       rate="${i#*=}"
0173       ;;
0174     -R)
0175       rr=1
0176       ;;
0177     -s=*|--server=*)
0178       server="${i#*=}"
0179       ;;
0180     -S|--stats)
0181       flags="$flags -s"
0182       do_stats=1
0183       ;;
0184     -t=*|--time=*)
0185       dur="${i#*=}"
0186       ;;
0187     -w)
0188       flags="$flags -w"
0189       ;;
0190     cubic)
0191       cc=cubic
0192       ;;
0193     dctcp)
0194       cc=dctcp
0195       ;;
0196     *)
0197       echo "Unknown arg:$i"
0198       Usage
0199       ;;
0200     esac
0201   done
0202 }
0203 
0204 processArgs
0205 config_bpffs
0206 
0207 if [ $debug_flag -eq 1 ] ; then
0208   rm -f hbm_out.log
0209 fi
0210 
0211 hbm_pid=$(start_hbm)
0212 usleep 100000
0213 
0214 host=`hostname`
0215 cg_base_dir=/sys/fs/cgroup/unified
0216 cg_dir="$cg_base_dir/cgroup-test-work-dir/hbm$id"
0217 
0218 echo $$ >> $cg_dir/cgroup.procs
0219 
0220 ulimit -l unlimited
0221 
0222 rm -f ss.out
0223 rm -f hbm.[0-9]*.$dir_name
0224 if [ $ecn -ne 0 ] ; then
0225   sysctl -w -q -n net.ipv4.tcp_ecn=1
0226 fi
0227 
0228 if [ $use_netperf -eq 0 ] ; then
0229   cur_cc=`sysctl -n net.ipv4.tcp_congestion_control`
0230   if [ "$cc" != "x" ] ; then
0231     sysctl -w -q -n net.ipv4.tcp_congestion_control=$cc
0232   fi
0233 fi
0234 
0235 if [ "$netem" -ne "0" ] ; then
0236   if [ "$qdisc" != "" ] ; then
0237     echo "WARNING: Ignoring -q options because -d option used"
0238   fi
0239   tc qdisc del dev lo root > /dev/null 2>&1
0240   tc qdisc add dev lo root netem delay $netem\ms > /dev/null 2>&1
0241 elif [ "$qdisc" != "" ] ; then
0242   tc qdisc del dev eth0 root > /dev/null 2>&1
0243   tc qdisc add dev eth0 root $qdisc > /dev/null 2>&1
0244 fi
0245 
0246 n=0
0247 m=$[$dur * 5]
0248 hn="::1"
0249 if [ $use_netperf -ne 0 ] ; then
0250   if [ "$server" != "" ] ; then
0251     hn=$server
0252   fi
0253 fi
0254 
0255 ( ping6 -i 0.2 -c $m $hn > ping.out 2>&1 ) &
0256 
0257 if [ $use_netperf -ne 0 ] ; then
0258   begNetserverPid=`ps ax | grep netserver | grep --invert-match "grep" | \
0259                    awk '{ print $1 }'`
0260   if [ "$begNetserverPid" == "" ] ; then
0261     if [ "$server" == "" ] ; then
0262       ( ./netserver > /dev/null 2>&1) &
0263       usleep 100000
0264     fi
0265   fi
0266   flow_cnt=1
0267   if [ "$server" == "" ] ; then
0268     np_server=$host
0269   else
0270     np_server=$server
0271   fi
0272   if [ "$cc" == "x" ] ; then
0273     np_cc=""
0274   else
0275     np_cc="-K $cc,$cc"
0276   fi
0277   replySize=1
0278   while [ $flow_cnt -le $flows ] ; do
0279     if [ $rr -ne 0 ] ; then
0280       reqSize=1M
0281       if [ $flow_cnt -eq 1 ] ; then
0282         reqSize=10K
0283       fi
0284       if [ "$dir" == "-i" ] ; then
0285         replySize=$reqSize
0286         reqSize=1
0287       fi
0288       ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR  -- -r $reqSize,$replySize $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,REMOTE_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,LOCAL_RECV_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
0289     else
0290       if [ "$dir" == "-i" ] ; then
0291         ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r 1,10M $np_cc -k P50_LATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REMOTE_TRANSPORT_RETRANS,REMOTE_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
0292       else
0293         ( ./netperf -H $np_server -l $dur -f m -j -t TCP_STREAM -- $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
0294       fi
0295     fi
0296     flow_cnt=$[flow_cnt+1]
0297   done
0298 
0299 # sleep for duration of test (plus some buffer)
0300   n=$[dur+2]
0301   sleep $n
0302 
0303 # force graceful termination of netperf
0304   pids=`pgrep netperf`
0305   for p in $pids ; do
0306     kill -SIGALRM $p
0307   done
0308 
0309   flow_cnt=1
0310   rate=0
0311   if [ $details -ne 0 ] ; then
0312     echo ""
0313     echo "Details for HBM in cgroup $id"
0314     if [ $do_stats -eq 1 ] ; then
0315       if [ -e hbm.$id.$dir_name ] ; then
0316         cat hbm.$id.$dir_name
0317       fi
0318     fi
0319   fi
0320   while [ $flow_cnt -le $flows ] ; do
0321     if [ "$dir" == "-i" ] ; then
0322       r=`cat netperf.$id.$flow_cnt | grep -o "REMOTE_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"`
0323     else
0324       r=`cat netperf.$id.$flow_cnt | grep -o "LOCAL_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"`
0325     fi
0326     echo "rate for flow $flow_cnt: $r"
0327     rate=$[rate+r]
0328     if [ $details -ne 0 ] ; then
0329       echo "-----"
0330       echo "Details for cgroup $id, flow $flow_cnt"
0331       cat netperf.$id.$flow_cnt
0332     fi
0333     flow_cnt=$[flow_cnt+1]
0334   done
0335   if [ $details -ne 0 ] ; then
0336     echo ""
0337     delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
0338     echo "PING AVG DELAY:$delay"
0339     echo "AGGREGATE_GOODPUT:$rate"
0340   else
0341     echo $rate
0342   fi
0343 elif [ $multi_iperf -eq 0 ] ; then
0344   (iperf3 -s -p $port -1 > /dev/null 2>&1) &
0345   usleep 100000
0346   iperf3 -c $host -p $port -i 0 -P $flows -f m -t $dur > iperf.$id
0347   rates=`grep receiver iperf.$id | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*"`
0348   rate=`echo $rates | grep -o "[0-9]*$"`
0349 
0350   if [ $details -ne 0 ] ; then
0351     echo ""
0352     echo "Details for HBM in cgroup $id"
0353     if [ $do_stats -eq 1 ] ; then
0354       if [ -e hbm.$id.$dir_name ] ; then
0355         cat hbm.$id.$dir_name
0356       fi
0357     fi
0358     delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
0359     echo "PING AVG DELAY:$delay"
0360     echo "AGGREGATE_GOODPUT:$rate"
0361   else
0362     echo $rate
0363   fi
0364 else
0365   flow_cnt=1
0366   while [ $flow_cnt -le $flows ] ; do
0367     (iperf3 -s -p $port -1 > /dev/null 2>&1) &
0368     ( iperf3 -c $host -p $port -i 0 -P 1 -f m -t $dur | grep receiver | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*" | grep -o "[0-9]*$" > iperf3.$id.$flow_cnt ) &
0369     port=$[port+1]
0370     flow_cnt=$[flow_cnt+1]
0371   done
0372   n=$[dur+1]
0373   sleep $n
0374   flow_cnt=1
0375   rate=0
0376   if [ $details -ne 0 ] ; then
0377     echo ""
0378     echo "Details for HBM in cgroup $id"
0379     if [ $do_stats -eq 1 ] ; then
0380       if [ -e hbm.$id.$dir_name ] ; then
0381         cat hbm.$id.$dir_name
0382       fi
0383     fi
0384   fi
0385 
0386   while [ $flow_cnt -le $flows ] ; do
0387     r=`cat iperf3.$id.$flow_cnt`
0388 #    echo "rate for flow $flow_cnt: $r"
0389   if [ $details -ne 0 ] ; then
0390     echo "Rate for cgroup $id, flow $flow_cnt LOCAL_SEND_THROUGHPUT=$r"
0391   fi
0392     rate=$[rate+r]
0393     flow_cnt=$[flow_cnt+1]
0394   done
0395   if [ $details -ne 0 ] ; then
0396     delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
0397     echo "PING AVG DELAY:$delay"
0398     echo "AGGREGATE_GOODPUT:$rate"
0399   else
0400     echo $rate
0401   fi
0402 fi
0403 
0404 if [ $use_netperf -eq 0 ] ; then
0405   sysctl -w -q -n net.ipv4.tcp_congestion_control=$cur_cc
0406 fi
0407 if [ $ecn -ne 0 ] ; then
0408   sysctl -w -q -n net.ipv4.tcp_ecn=0
0409 fi
0410 if [ "$netem" -ne "0" ] ; then
0411   tc qdisc del dev lo root > /dev/null 2>&1
0412 fi
0413 if [ "$qdisc" != "" ] ; then
0414   tc qdisc del dev eth0 root > /dev/null 2>&1
0415 fi
0416 sleep 2
0417 
0418 hbmPid=`ps ax | grep "hbm " | grep --invert-match "grep" | awk '{ print $1 }'`
0419 if [ "$hbmPid" == "$hbm_pid" ] ; then
0420   kill $hbm_pid
0421 fi
0422 
0423 sleep 1
0424 
0425 # Detach any pinned BPF programs that may have lingered
0426 rm -rf $BPFFS/hbm*
0427 
0428 if [ $use_netperf -ne 0 ] ; then
0429   if [ "$server" == "" ] ; then
0430     if [ "$begNetserverPid" == "" ] ; then
0431       netserverPid=`ps ax | grep netserver | grep --invert-match "grep" | awk '{ print $1 }'`
0432       if [ "$netserverPid" != "" ] ; then
0433         kill $netserverPid
0434       fi
0435     fi
0436   fi
0437 fi
0438 exit