0001
0002
0003
0004
0005
0006
0007
0008
0009
0010 Usage() {
0011 echo "Script for testing HBM (Host Bandwidth Manager) framework."
0012 echo "It creates a cgroup to use for testing and load a BPF program to limit"
0013 echo "egress or ingress bandwidth. It then uses iperf3 or netperf to create"
0014 echo "loads. The output is the goodput in Mbps (unless -D was used)."
0015 echo ""
0016 echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>]"
0017 echo " [-D] [-d=<delay>|--delay=<delay>] [--debug] [-E] [--edt]"
0018 echo " [-f=<#flows>|--flows=<#flows>] [-h] [-i=<id>|--id=<id >]"
0019 echo " [-l] [-N] [--no_cn] [-p=<port>|--port=<port>] [-P]"
0020 echo " [-q=<qdisc>] [-R] [-s=<server>|--server=<server]"
0021 echo " [-S|--stats] -t=<time>|--time=<time>] [-w] [cubic|dctcp]"
0022 echo " Where:"
0023 echo " out egress (default)"
0024 echo " -b or --bpf BPF program filename to load and attach."
0025 echo " Default is hbm_out_kern.o for egress,"
0026 echo " -c or -cc TCP congestion control (cubic or dctcp)"
0027 echo " --debug print BPF trace buffer"
0028 echo " -d or --delay add a delay in ms using netem"
0029 echo " -D In addition to the goodput in Mbps, it also outputs"
0030 echo " other detailed information. This information is"
0031 echo " test dependent (i.e. iperf3 or netperf)."
0032 echo " -E enable ECN (not required for dctcp)"
0033 echo " --edt use fq's Earliest Departure Time (requires fq)"
0034 echo " -f or --flows number of concurrent flows (default=1)"
0035 echo " -i or --id cgroup id (an integer, default is 1)"
0036 echo " -N use netperf instead of iperf3"
0037 echo " --no_cn Do not return CN notifications"
0038 echo " -l do not limit flows using loopback"
0039 echo " -h Help"
0040 echo " -p or --port iperf3 port (default is 5201)"
0041 echo " -P use an iperf3 instance for each flow"
0042 echo " -q use the specified qdisc"
0043 echo " -r or --rate rate in Mbps (default 1s 1Gbps)"
0044 echo " -R Use TCP_RR for netperf. 1st flow has req"
0045 echo " size of 10KB, rest of 1MB. Reply in all"
0046 echo " cases is 1 byte."
0047 echo " More detailed output for each flow can be found"
0048 echo " in the files netperf.<cg>.<flow>, where <cg> is the"
0049 echo " cgroup id as specified with the -i flag, and <flow>"
0050 echo " is the flow id starting at 1 and increasing by 1 for"
0051 echo " flow (as specified by -f)."
0052 echo " -s or --server hostname of netperf server. Used to create netperf"
0053 echo " test traffic between to hosts (default is within host)"
0054 echo " netserver must be running on the host."
0055 echo " -S or --stats whether to update hbm stats (default is yes)."
0056 echo " -t or --time duration of iperf3 in seconds (default=5)"
0057 echo " -w Work conserving flag. cgroup can increase its"
0058 echo " bandwidth beyond the rate limit specified"
0059 echo " while there is available bandwidth. Current"
0060 echo " implementation assumes there is only one NIC"
0061 echo " (eth0), but can be extended to support multiple"
0062 echo " NICs."
0063 echo " cubic or dctcp specify which TCP CC to use"
0064 echo " "
0065 exit
0066 }
0067
0068
0069
0070 debug_flag=0
0071 args="$@"
0072 name="$0"
0073 netem=0
0074 cc=x
0075 dir="-o"
0076 dir_name="out"
0077 dur=5
0078 flows=1
0079 id=1
0080 prog=""
0081 port=5201
0082 rate=1000
0083 multi_iperf=0
0084 flow_cnt=1
0085 use_netperf=0
0086 rr=0
0087 ecn=0
0088 details=0
0089 server=""
0090 qdisc=""
0091 flags=""
0092 do_stats=0
0093
0094 BPFFS=/sys/fs/bpf
0095 function config_bpffs () {
0096 if mount | grep $BPFFS > /dev/null; then
0097 echo "bpffs already mounted"
0098 else
0099 echo "bpffs not mounted. Mounting..."
0100 mount -t bpf none $BPFFS
0101 fi
0102 }
0103
0104 function start_hbm () {
0105 rm -f hbm.out
0106 echo "./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog" > hbm.out
0107 echo " " >> hbm.out
0108 ./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog >> hbm.out 2>&1 &
0109 echo $!
0110 }
0111
0112 processArgs () {
0113 for i in $args ; do
0114 case $i in
0115
0116
0117
0118
0119
0120 out)
0121 dir="-o"
0122 dir_name="out"
0123 ;;
0124 -b=*|--bpf=*)
0125 prog="${i#*=}"
0126 ;;
0127 -c=*|--cc=*)
0128 cc="${i#*=}"
0129 ;;
0130 --no_cn)
0131 flags="$flags --no_cn"
0132 ;;
0133 --debug)
0134 flags="$flags -d"
0135 debug_flag=1
0136 ;;
0137 -d=*|--delay=*)
0138 netem="${i#*=}"
0139 ;;
0140 -D)
0141 details=1
0142 ;;
0143 -E)
0144 ecn=1
0145 ;;
0146 --edt)
0147 flags="$flags --edt"
0148 qdisc="fq"
0149 ;;
0150 -f=*|--flows=*)
0151 flows="${i#*=}"
0152 ;;
0153 -i=*|--id=*)
0154 id="${i#*=}"
0155 ;;
0156 -l)
0157 flags="$flags -l"
0158 ;;
0159 -N)
0160 use_netperf=1
0161 ;;
0162 -p=*|--port=*)
0163 port="${i#*=}"
0164 ;;
0165 -P)
0166 multi_iperf=1
0167 ;;
0168 -q=*)
0169 qdisc="${i#*=}"
0170 ;;
0171 -r=*|--rate=*)
0172 rate="${i#*=}"
0173 ;;
0174 -R)
0175 rr=1
0176 ;;
0177 -s=*|--server=*)
0178 server="${i#*=}"
0179 ;;
0180 -S|--stats)
0181 flags="$flags -s"
0182 do_stats=1
0183 ;;
0184 -t=*|--time=*)
0185 dur="${i#*=}"
0186 ;;
0187 -w)
0188 flags="$flags -w"
0189 ;;
0190 cubic)
0191 cc=cubic
0192 ;;
0193 dctcp)
0194 cc=dctcp
0195 ;;
0196 *)
0197 echo "Unknown arg:$i"
0198 Usage
0199 ;;
0200 esac
0201 done
0202 }
0203
0204 processArgs
0205 config_bpffs
0206
0207 if [ $debug_flag -eq 1 ] ; then
0208 rm -f hbm_out.log
0209 fi
0210
0211 hbm_pid=$(start_hbm)
0212 usleep 100000
0213
0214 host=`hostname`
0215 cg_base_dir=/sys/fs/cgroup/unified
0216 cg_dir="$cg_base_dir/cgroup-test-work-dir/hbm$id"
0217
0218 echo $$ >> $cg_dir/cgroup.procs
0219
0220 ulimit -l unlimited
0221
0222 rm -f ss.out
0223 rm -f hbm.[0-9]*.$dir_name
0224 if [ $ecn -ne 0 ] ; then
0225 sysctl -w -q -n net.ipv4.tcp_ecn=1
0226 fi
0227
0228 if [ $use_netperf -eq 0 ] ; then
0229 cur_cc=`sysctl -n net.ipv4.tcp_congestion_control`
0230 if [ "$cc" != "x" ] ; then
0231 sysctl -w -q -n net.ipv4.tcp_congestion_control=$cc
0232 fi
0233 fi
0234
0235 if [ "$netem" -ne "0" ] ; then
0236 if [ "$qdisc" != "" ] ; then
0237 echo "WARNING: Ignoring -q options because -d option used"
0238 fi
0239 tc qdisc del dev lo root > /dev/null 2>&1
0240 tc qdisc add dev lo root netem delay $netem\ms > /dev/null 2>&1
0241 elif [ "$qdisc" != "" ] ; then
0242 tc qdisc del dev eth0 root > /dev/null 2>&1
0243 tc qdisc add dev eth0 root $qdisc > /dev/null 2>&1
0244 fi
0245
0246 n=0
0247 m=$[$dur * 5]
0248 hn="::1"
0249 if [ $use_netperf -ne 0 ] ; then
0250 if [ "$server" != "" ] ; then
0251 hn=$server
0252 fi
0253 fi
0254
0255 ( ping6 -i 0.2 -c $m $hn > ping.out 2>&1 ) &
0256
0257 if [ $use_netperf -ne 0 ] ; then
0258 begNetserverPid=`ps ax | grep netserver | grep --invert-match "grep" | \
0259 awk '{ print $1 }'`
0260 if [ "$begNetserverPid" == "" ] ; then
0261 if [ "$server" == "" ] ; then
0262 ( ./netserver > /dev/null 2>&1) &
0263 usleep 100000
0264 fi
0265 fi
0266 flow_cnt=1
0267 if [ "$server" == "" ] ; then
0268 np_server=$host
0269 else
0270 np_server=$server
0271 fi
0272 if [ "$cc" == "x" ] ; then
0273 np_cc=""
0274 else
0275 np_cc="-K $cc,$cc"
0276 fi
0277 replySize=1
0278 while [ $flow_cnt -le $flows ] ; do
0279 if [ $rr -ne 0 ] ; then
0280 reqSize=1M
0281 if [ $flow_cnt -eq 1 ] ; then
0282 reqSize=10K
0283 fi
0284 if [ "$dir" == "-i" ] ; then
0285 replySize=$reqSize
0286 reqSize=1
0287 fi
0288 ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r $reqSize,$replySize $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,REMOTE_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,LOCAL_RECV_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
0289 else
0290 if [ "$dir" == "-i" ] ; then
0291 ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r 1,10M $np_cc -k P50_LATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REMOTE_TRANSPORT_RETRANS,REMOTE_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
0292 else
0293 ( ./netperf -H $np_server -l $dur -f m -j -t TCP_STREAM -- $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
0294 fi
0295 fi
0296 flow_cnt=$[flow_cnt+1]
0297 done
0298
0299
0300 n=$[dur+2]
0301 sleep $n
0302
0303
0304 pids=`pgrep netperf`
0305 for p in $pids ; do
0306 kill -SIGALRM $p
0307 done
0308
0309 flow_cnt=1
0310 rate=0
0311 if [ $details -ne 0 ] ; then
0312 echo ""
0313 echo "Details for HBM in cgroup $id"
0314 if [ $do_stats -eq 1 ] ; then
0315 if [ -e hbm.$id.$dir_name ] ; then
0316 cat hbm.$id.$dir_name
0317 fi
0318 fi
0319 fi
0320 while [ $flow_cnt -le $flows ] ; do
0321 if [ "$dir" == "-i" ] ; then
0322 r=`cat netperf.$id.$flow_cnt | grep -o "REMOTE_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"`
0323 else
0324 r=`cat netperf.$id.$flow_cnt | grep -o "LOCAL_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"`
0325 fi
0326 echo "rate for flow $flow_cnt: $r"
0327 rate=$[rate+r]
0328 if [ $details -ne 0 ] ; then
0329 echo "-----"
0330 echo "Details for cgroup $id, flow $flow_cnt"
0331 cat netperf.$id.$flow_cnt
0332 fi
0333 flow_cnt=$[flow_cnt+1]
0334 done
0335 if [ $details -ne 0 ] ; then
0336 echo ""
0337 delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
0338 echo "PING AVG DELAY:$delay"
0339 echo "AGGREGATE_GOODPUT:$rate"
0340 else
0341 echo $rate
0342 fi
0343 elif [ $multi_iperf -eq 0 ] ; then
0344 (iperf3 -s -p $port -1 > /dev/null 2>&1) &
0345 usleep 100000
0346 iperf3 -c $host -p $port -i 0 -P $flows -f m -t $dur > iperf.$id
0347 rates=`grep receiver iperf.$id | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*"`
0348 rate=`echo $rates | grep -o "[0-9]*$"`
0349
0350 if [ $details -ne 0 ] ; then
0351 echo ""
0352 echo "Details for HBM in cgroup $id"
0353 if [ $do_stats -eq 1 ] ; then
0354 if [ -e hbm.$id.$dir_name ] ; then
0355 cat hbm.$id.$dir_name
0356 fi
0357 fi
0358 delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
0359 echo "PING AVG DELAY:$delay"
0360 echo "AGGREGATE_GOODPUT:$rate"
0361 else
0362 echo $rate
0363 fi
0364 else
0365 flow_cnt=1
0366 while [ $flow_cnt -le $flows ] ; do
0367 (iperf3 -s -p $port -1 > /dev/null 2>&1) &
0368 ( iperf3 -c $host -p $port -i 0 -P 1 -f m -t $dur | grep receiver | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*" | grep -o "[0-9]*$" > iperf3.$id.$flow_cnt ) &
0369 port=$[port+1]
0370 flow_cnt=$[flow_cnt+1]
0371 done
0372 n=$[dur+1]
0373 sleep $n
0374 flow_cnt=1
0375 rate=0
0376 if [ $details -ne 0 ] ; then
0377 echo ""
0378 echo "Details for HBM in cgroup $id"
0379 if [ $do_stats -eq 1 ] ; then
0380 if [ -e hbm.$id.$dir_name ] ; then
0381 cat hbm.$id.$dir_name
0382 fi
0383 fi
0384 fi
0385
0386 while [ $flow_cnt -le $flows ] ; do
0387 r=`cat iperf3.$id.$flow_cnt`
0388
0389 if [ $details -ne 0 ] ; then
0390 echo "Rate for cgroup $id, flow $flow_cnt LOCAL_SEND_THROUGHPUT=$r"
0391 fi
0392 rate=$[rate+r]
0393 flow_cnt=$[flow_cnt+1]
0394 done
0395 if [ $details -ne 0 ] ; then
0396 delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
0397 echo "PING AVG DELAY:$delay"
0398 echo "AGGREGATE_GOODPUT:$rate"
0399 else
0400 echo $rate
0401 fi
0402 fi
0403
0404 if [ $use_netperf -eq 0 ] ; then
0405 sysctl -w -q -n net.ipv4.tcp_congestion_control=$cur_cc
0406 fi
0407 if [ $ecn -ne 0 ] ; then
0408 sysctl -w -q -n net.ipv4.tcp_ecn=0
0409 fi
0410 if [ "$netem" -ne "0" ] ; then
0411 tc qdisc del dev lo root > /dev/null 2>&1
0412 fi
0413 if [ "$qdisc" != "" ] ; then
0414 tc qdisc del dev eth0 root > /dev/null 2>&1
0415 fi
0416 sleep 2
0417
0418 hbmPid=`ps ax | grep "hbm " | grep --invert-match "grep" | awk '{ print $1 }'`
0419 if [ "$hbmPid" == "$hbm_pid" ] ; then
0420 kill $hbm_pid
0421 fi
0422
0423 sleep 1
0424
0425
0426 rm -rf $BPFFS/hbm*
0427
0428 if [ $use_netperf -ne 0 ] ; then
0429 if [ "$server" == "" ] ; then
0430 if [ "$begNetserverPid" == "" ] ; then
0431 netserverPid=`ps ax | grep netserver | grep --invert-match "grep" | awk '{ print $1 }'`
0432 if [ "$netserverPid" != "" ] ; then
0433 kill $netserverPid
0434 fi
0435 fi
0436 fi
0437 fi
0438 exit