Back to home page

OSCL-LXR

 
 

    


0001 #!/bin/bash
0002 # SPDX-License-Identifier: GPL-2.0
0003 #
0004 # A test for switch behavior under MC overload. An issue in Spectrum chips
0005 # causes throughput of UC traffic to drop severely when a switch is under heavy
0006 # MC load. This issue can be overcome by putting the switch to MC-aware mode.
0007 # This test verifies that UC performance stays intact even as the switch is
0008 # under MC flood, and therefore that the MC-aware mode is enabled and correctly
0009 # configured.
0010 #
0011 # Because mlxsw throttles CPU port, the traffic can't actually reach userspace
0012 # at full speed. That makes it impossible to use iperf3 to simply measure the
0013 # throughput, because many packets (that reach $h3) don't get to the kernel at
0014 # all even in UDP mode (the situation is even worse in TCP mode, where one can't
0015 # hope to see more than a couple Mbps).
0016 #
0017 # So instead we send traffic with mausezahn and use RX ethtool counters at $h3.
0018 # Multicast traffic is untagged, unicast traffic is tagged with PCP 1. Therefore
0019 # each gets a different priority and we can use per-prio ethtool counters to
0020 # measure the throughput. In order to avoid prioritizing unicast traffic, prio
0021 # qdisc is installed on $swp3 and maps all priorities to the same band #7 (and
0022 # thus TC 0).
0023 #
0024 # Mausezahn can't actually saturate the links unless it's using large frames.
0025 # Thus we set MTU to 10K on all involved interfaces. Then both unicast and
0026 # multicast traffic uses 8K frames.
0027 #
0028 # +---------------------------+            +----------------------------------+
0029 # | H1                        |            |                               H2 |
0030 # |                           |            |  unicast --> + $h2.111           |
0031 # |                 multicast |            |  traffic     | 192.0.2.129/28    |
0032 # |                 traffic   |            |              | e-qos-map 0:1     |
0033 # |           $h1 + <-----    |            |              |                   |
0034 # | 192.0.2.65/28 |           |            |              + $h2               |
0035 # +---------------|-----------+            +--------------|-------------------+
0036 #                 |                                       |
0037 # +---------------|---------------------------------------|-------------------+
0038 # |         $swp1 +                                       + $swp2             |
0039 # |        >1Gbps |                                       | >1Gbps            |
0040 # | +-------------|------+                     +----------|----------------+  |
0041 # | |     $swp1.1 +      |                     |          + $swp2.111      |  |
0042 # | |                BR1 |             SW      | BR111                     |  |
0043 # | |     $swp3.1 +      |                     |          + $swp3.111      |  |
0044 # | +-------------|------+                     +----------|----------------+  |
0045 # |               \_______________________________________/                   |
0046 # |                                    |                                      |
0047 # |                                    + $swp3                                |
0048 # |                                    | 1Gbps bottleneck                     |
0049 # |                                    | prio qdisc: {0..7} -> 7              |
0050 # +------------------------------------|--------------------------------------+
0051 #                                      |
0052 #                                   +--|-----------------+
0053 #                                   |  + $h3          H3 |
0054 #                                   |  | 192.0.2.66/28   |
0055 #                                   |  |                 |
0056 #                                   |  + $h3.111         |
0057 #                                   |    192.0.2.130/28  |
0058 #                                   +--------------------+
0059 
0060 ALL_TESTS="
0061         ping_ipv4
0062         test_mc_aware
0063         test_uc_aware
0064 "
0065 
0066 lib_dir=$(dirname $0)/../../../net/forwarding
0067 
0068 NUM_NETIFS=6
0069 source $lib_dir/lib.sh
0070 source $lib_dir/devlink_lib.sh
0071 source qos_lib.sh
0072 
0073 h1_create()
0074 {
0075         simple_if_init $h1 192.0.2.65/28
0076         mtu_set $h1 10000
0077 }
0078 
0079 h1_destroy()
0080 {
0081         mtu_restore $h1
0082         simple_if_fini $h1 192.0.2.65/28
0083 }
0084 
0085 h2_create()
0086 {
0087         simple_if_init $h2
0088         mtu_set $h2 10000
0089 
0090         vlan_create $h2 111 v$h2 192.0.2.129/28
0091         ip link set dev $h2.111 type vlan egress-qos-map 0:1
0092 }
0093 
0094 h2_destroy()
0095 {
0096         vlan_destroy $h2 111
0097 
0098         mtu_restore $h2
0099         simple_if_fini $h2
0100 }
0101 
0102 h3_create()
0103 {
0104         simple_if_init $h3 192.0.2.66/28
0105         mtu_set $h3 10000
0106 
0107         vlan_create $h3 111 v$h3 192.0.2.130/28
0108 }
0109 
0110 h3_destroy()
0111 {
0112         vlan_destroy $h3 111
0113 
0114         mtu_restore $h3
0115         simple_if_fini $h3 192.0.2.66/28
0116 }
0117 
0118 switch_create()
0119 {
0120         ip link set dev $swp1 up
0121         mtu_set $swp1 10000
0122 
0123         ip link set dev $swp2 up
0124         mtu_set $swp2 10000
0125 
0126         ip link set dev $swp3 up
0127         mtu_set $swp3 10000
0128 
0129         vlan_create $swp2 111
0130         vlan_create $swp3 111
0131 
0132         ethtool -s $swp3 speed 1000 autoneg off
0133         tc qdisc replace dev $swp3 root handle 3: \
0134            prio bands 8 priomap 7 7 7 7 7 7 7 7
0135 
0136         ip link add name br1 type bridge vlan_filtering 0
0137         ip link set dev br1 up
0138         ip link set dev $swp1 master br1
0139         ip link set dev $swp3 master br1
0140 
0141         ip link add name br111 type bridge vlan_filtering 0
0142         ip link set dev br111 up
0143         ip link set dev $swp2.111 master br111
0144         ip link set dev $swp3.111 master br111
0145 
0146         # Make sure that ingress quotas are smaller than egress so that there is
0147         # room for both streams of traffic to be admitted to shared buffer.
0148         devlink_port_pool_th_save $swp1 0
0149         devlink_port_pool_th_set $swp1 0 5
0150         devlink_tc_bind_pool_th_save $swp1 0 ingress
0151         devlink_tc_bind_pool_th_set $swp1 0 ingress 0 5
0152 
0153         devlink_port_pool_th_save $swp2 0
0154         devlink_port_pool_th_set $swp2 0 5
0155         devlink_tc_bind_pool_th_save $swp2 1 ingress
0156         devlink_tc_bind_pool_th_set $swp2 1 ingress 0 5
0157 
0158         devlink_port_pool_th_save $swp3 4
0159         devlink_port_pool_th_set $swp3 4 12
0160 }
0161 
0162 switch_destroy()
0163 {
0164         devlink_port_pool_th_restore $swp3 4
0165 
0166         devlink_tc_bind_pool_th_restore $swp2 1 ingress
0167         devlink_port_pool_th_restore $swp2 0
0168 
0169         devlink_tc_bind_pool_th_restore $swp1 0 ingress
0170         devlink_port_pool_th_restore $swp1 0
0171 
0172         ip link del dev br111
0173         ip link del dev br1
0174 
0175         tc qdisc del dev $swp3 root handle 3:
0176         ethtool -s $swp3 autoneg on
0177 
0178         vlan_destroy $swp3 111
0179         vlan_destroy $swp2 111
0180 
0181         mtu_restore $swp3
0182         ip link set dev $swp3 down
0183 
0184         mtu_restore $swp2
0185         ip link set dev $swp2 down
0186 
0187         mtu_restore $swp1
0188         ip link set dev $swp1 down
0189 }
0190 
0191 setup_prepare()
0192 {
0193         h1=${NETIFS[p1]}
0194         swp1=${NETIFS[p2]}
0195 
0196         swp2=${NETIFS[p3]}
0197         h2=${NETIFS[p4]}
0198 
0199         swp3=${NETIFS[p5]}
0200         h3=${NETIFS[p6]}
0201 
0202         h3mac=$(mac_get $h3)
0203 
0204         vrf_prepare
0205 
0206         h1_create
0207         h2_create
0208         h3_create
0209         switch_create
0210 }
0211 
0212 cleanup()
0213 {
0214         pre_cleanup
0215 
0216         switch_destroy
0217         h3_destroy
0218         h2_destroy
0219         h1_destroy
0220 
0221         vrf_cleanup
0222 }
0223 
0224 ping_ipv4()
0225 {
0226         ping_test $h2 192.0.2.130
0227 }
0228 
0229 test_mc_aware()
0230 {
0231         RET=0
0232 
0233         local -a uc_rate
0234         start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac
0235         uc_rate=($(measure_rate $swp2 $h3 rx_octets_prio_1 "UC-only"))
0236         check_err $? "Could not get high enough UC-only ingress rate"
0237         stop_traffic
0238         local ucth1=${uc_rate[1]}
0239 
0240         start_traffic $h1 192.0.2.65 bc bc
0241 
0242         local d0=$(date +%s)
0243         local t0=$(ethtool_stats_get $h3 rx_octets_prio_0)
0244         local u0=$(ethtool_stats_get $swp1 rx_octets_prio_0)
0245 
0246         local -a uc_rate_2
0247         start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac
0248         uc_rate_2=($(measure_rate $swp2 $h3 rx_octets_prio_1 "UC+MC"))
0249         check_err $? "Could not get high enough UC+MC ingress rate"
0250         stop_traffic
0251         local ucth2=${uc_rate_2[1]}
0252 
0253         local d1=$(date +%s)
0254         local t1=$(ethtool_stats_get $h3 rx_octets_prio_0)
0255         local u1=$(ethtool_stats_get $swp1 rx_octets_prio_0)
0256 
0257         local deg=$(bc <<< "
0258                         scale=2
0259                         ret = 100 * ($ucth1 - $ucth2) / $ucth1
0260                         if (ret > 0) { ret } else { 0 }
0261                     ")
0262 
0263         # Minimum shaper of 200Mbps on MC TCs should cause about 20% of
0264         # degradation on 1Gbps link.
0265         check_err $(bc <<< "$deg < 15") "Minimum shaper not in effect"
0266         check_err $(bc <<< "$deg > 25") "MC traffic degrades UC performance too much"
0267 
0268         local interval=$((d1 - d0))
0269         local mc_ir=$(rate $u0 $u1 $interval)
0270         local mc_er=$(rate $t0 $t1 $interval)
0271 
0272         stop_traffic
0273 
0274         log_test "UC performance under MC overload"
0275 
0276         echo "UC-only throughput  $(humanize $ucth1)"
0277         echo "UC+MC throughput    $(humanize $ucth2)"
0278         echo "Degradation         $deg %"
0279         echo
0280         echo "Full report:"
0281         echo "  UC only:"
0282         echo "    ingress UC throughput $(humanize ${uc_rate[0]})"
0283         echo "    egress UC throughput  $(humanize ${uc_rate[1]})"
0284         echo "  UC+MC:"
0285         echo "    ingress UC throughput $(humanize ${uc_rate_2[0]})"
0286         echo "    egress UC throughput  $(humanize ${uc_rate_2[1]})"
0287         echo "    ingress MC throughput $(humanize $mc_ir)"
0288         echo "    egress MC throughput  $(humanize $mc_er)"
0289         echo
0290 }
0291 
0292 test_uc_aware()
0293 {
0294         RET=0
0295 
0296         start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac
0297 
0298         local d0=$(date +%s)
0299         local t0=$(ethtool_stats_get $h3 rx_octets_prio_1)
0300         local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1)
0301         sleep 1
0302 
0303         local attempts=50
0304         local passes=0
0305         local i
0306 
0307         for ((i = 0; i < attempts; ++i)); do
0308                 if $ARPING -c 1 -I $h1 -b 192.0.2.66 -q -w 1; then
0309                         ((passes++))
0310                 fi
0311 
0312                 sleep 0.1
0313         done
0314 
0315         local d1=$(date +%s)
0316         local t1=$(ethtool_stats_get $h3 rx_octets_prio_1)
0317         local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1)
0318 
0319         local interval=$((d1 - d0))
0320         local uc_ir=$(rate $u0 $u1 $interval)
0321         local uc_er=$(rate $t0 $t1 $interval)
0322 
0323         ((attempts == passes))
0324         check_err $?
0325 
0326         stop_traffic
0327 
0328         log_test "MC performance under UC overload"
0329         echo "    ingress UC throughput $(humanize ${uc_ir})"
0330         echo "    egress UC throughput  $(humanize ${uc_er})"
0331         echo "    sent $attempts BC ARPs, got $passes responses"
0332 }
0333 
0334 trap cleanup EXIT
0335 
0336 setup_prepare
0337 setup_wait
0338 
0339 tests_run
0340 
0341 exit $EXIT_STATUS