Back to home page

OSCL-LXR

 
 

    


0001 #!/bin/sh
0002 # SPDX-License-Identifier: GPL-2.0
0003 #
0004 # Check that route PMTU values match expectations, and that initial device MTU
0005 # values are assigned correctly
0006 #
0007 # Tests currently implemented:
0008 #
0009 # - pmtu_ipv4
0010 #       Set up two namespaces, A and B, with two paths between them over routers
0011 #       R1 and R2 (also implemented with namespaces), with different MTUs:
0012 #
0013 #         segment a_r1    segment b_r1          a_r1: 2000
0014 #       .--------------R1--------------.        b_r1: 1400
0015 #       A                               B       a_r2: 2000
0016 #       '--------------R2--------------'        b_r2: 1500
0017 #         segment a_r2    segment b_r2
0018 #
0019 #       Check that PMTU exceptions with the correct PMTU are created. Then
0020 #       decrease and increase the MTU of the local link for one of the paths,
0021 #       A to R1, checking that route exception PMTU changes accordingly over
0022 #       this path. Also check that locked exceptions are created when an ICMP
0023 #       message advertising a PMTU smaller than net.ipv4.route.min_pmtu is
0024 #       received
0025 #
0026 # - pmtu_ipv6
0027 #       Same as pmtu_ipv4, except for locked PMTU tests, using IPv6
0028 #
0029 # - pmtu_ipv4_dscp_icmp_exception
0030 #       Set up the same network topology as pmtu_ipv4, but use non-default
0031 #       routing table in A. A fib-rule is used to jump to this routing table
0032 #       based on DSCP. Send ICMPv4 packets with the expected DSCP value and
0033 #       verify that ECN doesn't interfere with the creation of PMTU exceptions.
0034 #
0035 # - pmtu_ipv4_dscp_udp_exception
0036 #       Same as pmtu_ipv4_dscp_icmp_exception, but use UDP instead of ICMP.
0037 #
0038 # - pmtu_ipv4_vxlan4_exception
0039 #       Set up the same network topology as pmtu_ipv4, create a VXLAN tunnel
0040 #       over IPv4 between A and B, routed via R1. On the link between R1 and B,
0041 #       set a MTU lower than the VXLAN MTU and the MTU on the link between A and
0042 #       R1. Send IPv4 packets, exceeding the MTU between R1 and B, over VXLAN
0043 #       from A to B and check that the PMTU exception is created with the right
0044 #       value on A
0045 #
0046 # - pmtu_ipv6_vxlan4_exception
0047 #       Same as pmtu_ipv4_vxlan4_exception, but send IPv6 packets from A to B
0048 #
0049 # - pmtu_ipv4_vxlan6_exception
0050 #       Same as pmtu_ipv4_vxlan4_exception, but use IPv6 transport from A to B
0051 #
0052 # - pmtu_ipv6_vxlan6_exception
0053 #       Same as pmtu_ipv4_vxlan6_exception, but send IPv6 packets from A to B
0054 #
0055 # - pmtu_ipv4_geneve4_exception
0056 #       Same as pmtu_ipv4_vxlan4_exception, but using a GENEVE tunnel instead of
0057 #       VXLAN
0058 #
0059 # - pmtu_ipv6_geneve4_exception
0060 #       Same as pmtu_ipv6_vxlan4_exception, but using a GENEVE tunnel instead of
0061 #       VXLAN
0062 #
0063 # - pmtu_ipv4_geneve6_exception
0064 #       Same as pmtu_ipv4_vxlan6_exception, but using a GENEVE tunnel instead of
0065 #       VXLAN
0066 #
0067 # - pmtu_ipv6_geneve6_exception
0068 #       Same as pmtu_ipv6_vxlan6_exception, but using a GENEVE tunnel instead of
0069 #       VXLAN
0070 #
0071 # - pmtu_ipv{4,6}_br_vxlan{4,6}_exception
0072 #       Set up three namespaces, A, B, and C, with routing between A and B over
0073 #       R1. R2 is unused in these tests. A has a veth connection to C, and is
0074 #       connected to B via a VXLAN endpoint, which is directly bridged to C.
0075 #       MTU on the B-R1 link is lower than other MTUs.
0076 #
0077 #       Check that both C and A are able to communicate with B over the VXLAN
0078 #       tunnel, and that PMTU exceptions with the correct values are created.
0079 #
0080 #                         segment a_r1    segment b_r1            b_r1: 4000
0081 #                       .--------------R1--------------.    everything
0082 #          C---veth     A                               B         else: 5000
0083 #               ' bridge                                |
0084 #                   '---- - - - - - VXLAN - - - - - - - '
0085 #
0086 # - pmtu_ipv{4,6}_br_geneve{4,6}_exception
0087 #       Same as pmtu_ipv{4,6}_br_vxlan{4,6}_exception, with a GENEVE tunnel
0088 #       instead.
0089 #
0090 # - pmtu_ipv{4,6}_ovs_vxlan{4,6}_exception
0091 #       Set up two namespaces, B, and C, with routing between the init namespace
0092 #       and B over R1. A and R2 are unused in these tests. The init namespace
0093 #       has a veth connection to C, and is connected to B via a VXLAN endpoint,
0094 #       which is handled by Open vSwitch and bridged to C. MTU on the B-R1 link
0095 #       is lower than other MTUs.
0096 #
0097 #       Check that C is able to communicate with B over the VXLAN tunnel, and
0098 #       that PMTU exceptions with the correct values are created.
0099 #
0100 #                         segment a_r1    segment b_r1            b_r1: 4000
0101 #                       .--------------R1--------------.    everything
0102 #          C---veth    init                             B         else: 5000
0103 #               '- ovs                                  |
0104 #                   '---- - - - - - VXLAN - - - - - - - '
0105 #
0106 # - pmtu_ipv{4,6}_ovs_geneve{4,6}_exception
0107 #       Same as pmtu_ipv{4,6}_ovs_vxlan{4,6}_exception, with a GENEVE tunnel
0108 #       instead.
0109 #
0110 # - pmtu_ipv{4,6}_fou{4,6}_exception
0111 #       Same as pmtu_ipv4_vxlan4, but using a direct IPv4/IPv6 encapsulation
0112 #       (FoU) over IPv4/IPv6, instead of VXLAN
0113 #
0114 # - pmtu_ipv{4,6}_fou{4,6}_exception
0115 #       Same as pmtu_ipv4_vxlan4, but using a generic UDP IPv4/IPv6
0116 #       encapsulation (GUE) over IPv4/IPv6, instead of VXLAN
0117 #
0118 # - pmtu_ipv{4,6}_ipv{4,6}_exception
0119 #       Same as pmtu_ipv4_vxlan4, but using a IPv4/IPv6 tunnel over IPv4/IPv6,
0120 #       instead of VXLAN
0121 #
0122 # - pmtu_vti4_exception
0123 #       Set up vti tunnel on top of veth, with xfrm states and policies, in two
0124 #       namespaces with matching endpoints. Check that route exception is not
0125 #       created if link layer MTU is not exceeded, then exceed it and check that
0126 #       exception is created with the expected PMTU. The approach described
0127 #       below for IPv6 doesn't apply here, because, on IPv4, administrative MTU
0128 #       changes alone won't affect PMTU
0129 #
0130 # - pmtu_vti4_udp_exception
0131 #       Same as pmtu_vti4_exception, but using ESP-in-UDP
0132 #
0133 # - pmtu_vti4_udp_routed_exception
0134 #       Set up vti tunnel on top of veth connected through routing namespace and
0135 #       add xfrm states and policies with ESP-in-UDP encapsulation. Check that
0136 #       route exception is not created if link layer MTU is not exceeded, then
0137 #       lower MTU on second part of routed environment and check that exception
0138 #       is created with the expected PMTU.
0139 #
0140 # - pmtu_vti6_exception
0141 #       Set up vti6 tunnel on top of veth, with xfrm states and policies, in two
0142 #       namespaces with matching endpoints. Check that route exception is
0143 #       created by exceeding link layer MTU with ping to other endpoint. Then
0144 #       decrease and increase MTU of tunnel, checking that route exception PMTU
0145 #       changes accordingly
0146 #
0147 # - pmtu_vti6_udp_exception
0148 #       Same as pmtu_vti6_exception, but using ESP-in-UDP
0149 #
0150 # - pmtu_vti6_udp_routed_exception
0151 #       Same as pmtu_vti6_udp_routed_exception but with routing between vti
0152 #       endpoints
0153 #
0154 # - pmtu_vti4_default_mtu
0155 #       Set up vti4 tunnel on top of veth, in two namespaces with matching
0156 #       endpoints. Check that MTU assigned to vti interface is the MTU of the
0157 #       lower layer (veth) minus additional lower layer headers (zero, for veth)
0158 #       minus IPv4 header length
0159 #
0160 # - pmtu_vti6_default_mtu
0161 #       Same as above, for IPv6
0162 #
0163 # - pmtu_vti4_link_add_mtu
0164 #       Set up vti4 interface passing MTU value at link creation, check MTU is
0165 #       configured, and that link is not created with invalid MTU values
0166 #
0167 # - pmtu_vti6_link_add_mtu
0168 #       Same as above, for IPv6
0169 #
0170 # - pmtu_vti6_link_change_mtu
0171 #       Set up two dummy interfaces with different MTUs, create a vti6 tunnel
0172 #       and check that configured MTU is used on link creation and changes, and
0173 #       that MTU is properly calculated instead when MTU is not configured from
0174 #       userspace
0175 #
0176 # - cleanup_ipv4_exception
0177 #       Similar to pmtu_ipv4_vxlan4_exception, but explicitly generate PMTU
0178 #       exceptions on multiple CPUs and check that the veth device tear-down
0179 #       happens in a timely manner
0180 #
0181 # - cleanup_ipv6_exception
0182 #       Same as above, but use IPv6 transport from A to B
0183 #
0184 # - list_flush_ipv4_exception
0185 #       Using the same topology as in pmtu_ipv4, create exceptions, and check
0186 #       they are shown when listing exception caches, gone after flushing them
0187 #
0188 # - list_flush_ipv6_exception
0189 #       Using the same topology as in pmtu_ipv6, create exceptions, and check
0190 #       they are shown when listing exception caches, gone after flushing them
0191 #
0192 # - pmtu_ipv4_route_change
0193 #       Use the same topology as in pmtu_ipv4, but issue a route replacement
0194 #       command and delete the corresponding device afterward. This tests for
0195 #       proper cleanup of the PMTU exceptions by the route replacement path.
0196 #       Device unregistration should complete successfully
0197 #
0198 # - pmtu_ipv6_route_change
0199 #       Same as above but with IPv6
0200 
0201 # Kselftest framework requirement - SKIP code is 4.
0202 ksft_skip=4
0203 
0204 PAUSE_ON_FAIL=no
0205 VERBOSE=0
0206 TRACING=0
0207 
0208 # Some systems don't have a ping6 binary anymore
0209 which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
0210 
0211 #               Name                          Description                  re-run with nh
0212 tests="
0213         pmtu_ipv4_exception             ipv4: PMTU exceptions                   1
0214         pmtu_ipv6_exception             ipv6: PMTU exceptions                   1
0215         pmtu_ipv4_dscp_icmp_exception   ICMPv4 with DSCP and ECN: PMTU exceptions       1
0216         pmtu_ipv4_dscp_udp_exception    UDPv4 with DSCP and ECN: PMTU exceptions        1
0217         pmtu_ipv4_vxlan4_exception      IPv4 over vxlan4: PMTU exceptions       1
0218         pmtu_ipv6_vxlan4_exception      IPv6 over vxlan4: PMTU exceptions       1
0219         pmtu_ipv4_vxlan6_exception      IPv4 over vxlan6: PMTU exceptions       1
0220         pmtu_ipv6_vxlan6_exception      IPv6 over vxlan6: PMTU exceptions       1
0221         pmtu_ipv4_geneve4_exception     IPv4 over geneve4: PMTU exceptions      1
0222         pmtu_ipv6_geneve4_exception     IPv6 over geneve4: PMTU exceptions      1
0223         pmtu_ipv4_geneve6_exception     IPv4 over geneve6: PMTU exceptions      1
0224         pmtu_ipv6_geneve6_exception     IPv6 over geneve6: PMTU exceptions      1
0225         pmtu_ipv4_br_vxlan4_exception   IPv4, bridged vxlan4: PMTU exceptions   1
0226         pmtu_ipv6_br_vxlan4_exception   IPv6, bridged vxlan4: PMTU exceptions   1
0227         pmtu_ipv4_br_vxlan6_exception   IPv4, bridged vxlan6: PMTU exceptions   1
0228         pmtu_ipv6_br_vxlan6_exception   IPv6, bridged vxlan6: PMTU exceptions   1
0229         pmtu_ipv4_br_geneve4_exception  IPv4, bridged geneve4: PMTU exceptions  1
0230         pmtu_ipv6_br_geneve4_exception  IPv6, bridged geneve4: PMTU exceptions  1
0231         pmtu_ipv4_br_geneve6_exception  IPv4, bridged geneve6: PMTU exceptions  1
0232         pmtu_ipv6_br_geneve6_exception  IPv6, bridged geneve6: PMTU exceptions  1
0233         pmtu_ipv4_ovs_vxlan4_exception  IPv4, OVS vxlan4: PMTU exceptions       1
0234         pmtu_ipv6_ovs_vxlan4_exception  IPv6, OVS vxlan4: PMTU exceptions       1
0235         pmtu_ipv4_ovs_vxlan6_exception  IPv4, OVS vxlan6: PMTU exceptions       1
0236         pmtu_ipv6_ovs_vxlan6_exception  IPv6, OVS vxlan6: PMTU exceptions       1
0237         pmtu_ipv4_ovs_geneve4_exception IPv4, OVS geneve4: PMTU exceptions      1
0238         pmtu_ipv6_ovs_geneve4_exception IPv6, OVS geneve4: PMTU exceptions      1
0239         pmtu_ipv4_ovs_geneve6_exception IPv4, OVS geneve6: PMTU exceptions      1
0240         pmtu_ipv6_ovs_geneve6_exception IPv6, OVS geneve6: PMTU exceptions      1
0241         pmtu_ipv4_fou4_exception        IPv4 over fou4: PMTU exceptions         1
0242         pmtu_ipv6_fou4_exception        IPv6 over fou4: PMTU exceptions         1
0243         pmtu_ipv4_fou6_exception        IPv4 over fou6: PMTU exceptions         1
0244         pmtu_ipv6_fou6_exception        IPv6 over fou6: PMTU exceptions         1
0245         pmtu_ipv4_gue4_exception        IPv4 over gue4: PMTU exceptions         1
0246         pmtu_ipv6_gue4_exception        IPv6 over gue4: PMTU exceptions         1
0247         pmtu_ipv4_gue6_exception        IPv4 over gue6: PMTU exceptions         1
0248         pmtu_ipv6_gue6_exception        IPv6 over gue6: PMTU exceptions         1
0249         pmtu_ipv4_ipv4_exception        IPv4 over IPv4: PMTU exceptions         1
0250         pmtu_ipv6_ipv4_exception        IPv6 over IPv4: PMTU exceptions         1
0251         pmtu_ipv4_ipv6_exception        IPv4 over IPv6: PMTU exceptions         1
0252         pmtu_ipv6_ipv6_exception        IPv6 over IPv6: PMTU exceptions         1
0253         pmtu_vti6_exception             vti6: PMTU exceptions                   0
0254         pmtu_vti4_exception             vti4: PMTU exceptions                   0
0255         pmtu_vti6_udp_exception         vti6: PMTU exceptions (ESP-in-UDP)      0
0256         pmtu_vti4_udp_exception         vti4: PMTU exceptions (ESP-in-UDP)      0
0257         pmtu_vti6_udp_routed_exception  vti6: PMTU exceptions, routed (ESP-in-UDP)      0
0258         pmtu_vti4_udp_routed_exception  vti4: PMTU exceptions, routed (ESP-in-UDP)      0
0259         pmtu_vti4_default_mtu           vti4: default MTU assignment            0
0260         pmtu_vti6_default_mtu           vti6: default MTU assignment            0
0261         pmtu_vti4_link_add_mtu          vti4: MTU setting on link creation      0
0262         pmtu_vti6_link_add_mtu          vti6: MTU setting on link creation      0
0263         pmtu_vti6_link_change_mtu       vti6: MTU changes on link changes       0
0264         cleanup_ipv4_exception          ipv4: cleanup of cached exceptions      1
0265         cleanup_ipv6_exception          ipv6: cleanup of cached exceptions      1
0266         list_flush_ipv4_exception       ipv4: list and flush cached exceptions  1
0267         list_flush_ipv6_exception       ipv6: list and flush cached exceptions  1
0268         pmtu_ipv4_route_change          ipv4: PMTU exception w/route replace    1
0269         pmtu_ipv6_route_change          ipv6: PMTU exception w/route replace    1"
0270 
0271 NS_A="ns-A"
0272 NS_B="ns-B"
0273 NS_C="ns-C"
0274 NS_R1="ns-R1"
0275 NS_R2="ns-R2"
0276 ns_a="ip netns exec ${NS_A}"
0277 ns_b="ip netns exec ${NS_B}"
0278 ns_c="ip netns exec ${NS_C}"
0279 ns_r1="ip netns exec ${NS_R1}"
0280 ns_r2="ip netns exec ${NS_R2}"
0281 # Addressing and routing for tests with routers: four network segments, with
0282 # index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an
0283 # identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2).
0284 # Addresses are:
0285 # - IPv4: PREFIX4.SEGMENT.ID (/24)
0286 # - IPv6: PREFIX6:SEGMENT::ID (/64)
0287 prefix4="10.0"
0288 prefix6="fc00"
0289 a_r1=1
0290 a_r2=2
0291 b_r1=3
0292 b_r2=4
0293 #       ns      peer    segment
0294 routing_addrs="
0295         A       R1      ${a_r1}
0296         A       R2      ${a_r2}
0297         B       R1      ${b_r1}
0298         B       R2      ${b_r2}
0299 "
0300 # Traffic from A to B goes through R1 by default, and through R2, if destined to
0301 # B's address on the b_r2 segment.
0302 # Traffic from B to A goes through R1.
0303 #       ns      destination             gateway
0304 routes="
0305         A       default                 ${prefix4}.${a_r1}.2
0306         A       ${prefix4}.${b_r2}.1    ${prefix4}.${a_r2}.2
0307         B       default                 ${prefix4}.${b_r1}.2
0308 
0309         A       default                 ${prefix6}:${a_r1}::2
0310         A       ${prefix6}:${b_r2}::1   ${prefix6}:${a_r2}::2
0311         B       default                 ${prefix6}:${b_r1}::2
0312 "
0313 USE_NH="no"
0314 #       ns      family  nh id      destination          gateway
0315 nexthops="
0316         A       4       41      ${prefix4}.${a_r1}.2    veth_A-R1
0317         A       4       42      ${prefix4}.${a_r2}.2    veth_A-R2
0318         B       4       41      ${prefix4}.${b_r1}.2    veth_B-R1
0319 
0320         A       6       61      ${prefix6}:${a_r1}::2   veth_A-R1
0321         A       6       62      ${prefix6}:${a_r2}::2   veth_A-R2
0322         B       6       61      ${prefix6}:${b_r1}::2   veth_B-R1
0323 "
0324 
0325 # nexthop id correlates to id in nexthops config above
0326 #       ns    family    prefix                  nh id
0327 routes_nh="
0328         A       4       default                 41
0329         A       4       ${prefix4}.${b_r2}.1    42
0330         B       4       default                 41
0331 
0332         A       6       default                 61
0333         A       6       ${prefix6}:${b_r2}::1   62
0334         B       6       default                 61
0335 "
0336 
0337 policy_mark=0x04
0338 rt_table=main
0339 
0340 veth4_a_addr="192.168.1.1"
0341 veth4_b_addr="192.168.1.2"
0342 veth4_c_addr="192.168.2.10"
0343 veth4_mask="24"
0344 veth6_a_addr="fd00:1::a"
0345 veth6_b_addr="fd00:1::b"
0346 veth6_c_addr="fd00:2::c"
0347 veth6_mask="64"
0348 
0349 tunnel4_a_addr="192.168.2.1"
0350 tunnel4_b_addr="192.168.2.2"
0351 tunnel4_mask="24"
0352 tunnel6_a_addr="fd00:2::a"
0353 tunnel6_b_addr="fd00:2::b"
0354 tunnel6_mask="64"
0355 
0356 dummy6_0_prefix="fc00:1000::"
0357 dummy6_1_prefix="fc00:1001::"
0358 dummy6_mask="64"
0359 
0360 err_buf=
0361 tcpdump_pids=
0362 nettest_pids=
0363 socat_pids=
0364 
0365 err() {
0366         err_buf="${err_buf}${1}
0367 "
0368 }
0369 
0370 err_flush() {
0371         echo -n "${err_buf}"
0372         err_buf=
0373 }
0374 
0375 run_cmd() {
0376         cmd="$*"
0377 
0378         if [ "$VERBOSE" = "1" ]; then
0379                 printf "    COMMAND: $cmd\n"
0380         fi
0381 
0382         out="$($cmd 2>&1)"
0383         rc=$?
0384         if [ "$VERBOSE" = "1" -a -n "$out" ]; then
0385                 echo "    $out"
0386                 echo
0387         fi
0388 
0389         return $rc
0390 }
0391 
0392 run_cmd_bg() {
0393         cmd="$*"
0394 
0395         if [ "$VERBOSE" = "1" ]; then
0396                 printf "    COMMAND: %s &\n" "${cmd}"
0397         fi
0398 
0399         $cmd 2>&1 &
0400 }
0401 
0402 # Find the auto-generated name for this namespace
0403 nsname() {
0404         eval echo \$NS_$1
0405 }
0406 
0407 setup_fou_or_gue() {
0408         outer="${1}"
0409         inner="${2}"
0410         encap="${3}"
0411 
0412         if [ "${outer}" = "4" ]; then
0413                 modprobe fou || return $ksft_skip
0414                 a_addr="${prefix4}.${a_r1}.1"
0415                 b_addr="${prefix4}.${b_r1}.1"
0416                 if [ "${inner}" = "4" ]; then
0417                         type="ipip"
0418                         ipproto="4"
0419                 else
0420                         type="sit"
0421                         ipproto="41"
0422                 fi
0423         else
0424                 modprobe fou6 || return $ksft_skip
0425                 a_addr="${prefix6}:${a_r1}::1"
0426                 b_addr="${prefix6}:${b_r1}::1"
0427                 if [ "${inner}" = "4" ]; then
0428                         type="ip6tnl"
0429                         mode="mode ipip6"
0430                         ipproto="4 -6"
0431                 else
0432                         type="ip6tnl"
0433                         mode="mode ip6ip6"
0434                         ipproto="41 -6"
0435                 fi
0436         fi
0437 
0438         run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return $ksft_skip
0439         run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return $ksft_skip
0440 
0441         run_cmd ${ns_b} ip fou add port 5556 ipproto ${ipproto}
0442         run_cmd ${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555
0443 
0444         if [ "${inner}" = "4" ]; then
0445                 run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${encap}_a
0446                 run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${encap}_b
0447         else
0448                 run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${encap}_a
0449                 run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${encap}_b
0450         fi
0451 
0452         run_cmd ${ns_a} ip link set ${encap}_a up
0453         run_cmd ${ns_b} ip link set ${encap}_b up
0454 }
0455 
0456 setup_fou44() {
0457         setup_fou_or_gue 4 4 fou
0458 }
0459 
0460 setup_fou46() {
0461         setup_fou_or_gue 4 6 fou
0462 }
0463 
0464 setup_fou64() {
0465         setup_fou_or_gue 6 4 fou
0466 }
0467 
0468 setup_fou66() {
0469         setup_fou_or_gue 6 6 fou
0470 }
0471 
0472 setup_gue44() {
0473         setup_fou_or_gue 4 4 gue
0474 }
0475 
0476 setup_gue46() {
0477         setup_fou_or_gue 4 6 gue
0478 }
0479 
0480 setup_gue64() {
0481         setup_fou_or_gue 6 4 gue
0482 }
0483 
0484 setup_gue66() {
0485         setup_fou_or_gue 6 6 gue
0486 }
0487 
0488 setup_ipvX_over_ipvY() {
0489         inner=${1}
0490         outer=${2}
0491 
0492         if [ "${outer}" -eq 4 ]; then
0493                 a_addr="${prefix4}.${a_r1}.1"
0494                 b_addr="${prefix4}.${b_r1}.1"
0495                 if [ "${inner}" -eq 4 ]; then
0496                         type="ipip"
0497                         mode="ipip"
0498                 else
0499                         type="sit"
0500                         mode="ip6ip"
0501                 fi
0502         else
0503                 a_addr="${prefix6}:${a_r1}::1"
0504                 b_addr="${prefix6}:${b_r1}::1"
0505                 type="ip6tnl"
0506                 if [ "${inner}" -eq 4 ]; then
0507                         mode="ipip6"
0508                 else
0509                         mode="ip6ip6"
0510                 fi
0511         fi
0512 
0513         run_cmd ${ns_a} ip link add ip_a type ${type} local ${a_addr} remote ${b_addr} mode ${mode} || return $ksft_skip
0514         run_cmd ${ns_b} ip link add ip_b type ${type} local ${b_addr} remote ${a_addr} mode ${mode}
0515 
0516         run_cmd ${ns_a} ip link set ip_a up
0517         run_cmd ${ns_b} ip link set ip_b up
0518 
0519         if [ "${inner}" = "4" ]; then
0520                 run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ip_a
0521                 run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ip_b
0522         else
0523                 run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ip_a
0524                 run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ip_b
0525         fi
0526 }
0527 
0528 setup_ip4ip4() {
0529         setup_ipvX_over_ipvY 4 4
0530 }
0531 
0532 setup_ip6ip4() {
0533         setup_ipvX_over_ipvY 6 4
0534 }
0535 
0536 setup_ip4ip6() {
0537         setup_ipvX_over_ipvY 4 6
0538 }
0539 
0540 setup_ip6ip6() {
0541         setup_ipvX_over_ipvY 6 6
0542 }
0543 
0544 setup_namespaces() {
0545         for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do
0546                 ip netns add ${n} || return 1
0547 
0548                 # Disable DAD, so that we don't have to wait to use the
0549                 # configured IPv6 addresses
0550                 ip netns exec ${n} sysctl -q net/ipv6/conf/default/accept_dad=0
0551         done
0552 }
0553 
0554 setup_veth() {
0555         run_cmd ${ns_a} ip link add veth_a type veth peer name veth_b || return 1
0556         run_cmd ${ns_a} ip link set veth_b netns ${NS_B}
0557 
0558         run_cmd ${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a
0559         run_cmd ${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b
0560 
0561         run_cmd ${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a
0562         run_cmd ${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b
0563 
0564         run_cmd ${ns_a} ip link set veth_a up
0565         run_cmd ${ns_b} ip link set veth_b up
0566 }
0567 
0568 setup_vti() {
0569         proto=${1}
0570         veth_a_addr="${2}"
0571         veth_b_addr="${3}"
0572         vti_a_addr="${4}"
0573         vti_b_addr="${5}"
0574         vti_mask=${6}
0575 
0576         [ ${proto} -eq 6 ] && vti_type="vti6" || vti_type="vti"
0577 
0578         run_cmd ${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1
0579         run_cmd ${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10
0580 
0581         run_cmd ${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a
0582         run_cmd ${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b
0583 
0584         run_cmd ${ns_a} ip link set vti${proto}_a up
0585         run_cmd ${ns_b} ip link set vti${proto}_b up
0586 }
0587 
0588 setup_vti4() {
0589         setup_vti 4 ${veth4_a_addr} ${veth4_b_addr} ${tunnel4_a_addr} ${tunnel4_b_addr} ${tunnel4_mask}
0590 }
0591 
0592 setup_vti6() {
0593         setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask}
0594 }
0595 
0596 setup_vti4routed() {
0597         setup_vti 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 ${tunnel4_a_addr} ${tunnel4_b_addr} ${tunnel4_mask}
0598 }
0599 
0600 setup_vti6routed() {
0601         setup_vti 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask}
0602 }
0603 
0604 setup_vxlan_or_geneve() {
0605         type="${1}"
0606         a_addr="${2}"
0607         b_addr="${3}"
0608         opts="${4}"
0609         br_if_a="${5}"
0610 
0611         if [ "${type}" = "vxlan" ]; then
0612                 opts="${opts} ttl 64 dstport 4789"
0613                 opts_a="local ${a_addr}"
0614                 opts_b="local ${b_addr}"
0615         else
0616                 opts_a=""
0617                 opts_b=""
0618         fi
0619 
0620         run_cmd ${ns_a} ip link add ${type}_a type ${type} id 1 ${opts_a} remote ${b_addr} ${opts} || return 1
0621         run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts}
0622 
0623         if [ -n "${br_if_a}" ]; then
0624                 run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${br_if_a}
0625                 run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${br_if_a}
0626                 run_cmd ${ns_a} ip link set ${type}_a master ${br_if_a}
0627         else
0628                 run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a
0629                 run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a
0630         fi
0631 
0632         run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
0633         run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b
0634 
0635         run_cmd ${ns_a} ip link set ${type}_a up
0636         run_cmd ${ns_b} ip link set ${type}_b up
0637 }
0638 
0639 setup_geneve4() {
0640         setup_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1  ${prefix4}.${b_r1}.1  "df set"
0641 }
0642 
0643 setup_vxlan4() {
0644         setup_vxlan_or_geneve vxlan  ${prefix4}.${a_r1}.1  ${prefix4}.${b_r1}.1  "df set"
0645 }
0646 
0647 setup_geneve6() {
0648         setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ""
0649 }
0650 
0651 setup_vxlan6() {
0652         setup_vxlan_or_geneve vxlan  ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ""
0653 }
0654 
0655 setup_bridged_geneve4() {
0656         setup_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1  ${prefix4}.${b_r1}.1  "df set" "br0"
0657 }
0658 
0659 setup_bridged_vxlan4() {
0660         setup_vxlan_or_geneve vxlan  ${prefix4}.${a_r1}.1  ${prefix4}.${b_r1}.1  "df set" "br0"
0661 }
0662 
0663 setup_bridged_geneve6() {
0664         setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "" "br0"
0665 }
0666 
0667 setup_bridged_vxlan6() {
0668         setup_vxlan_or_geneve vxlan  ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "" "br0"
0669 }
0670 
0671 setup_xfrm() {
0672         proto=${1}
0673         veth_a_addr="${2}"
0674         veth_b_addr="${3}"
0675         encap=${4}
0676 
0677         run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} || return 1
0678         run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap}
0679         run_cmd ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
0680         run_cmd ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
0681 
0682         run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap}
0683         run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap}
0684         run_cmd ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
0685         run_cmd ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
0686 }
0687 
0688 setup_nettest_xfrm() {
0689         which nettest >/dev/null
0690         if [ $? -ne 0 ]; then
0691                 echo "'nettest' command not found; skipping tests"
0692                 return 1
0693         fi
0694 
0695         [ ${1} -eq 6 ] && proto="-6" || proto=""
0696         port=${2}
0697 
0698         run_cmd_bg "${ns_a}" nettest "${proto}" -q -D -s -x -p "${port}" -t 5
0699         nettest_pids="${nettest_pids} $!"
0700 
0701         run_cmd_bg "${ns_b}" nettest "${proto}" -q -D -s -x -p "${port}" -t 5
0702         nettest_pids="${nettest_pids} $!"
0703 }
0704 
0705 setup_xfrm4() {
0706         setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr}
0707 }
0708 
0709 setup_xfrm6() {
0710         setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
0711 }
0712 
0713 setup_xfrm4udp() {
0714         setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0"
0715         setup_nettest_xfrm 4 4500
0716 }
0717 
0718 setup_xfrm6udp() {
0719         setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0"
0720         setup_nettest_xfrm 6 4500
0721 }
0722 
0723 setup_xfrm4udprouted() {
0724         setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0"
0725         setup_nettest_xfrm 4 4500
0726 }
0727 
0728 setup_xfrm6udprouted() {
0729         setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0"
0730         setup_nettest_xfrm 6 4500
0731 }
0732 
0733 setup_routing_old() {
0734         for i in ${routes}; do
0735                 [ "${ns}" = "" ]        && ns="${i}"            && continue
0736                 [ "${addr}" = "" ]      && addr="${i}"          && continue
0737                 [ "${gw}" = "" ]        && gw="${i}"
0738 
0739                 ns_name="$(nsname ${ns})"
0740 
0741                 ip -n "${ns_name}" route add "${addr}" table "${rt_table}" via "${gw}"
0742 
0743                 ns=""; addr=""; gw=""
0744         done
0745 }
0746 
0747 setup_routing_new() {
0748         for i in ${nexthops}; do
0749                 [ "${ns}" = "" ]        && ns="${i}"            && continue
0750                 [ "${fam}" = "" ]       && fam="${i}"           && continue
0751                 [ "${nhid}" = "" ]      && nhid="${i}"          && continue
0752                 [ "${gw}" = "" ]        && gw="${i}"            && continue
0753                 [ "${dev}" = "" ]       && dev="${i}"
0754 
0755                 ns_name="$(nsname ${ns})"
0756 
0757                 ip -n ${ns_name} -${fam} nexthop add id ${nhid} via ${gw} dev ${dev}
0758 
0759                 ns=""; fam=""; nhid=""; gw=""; dev=""
0760 
0761         done
0762 
0763         for i in ${routes_nh}; do
0764                 [ "${ns}" = "" ]        && ns="${i}"            && continue
0765                 [ "${fam}" = "" ]       && fam="${i}"           && continue
0766                 [ "${addr}" = "" ]      && addr="${i}"          && continue
0767                 [ "${nhid}" = "" ]      && nhid="${i}"
0768 
0769                 ns_name="$(nsname ${ns})"
0770 
0771                 ip -n "${ns_name}" -"${fam}" route add "${addr}" table "${rt_table}" nhid "${nhid}"
0772 
0773                 ns=""; fam=""; addr=""; nhid=""
0774         done
0775 }
0776 
0777 setup_routing() {
0778         for i in ${NS_R1} ${NS_R2}; do
0779                 ip netns exec ${i} sysctl -q net/ipv4/ip_forward=1
0780                 ip netns exec ${i} sysctl -q net/ipv6/conf/all/forwarding=1
0781         done
0782 
0783         for i in ${routing_addrs}; do
0784                 [ "${ns}" = "" ]        && ns="${i}"            && continue
0785                 [ "${peer}" = "" ]      && peer="${i}"          && continue
0786                 [ "${segment}" = "" ]   && segment="${i}"
0787 
0788                 ns_name="$(nsname ${ns})"
0789                 peer_name="$(nsname ${peer})"
0790                 if="veth_${ns}-${peer}"
0791                 ifpeer="veth_${peer}-${ns}"
0792 
0793                 # Create veth links
0794                 ip link add ${if} up netns ${ns_name} type veth peer name ${ifpeer} netns ${peer_name} || return 1
0795                 ip -n ${peer_name} link set dev ${ifpeer} up
0796 
0797                 # Add addresses
0798                 ip -n ${ns_name}   addr add ${prefix4}.${segment}.1/24  dev ${if}
0799                 ip -n ${ns_name}   addr add ${prefix6}:${segment}::1/64 dev ${if}
0800 
0801                 ip -n ${peer_name} addr add ${prefix4}.${segment}.2/24  dev ${ifpeer}
0802                 ip -n ${peer_name} addr add ${prefix6}:${segment}::2/64 dev ${ifpeer}
0803 
0804                 ns=""; peer=""; segment=""
0805         done
0806 
0807         if [ "$USE_NH" = "yes" ]; then
0808                 setup_routing_new
0809         else
0810                 setup_routing_old
0811         fi
0812 
0813         return 0
0814 }
0815 
0816 setup_policy_routing() {
0817         setup_routing
0818 
0819         ip -netns "${NS_A}" -4 rule add dsfield "${policy_mark}" \
0820                 table "${rt_table}"
0821 
0822         # Set the IPv4 Don't Fragment bit with tc, since socat doesn't seem to
0823         # have an option do to it.
0824         tc -netns "${NS_A}" qdisc replace dev veth_A-R1 root prio
0825         tc -netns "${NS_A}" qdisc replace dev veth_A-R2 root prio
0826         tc -netns "${NS_A}" filter add dev veth_A-R1                      \
0827                 protocol ipv4 flower ip_proto udp                         \
0828                 action pedit ex munge ip df set 0x40 pipe csum ip and udp
0829         tc -netns "${NS_A}" filter add dev veth_A-R2                      \
0830                 protocol ipv4 flower ip_proto udp                         \
0831                 action pedit ex munge ip df set 0x40 pipe csum ip and udp
0832 }
0833 
0834 setup_bridge() {
0835         run_cmd ${ns_a} ip link add br0 type bridge || return $ksft_skip
0836         run_cmd ${ns_a} ip link set br0 up
0837 
0838         run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C
0839         run_cmd ${ns_c} ip link set veth_A-C netns ns-A
0840 
0841         run_cmd ${ns_a} ip link set veth_A-C up
0842         run_cmd ${ns_c} ip link set veth_C-A up
0843         run_cmd ${ns_c} ip addr add ${veth4_c_addr}/${veth4_mask} dev veth_C-A
0844         run_cmd ${ns_c} ip addr add ${veth6_c_addr}/${veth6_mask} dev veth_C-A
0845         run_cmd ${ns_a} ip link set veth_A-C master br0
0846 }
0847 
0848 setup_ovs_vxlan_or_geneve() {
0849         type="${1}"
0850         a_addr="${2}"
0851         b_addr="${3}"
0852 
0853         if [ "${type}" = "vxlan" ]; then
0854                 opts="${opts} ttl 64 dstport 4789"
0855                 opts_b="local ${b_addr}"
0856         fi
0857 
0858         run_cmd ovs-vsctl add-port ovs_br0 ${type}_a -- \
0859                 set interface ${type}_a type=${type} \
0860                 options:remote_ip=${b_addr} options:key=1 options:csum=true || return 1
0861 
0862         run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts} || return 1
0863 
0864         run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
0865         run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b
0866 
0867         run_cmd ${ns_b} ip link set ${type}_b up
0868 }
0869 
0870 setup_ovs_geneve4() {
0871         setup_ovs_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1  ${prefix4}.${b_r1}.1
0872 }
0873 
0874 setup_ovs_vxlan4() {
0875         setup_ovs_vxlan_or_geneve vxlan  ${prefix4}.${a_r1}.1  ${prefix4}.${b_r1}.1
0876 }
0877 
0878 setup_ovs_geneve6() {
0879         setup_ovs_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
0880 }
0881 
0882 setup_ovs_vxlan6() {
0883         setup_ovs_vxlan_or_geneve vxlan  ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
0884 }
0885 
0886 setup_ovs_bridge() {
0887         run_cmd ovs-vsctl add-br ovs_br0 || return $ksft_skip
0888         run_cmd ip link set ovs_br0 up
0889 
0890         run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C
0891         run_cmd ${ns_c} ip link set veth_A-C netns 1
0892 
0893         run_cmd         ip link set veth_A-C up
0894         run_cmd ${ns_c} ip link set veth_C-A up
0895         run_cmd ${ns_c} ip addr add ${veth4_c_addr}/${veth4_mask} dev veth_C-A
0896         run_cmd ${ns_c} ip addr add ${veth6_c_addr}/${veth6_mask} dev veth_C-A
0897         run_cmd ovs-vsctl add-port ovs_br0 veth_A-C
0898 
0899         # Move veth_A-R1 to init
0900         run_cmd ${ns_a} ip link set veth_A-R1 netns 1
0901         run_cmd ip addr add ${prefix4}.${a_r1}.1/${veth4_mask} dev veth_A-R1
0902         run_cmd ip addr add ${prefix6}:${a_r1}::1/${veth6_mask} dev veth_A-R1
0903         run_cmd ip link set veth_A-R1 up
0904         run_cmd ip route add ${prefix4}.${b_r1}.1 via ${prefix4}.${a_r1}.2
0905         run_cmd ip route add ${prefix6}:${b_r1}::1 via ${prefix6}:${a_r1}::2
0906 }
0907 
0908 setup() {
0909         [ "$(id -u)" -ne 0 ] && echo "  need to run as root" && return $ksft_skip
0910 
0911         for arg do
0912                 eval setup_${arg} || { echo "  ${arg} not supported"; return 1; }
0913         done
0914 }
0915 
0916 trace() {
0917         [ $TRACING -eq 0 ] && return
0918 
0919         for arg do
0920                 [ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue
0921                 ${ns_cmd} tcpdump --immediate-mode -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null &
0922                 tcpdump_pids="${tcpdump_pids} $!"
0923                 ns_cmd=
0924         done
0925         sleep 1
0926 }
0927 
0928 cleanup() {
0929         for pid in ${tcpdump_pids}; do
0930                 kill ${pid}
0931         done
0932         tcpdump_pids=
0933 
0934         for pid in ${nettest_pids}; do
0935                 kill ${pid}
0936         done
0937         nettest_pids=
0938 
0939         for pid in ${socat_pids}; do
0940                 kill "${pid}"
0941         done
0942         socat_pids=
0943 
0944         for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do
0945                 ip netns del ${n} 2> /dev/null
0946         done
0947 
0948         ip link del veth_A-C                    2>/dev/null
0949         ip link del veth_A-R1                   2>/dev/null
0950         ovs-vsctl --if-exists del-port vxlan_a  2>/dev/null
0951         ovs-vsctl --if-exists del-br ovs_br0    2>/dev/null
0952 }
0953 
0954 mtu() {
0955         ns_cmd="${1}"
0956         dev="${2}"
0957         mtu="${3}"
0958 
0959         ${ns_cmd} ip link set dev ${dev} mtu ${mtu}
0960 }
0961 
0962 mtu_parse() {
0963         input="${1}"
0964 
0965         next=0
0966         for i in ${input}; do
0967                 [ ${next} -eq 1 -a "${i}" = "lock" ] && next=2 && continue
0968                 [ ${next} -eq 1 ] && echo "${i}" && return
0969                 [ ${next} -eq 2 ] && echo "lock ${i}" && return
0970                 [ "${i}" = "mtu" ] && next=1
0971         done
0972 }
0973 
0974 link_get() {
0975         ns_cmd="${1}"
0976         name="${2}"
0977 
0978         ${ns_cmd} ip link show dev "${name}"
0979 }
0980 
0981 link_get_mtu() {
0982         ns_cmd="${1}"
0983         name="${2}"
0984 
0985         mtu_parse "$(link_get "${ns_cmd}" ${name})"
0986 }
0987 
0988 route_get_dst_exception() {
0989         ns_cmd="${1}"
0990         dst="${2}"
0991         dsfield="${3}"
0992 
0993         if [ -z "${dsfield}" ]; then
0994                 dsfield=0
0995         fi
0996 
0997         ${ns_cmd} ip route get "${dst}" dsfield "${dsfield}"
0998 }
0999 
1000 route_get_dst_pmtu_from_exception() {
1001         ns_cmd="${1}"
1002         dst="${2}"
1003         dsfield="${3}"
1004 
1005         mtu_parse "$(route_get_dst_exception "${ns_cmd}" "${dst}" "${dsfield}")"
1006 }
1007 
1008 check_pmtu_value() {
1009         expected="${1}"
1010         value="${2}"
1011         event="${3}"
1012 
1013         [ "${expected}" = "any" ] && [ -n "${value}" ] && return 0
1014         [ "${value}" = "${expected}" ] && return 0
1015         [ -z "${value}" ] &&    err "  PMTU exception wasn't created after ${event}" && return 1
1016         [ -z "${expected}" ] && err "  PMTU exception shouldn't exist after ${event}" && return 1
1017         err "  found PMTU exception with incorrect MTU ${value}, expected ${expected}, after ${event}"
1018         return 1
1019 }
1020 
1021 test_pmtu_ipvX() {
1022         family=${1}
1023 
1024         setup namespaces routing || return $ksft_skip
1025         trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
1026               "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
1027               "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
1028               "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
1029 
1030         if [ ${family} -eq 4 ]; then
1031                 ping=ping
1032                 dst1="${prefix4}.${b_r1}.1"
1033                 dst2="${prefix4}.${b_r2}.1"
1034         else
1035                 ping=${ping6}
1036                 dst1="${prefix6}:${b_r1}::1"
1037                 dst2="${prefix6}:${b_r2}::1"
1038         fi
1039 
1040         # Set up initial MTU values
1041         mtu "${ns_a}"  veth_A-R1 2000
1042         mtu "${ns_r1}" veth_R1-A 2000
1043         mtu "${ns_r1}" veth_R1-B 1400
1044         mtu "${ns_b}"  veth_B-R1 1400
1045 
1046         mtu "${ns_a}"  veth_A-R2 2000
1047         mtu "${ns_r2}" veth_R2-A 2000
1048         mtu "${ns_r2}" veth_R2-B 1500
1049         mtu "${ns_b}"  veth_B-R2 1500
1050 
1051         # Create route exceptions
1052         run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1}
1053         run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2}
1054 
1055         # Check that exceptions have been created with the correct PMTU
1056         pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
1057         check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
1058         pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
1059         check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
1060 
1061         # Decrease local MTU below PMTU, check for PMTU decrease in route exception
1062         mtu "${ns_a}"  veth_A-R1 1300
1063         mtu "${ns_r1}" veth_R1-A 1300
1064         pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
1065         check_pmtu_value "1300" "${pmtu_1}" "decreasing local MTU" || return 1
1066         # Second exception shouldn't be modified
1067         pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
1068         check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1
1069 
1070         # Increase MTU, check for PMTU increase in route exception
1071         mtu "${ns_a}"  veth_A-R1 1700
1072         mtu "${ns_r1}" veth_R1-A 1700
1073         pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
1074         check_pmtu_value "1700" "${pmtu_1}" "increasing local MTU" || return 1
1075         # Second exception shouldn't be modified
1076         pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
1077         check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1
1078 
1079         # Skip PMTU locking tests for IPv6
1080         [ $family -eq 6 ] && return 0
1081 
1082         # Decrease remote MTU on path via R2, get new exception
1083         mtu "${ns_r2}" veth_R2-B 400
1084         mtu "${ns_b}"  veth_B-R2 400
1085         run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2}
1086         pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
1087         check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
1088 
1089         # Decrease local MTU below PMTU
1090         mtu "${ns_a}"  veth_A-R2 500
1091         mtu "${ns_r2}" veth_R2-A 500
1092         pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
1093         check_pmtu_value "500" "${pmtu_2}" "decreasing local MTU" || return 1
1094 
1095         # Increase local MTU
1096         mtu "${ns_a}"  veth_A-R2 1500
1097         mtu "${ns_r2}" veth_R2-A 1500
1098         pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
1099         check_pmtu_value "1500" "${pmtu_2}" "increasing local MTU" || return 1
1100 
1101         # Get new exception
1102         run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2}
1103         pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
1104         check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
1105 }
1106 
1107 test_pmtu_ipv4_exception() {
1108         test_pmtu_ipvX 4
1109 }
1110 
1111 test_pmtu_ipv6_exception() {
1112         test_pmtu_ipvX 6
1113 }
1114 
1115 test_pmtu_ipv4_dscp_icmp_exception() {
1116         rt_table=100
1117 
1118         setup namespaces policy_routing || return $ksft_skip
1119         trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
1120               "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
1121               "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
1122               "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
1123 
1124         # Set up initial MTU values
1125         mtu "${ns_a}"  veth_A-R1 2000
1126         mtu "${ns_r1}" veth_R1-A 2000
1127         mtu "${ns_r1}" veth_R1-B 1400
1128         mtu "${ns_b}"  veth_B-R1 1400
1129 
1130         mtu "${ns_a}"  veth_A-R2 2000
1131         mtu "${ns_r2}" veth_R2-A 2000
1132         mtu "${ns_r2}" veth_R2-B 1500
1133         mtu "${ns_b}"  veth_B-R2 1500
1134 
1135         len=$((2000 - 20 - 8)) # Fills MTU of veth_A-R1
1136 
1137         dst1="${prefix4}.${b_r1}.1"
1138         dst2="${prefix4}.${b_r2}.1"
1139 
1140         # Create route exceptions
1141         dsfield=${policy_mark} # No ECN bit set (Not-ECT)
1142         run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst1}"
1143 
1144         dsfield=$(printf "%#x" $((policy_mark + 0x02))) # ECN=2 (ECT(0))
1145         run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst2}"
1146 
1147         # Check that exceptions have been created with the correct PMTU
1148         pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")"
1149         check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
1150 
1151         pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")"
1152         check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
1153 }
1154 
1155 test_pmtu_ipv4_dscp_udp_exception() {
1156         rt_table=100
1157 
1158         if ! which socat > /dev/null 2>&1; then
1159                 echo "'socat' command not found; skipping tests"
1160                 return $ksft_skip
1161         fi
1162 
1163         setup namespaces policy_routing || return $ksft_skip
1164         trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
1165               "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
1166               "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
1167               "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
1168 
1169         # Set up initial MTU values
1170         mtu "${ns_a}"  veth_A-R1 2000
1171         mtu "${ns_r1}" veth_R1-A 2000
1172         mtu "${ns_r1}" veth_R1-B 1400
1173         mtu "${ns_b}"  veth_B-R1 1400
1174 
1175         mtu "${ns_a}"  veth_A-R2 2000
1176         mtu "${ns_r2}" veth_R2-A 2000
1177         mtu "${ns_r2}" veth_R2-B 1500
1178         mtu "${ns_b}"  veth_B-R2 1500
1179 
1180         len=$((2000 - 20 - 8)) # Fills MTU of veth_A-R1
1181 
1182         dst1="${prefix4}.${b_r1}.1"
1183         dst2="${prefix4}.${b_r2}.1"
1184 
1185         # Create route exceptions
1186         run_cmd_bg "${ns_b}" socat UDP-LISTEN:50000 OPEN:/dev/null,wronly=1
1187         socat_pids="${socat_pids} $!"
1188 
1189         dsfield=${policy_mark} # No ECN bit set (Not-ECT)
1190         run_cmd "${ns_a}" socat OPEN:/dev/zero,rdonly=1,readbytes="${len}" \
1191                 UDP:"${dst1}":50000,tos="${dsfield}"
1192 
1193         dsfield=$(printf "%#x" $((policy_mark + 0x02))) # ECN=2 (ECT(0))
1194         run_cmd "${ns_a}" socat OPEN:/dev/zero,rdonly=1,readbytes="${len}" \
1195                 UDP:"${dst2}":50000,tos="${dsfield}"
1196 
1197         # Check that exceptions have been created with the correct PMTU
1198         pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")"
1199         check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
1200         pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")"
1201         check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
1202 }
1203 
1204 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() {
1205         type=${1}
1206         family=${2}
1207         outer_family=${3}
1208         ll_mtu=4000
1209 
1210         if [ ${outer_family} -eq 4 ]; then
1211                 setup namespaces routing ${type}4 || return $ksft_skip
1212                 #                      IPv4 header   UDP header   VXLAN/GENEVE header   Ethernet header
1213                 exp_mtu=$((${ll_mtu} - 20          - 8          - 8                   - 14))
1214         else
1215                 setup namespaces routing ${type}6 || return $ksft_skip
1216                 #                      IPv6 header   UDP header   VXLAN/GENEVE header   Ethernet header
1217                 exp_mtu=$((${ll_mtu} - 40          - 8          - 8                   - 14))
1218         fi
1219 
1220         trace "${ns_a}" ${type}_a    "${ns_b}"  ${type}_b \
1221               "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
1222               "${ns_b}" veth_B-R1    "${ns_r1}" veth_R1-B
1223 
1224         if [ ${family} -eq 4 ]; then
1225                 ping=ping
1226                 dst=${tunnel4_b_addr}
1227         else
1228                 ping=${ping6}
1229                 dst=${tunnel6_b_addr}
1230         fi
1231 
1232         # Create route exception by exceeding link layer MTU
1233         mtu "${ns_a}"  veth_A-R1 $((${ll_mtu} + 1000))
1234         mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
1235         mtu "${ns_b}"  veth_B-R1 ${ll_mtu}
1236         mtu "${ns_r1}" veth_R1-B ${ll_mtu}
1237 
1238         mtu "${ns_a}" ${type}_a $((${ll_mtu} + 1000))
1239         mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000))
1240         run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst}
1241 
1242         # Check that exception was created
1243         pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
1244         check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ${type} interface"
1245 }
1246 
1247 test_pmtu_ipv4_vxlan4_exception() {
1248         test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan  4 4
1249 }
1250 
1251 test_pmtu_ipv6_vxlan4_exception() {
1252         test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan  6 4
1253 }
1254 
1255 test_pmtu_ipv4_geneve4_exception() {
1256         test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 4 4
1257 }
1258 
1259 test_pmtu_ipv6_geneve4_exception() {
1260         test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 4
1261 }
1262 
1263 test_pmtu_ipv4_vxlan6_exception() {
1264         test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan  4 6
1265 }
1266 
1267 test_pmtu_ipv6_vxlan6_exception() {
1268         test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan  6 6
1269 }
1270 
1271 test_pmtu_ipv4_geneve6_exception() {
1272         test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 4 6
1273 }
1274 
1275 test_pmtu_ipv6_geneve6_exception() {
1276         test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 6
1277 }
1278 
1279 test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() {
1280         type=${1}
1281         family=${2}
1282         outer_family=${3}
1283         ll_mtu=4000
1284 
1285         if [ ${outer_family} -eq 4 ]; then
1286                 setup namespaces routing bridge bridged_${type}4 || return $ksft_skip
1287                 #                      IPv4 header   UDP header   VXLAN/GENEVE header   Ethernet header
1288                 exp_mtu=$((${ll_mtu} - 20          - 8          - 8                   - 14))
1289         else
1290                 setup namespaces routing bridge bridged_${type}6 || return $ksft_skip
1291                 #                      IPv6 header   UDP header   VXLAN/GENEVE header   Ethernet header
1292                 exp_mtu=$((${ll_mtu} - 40          - 8          - 8                   - 14))
1293         fi
1294 
1295         trace "${ns_a}" ${type}_a    "${ns_b}"  ${type}_b \
1296               "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
1297               "${ns_b}" veth_B-R1    "${ns_r1}" veth_R1-B \
1298               "${ns_a}" br0          "${ns_a}"  veth-A-C  \
1299               "${ns_c}" veth_C-A
1300 
1301         if [ ${family} -eq 4 ]; then
1302                 ping=ping
1303                 dst=${tunnel4_b_addr}
1304         else
1305                 ping=${ping6}
1306                 dst=${tunnel6_b_addr}
1307         fi
1308 
1309         # Create route exception by exceeding link layer MTU
1310         mtu "${ns_a}"  veth_A-R1 $((${ll_mtu} + 1000))
1311         mtu "${ns_a}"  br0       $((${ll_mtu} + 1000))
1312         mtu "${ns_a}"  veth_A-C  $((${ll_mtu} + 1000))
1313         mtu "${ns_c}"  veth_C-A  $((${ll_mtu} + 1000))
1314         mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
1315         mtu "${ns_b}"  veth_B-R1 ${ll_mtu}
1316         mtu "${ns_r1}" veth_R1-B ${ll_mtu}
1317 
1318         mtu "${ns_a}" ${type}_a $((${ll_mtu} + 1000))
1319         mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000))
1320 
1321         run_cmd ${ns_c} ${ping} -q -M want -i 0.1 -c 10 -s $((${ll_mtu} + 500)) ${dst} || return 1
1322         run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1  -s $((${ll_mtu} + 500)) ${dst} || return 1
1323 
1324         # Check that exceptions were created
1325         pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})"
1326         check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on bridged ${type} interface"
1327         pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
1328         check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on locally bridged ${type} interface"
1329 }
1330 
1331 test_pmtu_ipv4_br_vxlan4_exception() {
1332         test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan  4 4
1333 }
1334 
1335 test_pmtu_ipv6_br_vxlan4_exception() {
1336         test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan  6 4
1337 }
1338 
1339 test_pmtu_ipv4_br_geneve4_exception() {
1340         test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 4 4
1341 }
1342 
1343 test_pmtu_ipv6_br_geneve4_exception() {
1344         test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 6 4
1345 }
1346 
1347 test_pmtu_ipv4_br_vxlan6_exception() {
1348         test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan  4 6
1349 }
1350 
1351 test_pmtu_ipv6_br_vxlan6_exception() {
1352         test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan  6 6
1353 }
1354 
1355 test_pmtu_ipv4_br_geneve6_exception() {
1356         test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 4 6
1357 }
1358 
1359 test_pmtu_ipv6_br_geneve6_exception() {
1360         test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 6 6
1361 }
1362 
1363 test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() {
1364         type=${1}
1365         family=${2}
1366         outer_family=${3}
1367         ll_mtu=4000
1368 
1369         if [ ${outer_family} -eq 4 ]; then
1370                 setup namespaces routing ovs_bridge ovs_${type}4 || return $ksft_skip
1371                 #                      IPv4 header   UDP header   VXLAN/GENEVE header   Ethernet header
1372                 exp_mtu=$((${ll_mtu} - 20          - 8          - 8                   - 14))
1373         else
1374                 setup namespaces routing ovs_bridge ovs_${type}6 || return $ksft_skip
1375                 #                      IPv6 header   UDP header   VXLAN/GENEVE header   Ethernet header
1376                 exp_mtu=$((${ll_mtu} - 40          - 8          - 8                   - 14))
1377         fi
1378 
1379         if [ "${type}" = "vxlan" ]; then
1380                 tun_a="vxlan_sys_4789"
1381         elif [ "${type}" = "geneve" ]; then
1382                 tun_a="genev_sys_6081"
1383         fi
1384 
1385         trace ""        "${tun_a}"  "${ns_b}"  ${type}_b \
1386               ""        veth_A-R1   "${ns_r1}" veth_R1-A \
1387               "${ns_b}" veth_B-R1   "${ns_r1}" veth_R1-B \
1388               ""        ovs_br0     ""         veth-A-C  \
1389               "${ns_c}" veth_C-A
1390 
1391         if [ ${family} -eq 4 ]; then
1392                 ping=ping
1393                 dst=${tunnel4_b_addr}
1394         else
1395                 ping=${ping6}
1396                 dst=${tunnel6_b_addr}
1397         fi
1398 
1399         # Create route exception by exceeding link layer MTU
1400         mtu ""         veth_A-R1 $((${ll_mtu} + 1000))
1401         mtu ""         ovs_br0   $((${ll_mtu} + 1000))
1402         mtu ""         veth_A-C  $((${ll_mtu} + 1000))
1403         mtu "${ns_c}"  veth_C-A  $((${ll_mtu} + 1000))
1404         mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
1405         mtu "${ns_b}"  veth_B-R1 ${ll_mtu}
1406         mtu "${ns_r1}" veth_R1-B ${ll_mtu}
1407 
1408         mtu ""        ${tun_a}  $((${ll_mtu} + 1000))
1409         mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000))
1410 
1411         run_cmd ${ns_c} ${ping} -q -M want -i 0.1 -c 20 -s $((${ll_mtu} + 500)) ${dst} || return 1
1412 
1413         # Check that exceptions were created
1414         pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})"
1415         check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on Open vSwitch ${type} interface"
1416 }
1417 
1418 test_pmtu_ipv4_ovs_vxlan4_exception() {
1419         test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan  4 4
1420 }
1421 
1422 test_pmtu_ipv6_ovs_vxlan4_exception() {
1423         test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan  6 4
1424 }
1425 
1426 test_pmtu_ipv4_ovs_geneve4_exception() {
1427         test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 4 4
1428 }
1429 
1430 test_pmtu_ipv6_ovs_geneve4_exception() {
1431         test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 6 4
1432 }
1433 
1434 test_pmtu_ipv4_ovs_vxlan6_exception() {
1435         test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan  4 6
1436 }
1437 
1438 test_pmtu_ipv6_ovs_vxlan6_exception() {
1439         test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan  6 6
1440 }
1441 
1442 test_pmtu_ipv4_ovs_geneve6_exception() {
1443         test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 4 6
1444 }
1445 
1446 test_pmtu_ipv6_ovs_geneve6_exception() {
1447         test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 6 6
1448 }
1449 
1450 test_pmtu_ipvX_over_fouY_or_gueY() {
1451         inner_family=${1}
1452         outer_family=${2}
1453         encap=${3}
1454         ll_mtu=4000
1455 
1456         setup namespaces routing ${encap}${outer_family}${inner_family} || return $ksft_skip
1457         trace "${ns_a}" ${encap}_a   "${ns_b}"  ${encap}_b \
1458               "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
1459               "${ns_b}" veth_B-R1    "${ns_r1}" veth_R1-B
1460 
1461         if [ ${inner_family} -eq 4 ]; then
1462                 ping=ping
1463                 dst=${tunnel4_b_addr}
1464         else
1465                 ping=${ping6}
1466                 dst=${tunnel6_b_addr}
1467         fi
1468 
1469         if [ "${encap}" = "gue" ]; then
1470                 encap_overhead=4
1471         else
1472                 encap_overhead=0
1473         fi
1474 
1475         if [ ${outer_family} -eq 4 ]; then
1476                 #                      IPv4 header   UDP header
1477                 exp_mtu=$((${ll_mtu} - 20          - 8         - ${encap_overhead}))
1478         else
1479                 #                      IPv6 header   Option 4   UDP header
1480                 exp_mtu=$((${ll_mtu} - 40          - 8        - 8       - ${encap_overhead}))
1481         fi
1482 
1483         # Create route exception by exceeding link layer MTU
1484         mtu "${ns_a}"  veth_A-R1 $((${ll_mtu} + 1000))
1485         mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
1486         mtu "${ns_b}"  veth_B-R1 ${ll_mtu}
1487         mtu "${ns_r1}" veth_R1-B ${ll_mtu}
1488 
1489         mtu "${ns_a}" ${encap}_a $((${ll_mtu} + 1000))
1490         mtu "${ns_b}" ${encap}_b $((${ll_mtu} + 1000))
1491         run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst}
1492 
1493         # Check that exception was created
1494         pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
1495         check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ${encap} interface"
1496 }
1497 
1498 test_pmtu_ipv4_fou4_exception() {
1499         test_pmtu_ipvX_over_fouY_or_gueY 4 4 fou
1500 }
1501 
1502 test_pmtu_ipv6_fou4_exception() {
1503         test_pmtu_ipvX_over_fouY_or_gueY 6 4 fou
1504 }
1505 
1506 test_pmtu_ipv4_fou6_exception() {
1507         test_pmtu_ipvX_over_fouY_or_gueY 4 6 fou
1508 }
1509 
1510 test_pmtu_ipv6_fou6_exception() {
1511         test_pmtu_ipvX_over_fouY_or_gueY 6 6 fou
1512 }
1513 
1514 test_pmtu_ipv4_gue4_exception() {
1515         test_pmtu_ipvX_over_fouY_or_gueY 4 4 gue
1516 }
1517 
1518 test_pmtu_ipv6_gue4_exception() {
1519         test_pmtu_ipvX_over_fouY_or_gueY 6 4 gue
1520 }
1521 
1522 test_pmtu_ipv4_gue6_exception() {
1523         test_pmtu_ipvX_over_fouY_or_gueY 4 6 gue
1524 }
1525 
1526 test_pmtu_ipv6_gue6_exception() {
1527         test_pmtu_ipvX_over_fouY_or_gueY 6 6 gue
1528 }
1529 
1530 test_pmtu_ipvX_over_ipvY_exception() {
1531         inner=${1}
1532         outer=${2}
1533         ll_mtu=4000
1534 
1535         setup namespaces routing ip${inner}ip${outer} || return $ksft_skip
1536 
1537         trace "${ns_a}" ip_a         "${ns_b}"  ip_b  \
1538               "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
1539               "${ns_b}" veth_B-R1    "${ns_r1}" veth_R1-B
1540 
1541         if [ ${inner} -eq 4 ]; then
1542                 ping=ping
1543                 dst=${tunnel4_b_addr}
1544         else
1545                 ping=${ping6}
1546                 dst=${tunnel6_b_addr}
1547         fi
1548 
1549         if [ ${outer} -eq 4 ]; then
1550                 #                      IPv4 header
1551                 exp_mtu=$((${ll_mtu} - 20))
1552         else
1553                 #                      IPv6 header   Option 4
1554                 exp_mtu=$((${ll_mtu} - 40          - 8))
1555         fi
1556 
1557         # Create route exception by exceeding link layer MTU
1558         mtu "${ns_a}"  veth_A-R1 $((${ll_mtu} + 1000))
1559         mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
1560         mtu "${ns_b}"  veth_B-R1 ${ll_mtu}
1561         mtu "${ns_r1}" veth_R1-B ${ll_mtu}
1562 
1563         mtu "${ns_a}" ip_a $((${ll_mtu} + 1000)) || return
1564         mtu "${ns_b}" ip_b $((${ll_mtu} + 1000)) || return
1565         run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst}
1566 
1567         # Check that exception was created
1568         pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
1569         check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ip${inner}ip${outer} interface"
1570 }
1571 
1572 test_pmtu_ipv4_ipv4_exception() {
1573         test_pmtu_ipvX_over_ipvY_exception 4 4
1574 }
1575 
1576 test_pmtu_ipv6_ipv4_exception() {
1577         test_pmtu_ipvX_over_ipvY_exception 6 4
1578 }
1579 
1580 test_pmtu_ipv4_ipv6_exception() {
1581         test_pmtu_ipvX_over_ipvY_exception 4 6
1582 }
1583 
1584 test_pmtu_ipv6_ipv6_exception() {
1585         test_pmtu_ipvX_over_ipvY_exception 6 6
1586 }
1587 
1588 test_pmtu_vti4_exception() {
1589         setup namespaces veth vti4 xfrm4 || return $ksft_skip
1590         trace "${ns_a}" veth_a    "${ns_b}" veth_b \
1591               "${ns_a}" vti4_a    "${ns_b}" vti4_b
1592 
1593         veth_mtu=1500
1594         vti_mtu=$((veth_mtu - 20))
1595 
1596         #                                SPI   SN   IV  ICV   pad length   next header
1597         esp_payload_rfc4106=$((vti_mtu - 4   - 4  - 8 - 16  - 1          - 1))
1598         ping_payload=$((esp_payload_rfc4106 - 28))
1599 
1600         mtu "${ns_a}" veth_a ${veth_mtu}
1601         mtu "${ns_b}" veth_b ${veth_mtu}
1602         mtu "${ns_a}" vti4_a ${vti_mtu}
1603         mtu "${ns_b}" vti4_b ${vti_mtu}
1604 
1605         # Send DF packet without exceeding link layer MTU, check that no
1606         # exception is created
1607         run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr}
1608         pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
1609         check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
1610 
1611         # Now exceed link layer MTU by one byte, check that exception is created
1612         # with the right PMTU value
1613         run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr}
1614         pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
1615         check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))"
1616 }
1617 
1618 test_pmtu_vti6_exception() {
1619         setup namespaces veth vti6 xfrm6 || return $ksft_skip
1620         trace "${ns_a}" veth_a    "${ns_b}" veth_b \
1621               "${ns_a}" vti6_a    "${ns_b}" vti6_b
1622         fail=0
1623 
1624         # Create route exception by exceeding link layer MTU
1625         mtu "${ns_a}" veth_a 4000
1626         mtu "${ns_b}" veth_b 4000
1627         mtu "${ns_a}" vti6_a 5000
1628         mtu "${ns_b}" vti6_b 5000
1629         run_cmd ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr}
1630 
1631         # Check that exception was created
1632         pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
1633         check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1
1634 
1635         # Decrease tunnel MTU, check for PMTU decrease in route exception
1636         mtu "${ns_a}" vti6_a 3000
1637         pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
1638         check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1
1639 
1640         # Increase tunnel MTU, check for PMTU increase in route exception
1641         mtu "${ns_a}" vti6_a 9000
1642         pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
1643         check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1
1644 
1645         return ${fail}
1646 }
1647 
1648 test_pmtu_vti4_udp_exception() {
1649         setup namespaces veth vti4 xfrm4udp || return $ksft_skip
1650         trace "${ns_a}" veth_a    "${ns_b}" veth_b \
1651               "${ns_a}" vti4_a    "${ns_b}" vti4_b
1652 
1653         veth_mtu=1500
1654         vti_mtu=$((veth_mtu - 20))
1655 
1656         #                                UDP   SPI   SN   IV  ICV   pad length   next header
1657         esp_payload_rfc4106=$((vti_mtu - 8   - 4   - 4  - 8 - 16  - 1          - 1))
1658         ping_payload=$((esp_payload_rfc4106 - 28))
1659 
1660         mtu "${ns_a}" veth_a ${veth_mtu}
1661         mtu "${ns_b}" veth_b ${veth_mtu}
1662         mtu "${ns_a}" vti4_a ${vti_mtu}
1663         mtu "${ns_b}" vti4_b ${vti_mtu}
1664 
1665         # Send DF packet without exceeding link layer MTU, check that no
1666         # exception is created
1667         run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr}
1668         pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
1669         check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
1670 
1671         # Now exceed link layer MTU by one byte, check that exception is created
1672         # with the right PMTU value
1673         run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr}
1674         pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
1675         check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))"
1676 }
1677 
1678 test_pmtu_vti6_udp_exception() {
1679         setup namespaces veth vti6 xfrm6udp || return $ksft_skip
1680         trace "${ns_a}" veth_a    "${ns_b}" veth_b \
1681               "${ns_a}" vti6_a    "${ns_b}" vti6_b
1682         fail=0
1683 
1684         # Create route exception by exceeding link layer MTU
1685         mtu "${ns_a}" veth_a 4000
1686         mtu "${ns_b}" veth_b 4000
1687         mtu "${ns_a}" vti6_a 5000
1688         mtu "${ns_b}" vti6_b 5000
1689         run_cmd ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr}
1690 
1691         # Check that exception was created
1692         pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
1693         check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1
1694 
1695         # Decrease tunnel MTU, check for PMTU decrease in route exception
1696         mtu "${ns_a}" vti6_a 3000
1697         pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
1698         check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1
1699 
1700         # Increase tunnel MTU, check for PMTU increase in route exception
1701         mtu "${ns_a}" vti6_a 9000
1702         pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
1703         check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1
1704 
1705         return ${fail}
1706 }
1707 
1708 test_pmtu_vti4_udp_routed_exception() {
1709         setup namespaces routing vti4routed xfrm4udprouted || return $ksft_skip
1710         trace "${ns_a}" veth_A-R1    "${ns_b}" veth_B-R1 \
1711               "${ns_a}" vti4_a       "${ns_b}" vti4_b
1712 
1713         veth_mtu=1500
1714         vti_mtu=$((veth_mtu - 20))
1715 
1716         #                                UDP   SPI   SN   IV  ICV   pad length   next header
1717         esp_payload_rfc4106=$((vti_mtu - 8   - 4   - 4  - 8 - 16  - 1          - 1))
1718         ping_payload=$((esp_payload_rfc4106 - 28))
1719 
1720         mtu "${ns_a}"  veth_A-R1 ${veth_mtu}
1721         mtu "${ns_r1}" veth_R1-A ${veth_mtu}
1722         mtu "${ns_b}"  veth_B-R1 ${veth_mtu}
1723         mtu "${ns_r1}" veth_R1-B ${veth_mtu}
1724 
1725         mtu "${ns_a}" vti4_a ${vti_mtu}
1726         mtu "${ns_b}" vti4_b ${vti_mtu}
1727 
1728         # Send DF packet without exceeding link layer MTU, check that no
1729         # exception is created
1730         run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr}
1731         pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
1732         check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
1733 
1734         # Now decrease link layer MTU by 8 bytes on R1, check that exception is created
1735         # with the right PMTU value
1736         mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8))
1737         run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel4_b_addr}
1738         pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
1739         check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))"
1740 }
1741 
1742 test_pmtu_vti6_udp_routed_exception() {
1743         setup namespaces routing vti6routed xfrm6udprouted || return $ksft_skip
1744         trace "${ns_a}" veth_A-R1    "${ns_b}" veth_B-R1 \
1745               "${ns_a}" vti6_a       "${ns_b}" vti6_b
1746 
1747         veth_mtu=1500
1748         vti_mtu=$((veth_mtu - 40))
1749 
1750         #                                UDP   SPI   SN   IV  ICV   pad length   next header
1751         esp_payload_rfc4106=$((vti_mtu - 8   - 4   - 4  - 8 - 16  - 1          - 1))
1752         ping_payload=$((esp_payload_rfc4106 - 48))
1753 
1754         mtu "${ns_a}"  veth_A-R1 ${veth_mtu}
1755         mtu "${ns_r1}" veth_R1-A ${veth_mtu}
1756         mtu "${ns_b}"  veth_B-R1 ${veth_mtu}
1757         mtu "${ns_r1}" veth_R1-B ${veth_mtu}
1758 
1759         # mtu "${ns_a}" vti6_a ${vti_mtu}
1760         # mtu "${ns_b}" vti6_b ${vti_mtu}
1761 
1762         run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel6_b_addr}
1763 
1764         # Check that exception was not created
1765         pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
1766         check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
1767 
1768         # Now decrease link layer MTU by 8 bytes on R1, check that exception is created
1769         # with the right PMTU value
1770         mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8))
1771         run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel6_b_addr}
1772         pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
1773         check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))"
1774 
1775 }
1776 
1777 test_pmtu_vti4_default_mtu() {
1778         setup namespaces veth vti4 || return $ksft_skip
1779 
1780         # Check that MTU of vti device is MTU of veth minus IPv4 header length
1781         veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
1782         vti4_mtu="$(link_get_mtu "${ns_a}" vti4_a)"
1783         if [ $((veth_mtu - vti4_mtu)) -ne 20 ]; then
1784                 err "  vti MTU ${vti4_mtu} is not veth MTU ${veth_mtu} minus IPv4 header length"
1785                 return 1
1786         fi
1787 }
1788 
1789 test_pmtu_vti6_default_mtu() {
1790         setup namespaces veth vti6 || return $ksft_skip
1791 
1792         # Check that MTU of vti device is MTU of veth minus IPv6 header length
1793         veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
1794         vti6_mtu="$(link_get_mtu "${ns_a}" vti6_a)"
1795         if [ $((veth_mtu - vti6_mtu)) -ne 40 ]; then
1796                 err "  vti MTU ${vti6_mtu} is not veth MTU ${veth_mtu} minus IPv6 header length"
1797                 return 1
1798         fi
1799 }
1800 
1801 test_pmtu_vti4_link_add_mtu() {
1802         setup namespaces || return $ksft_skip
1803 
1804         run_cmd ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
1805         [ $? -ne 0 ] && err "  vti not supported" && return $ksft_skip
1806         run_cmd ${ns_a} ip link del vti4_a
1807 
1808         fail=0
1809 
1810         min=68
1811         max=$((65535 - 20))
1812         # Check invalid values first
1813         for v in $((min - 1)) $((max + 1)); do
1814                 run_cmd ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
1815                 # This can fail, or MTU can be adjusted to a proper value
1816                 [ $? -ne 0 ] && continue
1817                 mtu="$(link_get_mtu "${ns_a}" vti4_a)"
1818                 if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
1819                         err "  vti tunnel created with invalid MTU ${mtu}"
1820                         fail=1
1821                 fi
1822                 run_cmd ${ns_a} ip link del vti4_a
1823         done
1824 
1825         # Now check valid values
1826         for v in ${min} 1300 ${max}; do
1827                 run_cmd ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
1828                 mtu="$(link_get_mtu "${ns_a}" vti4_a)"
1829                 run_cmd ${ns_a} ip link del vti4_a
1830                 if [ "${mtu}" != "${v}" ]; then
1831                         err "  vti MTU ${mtu} doesn't match configured value ${v}"
1832                         fail=1
1833                 fi
1834         done
1835 
1836         return ${fail}
1837 }
1838 
1839 test_pmtu_vti6_link_add_mtu() {
1840         setup namespaces || return $ksft_skip
1841 
1842         run_cmd ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
1843         [ $? -ne 0 ] && err "  vti6 not supported" && return $ksft_skip
1844         run_cmd ${ns_a} ip link del vti6_a
1845 
1846         fail=0
1847 
1848         min=68                  # vti6 can carry IPv4 packets too
1849         max=$((65535 - 40))
1850         # Check invalid values first
1851         for v in $((min - 1)) $((max + 1)); do
1852                 run_cmd ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
1853                 # This can fail, or MTU can be adjusted to a proper value
1854                 [ $? -ne 0 ] && continue
1855                 mtu="$(link_get_mtu "${ns_a}" vti6_a)"
1856                 if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
1857                         err "  vti6 tunnel created with invalid MTU ${v}"
1858                         fail=1
1859                 fi
1860                 run_cmd ${ns_a} ip link del vti6_a
1861         done
1862 
1863         # Now check valid values
1864         for v in 68 1280 1300 $((65535 - 40)); do
1865                 run_cmd ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
1866                 mtu="$(link_get_mtu "${ns_a}" vti6_a)"
1867                 run_cmd ${ns_a} ip link del vti6_a
1868                 if [ "${mtu}" != "${v}" ]; then
1869                         err "  vti6 MTU ${mtu} doesn't match configured value ${v}"
1870                         fail=1
1871                 fi
1872         done
1873 
1874         return ${fail}
1875 }
1876 
1877 test_pmtu_vti6_link_change_mtu() {
1878         setup namespaces || return $ksft_skip
1879 
1880         run_cmd ${ns_a} ip link add dummy0 mtu 1500 type dummy
1881         [ $? -ne 0 ] && err "  dummy not supported" && return $ksft_skip
1882         run_cmd ${ns_a} ip link add dummy1 mtu 3000 type dummy
1883         run_cmd ${ns_a} ip link set dummy0 up
1884         run_cmd ${ns_a} ip link set dummy1 up
1885 
1886         run_cmd ${ns_a} ip addr add ${dummy6_0_prefix}1/${dummy6_mask} dev dummy0
1887         run_cmd ${ns_a} ip addr add ${dummy6_1_prefix}1/${dummy6_mask} dev dummy1
1888 
1889         fail=0
1890 
1891         # Create vti6 interface bound to device, passing MTU, check it
1892         run_cmd ${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_prefix}2 local ${dummy6_0_prefix}1
1893         mtu="$(link_get_mtu "${ns_a}" vti6_a)"
1894         if [ ${mtu} -ne 1300 ]; then
1895                 err "  vti6 MTU ${mtu} doesn't match configured value 1300"
1896                 fail=1
1897         fi
1898 
1899         # Move to another device with different MTU, without passing MTU, check
1900         # MTU is adjusted
1901         run_cmd ${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_prefix}2 local ${dummy6_1_prefix}1
1902         mtu="$(link_get_mtu "${ns_a}" vti6_a)"
1903         if [ ${mtu} -ne $((3000 - 40)) ]; then
1904                 err "  vti MTU ${mtu} is not dummy MTU 3000 minus IPv6 header length"
1905                 fail=1
1906         fi
1907 
1908         # Move it back, passing MTU, check MTU is not overridden
1909         run_cmd ${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_prefix}2 local ${dummy6_0_prefix}1
1910         mtu="$(link_get_mtu "${ns_a}" vti6_a)"
1911         if [ ${mtu} -ne 1280 ]; then
1912                 err "  vti6 MTU ${mtu} doesn't match configured value 1280"
1913                 fail=1
1914         fi
1915 
1916         return ${fail}
1917 }
1918 
1919 check_command() {
1920         cmd=${1}
1921 
1922         if ! which ${cmd} > /dev/null 2>&1; then
1923                 err "  missing required command: '${cmd}'"
1924                 return 1
1925         fi
1926         return 0
1927 }
1928 
1929 test_cleanup_vxlanX_exception() {
1930         outer="${1}"
1931         encap="vxlan"
1932         ll_mtu=4000
1933 
1934         check_command taskset || return $ksft_skip
1935         cpu_list=$(grep -m 2 processor /proc/cpuinfo | cut -d ' ' -f 2)
1936 
1937         setup namespaces routing ${encap}${outer} || return $ksft_skip
1938         trace "${ns_a}" ${encap}_a   "${ns_b}"  ${encap}_b \
1939               "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
1940               "${ns_b}" veth_B-R1    "${ns_r1}" veth_R1-B
1941 
1942         # Create route exception by exceeding link layer MTU
1943         mtu "${ns_a}"  veth_A-R1 $((${ll_mtu} + 1000))
1944         mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
1945         mtu "${ns_b}"  veth_B-R1 ${ll_mtu}
1946         mtu "${ns_r1}" veth_R1-B ${ll_mtu}
1947 
1948         mtu "${ns_a}" ${encap}_a $((${ll_mtu} + 1000))
1949         mtu "${ns_b}" ${encap}_b $((${ll_mtu} + 1000))
1950 
1951         # Fill exception cache for multiple CPUs (2)
1952         # we can always use inner IPv4 for that
1953         for cpu in ${cpu_list}; do
1954                 run_cmd taskset --cpu-list ${cpu} ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${tunnel4_b_addr}
1955         done
1956 
1957         ${ns_a} ip link del dev veth_A-R1 &
1958         iplink_pid=$!
1959         sleep 1
1960         if [ "$(cat /proc/${iplink_pid}/cmdline 2>/dev/null | tr -d '\0')" = "iplinkdeldevveth_A-R1" ]; then
1961                 err "  can't delete veth device in a timely manner, PMTU dst likely leaked"
1962                 return 1
1963         fi
1964 }
1965 
1966 test_cleanup_ipv6_exception() {
1967         test_cleanup_vxlanX_exception 6
1968 }
1969 
1970 test_cleanup_ipv4_exception() {
1971         test_cleanup_vxlanX_exception 4
1972 }
1973 
1974 run_test() {
1975         (
1976         tname="$1"
1977         tdesc="$2"
1978 
1979         unset IFS
1980 
1981         # Since cleanup() relies on variables modified by this subshell, it
1982         # has to run in this context.
1983         trap cleanup EXIT
1984 
1985         if [ "$VERBOSE" = "1" ]; then
1986                 printf "\n##########################################################################\n\n"
1987         fi
1988 
1989         eval test_${tname}
1990         ret=$?
1991 
1992         if [ $ret -eq 0 ]; then
1993                 printf "TEST: %-60s  [ OK ]\n" "${tdesc}"
1994         elif [ $ret -eq 1 ]; then
1995                 printf "TEST: %-60s  [FAIL]\n" "${tdesc}"
1996                 if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
1997                         echo
1998                         echo "Pausing. Hit enter to continue"
1999                         read a
2000                 fi
2001                 err_flush
2002                 exit 1
2003         elif [ $ret -eq $ksft_skip ]; then
2004                 printf "TEST: %-60s  [SKIP]\n" "${tdesc}"
2005                 err_flush
2006         fi
2007 
2008         return $ret
2009         )
2010         ret=$?
2011         case $ret in
2012                 0)
2013                         all_skipped=false
2014                         [ $exitcode=$ksft_skip ] && exitcode=0
2015                 ;;
2016                 $ksft_skip)
2017                         [ $all_skipped = true ] && exitcode=$ksft_skip
2018                 ;;
2019                 *)
2020                         all_skipped=false
2021                         exitcode=1
2022                 ;;
2023         esac
2024 
2025         return $ret
2026 }
2027 
2028 run_test_nh() {
2029         tname="$1"
2030         tdesc="$2"
2031 
2032         USE_NH=yes
2033         run_test "${tname}" "${tdesc} - nexthop objects"
2034         USE_NH=no
2035 }
2036 
2037 test_list_flush_ipv4_exception() {
2038         setup namespaces routing || return $ksft_skip
2039         trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
2040               "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
2041               "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
2042               "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
2043 
2044         dst_prefix1="${prefix4}.${b_r1}."
2045         dst2="${prefix4}.${b_r2}.1"
2046 
2047         # Set up initial MTU values
2048         mtu "${ns_a}"  veth_A-R1 2000
2049         mtu "${ns_r1}" veth_R1-A 2000
2050         mtu "${ns_r1}" veth_R1-B 1500
2051         mtu "${ns_b}"  veth_B-R1 1500
2052 
2053         mtu "${ns_a}"  veth_A-R2 2000
2054         mtu "${ns_r2}" veth_R2-A 2000
2055         mtu "${ns_r2}" veth_R2-B 1500
2056         mtu "${ns_b}"  veth_B-R2 1500
2057 
2058         fail=0
2059 
2060         # Add 100 addresses for veth endpoint on B reached by default A route
2061         for i in $(seq 100 199); do
2062                 run_cmd ${ns_b} ip addr add "${dst_prefix1}${i}" dev veth_B-R1
2063         done
2064 
2065         # Create 100 cached route exceptions for path via R1, one via R2. Note
2066         # that with IPv4 we need to actually cause a route lookup that matches
2067         # the exception caused by ICMP, in order to actually have a cached
2068         # route, so we need to ping each destination twice
2069         for i in $(seq 100 199); do
2070                 run_cmd ${ns_a} ping -q -M want -i 0.1 -c 2 -s 1800 "${dst_prefix1}${i}"
2071         done
2072         run_cmd ${ns_a} ping -q -M want -i 0.1 -c 2 -s 1800 "${dst2}"
2073 
2074         if [ "$(${ns_a} ip -oneline route list cache | wc -l)" -ne 101 ]; then
2075                 err "  can't list cached exceptions"
2076                 fail=1
2077         fi
2078 
2079         run_cmd ${ns_a} ip route flush cache
2080         pmtu1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst_prefix}1)"
2081         pmtu2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst_prefix}2)"
2082         if [ -n "${pmtu1}" ] || [ -n "${pmtu2}" ] || \
2083            [ -n "$(${ns_a} ip route list cache)" ]; then
2084                 err "  can't flush cached exceptions"
2085                 fail=1
2086         fi
2087 
2088         return ${fail}
2089 }
2090 
2091 test_list_flush_ipv6_exception() {
2092         setup namespaces routing || return $ksft_skip
2093         trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
2094               "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
2095               "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
2096               "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
2097 
2098         dst_prefix1="${prefix6}:${b_r1}::"
2099         dst2="${prefix6}:${b_r2}::1"
2100 
2101         # Set up initial MTU values
2102         mtu "${ns_a}"  veth_A-R1 2000
2103         mtu "${ns_r1}" veth_R1-A 2000
2104         mtu "${ns_r1}" veth_R1-B 1500
2105         mtu "${ns_b}"  veth_B-R1 1500
2106 
2107         mtu "${ns_a}"  veth_A-R2 2000
2108         mtu "${ns_r2}" veth_R2-A 2000
2109         mtu "${ns_r2}" veth_R2-B 1500
2110         mtu "${ns_b}"  veth_B-R2 1500
2111 
2112         fail=0
2113 
2114         # Add 100 addresses for veth endpoint on B reached by default A route
2115         for i in $(seq 100 199); do
2116                 run_cmd ${ns_b} ip addr add "${dst_prefix1}${i}" dev veth_B-R1
2117         done
2118 
2119         # Create 100 cached route exceptions for path via R1, one via R2
2120         for i in $(seq 100 199); do
2121                 run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s 1800 "${dst_prefix1}${i}"
2122         done
2123         run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s 1800 "${dst2}"
2124         if [ "$(${ns_a} ip -oneline -6 route list cache | wc -l)" -ne 101 ]; then
2125                 err "  can't list cached exceptions"
2126                 fail=1
2127         fi
2128 
2129         run_cmd ${ns_a} ip -6 route flush cache
2130         pmtu1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst_prefix1}100")"
2131         pmtu2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
2132         if [ -n "${pmtu1}" ] || [ -n "${pmtu2}" ] || \
2133            [ -n "$(${ns_a} ip -6 route list cache)" ]; then
2134                 err "  can't flush cached exceptions"
2135                 fail=1
2136         fi
2137 
2138         return ${fail}
2139 }
2140 
2141 test_pmtu_ipvX_route_change() {
2142         family=${1}
2143 
2144         setup namespaces routing || return 2
2145         trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
2146               "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
2147               "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
2148               "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
2149 
2150         if [ ${family} -eq 4 ]; then
2151                 ping=ping
2152                 dst1="${prefix4}.${b_r1}.1"
2153                 dst2="${prefix4}.${b_r2}.1"
2154                 gw="${prefix4}.${a_r1}.2"
2155         else
2156                 ping=${ping6}
2157                 dst1="${prefix6}:${b_r1}::1"
2158                 dst2="${prefix6}:${b_r2}::1"
2159                 gw="${prefix6}:${a_r1}::2"
2160         fi
2161 
2162         # Set up initial MTU values
2163         mtu "${ns_a}"  veth_A-R1 2000
2164         mtu "${ns_r1}" veth_R1-A 2000
2165         mtu "${ns_r1}" veth_R1-B 1400
2166         mtu "${ns_b}"  veth_B-R1 1400
2167 
2168         mtu "${ns_a}"  veth_A-R2 2000
2169         mtu "${ns_r2}" veth_R2-A 2000
2170         mtu "${ns_r2}" veth_R2-B 1500
2171         mtu "${ns_b}"  veth_B-R2 1500
2172 
2173         # Create route exceptions
2174         run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1}
2175         run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2}
2176 
2177         # Check that exceptions have been created with the correct PMTU
2178         pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
2179         check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
2180         pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
2181         check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
2182 
2183         # Replace the route from A to R1
2184         run_cmd ${ns_a} ip route change default via ${gw}
2185 
2186         # Delete the device in A
2187         run_cmd ${ns_a} ip link del "veth_A-R1"
2188 }
2189 
2190 test_pmtu_ipv4_route_change() {
2191         test_pmtu_ipvX_route_change 4
2192 }
2193 
2194 test_pmtu_ipv6_route_change() {
2195         test_pmtu_ipvX_route_change 6
2196 }
2197 
2198 usage() {
2199         echo
2200         echo "$0 [OPTIONS] [TEST]..."
2201         echo "If no TEST argument is given, all tests will be run."
2202         echo
2203         echo "Options"
2204         echo "  --trace: capture traffic to TEST_INTERFACE.pcap"
2205         echo
2206         echo "Available tests${tests}"
2207         exit 1
2208 }
2209 
2210 ################################################################################
2211 #
2212 exitcode=0
2213 desc=0
2214 all_skipped=true
2215 
2216 while getopts :ptv o
2217 do
2218         case $o in
2219         p) PAUSE_ON_FAIL=yes;;
2220         v) VERBOSE=1;;
2221         t) if which tcpdump > /dev/null 2>&1; then
2222                 TRACING=1
2223            else
2224                 echo "=== tcpdump not available, tracing disabled"
2225            fi
2226            ;;
2227         *) usage;;
2228         esac
2229 done
2230 shift $(($OPTIND-1))
2231 
2232 IFS="   
2233 "
2234 
2235 for arg do
2236         # Check first that all requested tests are available before running any
2237         command -v > /dev/null "test_${arg}" || { echo "=== Test ${arg} not found"; usage; }
2238 done
2239 
2240 trap cleanup EXIT
2241 
2242 # start clean
2243 cleanup
2244 
2245 HAVE_NH=no
2246 ip nexthop ls >/dev/null 2>&1
2247 [ $? -eq 0 ] && HAVE_NH=yes
2248 
2249 name=""
2250 desc=""
2251 rerun_nh=0
2252 for t in ${tests}; do
2253         [ "${name}" = "" ]      && name="${t}"  && continue
2254         [ "${desc}" = "" ]      && desc="${t}"  && continue
2255 
2256         if [ "${HAVE_NH}" = "yes" ]; then
2257                 rerun_nh="${t}"
2258         fi
2259 
2260         run_this=1
2261         for arg do
2262                 [ "${arg}" != "${arg#--*}" ] && continue
2263                 [ "${arg}" = "${name}" ] && run_this=1 && break
2264                 run_this=0
2265         done
2266         if [ $run_this -eq 1 ]; then
2267                 run_test "${name}" "${desc}"
2268                 # if test was skipped no need to retry with nexthop objects
2269                 [ $? -eq $ksft_skip ] && rerun_nh=0
2270 
2271                 if [ "${rerun_nh}" = "1" ]; then
2272                         run_test_nh "${name}" "${desc}"
2273                 fi
2274         fi
2275         name=""
2276         desc=""
2277         rerun_nh=0
2278 done
2279 
2280 exit ${exitcode}