Home | History | Annotate | Download | only in net
      1 #!/bin/sh
      2 # SPDX-License-Identifier: GPL-2.0
      3 #
      4 # Check that route PMTU values match expectations, and that initial device MTU
      5 # values are assigned correctly
      6 #
      7 # Tests currently implemented:
      8 #
      9 # - pmtu_ipv4
     10 #	Set up two namespaces, A and B, with two paths between them over routers
     11 #	R1 and R2 (also implemented with namespaces), with different MTUs:
     12 #
     13 #	  segment a_r1    segment b_r1		a_r1: 2000
     14 #	.--------------R1--------------.	a_r2: 1500
     15 #	A                               B	a_r3: 2000
     16 #	'--------------R2--------------'	a_r4: 1400
     17 #	  segment a_r2    segment b_r2
     18 #
     19 #	Check that PMTU exceptions with the correct PMTU are created. Then
     20 #	decrease and increase the MTU of the local link for one of the paths,
     21 #	A to R1, checking that route exception PMTU changes accordingly over
     22 #	this path. Also check that locked exceptions are created when an ICMP
     23 #	message advertising a PMTU smaller than net.ipv4.route.min_pmtu is
     24 #	received
     25 #
     26 # - pmtu_ipv6
     27 #	Same as pmtu_ipv4, except for locked PMTU tests, using IPv6
     28 #
     29 # - pmtu_vti4_exception
     30 #	Set up vti tunnel on top of veth, with xfrm states and policies, in two
     31 #	namespaces with matching endpoints. Check that route exception is not
     32 #	created if link layer MTU is not exceeded, then exceed it and check that
     33 #	exception is created with the expected PMTU. The approach described
     34 #	below for IPv6 doesn't apply here, because, on IPv4, administrative MTU
     35 #	changes alone won't affect PMTU
     36 #
     37 # - pmtu_vti6_exception
     38 #	Set up vti6 tunnel on top of veth, with xfrm states and policies, in two
     39 #	namespaces with matching endpoints. Check that route exception is
     40 #	created by exceeding link layer MTU with ping to other endpoint. Then
     41 #	decrease and increase MTU of tunnel, checking that route exception PMTU
     42 #	changes accordingly
     43 #
     44 # - pmtu_vti4_default_mtu
     45 #	Set up vti4 tunnel on top of veth, in two namespaces with matching
     46 #	endpoints. Check that MTU assigned to vti interface is the MTU of the
     47 #	lower layer (veth) minus additional lower layer headers (zero, for veth)
     48 #	minus IPv4 header length
     49 #
     50 # - pmtu_vti6_default_mtu
     51 #	Same as above, for IPv6
     52 #
     53 # - pmtu_vti4_link_add_mtu
     54 #	Set up vti4 interface passing MTU value at link creation, check MTU is
     55 #	configured, and that link is not created with invalid MTU values
     56 #
     57 # - pmtu_vti6_link_add_mtu
     58 #	Same as above, for IPv6
     59 #
     60 # - pmtu_vti6_link_change_mtu
     61 #	Set up two dummy interfaces with different MTUs, create a vti6 tunnel
     62 #	and check that configured MTU is used on link creation and changes, and
     63 #	that MTU is properly calculated instead when MTU is not configured from
     64 #	userspace
     65 
     66 # Kselftest framework requirement - SKIP code is 4.
     67 ksft_skip=4
     68 
     69 # Some systems don't have a ping6 binary anymore
     70 which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
     71 
     72 tests="
     73 	pmtu_ipv4_exception		ipv4: PMTU exceptions
     74 	pmtu_ipv6_exception		ipv6: PMTU exceptions
     75 	pmtu_vti6_exception		vti6: PMTU exceptions
     76 	pmtu_vti4_exception		vti4: PMTU exceptions
     77 	pmtu_vti4_default_mtu		vti4: default MTU assignment
     78 	pmtu_vti6_default_mtu		vti6: default MTU assignment
     79 	pmtu_vti4_link_add_mtu		vti4: MTU setting on link creation
     80 	pmtu_vti6_link_add_mtu		vti6: MTU setting on link creation
     81 	pmtu_vti6_link_change_mtu	vti6: MTU changes on link changes"
     82 
     83 NS_A="ns-$(mktemp -u XXXXXX)"
     84 NS_B="ns-$(mktemp -u XXXXXX)"
     85 NS_R1="ns-$(mktemp -u XXXXXX)"
     86 NS_R2="ns-$(mktemp -u XXXXXX)"
     87 ns_a="ip netns exec ${NS_A}"
     88 ns_b="ip netns exec ${NS_B}"
     89 ns_r1="ip netns exec ${NS_R1}"
     90 ns_r2="ip netns exec ${NS_R2}"
     91 
     92 # Addressing and routing for tests with routers: four network segments, with
     93 # index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an
     94 # identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2).
     95 # Addresses are:
     96 # - IPv4: PREFIX4.SEGMENT.ID (/24)
     97 # - IPv6: PREFIX6:SEGMENT::ID (/64)
     98 prefix4="192.168"
     99 prefix6="fd00"
    100 a_r1=1
    101 a_r2=2
    102 b_r1=3
    103 b_r2=4
    104 #	ns	peer	segment
    105 routing_addrs="
    106 	A	R1	${a_r1}
    107 	A	R2	${a_r2}
    108 	B	R1	${b_r1}
    109 	B	R2	${b_r2}
    110 "
    111 # Traffic from A to B goes through R1 by default, and through R2, if destined to
    112 # B's address on the b_r2 segment.
    113 # Traffic from B to A goes through R1.
    114 #	ns	destination		gateway
    115 routes="
    116 	A	default			${prefix4}.${a_r1}.2
    117 	A	${prefix4}.${b_r2}.1	${prefix4}.${a_r2}.2
    118 	B	default			${prefix4}.${b_r1}.2
    119 
    120 	A	default			${prefix6}:${a_r1}::2
    121 	A	${prefix6}:${b_r2}::1	${prefix6}:${a_r2}::2
    122 	B	default			${prefix6}:${b_r1}::2
    123 "
    124 
    125 veth4_a_addr="192.168.1.1"
    126 veth4_b_addr="192.168.1.2"
    127 veth4_mask="24"
    128 veth6_a_addr="fd00:1::a"
    129 veth6_b_addr="fd00:1::b"
    130 veth6_mask="64"
    131 
    132 vti4_a_addr="192.168.2.1"
    133 vti4_b_addr="192.168.2.2"
    134 vti4_mask="24"
    135 vti6_a_addr="fd00:2::a"
    136 vti6_b_addr="fd00:2::b"
    137 vti6_mask="64"
    138 
    139 dummy6_0_addr="fc00:1000::0"
    140 dummy6_1_addr="fc00:1001::0"
    141 dummy6_mask="64"
    142 
    143 cleanup_done=1
    144 err_buf=
    145 tcpdump_pids=
    146 
    147 err() {
    148 	err_buf="${err_buf}${1}
    149 "
    150 }
    151 
    152 err_flush() {
    153 	echo -n "${err_buf}"
    154 	err_buf=
    155 }
    156 
    157 # Find the auto-generated name for this namespace
    158 nsname() {
    159 	eval echo \$NS_$1
    160 }
    161 
    162 setup_namespaces() {
    163 	for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
    164 		ip netns add ${n} || return 1
    165 	done
    166 }
    167 
    168 setup_veth() {
    169 	${ns_a} ip link add veth_a type veth peer name veth_b || return 1
    170 	${ns_a} ip link set veth_b netns ${NS_B}
    171 
    172 	${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a
    173 	${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b
    174 
    175 	${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a
    176 	${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b
    177 
    178 	${ns_a} ip link set veth_a up
    179 	${ns_b} ip link set veth_b up
    180 }
    181 
    182 setup_vti() {
    183 	proto=${1}
    184 	veth_a_addr="${2}"
    185 	veth_b_addr="${3}"
    186 	vti_a_addr="${4}"
    187 	vti_b_addr="${5}"
    188 	vti_mask=${6}
    189 
    190 	[ ${proto} -eq 6 ] && vti_type="vti6" || vti_type="vti"
    191 
    192 	${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1
    193 	${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10
    194 
    195 	${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a
    196 	${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b
    197 
    198 	${ns_a} ip link set vti${proto}_a up
    199 	${ns_b} ip link set vti${proto}_b up
    200 
    201 	sleep 1
    202 }
    203 
    204 setup_vti4() {
    205 	setup_vti 4 ${veth4_a_addr} ${veth4_b_addr} ${vti4_a_addr} ${vti4_b_addr} ${vti4_mask}
    206 }
    207 
    208 setup_vti6() {
    209 	setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${vti6_a_addr} ${vti6_b_addr} ${vti6_mask}
    210 }
    211 
    212 setup_xfrm() {
    213 	proto=${1}
    214 	veth_a_addr="${2}"
    215 	veth_b_addr="${3}"
    216 
    217 	${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1
    218 	${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
    219 	${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
    220 	${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
    221 
    222 	${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
    223 	${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
    224 	${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
    225 	${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
    226 }
    227 
    228 setup_xfrm4() {
    229 	setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr}
    230 }
    231 
    232 setup_xfrm6() {
    233 	setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
    234 }
    235 
    236 setup_routing() {
    237 	for i in ${NS_R1} ${NS_R2}; do
    238 		ip netns exec ${i} sysctl -q net/ipv4/ip_forward=1
    239 		ip netns exec ${i} sysctl -q net/ipv6/conf/all/forwarding=1
    240 	done
    241 
    242 	for i in ${routing_addrs}; do
    243 		[ "${ns}" = "" ]	&& ns="${i}"		&& continue
    244 		[ "${peer}" = "" ]	&& peer="${i}"		&& continue
    245 		[ "${segment}" = "" ]	&& segment="${i}"
    246 
    247 		ns_name="$(nsname ${ns})"
    248 		peer_name="$(nsname ${peer})"
    249 		if="veth_${ns}-${peer}"
    250 		ifpeer="veth_${peer}-${ns}"
    251 
    252 		# Create veth links
    253 		ip link add ${if} up netns ${ns_name} type veth peer name ${ifpeer} netns ${peer_name} || return 1
    254 		ip -n ${peer_name} link set dev ${ifpeer} up
    255 
    256 		# Add addresses
    257 		ip -n ${ns_name}   addr add ${prefix4}.${segment}.1/24  dev ${if}
    258 		ip -n ${ns_name}   addr add ${prefix6}:${segment}::1/64 dev ${if}
    259 
    260 		ip -n ${peer_name} addr add ${prefix4}.${segment}.2/24  dev ${ifpeer}
    261 		ip -n ${peer_name} addr add ${prefix6}:${segment}::2/64 dev ${ifpeer}
    262 
    263 		ns=""; peer=""; segment=""
    264 	done
    265 
    266 	for i in ${routes}; do
    267 		[ "${ns}" = "" ]	&& ns="${i}"		&& continue
    268 		[ "${addr}" = "" ]	&& addr="${i}"		&& continue
    269 		[ "${gw}" = "" ]	&& gw="${i}"
    270 
    271 		ns_name="$(nsname ${ns})"
    272 
    273 		ip -n ${ns_name} route add ${addr} via ${gw}
    274 
    275 		ns=""; addr=""; gw=""
    276 	done
    277 }
    278 
    279 setup() {
    280 	[ "$(id -u)" -ne 0 ] && echo "  need to run as root" && return $ksft_skip
    281 
    282 	cleanup_done=0
    283 	for arg do
    284 		eval setup_${arg} || { echo "  ${arg} not supported"; return 1; }
    285 	done
    286 }
    287 
    288 trace() {
    289 	[ $tracing -eq 0 ] && return
    290 
    291 	for arg do
    292 		[ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue
    293 		${ns_cmd} tcpdump -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null &
    294 		tcpdump_pids="${tcpdump_pids} $!"
    295 		ns_cmd=
    296 	done
    297 	sleep 1
    298 }
    299 
    300 cleanup() {
    301 	for pid in ${tcpdump_pids}; do
    302 		kill ${pid}
    303 	done
    304 	tcpdump_pids=
    305 
    306 	[ ${cleanup_done} -eq 1 ] && return
    307 	for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
    308 		ip netns del ${n} 2> /dev/null
    309 	done
    310 	cleanup_done=1
    311 }
    312 
    313 mtu() {
    314 	ns_cmd="${1}"
    315 	dev="${2}"
    316 	mtu="${3}"
    317 
    318 	${ns_cmd} ip link set dev ${dev} mtu ${mtu}
    319 }
    320 
    321 mtu_parse() {
    322 	input="${1}"
    323 
    324 	next=0
    325 	for i in ${input}; do
    326 		[ ${next} -eq 1 -a "${i}" = "lock" ] && next=2 && continue
    327 		[ ${next} -eq 1 ] && echo "${i}" && return
    328 		[ ${next} -eq 2 ] && echo "lock ${i}" && return
    329 		[ "${i}" = "mtu" ] && next=1
    330 	done
    331 }
    332 
    333 link_get() {
    334 	ns_cmd="${1}"
    335 	name="${2}"
    336 
    337 	${ns_cmd} ip link show dev "${name}"
    338 }
    339 
    340 link_get_mtu() {
    341 	ns_cmd="${1}"
    342 	name="${2}"
    343 
    344 	mtu_parse "$(link_get "${ns_cmd}" ${name})"
    345 }
    346 
    347 route_get_dst_exception() {
    348 	ns_cmd="${1}"
    349 	dst="${2}"
    350 
    351 	${ns_cmd} ip route get "${dst}"
    352 }
    353 
    354 route_get_dst_pmtu_from_exception() {
    355 	ns_cmd="${1}"
    356 	dst="${2}"
    357 
    358 	mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})"
    359 }
    360 
    361 check_pmtu_value() {
    362 	expected="${1}"
    363 	value="${2}"
    364 	event="${3}"
    365 
    366 	[ "${expected}" = "any" ] && [ -n "${value}" ] && return 0
    367 	[ "${value}" = "${expected}" ] && return 0
    368 	[ -z "${value}" ] &&    err "  PMTU exception wasn't created after ${event}" && return 1
    369 	[ -z "${expected}" ] && err "  PMTU exception shouldn't exist after ${event}" && return 1
    370 	err "  found PMTU exception with incorrect MTU ${value}, expected ${expected}, after ${event}"
    371 	return 1
    372 }
    373 
    374 test_pmtu_ipvX() {
    375 	family=${1}
    376 
    377 	setup namespaces routing || return 2
    378 	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
    379 	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
    380 	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
    381 	      "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
    382 
    383 	if [ ${family} -eq 4 ]; then
    384 		ping=ping
    385 		dst1="${prefix4}.${b_r1}.1"
    386 		dst2="${prefix4}.${b_r2}.1"
    387 	else
    388 		ping=${ping6}
    389 		dst1="${prefix6}:${b_r1}::1"
    390 		dst2="${prefix6}:${b_r2}::1"
    391 	fi
    392 
    393 	# Set up initial MTU values
    394 	mtu "${ns_a}"  veth_A-R1 2000
    395 	mtu "${ns_r1}" veth_R1-A 2000
    396 	mtu "${ns_r1}" veth_R1-B 1400
    397 	mtu "${ns_b}"  veth_B-R1 1400
    398 
    399 	mtu "${ns_a}"  veth_A-R2 2000
    400 	mtu "${ns_r2}" veth_R2-A 2000
    401 	mtu "${ns_r2}" veth_R2-B 1500
    402 	mtu "${ns_b}"  veth_B-R2 1500
    403 
    404 	# Create route exceptions
    405 	${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1800 ${dst1} > /dev/null
    406 	${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1800 ${dst2} > /dev/null
    407 
    408 	# Check that exceptions have been created with the correct PMTU
    409 	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
    410 	check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
    411 	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
    412 	check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
    413 
    414 	# Decrease local MTU below PMTU, check for PMTU decrease in route exception
    415 	mtu "${ns_a}"  veth_A-R1 1300
    416 	mtu "${ns_r1}" veth_R1-A 1300
    417 	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
    418 	check_pmtu_value "1300" "${pmtu_1}" "decreasing local MTU" || return 1
    419 	# Second exception shouldn't be modified
    420 	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
    421 	check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1
    422 
    423 	# Increase MTU, check for PMTU increase in route exception
    424 	mtu "${ns_a}"  veth_A-R1 1700
    425 	mtu "${ns_r1}" veth_R1-A 1700
    426 	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
    427 	check_pmtu_value "1700" "${pmtu_1}" "increasing local MTU" || return 1
    428 	# Second exception shouldn't be modified
    429 	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
    430 	check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1
    431 
    432 	# Skip PMTU locking tests for IPv6
    433 	[ $family -eq 6 ] && return 0
    434 
    435 	# Decrease remote MTU on path via R2, get new exception
    436 	mtu "${ns_r2}" veth_R2-B 400
    437 	mtu "${ns_b}"  veth_B-R2 400
    438 	${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1400 ${dst2} > /dev/null
    439 	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
    440 	check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
    441 
    442 	# Decrease local MTU below PMTU
    443 	mtu "${ns_a}"  veth_A-R2 500
    444 	mtu "${ns_r2}" veth_R2-A 500
    445 	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
    446 	check_pmtu_value "500" "${pmtu_2}" "decreasing local MTU" || return 1
    447 
    448 	# Increase local MTU
    449 	mtu "${ns_a}"  veth_A-R2 1500
    450 	mtu "${ns_r2}" veth_R2-A 1500
    451 	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
    452 	check_pmtu_value "1500" "${pmtu_2}" "increasing local MTU" || return 1
    453 
    454 	# Get new exception
    455 	${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1400 ${dst2} > /dev/null
    456 	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
    457 	check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
    458 }
    459 
    460 test_pmtu_ipv4_exception() {
    461 	test_pmtu_ipvX 4
    462 }
    463 
    464 test_pmtu_ipv6_exception() {
    465 	test_pmtu_ipvX 6
    466 }
    467 
    468 test_pmtu_vti4_exception() {
    469 	setup namespaces veth vti4 xfrm4 || return 2
    470 	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
    471 	      "${ns_a}" vti4_a    "${ns_b}" vti4_b
    472 
    473 	veth_mtu=1500
    474 	vti_mtu=$((veth_mtu - 20))
    475 
    476 	#                                SPI   SN   IV  ICV   pad length   next header
    477 	esp_payload_rfc4106=$((vti_mtu - 4   - 4  - 8 - 16  - 1          - 1))
    478 	ping_payload=$((esp_payload_rfc4106 - 28))
    479 
    480 	mtu "${ns_a}" veth_a ${veth_mtu}
    481 	mtu "${ns_b}" veth_b ${veth_mtu}
    482 	mtu "${ns_a}" vti4_a ${vti_mtu}
    483 	mtu "${ns_b}" vti4_b ${vti_mtu}
    484 
    485 	# Send DF packet without exceeding link layer MTU, check that no
    486 	# exception is created
    487 	${ns_a} ping -q -M want -i 0.1 -w 2 -s ${ping_payload} ${vti4_b_addr} > /dev/null
    488 	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
    489 	check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
    490 
    491 	# Now exceed link layer MTU by one byte, check that exception is created
    492 	# with the right PMTU value
    493 	${ns_a} ping -q -M want -i 0.1 -w 2 -s $((ping_payload + 1)) ${vti4_b_addr} > /dev/null
    494 	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
    495 	check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))"
    496 }
    497 
    498 test_pmtu_vti6_exception() {
    499 	setup namespaces veth vti6 xfrm6 || return 2
    500 	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
    501 	      "${ns_a}" vti6_a    "${ns_b}" vti6_b
    502 	fail=0
    503 
    504 	# Create route exception by exceeding link layer MTU
    505 	mtu "${ns_a}" veth_a 4000
    506 	mtu "${ns_b}" veth_b 4000
    507 	mtu "${ns_a}" vti6_a 5000
    508 	mtu "${ns_b}" vti6_b 5000
    509 	${ns_a} ${ping6} -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null
    510 
    511 	# Check that exception was created
    512 	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})"
    513 	check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1
    514 
    515 	# Decrease tunnel MTU, check for PMTU decrease in route exception
    516 	mtu "${ns_a}" vti6_a 3000
    517 	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})"
    518 	check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1
    519 
    520 	# Increase tunnel MTU, check for PMTU increase in route exception
    521 	mtu "${ns_a}" vti6_a 9000
    522 	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})"
    523 	check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1
    524 
    525 	return ${fail}
    526 }
    527 
    528 test_pmtu_vti4_default_mtu() {
    529 	setup namespaces veth vti4 || return 2
    530 
    531 	# Check that MTU of vti device is MTU of veth minus IPv4 header length
    532 	veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
    533 	vti4_mtu="$(link_get_mtu "${ns_a}" vti4_a)"
    534 	if [ $((veth_mtu - vti4_mtu)) -ne 20 ]; then
    535 		err "  vti MTU ${vti4_mtu} is not veth MTU ${veth_mtu} minus IPv4 header length"
    536 		return 1
    537 	fi
    538 }
    539 
    540 test_pmtu_vti6_default_mtu() {
    541 	setup namespaces veth vti6 || return 2
    542 
    543 	# Check that MTU of vti device is MTU of veth minus IPv6 header length
    544 	veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
    545 	vti6_mtu="$(link_get_mtu "${ns_a}" vti6_a)"
    546 	if [ $((veth_mtu - vti6_mtu)) -ne 40 ]; then
    547 		err "  vti MTU ${vti6_mtu} is not veth MTU ${veth_mtu} minus IPv6 header length"
    548 		return 1
    549 	fi
    550 }
    551 
    552 test_pmtu_vti4_link_add_mtu() {
    553 	setup namespaces || return 2
    554 
    555 	${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
    556 	[ $? -ne 0 ] && err "  vti not supported" && return 2
    557 	${ns_a} ip link del vti4_a
    558 
    559 	fail=0
    560 
    561 	min=68
    562 	max=$((65535 - 20))
    563 	# Check invalid values first
    564 	for v in $((min - 1)) $((max + 1)); do
    565 		${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 2>/dev/null
    566 		# This can fail, or MTU can be adjusted to a proper value
    567 		[ $? -ne 0 ] && continue
    568 		mtu="$(link_get_mtu "${ns_a}" vti4_a)"
    569 		if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
    570 			err "  vti tunnel created with invalid MTU ${mtu}"
    571 			fail=1
    572 		fi
    573 		${ns_a} ip link del vti4_a
    574 	done
    575 
    576 	# Now check valid values
    577 	for v in ${min} 1300 ${max}; do
    578 		${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
    579 		mtu="$(link_get_mtu "${ns_a}" vti4_a)"
    580 		${ns_a} ip link del vti4_a
    581 		if [ "${mtu}" != "${v}" ]; then
    582 			err "  vti MTU ${mtu} doesn't match configured value ${v}"
    583 			fail=1
    584 		fi
    585 	done
    586 
    587 	return ${fail}
    588 }
    589 
    590 test_pmtu_vti6_link_add_mtu() {
    591 	setup namespaces || return 2
    592 
    593 	${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
    594 	[ $? -ne 0 ] && err "  vti6 not supported" && return 2
    595 	${ns_a} ip link del vti6_a
    596 
    597 	fail=0
    598 
    599 	min=68			# vti6 can carry IPv4 packets too
    600 	max=$((65535 - 40))
    601 	# Check invalid values first
    602 	for v in $((min - 1)) $((max + 1)); do
    603 		${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 2>/dev/null
    604 		# This can fail, or MTU can be adjusted to a proper value
    605 		[ $? -ne 0 ] && continue
    606 		mtu="$(link_get_mtu "${ns_a}" vti6_a)"
    607 		if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
    608 			err "  vti6 tunnel created with invalid MTU ${v}"
    609 			fail=1
    610 		fi
    611 		${ns_a} ip link del vti6_a
    612 	done
    613 
    614 	# Now check valid values
    615 	for v in 68 1280 1300 $((65535 - 40)); do
    616 		${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
    617 		mtu="$(link_get_mtu "${ns_a}" vti6_a)"
    618 		${ns_a} ip link del vti6_a
    619 		if [ "${mtu}" != "${v}" ]; then
    620 			err "  vti6 MTU ${mtu} doesn't match configured value ${v}"
    621 			fail=1
    622 		fi
    623 	done
    624 
    625 	return ${fail}
    626 }
    627 
    628 test_pmtu_vti6_link_change_mtu() {
    629 	setup namespaces || return 2
    630 
    631 	${ns_a} ip link add dummy0 mtu 1500 type dummy
    632 	[ $? -ne 0 ] && err "  dummy not supported" && return 2
    633 	${ns_a} ip link add dummy1 mtu 3000 type dummy
    634 	${ns_a} ip link set dummy0 up
    635 	${ns_a} ip link set dummy1 up
    636 
    637 	${ns_a} ip addr add ${dummy6_0_addr}/${dummy6_mask} dev dummy0
    638 	${ns_a} ip addr add ${dummy6_1_addr}/${dummy6_mask} dev dummy1
    639 
    640 	fail=0
    641 
    642 	# Create vti6 interface bound to device, passing MTU, check it
    643 	${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
    644 	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
    645 	if [ ${mtu} -ne 1300 ]; then
    646 		err "  vti6 MTU ${mtu} doesn't match configured value 1300"
    647 		fail=1
    648 	fi
    649 
    650 	# Move to another device with different MTU, without passing MTU, check
    651 	# MTU is adjusted
    652 	${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_addr} local ${dummy6_1_addr}
    653 	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
    654 	if [ ${mtu} -ne $((3000 - 40)) ]; then
    655 		err "  vti MTU ${mtu} is not dummy MTU 3000 minus IPv6 header length"
    656 		fail=1
    657 	fi
    658 
    659 	# Move it back, passing MTU, check MTU is not overridden
    660 	${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
    661 	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
    662 	if [ ${mtu} -ne 1280 ]; then
    663 		err "  vti6 MTU ${mtu} doesn't match configured value 1280"
    664 		fail=1
    665 	fi
    666 
    667 	return ${fail}
    668 }
    669 
    670 usage() {
    671 	echo
    672 	echo "$0 [OPTIONS] [TEST]..."
    673 	echo "If no TEST argument is given, all tests will be run."
    674 	echo
    675 	echo "Options"
    676 	echo "  --trace: capture traffic to TEST_INTERFACE.pcap"
    677 	echo
    678 	echo "Available tests${tests}"
    679 	exit 1
    680 }
    681 
    682 exitcode=0
    683 desc=0
    684 IFS="	
    685 "
    686 
    687 tracing=0
    688 for arg do
    689 	if [ "${arg}" != "${arg#--*}" ]; then
    690 		opt="${arg#--}"
    691 		if [ "${opt}" = "trace" ]; then
    692 			if which tcpdump > /dev/null 2>&1; then
    693 				tracing=1
    694 			else
    695 				echo "=== tcpdump not available, tracing disabled"
    696 			fi
    697 		else
    698 			usage
    699 		fi
    700 	else
    701 		# Check first that all requested tests are available before
    702 		# running any
    703 		command -v > /dev/null "test_${arg}" || { echo "=== Test ${arg} not found"; usage; }
    704 	fi
    705 done
    706 
    707 trap cleanup EXIT
    708 
    709 for t in ${tests}; do
    710 	[ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0
    711 
    712 	run_this=1
    713 	for arg do
    714 		[ "${arg}" != "${arg#--*}" ] && continue
    715 		[ "${arg}" = "${name}" ] && run_this=1 && break
    716 		run_this=0
    717 	done
    718 	[ $run_this -eq 0 ] && continue
    719 
    720 	(
    721 		unset IFS
    722 		eval test_${name}
    723 		ret=$?
    724 		cleanup
    725 
    726 		if [ $ret -eq 0 ]; then
    727 			printf "TEST: %-60s  [ OK ]\n" "${t}"
    728 		elif [ $ret -eq 1 ]; then
    729 			printf "TEST: %-60s  [FAIL]\n" "${t}"
    730 			err_flush
    731 			exit 1
    732 		elif [ $ret -eq 2 ]; then
    733 			printf "TEST: %-60s  [SKIP]\n" "${t}"
    734 			err_flush
    735 		fi
    736 	)
    737 	[ $? -ne 0 ] && exitcode=1
    738 done
    739 
    740 exit ${exitcode}
    741