diff options
Diffstat (limited to 'tools/testing/selftests/net')
39 files changed, 6296 insertions, 64 deletions
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index c612d6e..128e548 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -1,9 +1,14 @@ msg_zerocopy socket psock_fanout +psock_snd psock_tpacket reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa reuseport_dualstack reuseaddr_conflict +tcp_mmap +udpgso +udpgso_bench_rx +udpgso_bench_tx diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 3ff81a4..663e11e 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -5,13 +5,18 @@ CFLAGS = -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../usr/include/ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh -TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh +TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh +TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy +TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd +TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict include ../lib.mk $(OUTPUT)/reuseport_bpf_numa: LDFLAGS += -lnuma +$(OUTPUT)/tcp_mmap: LDFLAGS += -lpthread +$(OUTPUT)/tcp_inq: LDFLAGS += -lpthread diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh new file mode 100755 index 0000000..d4cfb6a --- /dev/null +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -0,0 +1,248 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test is for checking IPv4 and IPv6 FIB rules API + +ret=0 + +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} +IP="ip -netns testns" + +RTABLE=100 +GW_IP4=192.51.100.2 +SRC_IP=192.51.100.3 +GW_IP6=2001:db8:1::2 +SRC_IP6=2001:db8:1::3 + +DEV_ADDR=192.51.100.1 +DEV=dummy0 + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-50s [ OK ]\n" "${msg}" + else + nfail=$((nfail+1)) + printf "\n TEST: %-50s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +log_section() +{ + echo + echo "######################################################################" + echo "TEST SECTION: $*" + echo "######################################################################" +} + +setup() +{ + set -e + ip netns add testns + $IP link set dev lo up + + $IP link add dummy0 type dummy + $IP link set dev dummy0 up + $IP address add 198.51.100.1/24 dev dummy0 + $IP -6 address add 2001:db8:1::1/64 dev dummy0 + + set +e +} + +cleanup() +{ + $IP link del dev dummy0 &> /dev/null + ip netns del testns +} + +fib_check_iproute_support() +{ + ip rule help 2>&1 | grep -q $1 + if [ $? -ne 0 ]; then + echo "SKIP: iproute2 iprule too old, missing $1 match" + return 1 + fi + + ip route get help 2>&1 | grep -q $2 + if [ $? -ne 0 ]; then + echo "SKIP: iproute2 get route too old, missing $2 match" + return 1 + fi + + return 0 +} + +fib_rule6_del() +{ + $IP -6 rule del $1 + log_test $? 0 "rule6 del $1" +} + +fib_rule6_del_by_pref() +{ + pref=$($IP -6 rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1) + $IP -6 rule del pref $pref +} + +fib_rule6_test_match_n_redirect() +{ + local match="$1" + local getmatch="$2" + + $IP -6 rule add $match table $RTABLE + $IP -6 route get $GW_IP6 $getmatch | grep -q "table $RTABLE" + log_test $? 0 "rule6 check: $1" + + fib_rule6_del_by_pref "$match" + log_test $? 0 "rule6 del by pref: $match" +} + +fib_rule6_test() +{ + # setup the fib rule redirect route + $IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink + + match="oif $DEV" + fib_rule6_test_match_n_redirect "$match" "$match" "oif redirect to table" + + match="from $SRC_IP6 iif $DEV" + fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table" + + match="tos 0x10" + fib_rule6_test_match_n_redirect "$match" "$match" "tos redirect to table" + + match="fwmark 0x64" + getmatch="mark 0x64" + fib_rule6_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table" + + fib_check_iproute_support "uidrange" "uid" + if [ $? -eq 0 ]; then + match="uidrange 100-100" + getmatch="uid 100" + fib_rule6_test_match_n_redirect "$match" "$getmatch" "uid redirect to table" + fi + + fib_check_iproute_support "sport" "sport" + if [ $? -eq 0 ]; then + match="sport 666 dport 777" + fib_rule6_test_match_n_redirect "$match" "$match" "sport and dport redirect to table" + fi + + fib_check_iproute_support "ipproto" "ipproto" + if [ $? -eq 0 ]; then + match="ipproto tcp" + fib_rule6_test_match_n_redirect "$match" "$match" "ipproto match" + fi + + fib_check_iproute_support "ipproto" "ipproto" + if [ $? -eq 0 ]; then + match="ipproto icmp" + fib_rule6_test_match_n_redirect "$match" "$match" "ipproto icmp match" + fi +} + +fib_rule4_del() +{ + $IP rule del $1 + log_test $? 0 "del $1" +} + +fib_rule4_del_by_pref() +{ + pref=$($IP rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1) + $IP rule del pref $pref +} + +fib_rule4_test_match_n_redirect() +{ + local match="$1" + local getmatch="$2" + + $IP rule add $match table $RTABLE + $IP route get $GW_IP4 $getmatch | grep -q "table $RTABLE" + log_test $? 0 "rule4 check: $1" + + fib_rule4_del_by_pref "$match" + log_test $? 0 "rule4 del by pref: $match" +} + +fib_rule4_test() +{ + # setup the fib rule redirect route + $IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink + + match="oif $DEV" + fib_rule4_test_match_n_redirect "$match" "$match" "oif redirect to table" + + match="from $SRC_IP iif $DEV" + fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table" + + match="tos 0x10" + fib_rule4_test_match_n_redirect "$match" "$match" "tos redirect to table" + + match="fwmark 0x64" + getmatch="mark 0x64" + fib_rule4_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table" + + fib_check_iproute_support "uidrange" "uid" + if [ $? -eq 0 ]; then + match="uidrange 100-100" + getmatch="uid 100" + fib_rule4_test_match_n_redirect "$match" "$getmatch" "uid redirect to table" + fi + + fib_check_iproute_support "sport" "sport" + if [ $? -eq 0 ]; then + match="sport 666 dport 777" + fib_rule4_test_match_n_redirect "$match" "$match" "sport and dport redirect to table" + fi + + fib_check_iproute_support "ipproto" "ipproto" + if [ $? -eq 0 ]; then + match="ipproto tcp" + fib_rule4_test_match_n_redirect "$match" "$match" "ipproto tcp match" + fi + + fib_check_iproute_support "ipproto" "ipproto" + if [ $? -eq 0 ]; then + match="ipproto icmp" + fib_rule4_test_match_n_redirect "$match" "$match" "ipproto icmp match" + fi +} + +run_fibrule_tests() +{ + log_section "IPv4 fib rule" + fib_rule4_test + log_section "IPv6 fib rule" + fib_rule6_test +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit 0 +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit 0 +fi + +# start clean +cleanup &> /dev/null +setup +run_fibrule_tests +cleanup + +exit $ret diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 5baac82..78245d6 100755..100644 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -8,8 +8,11 @@ ret=0 # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 -VERBOSE=${VERBOSE:=0} -PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} +# all tests in this script. Can be overridden with -t option +TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric" +VERBOSE=0 +PAUSE_ON_FAIL=no +PAUSE=no IP="ip -netns testns" log_test() @@ -20,8 +23,10 @@ log_test() if [ ${rc} -eq ${expected} ]; then printf " TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) else ret=1 + nfail=$((nfail+1)) printf " TEST: %-60s [FAIL]\n" "${msg}" if [ "${PAUSE_ON_FAIL}" = "yes" ]; then echo @@ -30,6 +35,13 @@ log_test() [ "$a" = "q" ] && exit 1 fi fi + + if [ "${PAUSE}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi } setup() @@ -565,20 +577,825 @@ fib_nexthop_test() } ################################################################################ -# +# Tests on route add and replace + +run_cmd() +{ + local cmd="$1" + local out + local stderr="2>/dev/null" + + if [ "$VERBOSE" = "1" ]; then + printf " COMMAND: $cmd\n" + stderr= + fi + + out=$(eval $cmd $stderr) + rc=$? + if [ "$VERBOSE" = "1" -a -n "$out" ]; then + echo " $out" + fi + + [ "$VERBOSE" = "1" ] && echo + + return $rc +} + +# add route for a prefix, flushing any existing routes first +# expected to be the first step of a test +add_route6() +{ + local pfx="$1" + local nh="$2" + local out + + if [ "$VERBOSE" = "1" ]; then + echo + echo " ##################################################" + echo + fi + + run_cmd "$IP -6 ro flush ${pfx}" + [ $? -ne 0 ] && exit 1 + + out=$($IP -6 ro ls match ${pfx}) + if [ -n "$out" ]; then + echo "Failed to flush routes for prefix used for tests." + exit 1 + fi + + run_cmd "$IP -6 ro add ${pfx} ${nh}" + if [ $? -ne 0 ]; then + echo "Failed to add initial route for test." + exit 1 + fi +} + +# add initial route - used in replace route tests +add_initial_route6() +{ + add_route6 "2001:db8:104::/64" "$1" +} + +check_route6() +{ + local pfx="2001:db8:104::/64" + local expected="$1" + local out + local rc=0 + + out=$($IP -6 ro ls match ${pfx} | sed -e 's/ pref medium//') + [ "${out}" = "${expected}" ] && return 0 + + if [ -z "${out}" ]; then + if [ "$VERBOSE" = "1" ]; then + printf "\nNo route entry found\n" + printf "Expected:\n" + printf " ${expected}\n" + fi + return 1 + fi + + # tricky way to convert output to 1-line without ip's + # messy '\'; this drops all extra white space + out=$(echo ${out}) + if [ "${out}" != "${expected}" ]; then + rc=1 + if [ "${VERBOSE}" = "1" ]; then + printf " Unexpected route entry. Have:\n" + printf " ${out}\n" + printf " Expected:\n" + printf " ${expected}\n\n" + fi + fi + + return $rc +} + +route_cleanup() +{ + $IP li del red 2>/dev/null + $IP li del dummy1 2>/dev/null + $IP li del veth1 2>/dev/null + $IP li del veth3 2>/dev/null + + cleanup &> /dev/null +} + +route_setup() +{ + route_cleanup + setup + + [ "${VERBOSE}" = "1" ] && set -x + set -e + + $IP li add red up type vrf table 101 + $IP li add veth1 type veth peer name veth2 + $IP li add veth3 type veth peer name veth4 + + $IP li set veth1 up + $IP li set veth3 up + $IP li set veth2 vrf red up + $IP li set veth4 vrf red up + $IP li add dummy1 type dummy + $IP li set dummy1 vrf red up + + $IP -6 addr add 2001:db8:101::1/64 dev veth1 + $IP -6 addr add 2001:db8:101::2/64 dev veth2 + $IP -6 addr add 2001:db8:103::1/64 dev veth3 + $IP -6 addr add 2001:db8:103::2/64 dev veth4 + $IP -6 addr add 2001:db8:104::1/64 dev dummy1 + + $IP addr add 172.16.101.1/24 dev veth1 + $IP addr add 172.16.101.2/24 dev veth2 + $IP addr add 172.16.103.1/24 dev veth3 + $IP addr add 172.16.103.2/24 dev veth4 + $IP addr add 172.16.104.1/24 dev dummy1 + + set +ex +} + +# assumption is that basic add of a single path route works +# otherwise just adding an address on an interface is broken +ipv6_rt_add() +{ + local rc + + echo + echo "IPv6 route add / append tests" + + # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL + add_route6 "2001:db8:104::/64" "via 2001:db8:101::2" + run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::2" + log_test $? 2 "Attempt to add duplicate route - gw" + + # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL + add_route6 "2001:db8:104::/64" "via 2001:db8:101::2" + run_cmd "$IP -6 ro add 2001:db8:104::/64 dev veth3" + log_test $? 2 "Attempt to add duplicate route - dev only" + + # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL + add_route6 "2001:db8:104::/64" "via 2001:db8:101::2" + run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64" + log_test $? 2 "Attempt to add duplicate route - reject route" + + # iproute2 prepend only sets NLM_F_CREATE + # - adds a new route; does NOT convert existing route to ECMP + add_route6 "2001:db8:104::/64" "via 2001:db8:101::2" + run_cmd "$IP -6 ro prepend 2001:db8:104::/64 via 2001:db8:103::2" + check_route6 "2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024 2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024" + log_test $? 0 "Add new route for existing prefix (w/o NLM_F_EXCL)" + + # route append with same prefix adds a new route + # - iproute2 sets NLM_F_CREATE | NLM_F_APPEND + add_route6 "2001:db8:104::/64" "via 2001:db8:101::2" + run_cmd "$IP -6 ro append 2001:db8:104::/64 via 2001:db8:103::2" + check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1" + log_test $? 0 "Append nexthop to existing route - gw" + + add_route6 "2001:db8:104::/64" "via 2001:db8:101::2" + run_cmd "$IP -6 ro append 2001:db8:104::/64 dev veth3" + check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop dev veth3 weight 1" + log_test $? 0 "Append nexthop to existing route - dev only" + + # multipath route can not have a nexthop that is a reject route + add_route6 "2001:db8:104::/64" "via 2001:db8:101::2" + run_cmd "$IP -6 ro append unreachable 2001:db8:104::/64" + log_test $? 2 "Append nexthop to existing route - reject route" + + # reject route can not be converted to multipath route + run_cmd "$IP -6 ro flush 2001:db8:104::/64" + run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64" + run_cmd "$IP -6 ro append 2001:db8:104::/64 via 2001:db8:103::2" + log_test $? 2 "Append nexthop to existing reject route - gw" + + run_cmd "$IP -6 ro flush 2001:db8:104::/64" + run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64" + run_cmd "$IP -6 ro append 2001:db8:104::/64 dev veth3" + log_test $? 2 "Append nexthop to existing reject route - dev only" + + # insert mpath directly + add_route6 "2001:db8:104::/64" "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1" + log_test $? 0 "Add multipath route" + + add_route6 "2001:db8:104::/64" "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + run_cmd "$IP -6 ro add 2001:db8:104::/64 nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + log_test $? 2 "Attempt to add duplicate multipath route" + + # insert of a second route without append but different metric + add_route6 "2001:db8:104::/64" "via 2001:db8:101::2" + run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::2 metric 512" + rc=$? + if [ $rc -eq 0 ]; then + run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::3 metric 256" + rc=$? + fi + log_test $rc 0 "Route add with different metrics" + + run_cmd "$IP -6 ro del 2001:db8:104::/64 metric 512" + rc=$? + if [ $rc -eq 0 ]; then + check_route6 "2001:db8:104::/64 via 2001:db8:103::3 dev veth3 metric 256 2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024" + rc=$? + fi + log_test $rc 0 "Route delete with metric" +} -fib_test() +ipv6_rt_replace_single() { - if [ -n "$TEST" ]; then - eval $TEST + # single path with single path + # + add_initial_route6 "via 2001:db8:101::2" + run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:103::2" + check_route6 "2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024" + log_test $? 0 "Single path with single path" + + # single path with multipath + # + add_initial_route6 "nexthop via 2001:db8:101::2" + run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::2" + check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::3 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1" + log_test $? 0 "Single path with multipath" + + # single path with reject + # + add_initial_route6 "nexthop via 2001:db8:101::2" + run_cmd "$IP -6 ro replace unreachable 2001:db8:104::/64" + check_route6 "unreachable 2001:db8:104::/64 dev lo metric 1024" + log_test $? 0 "Single path with reject route" + + # single path with single path using MULTIPATH attribute + # + add_initial_route6 "via 2001:db8:101::2" + run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:103::2" + check_route6 "2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024" + log_test $? 0 "Single path with single path via multipath attribute" + + # route replace fails - invalid nexthop + add_initial_route6 "via 2001:db8:101::2" + run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:104::2" + if [ $? -eq 0 ]; then + # previous command is expected to fail so if it returns 0 + # that means the test failed. + log_test 0 1 "Invalid nexthop" else - fib_unreg_test - fib_down_test - fib_carrier_test - fib_nexthop_test + check_route6 "2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024" + log_test $? 0 "Invalid nexthop" fi + + # replace non-existent route + # - note use of change versus replace since ip adds NLM_F_CREATE + # for replace + add_initial_route6 "via 2001:db8:101::2" + run_cmd "$IP -6 ro change 2001:db8:105::/64 via 2001:db8:101::2" + log_test $? 2 "Single path - replace of non-existent route" +} + +ipv6_rt_replace_mpath() +{ + # multipath with multipath + add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::3" + check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::3 dev veth1 weight 1 nexthop via 2001:db8:103::3 dev veth3 weight 1" + log_test $? 0 "Multipath with multipath" + + # multipath with single + add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:101::3" + check_route6 "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024" + log_test $? 0 "Multipath with single path" + + # multipath with single + add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3" + check_route6 "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024" + log_test $? 0 "Multipath with single path via multipath attribute" + + # multipath with reject + add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + run_cmd "$IP -6 ro replace unreachable 2001:db8:104::/64" + check_route6 "unreachable 2001:db8:104::/64 dev lo metric 1024" + log_test $? 0 "Multipath with reject route" + + # route replace fails - invalid nexthop 1 + add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:111::3 nexthop via 2001:db8:103::3" + check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1" + log_test $? 0 "Multipath - invalid first nexthop" + + # route replace fails - invalid nexthop 2 + add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:113::3" + check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1" + log_test $? 0 "Multipath - invalid second nexthop" + + # multipath non-existent route + add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + run_cmd "$IP -6 ro change 2001:db8:105::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::3" + log_test $? 2 "Multipath - replace of non-existent route" +} + +ipv6_rt_replace() +{ + echo + echo "IPv6 route replace tests" + + ipv6_rt_replace_single + ipv6_rt_replace_mpath +} + +ipv6_route_test() +{ + route_setup + + ipv6_rt_add + ipv6_rt_replace + + route_cleanup } +ip_addr_metric_check() +{ + ip addr help 2>&1 | grep -q metric + if [ $? -ne 0 ]; then + echo "iproute2 command does not support metric for addresses. Skipping test" + return 1 + fi + + return 0 +} + +ipv6_addr_metric_test() +{ + local rc + + echo + echo "IPv6 prefix route tests" + + ip_addr_metric_check || return 1 + + setup + + set -e + $IP li add dummy1 type dummy + $IP li add dummy2 type dummy + $IP li set dummy1 up + $IP li set dummy2 up + + # default entry is metric 256 + run_cmd "$IP -6 addr add dev dummy1 2001:db8:104::1/64" + run_cmd "$IP -6 addr add dev dummy2 2001:db8:104::2/64" + set +e + + check_route6 "2001:db8:104::/64 dev dummy1 proto kernel metric 256 2001:db8:104::/64 dev dummy2 proto kernel metric 256" + log_test $? 0 "Default metric" + + set -e + run_cmd "$IP -6 addr flush dev dummy1" + run_cmd "$IP -6 addr add dev dummy1 2001:db8:104::1/64 metric 257" + set +e + + check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 256 2001:db8:104::/64 dev dummy1 proto kernel metric 257" + log_test $? 0 "User specified metric on first device" + + set -e + run_cmd "$IP -6 addr flush dev dummy2" + run_cmd "$IP -6 addr add dev dummy2 2001:db8:104::2/64 metric 258" + set +e + + check_route6 "2001:db8:104::/64 dev dummy1 proto kernel metric 257 2001:db8:104::/64 dev dummy2 proto kernel metric 258" + log_test $? 0 "User specified metric on second device" + + run_cmd "$IP -6 addr del dev dummy1 2001:db8:104::1/64 metric 257" + rc=$? + if [ $rc -eq 0 ]; then + check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 258" + rc=$? + fi + log_test $rc 0 "Delete of address on first device" + + run_cmd "$IP -6 addr change dev dummy2 2001:db8:104::2/64 metric 259" + rc=$? + if [ $rc -eq 0 ]; then + check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 259" + rc=$? + fi + log_test $rc 0 "Modify metric of address" + + # verify prefix route removed on down + run_cmd "ip netns exec testns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1" + run_cmd "$IP li set dev dummy2 down" + rc=$? + if [ $rc -eq 0 ]; then + check_route6 "" + rc=$? + fi + log_test $rc 0 "Prefix route removed on link down" + + # verify prefix route re-inserted with assigned metric + run_cmd "$IP li set dev dummy2 up" + rc=$? + if [ $rc -eq 0 ]; then + check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 259" + rc=$? + fi + log_test $rc 0 "Prefix route with metric on link up" + + $IP li del dummy1 + $IP li del dummy2 + cleanup +} + +# add route for a prefix, flushing any existing routes first +# expected to be the first step of a test +add_route() +{ + local pfx="$1" + local nh="$2" + local out + + if [ "$VERBOSE" = "1" ]; then + echo + echo " ##################################################" + echo + fi + + run_cmd "$IP ro flush ${pfx}" + [ $? -ne 0 ] && exit 1 + + out=$($IP ro ls match ${pfx}) + if [ -n "$out" ]; then + echo "Failed to flush routes for prefix used for tests." + exit 1 + fi + + run_cmd "$IP ro add ${pfx} ${nh}" + if [ $? -ne 0 ]; then + echo "Failed to add initial route for test." + exit 1 + fi +} + +# add initial route - used in replace route tests +add_initial_route() +{ + add_route "172.16.104.0/24" "$1" +} + +check_route() +{ + local pfx="172.16.104.0/24" + local expected="$1" + local out + local rc=0 + + out=$($IP ro ls match ${pfx}) + [ "${out}" = "${expected}" ] && return 0 + + if [ -z "${out}" ]; then + if [ "$VERBOSE" = "1" ]; then + printf "\nNo route entry found\n" + printf "Expected:\n" + printf " ${expected}\n" + fi + return 1 + fi + + # tricky way to convert output to 1-line without ip's + # messy '\'; this drops all extra white space + out=$(echo ${out}) + if [ "${out}" != "${expected}" ]; then + rc=1 + if [ "${VERBOSE}" = "1" ]; then + printf " Unexpected route entry. Have:\n" + printf " ${out}\n" + printf " Expected:\n" + printf " ${expected}\n\n" + fi + fi + + return $rc +} + +# assumption is that basic add of a single path route works +# otherwise just adding an address on an interface is broken +ipv4_rt_add() +{ + local rc + + echo + echo "IPv4 route add / append tests" + + # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL + add_route "172.16.104.0/24" "via 172.16.101.2" + run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.2" + log_test $? 2 "Attempt to add duplicate route - gw" + + # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL + add_route "172.16.104.0/24" "via 172.16.101.2" + run_cmd "$IP ro add 172.16.104.0/24 dev veth3" + log_test $? 2 "Attempt to add duplicate route - dev only" + + # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL + add_route "172.16.104.0/24" "via 172.16.101.2" + run_cmd "$IP ro add unreachable 172.16.104.0/24" + log_test $? 2 "Attempt to add duplicate route - reject route" + + # iproute2 prepend only sets NLM_F_CREATE + # - adds a new route; does NOT convert existing route to ECMP + add_route "172.16.104.0/24" "via 172.16.101.2" + run_cmd "$IP ro prepend 172.16.104.0/24 via 172.16.103.2" + check_route "172.16.104.0/24 via 172.16.103.2 dev veth3 172.16.104.0/24 via 172.16.101.2 dev veth1" + log_test $? 0 "Add new nexthop for existing prefix" + + # route append with same prefix adds a new route + # - iproute2 sets NLM_F_CREATE | NLM_F_APPEND + add_route "172.16.104.0/24" "via 172.16.101.2" + run_cmd "$IP ro append 172.16.104.0/24 via 172.16.103.2" + check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 via 172.16.103.2 dev veth3" + log_test $? 0 "Append nexthop to existing route - gw" + + add_route "172.16.104.0/24" "via 172.16.101.2" + run_cmd "$IP ro append 172.16.104.0/24 dev veth3" + check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 dev veth3 scope link" + log_test $? 0 "Append nexthop to existing route - dev only" + + add_route "172.16.104.0/24" "via 172.16.101.2" + run_cmd "$IP ro append unreachable 172.16.104.0/24" + check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 unreachable 172.16.104.0/24" + log_test $? 0 "Append nexthop to existing route - reject route" + + run_cmd "$IP ro flush 172.16.104.0/24" + run_cmd "$IP ro add unreachable 172.16.104.0/24" + run_cmd "$IP ro append 172.16.104.0/24 via 172.16.103.2" + check_route "unreachable 172.16.104.0/24 172.16.104.0/24 via 172.16.103.2 dev veth3" + log_test $? 0 "Append nexthop to existing reject route - gw" + + run_cmd "$IP ro flush 172.16.104.0/24" + run_cmd "$IP ro add unreachable 172.16.104.0/24" + run_cmd "$IP ro append 172.16.104.0/24 dev veth3" + check_route "unreachable 172.16.104.0/24 172.16.104.0/24 dev veth3 scope link" + log_test $? 0 "Append nexthop to existing reject route - dev only" + + # insert mpath directly + add_route "172.16.104.0/24" "nexthop via 172.16.101.2 nexthop via 172.16.103.2" + check_route "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1" + log_test $? 0 "add multipath route" + + add_route "172.16.104.0/24" "nexthop via 172.16.101.2 nexthop via 172.16.103.2" + run_cmd "$IP ro add 172.16.104.0/24 nexthop via 172.16.101.2 nexthop via 172.16.103.2" + log_test $? 2 "Attempt to add duplicate multipath route" + + # insert of a second route without append but different metric + add_route "172.16.104.0/24" "via 172.16.101.2" + run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.2 metric 512" + rc=$? + if [ $rc -eq 0 ]; then + run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.3 metric 256" + rc=$? + fi + log_test $rc 0 "Route add with different metrics" + + run_cmd "$IP ro del 172.16.104.0/24 metric 512" + rc=$? + if [ $rc -eq 0 ]; then + check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 via 172.16.103.3 dev veth3 metric 256" + rc=$? + fi + log_test $rc 0 "Route delete with metric" +} + +ipv4_rt_replace_single() +{ + # single path with single path + # + add_initial_route "via 172.16.101.2" + run_cmd "$IP ro replace 172.16.104.0/24 via 172.16.103.2" + check_route "172.16.104.0/24 via 172.16.103.2 dev veth3" + log_test $? 0 "Single path with single path" + + # single path with multipath + # + add_initial_route "nexthop via 172.16.101.2" + run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.2" + check_route "172.16.104.0/24 nexthop via 172.16.101.3 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1" + log_test $? 0 "Single path with multipath" + + # single path with reject + # + add_initial_route "nexthop via 172.16.101.2" + run_cmd "$IP ro replace unreachable 172.16.104.0/24" + check_route "unreachable 172.16.104.0/24" + log_test $? 0 "Single path with reject route" + + # single path with single path using MULTIPATH attribute + # + add_initial_route "via 172.16.101.2" + run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.103.2" + check_route "172.16.104.0/24 via 172.16.103.2 dev veth3" + log_test $? 0 "Single path with single path via multipath attribute" + + # route replace fails - invalid nexthop + add_initial_route "via 172.16.101.2" + run_cmd "$IP ro replace 172.16.104.0/24 via 2001:db8:104::2" + if [ $? -eq 0 ]; then + # previous command is expected to fail so if it returns 0 + # that means the test failed. + log_test 0 1 "Invalid nexthop" + else + check_route "172.16.104.0/24 via 172.16.101.2 dev veth1" + log_test $? 0 "Invalid nexthop" + fi + + # replace non-existent route + # - note use of change versus replace since ip adds NLM_F_CREATE + # for replace + add_initial_route "via 172.16.101.2" + run_cmd "$IP ro change 172.16.105.0/24 via 172.16.101.2" + log_test $? 2 "Single path - replace of non-existent route" +} + +ipv4_rt_replace_mpath() +{ + # multipath with multipath + add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2" + run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.3" + check_route "172.16.104.0/24 nexthop via 172.16.101.3 dev veth1 weight 1 nexthop via 172.16.103.3 dev veth3 weight 1" + log_test $? 0 "Multipath with multipath" + + # multipath with single + add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2" + run_cmd "$IP ro replace 172.16.104.0/24 via 172.16.101.3" + check_route "172.16.104.0/24 via 172.16.101.3 dev veth1" + log_test $? 0 "Multipath with single path" + + # multipath with single + add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2" + run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3" + check_route "172.16.104.0/24 via 172.16.101.3 dev veth1" + log_test $? 0 "Multipath with single path via multipath attribute" + + # multipath with reject + add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2" + run_cmd "$IP ro replace unreachable 172.16.104.0/24" + check_route "unreachable 172.16.104.0/24" + log_test $? 0 "Multipath with reject route" + + # route replace fails - invalid nexthop 1 + add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2" + run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.111.3 nexthop via 172.16.103.3" + check_route "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1" + log_test $? 0 "Multipath - invalid first nexthop" + + # route replace fails - invalid nexthop 2 + add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2" + run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.113.3" + check_route "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1" + log_test $? 0 "Multipath - invalid second nexthop" + + # multipath non-existent route + add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2" + run_cmd "$IP ro change 172.16.105.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.3" + log_test $? 2 "Multipath - replace of non-existent route" +} + +ipv4_rt_replace() +{ + echo + echo "IPv4 route replace tests" + + ipv4_rt_replace_single + ipv4_rt_replace_mpath +} + +ipv4_route_test() +{ + route_setup + + ipv4_rt_add + ipv4_rt_replace + + route_cleanup +} + +ipv4_addr_metric_test() +{ + local rc + + echo + echo "IPv4 prefix route tests" + + ip_addr_metric_check || return 1 + + setup + + set -e + $IP li add dummy1 type dummy + $IP li add dummy2 type dummy + $IP li set dummy1 up + $IP li set dummy2 up + + # default entry is metric 256 + run_cmd "$IP addr add dev dummy1 172.16.104.1/24" + run_cmd "$IP addr add dev dummy2 172.16.104.2/24" + set +e + + check_route "172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2" + log_test $? 0 "Default metric" + + set -e + run_cmd "$IP addr flush dev dummy1" + run_cmd "$IP addr add dev dummy1 172.16.104.1/24 metric 257" + set +e + + check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 metric 257" + log_test $? 0 "User specified metric on first device" + + set -e + run_cmd "$IP addr flush dev dummy2" + run_cmd "$IP addr add dev dummy2 172.16.104.2/24 metric 258" + set +e + + check_route "172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 metric 257 172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 258" + log_test $? 0 "User specified metric on second device" + + run_cmd "$IP addr del dev dummy1 172.16.104.1/24 metric 257" + rc=$? + if [ $rc -eq 0 ]; then + check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 258" + rc=$? + fi + log_test $rc 0 "Delete of address on first device" + + run_cmd "$IP addr change dev dummy2 172.16.104.2/24 metric 259" + rc=$? + if [ $rc -eq 0 ]; then + check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 259" + rc=$? + fi + log_test $rc 0 "Modify metric of address" + + # verify prefix route removed on down + run_cmd "$IP li set dev dummy2 down" + rc=$? + if [ $rc -eq 0 ]; then + check_route "" + rc=$? + fi + log_test $rc 0 "Prefix route removed on link down" + + # verify prefix route re-inserted with assigned metric + run_cmd "$IP li set dev dummy2 up" + rc=$? + if [ $rc -eq 0 ]; then + check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 259" + rc=$? + fi + log_test $rc 0 "Prefix route with metric on link up" + + $IP li del dummy1 + $IP li del dummy2 + cleanup +} + +################################################################################ +# usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $TESTS) + -p Pause on fail + -P Pause after each test before cleanup + -v verbose mode (show commands and output) +EOF +} + +################################################################################ +# main + +while getopts :t:pPhv o +do + case $o in + t) TESTS=$OPTARG;; + p) PAUSE_ON_FAIL=yes;; + P) PAUSE=yes;; + v) VERBOSE=$(($VERBOSE + 1));; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +PEER_CMD="ip netns exec ${PEER_NS}" + +# make sure we don't pause twice +[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no + if [ "$(id -u)" -ne 0 ];then echo "SKIP: Need root privileges" exit $ksft_skip; @@ -598,6 +1415,25 @@ fi # start clean cleanup &> /dev/null -fib_test +for t in $TESTS +do + case $t in + fib_unreg_test|unregister) fib_unreg_test;; + fib_down_test|down) fib_down_test;; + fib_carrier_test|carrier) fib_carrier_test;; + fib_nexthop_test|nexthop) fib_nexthop_test;; + ipv6_route_test|ipv6_rt) ipv6_route_test;; + ipv4_route_test|ipv4_rt) ipv4_route_test;; + ipv6_addr_metric) ipv6_addr_metric_test;; + ipv4_addr_metric) ipv4_addr_metric_test;; + + help) echo "Test names: $TESTS"; exit 0;; + esac +done + +if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} +fi exit $ret diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh index 75d9224..d8313d0 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh @@ -1,6 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding" NUM_NETIFS=4 CHECK_TC="yes" source lib.sh @@ -75,14 +76,31 @@ cleanup() vrf_cleanup } +ping_ipv4() +{ + ping_test $h1 192.0.2.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:1::2 +} + +learning() +{ + learning_test "br0" $swp1 $h1 $h2 +} + +flooding() +{ + flood_test $swp2 $h1 $h2 +} + trap cleanup EXIT setup_prepare setup_wait -ping_test $h1 192.0.2.2 -ping6_test $h1 2001:db8:1::2 -learning_test "br0" $swp1 $h1 $h2 -flood_test $swp2 $h1 $h2 +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh index 1cddf06..c15c6c8 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh @@ -1,6 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding" NUM_NETIFS=4 source lib.sh @@ -73,14 +74,31 @@ cleanup() vrf_cleanup } +ping_ipv4() +{ + ping_test $h1 192.0.2.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:1::2 +} + +learning() +{ + learning_test "br0" $swp1 $h1 $h2 +} + +flooding() +{ + flood_test $swp2 $h1 $h2 +} + trap cleanup EXIT setup_prepare setup_wait -ping_test $h1 192.0.2.2 -ping6_test $h1 2001:db8:1::2 -learning_test "br0" $swp1 $h1 $h2 -flood_test $swp2 $h1 $h2 +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 1ac6c62..7b18a53 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -321,6 +321,50 @@ simple_if_fini() vrf_destroy $vrf_name } +tunnel_create() +{ + local name=$1; shift + local type=$1; shift + local local=$1; shift + local remote=$1; shift + + ip link add name $name type $type \ + local $local remote $remote "$@" + ip link set dev $name up +} + +tunnel_destroy() +{ + local name=$1; shift + + ip link del dev $name +} + +vlan_create() +{ + local if_name=$1; shift + local vid=$1; shift + local vrf=$1; shift + local ips=("${@}") + local name=$if_name.$vid + + ip link add name $name link $if_name type vlan id $vid + if [ "$vrf" != "" ]; then + ip link set dev $name master $vrf + fi + ip link set dev $name up + __addr_add_del $name add "${ips[@]}" +} + +vlan_destroy() +{ + local if_name=$1; shift + local vid=$1; shift + local name=$if_name.$vid + + ip link del dev $name +} + master_name_get() { local if_name=$1 @@ -335,6 +379,15 @@ link_stats_tx_packets_get() ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]' } +tc_rule_stats_get() +{ + local dev=$1; shift + local pref=$1; shift + + tc -j -s filter show dev $dev ingress pref $pref | + jq '.[1].options.actions[].stats.packets' +} + mac_get() { local if_name=$1 @@ -353,19 +406,33 @@ bridge_ageing_time_get() echo $((ageing_time / 100)) } -forwarding_enable() +declare -A SYSCTL_ORIG +sysctl_set() +{ + local key=$1; shift + local value=$1; shift + + SYSCTL_ORIG[$key]=$(sysctl -n $key) + sysctl -qw $key=$value +} + +sysctl_restore() { - ipv4_fwd=$(sysctl -n net.ipv4.conf.all.forwarding) - ipv6_fwd=$(sysctl -n net.ipv6.conf.all.forwarding) + local key=$1; shift - sysctl -q -w net.ipv4.conf.all.forwarding=1 - sysctl -q -w net.ipv6.conf.all.forwarding=1 + sysctl -qw $key=${SYSCTL_ORIG["$key"]} +} + +forwarding_enable() +{ + sysctl_set net.ipv4.conf.all.forwarding 1 + sysctl_set net.ipv6.conf.all.forwarding 1 } forwarding_restore() { - sysctl -q -w net.ipv6.conf.all.forwarding=$ipv6_fwd - sysctl -q -w net.ipv4.conf.all.forwarding=$ipv4_fwd + sysctl_restore net.ipv6.conf.all.forwarding + sysctl_restore net.ipv4.conf.all.forwarding } tc_offload_check() @@ -381,6 +448,115 @@ tc_offload_check() return 0 } +trap_install() +{ + local dev=$1; shift + local direction=$1; shift + + # For slow-path testing, we need to install a trap to get to + # slow path the packets that would otherwise be switched in HW. + tc filter add dev $dev $direction pref 1 flower skip_sw action trap +} + +trap_uninstall() +{ + local dev=$1; shift + local direction=$1; shift + + tc filter del dev $dev $direction pref 1 flower skip_sw +} + +slow_path_trap_install() +{ + if [ "${tcflags/skip_hw}" != "$tcflags" ]; then + trap_install "$@" + fi +} + +slow_path_trap_uninstall() +{ + if [ "${tcflags/skip_hw}" != "$tcflags" ]; then + trap_uninstall "$@" + fi +} + +__icmp_capture_add_del() +{ + local add_del=$1; shift + local pref=$1; shift + local vsuf=$1; shift + local tundev=$1; shift + local filter=$1; shift + + tc filter $add_del dev "$tundev" ingress \ + proto ip$vsuf pref $pref \ + flower ip_proto icmp$vsuf $filter \ + action pass +} + +icmp_capture_install() +{ + __icmp_capture_add_del add 100 "" "$@" +} + +icmp_capture_uninstall() +{ + __icmp_capture_add_del del 100 "" "$@" +} + +icmp6_capture_install() +{ + __icmp_capture_add_del add 100 v6 "$@" +} + +icmp6_capture_uninstall() +{ + __icmp_capture_add_del del 100 v6 "$@" +} + +__vlan_capture_add_del() +{ + local add_del=$1; shift + local pref=$1; shift + local dev=$1; shift + local filter=$1; shift + + tc filter $add_del dev "$dev" ingress \ + proto 802.1q pref $pref \ + flower $filter \ + action pass +} + +vlan_capture_install() +{ + __vlan_capture_add_del add 100 "$@" +} + +vlan_capture_uninstall() +{ + __vlan_capture_add_del del 100 "$@" +} + +matchall_sink_create() +{ + local dev=$1; shift + + tc qdisc add dev $dev clsact + tc filter add dev $dev ingress \ + pref 10000 \ + matchall \ + action drop +} + +tests_run() +{ + local current_test + + for current_test in ${TESTS:-$ALL_TESTS}; do + $current_test + done +} + ############################################################################## # Tests diff --git a/tools/testing/selftests/net/forwarding/mirror_gre.sh b/tools/testing/selftests/net/forwarding/mirror_gre.sh new file mode 100755 index 0000000..e6fd7a1 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre.sh @@ -0,0 +1,159 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test uses standard topology for testing gretap. See +# mirror_gre_topo_lib.sh for more details. +# +# Test for "tc action mirred egress mirror" when the device to mirror to is a +# gretap or ip6gretap netdevice. Expect that the packets come out encapsulated, +# and another gretap / ip6gretap netdevice is then capable of decapsulating the +# traffic. Test that the payload is what is expected (ICMP ping request or +# reply, depending on test). + +ALL_TESTS=" + test_gretap + test_ip6gretap + test_gretap_mac + test_ip6gretap_mac + test_two_spans +" + +NUM_NETIFS=6 +source lib.sh +source mirror_lib.sh +source mirror_gre_lib.sh +source mirror_gre_topo_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + mirror_gre_topo_create + + ip address add dev $swp3 192.0.2.129/28 + ip address add dev $h3 192.0.2.130/28 + + ip address add dev $swp3 2001:db8:2::1/64 + ip address add dev $h3 2001:db8:2::2/64 +} + +cleanup() +{ + pre_cleanup + + ip address del dev $h3 2001:db8:2::2/64 + ip address del dev $swp3 2001:db8:2::1/64 + + ip address del dev $h3 192.0.2.130/28 + ip address del dev $swp3 192.0.2.129/28 + + mirror_gre_topo_destroy + vrf_cleanup +} + +test_span_gre_mac() +{ + local tundev=$1; shift + local direction=$1; shift + local prot=$1; shift + local what=$1; shift + + local swp3mac=$(mac_get $swp3) + local h3mac=$(mac_get $h3) + + RET=0 + + mirror_install $swp1 $direction $tundev "matchall $tcflags" + tc filter add dev $h3 ingress pref 77 prot $prot \ + flower ip_proto 0x2f src_mac $swp3mac dst_mac $h3mac \ + action pass + + mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10 + + tc filter del dev $h3 ingress pref 77 + mirror_uninstall $swp1 $direction + + log_test "$direction $what: envelope MAC ($tcflags)" +} + +test_two_spans() +{ + RET=0 + + mirror_install $swp1 ingress gt4 "matchall $tcflags" + mirror_install $swp1 egress gt6 "matchall $tcflags" + quick_test_span_gre_dir gt4 ingress + quick_test_span_gre_dir gt6 egress + + mirror_uninstall $swp1 ingress + fail_test_span_gre_dir gt4 ingress + quick_test_span_gre_dir gt6 egress + + mirror_install $swp1 ingress gt4 "matchall $tcflags" + mirror_uninstall $swp1 egress + quick_test_span_gre_dir gt4 ingress + fail_test_span_gre_dir gt6 egress + + mirror_uninstall $swp1 ingress + log_test "two simultaneously configured mirrors ($tcflags)" +} + +test_gretap() +{ + full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap" + full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap" +} + +test_ip6gretap() +{ + full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap" + full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap" +} + +test_gretap_mac() +{ + test_span_gre_mac gt4 ingress ip "mirror to gretap" + test_span_gre_mac gt4 egress ip "mirror to gretap" +} + +test_ip6gretap_mac() +{ + test_span_gre_mac gt6 ingress ipv6 "mirror to ip6gretap" + test_span_gre_mac gt6 egress ipv6 "mirror to ip6gretap" +} + +test_all() +{ + slow_path_trap_install $swp1 ingress + slow_path_trap_install $swp1 egress + + tests_run + + slow_path_trap_uninstall $swp1 egress + slow_path_trap_uninstall $swp1 ingress +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tcflags="skip_hw" +test_all + +if ! tc_offload_check; then + echo "WARN: Could not test offloaded functionality" +else + tcflags="skip_sw" + test_all +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh new file mode 100755 index 0000000..360ca13 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh @@ -0,0 +1,226 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +---------------------+ +---------------------+ +# | H1 | | H2 | +# | + $h1 | | $h2 + | +# | | 192.0.2.1/28 | | 192.0.2.2/28 | | +# +-----|---------------+ +---------------|-----+ +# | | +# +-----|-------------------------------------------------------------|-----+ +# | SW o--> mirror | | +# | +---|-------------------------------------------------------------|---+ | +# | | + $swp1 BR $swp2 + | | +# | +---------------------------------------------------------------------+ | +# | | +# | +---------------------------------------------------------------------+ | +# | | OL + gt6 (ip6gretap) + gt4 (gretap) | | +# | | : loc=2001:db8:2::1 : loc=192.0.2.129 | | +# | | : rem=2001:db8:2::2 : rem=192.0.2.130 | | +# | | : ttl=100 : ttl=100 | | +# | | : tos=inherit : tos=inherit | | +# | +-------------------------:--|-------------------:--|-----------------+ | +# | : | : | | +# | +-------------------------:--|-------------------:--|-----------------+ | +# | | UL : |,---------------------' | | +# | | + $swp3 : || : | | +# | | | 192.0.2.129/28 : vv : | | +# | | | 2001:db8:2::1/64 : + ul (dummy) : | | +# | +---|---------------------:----------------------:--------------------+ | +# +-----|---------------------:----------------------:----------------------+ +# | : : +# +-----|---------------------:----------------------:----------------------+ +# | H3 + $h3 + h3-gt6 (ip6gretap) + h3-gt4 (gretap) | +# | 192.0.2.130/28 loc=2001:db8:2::2 loc=192.0.2.130 | +# | 2001:db8:2::2/64 rem=2001:db8:2::1 rem=192.0.2.129 | +# | ttl=100 ttl=100 | +# | tos=inherit tos=inherit | +# | | +# +-------------------------------------------------------------------------+ +# +# This tests mirroring to gretap and ip6gretap configured in an overlay / +# underlay manner, i.e. with a bound dummy device that marks underlay VRF where +# the encapsulated packed should be routed. + +ALL_TESTS=" + test_gretap + test_ip6gretap +" + +NUM_NETIFS=6 +source lib.sh +source mirror_lib.sh +source mirror_gre_lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/28 +} + +h3_create() +{ + simple_if_init $h3 192.0.2.130/28 2001:db8:2::2/64 + + tunnel_create h3-gt4 gretap 192.0.2.130 192.0.2.129 + ip link set h3-gt4 vrf v$h3 + matchall_sink_create h3-gt4 + + tunnel_create h3-gt6 ip6gretap 2001:db8:2::2 2001:db8:2::1 + ip link set h3-gt6 vrf v$h3 + matchall_sink_create h3-gt6 +} + +h3_destroy() +{ + tunnel_destroy h3-gt6 + tunnel_destroy h3-gt4 + + simple_if_fini $h3 192.0.2.130/28 2001:db8:2::2/64 +} + +switch_create() +{ + # Bridge between H1 and H2. + + ip link add name br1 type bridge vlan_filtering 1 + ip link set dev br1 up + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + tc qdisc add dev $swp1 clsact + + # Underlay. + + simple_if_init $swp3 192.0.2.129/28 2001:db8:2::1/64 + + ip link add name ul type dummy + ip link set dev ul master v$swp3 + ip link set dev ul up + + # Overlay. + + vrf_create vrf-ol + ip link set dev vrf-ol up + + tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \ + ttl 100 tos inherit dev ul + ip link set dev gt4 master vrf-ol + ip link set dev gt4 up + + tunnel_create gt6 ip6gretap 2001:db8:2::1 2001:db8:2::2 \ + ttl 100 tos inherit dev ul allow-localremote + ip link set dev gt6 master vrf-ol + ip link set dev gt6 up +} + +switch_destroy() +{ + vrf_destroy vrf-ol + + tunnel_destroy gt6 + tunnel_destroy gt4 + + simple_if_fini $swp3 192.0.2.129/28 2001:db8:2::1/64 + + ip link del dev ul + + tc qdisc del dev $swp1 clsact + + ip link set dev $swp1 down + ip link set dev $swp2 down + ip link del dev br1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + + h1_create + h2_create + h3_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h3_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +test_gretap() +{ + full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap w/ UL" + full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap w/ UL" +} + +test_ip6gretap() +{ + full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap w/ UL" + full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap w/ UL" +} + +test_all() +{ + RET=0 + + slow_path_trap_install $swp1 ingress + slow_path_trap_install $swp1 egress + + tests_run + + slow_path_trap_uninstall $swp1 egress + slow_path_trap_uninstall $swp1 ingress +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tcflags="skip_hw" +test_all + +if ! tc_offload_check; then + echo "WARN: Could not test offloaded functionality" +else + tcflags="skip_sw" + test_all +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh new file mode 100755 index 0000000..3bb4c2b --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh @@ -0,0 +1,121 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test uses standard topology for testing gretap. See +# mirror_gre_topo_lib.sh for more details. +# +# Test for "tc action mirred egress mirror" when the underlay route points at a +# bridge device without vlan filtering (802.1d). The device attached to that +# bridge is a VLAN. + +ALL_TESTS=" + test_gretap + test_ip6gretap + test_gretap_stp + test_ip6gretap_stp +" + +NUM_NETIFS=6 +source lib.sh +source mirror_lib.sh +source mirror_gre_lib.sh +source mirror_gre_topo_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + mirror_gre_topo_create + + ip link add name br2 type bridge vlan_filtering 0 + ip link set dev br2 up + + vlan_create $swp3 555 + + ip link set dev $swp3.555 master br2 + ip route add 192.0.2.130/32 dev br2 + ip -6 route add 2001:db8:2::2/128 dev br2 + + ip address add dev br2 192.0.2.129/32 + ip address add dev br2 2001:db8:2::1/128 + + vlan_create $h3 555 v$h3 192.0.2.130/28 2001:db8:2::2/64 +} + +cleanup() +{ + pre_cleanup + + vlan_destroy $h3 555 + ip link del dev br2 + vlan_destroy $swp3 555 + + mirror_gre_topo_destroy + vrf_cleanup +} + +test_vlan_match() +{ + local tundev=$1; shift + local vlan_match=$1; shift + local what=$1; shift + + full_test_span_gre_dir_vlan $tundev ingress "$vlan_match" 8 0 "$what" + full_test_span_gre_dir_vlan $tundev egress "$vlan_match" 0 8 "$what" +} + +test_gretap() +{ + test_vlan_match gt4 'vlan_id 555 vlan_ethtype ip' "mirror to gretap" +} + +test_ip6gretap() +{ + test_vlan_match gt6 'vlan_id 555 vlan_ethtype ipv6' "mirror to ip6gretap" +} + +test_gretap_stp() +{ + full_test_span_gre_stp gt4 $swp3.555 "mirror to gretap" +} + +test_ip6gretap_stp() +{ + full_test_span_gre_stp gt6 $swp3.555 "mirror to ip6gretap" +} + +test_all() +{ + slow_path_trap_install $swp1 ingress + slow_path_trap_install $swp1 egress + + tests_run + + slow_path_trap_uninstall $swp1 egress + slow_path_trap_uninstall $swp1 ingress +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tcflags="skip_hw" +test_all + +if ! tc_offload_check; then + echo "WARN: Could not test offloaded functionality" +else + tcflags="skip_sw" + test_all +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh new file mode 100755 index 0000000..aa29d46 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh @@ -0,0 +1,278 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test uses standard topology for testing gretap. See +# mirror_gre_topo_lib.sh for more details. +# +# Test how mirrors to gretap and ip6gretap react to changes to relevant +# configuration. + +ALL_TESTS=" + test_ttl + test_tun_up + test_egress_up + test_remote_ip + test_tun_del + test_route_del +" + +NUM_NETIFS=6 +source lib.sh +source mirror_lib.sh +source mirror_gre_lib.sh +source mirror_gre_topo_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + mirror_gre_topo_create + + # This test downs $swp3, which deletes the configured IPv6 address + # unless this sysctl is set. + sysctl_set net.ipv6.conf.$swp3.keep_addr_on_down 1 + + ip address add dev $swp3 192.0.2.129/28 + ip address add dev $h3 192.0.2.130/28 + + ip address add dev $swp3 2001:db8:2::1/64 + ip address add dev $h3 2001:db8:2::2/64 +} + +cleanup() +{ + pre_cleanup + + ip address del dev $h3 2001:db8:2::2/64 + ip address del dev $swp3 2001:db8:2::1/64 + + ip address del dev $h3 192.0.2.130/28 + ip address del dev $swp3 192.0.2.129/28 + + sysctl_restore net.ipv6.conf.$swp3.keep_addr_on_down + + mirror_gre_topo_destroy + vrf_cleanup +} + +test_span_gre_ttl() +{ + local tundev=$1; shift + local type=$1; shift + local prot=$1; shift + local what=$1; shift + + RET=0 + + mirror_install $swp1 ingress $tundev "matchall $tcflags" + tc filter add dev $h3 ingress pref 77 prot $prot \ + flower ip_ttl 50 action pass + + mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 0 + + ip link set dev $tundev type $type ttl 50 + mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10 + + ip link set dev $tundev type $type ttl 100 + tc filter del dev $h3 ingress pref 77 + mirror_uninstall $swp1 ingress + + log_test "$what: TTL change ($tcflags)" +} + +test_span_gre_tun_up() +{ + local tundev=$1; shift + local what=$1; shift + + RET=0 + + ip link set dev $tundev down + mirror_install $swp1 ingress $tundev "matchall $tcflags" + fail_test_span_gre_dir $tundev ingress + + ip link set dev $tundev up + + quick_test_span_gre_dir $tundev ingress + mirror_uninstall $swp1 ingress + + log_test "$what: tunnel down/up ($tcflags)" +} + +test_span_gre_egress_up() +{ + local tundev=$1; shift + local remote_ip=$1; shift + local what=$1; shift + + RET=0 + + ip link set dev $swp3 down + mirror_install $swp1 ingress $tundev "matchall $tcflags" + fail_test_span_gre_dir $tundev ingress + + # After setting the device up, wait for neighbor to get resolved so that + # we can expect mirroring to work. + ip link set dev $swp3 up + while true; do + ip neigh sh dev $swp3 $remote_ip nud reachable | + grep -q ^ + if [[ $? -ne 0 ]]; then + sleep 1 + else + break + fi + done + + quick_test_span_gre_dir $tundev ingress + mirror_uninstall $swp1 ingress + + log_test "$what: egress down/up ($tcflags)" +} + +test_span_gre_remote_ip() +{ + local tundev=$1; shift + local type=$1; shift + local correct_ip=$1; shift + local wrong_ip=$1; shift + local what=$1; shift + + RET=0 + + ip link set dev $tundev type $type remote $wrong_ip + mirror_install $swp1 ingress $tundev "matchall $tcflags" + fail_test_span_gre_dir $tundev ingress + + ip link set dev $tundev type $type remote $correct_ip + quick_test_span_gre_dir $tundev ingress + mirror_uninstall $swp1 ingress + + log_test "$what: remote address change ($tcflags)" +} + +test_span_gre_tun_del() +{ + local tundev=$1; shift + local type=$1; shift + local flags=$1; shift + local local_ip=$1; shift + local remote_ip=$1; shift + local what=$1; shift + + RET=0 + + mirror_install $swp1 ingress $tundev "matchall $tcflags" + quick_test_span_gre_dir $tundev ingress + ip link del dev $tundev + fail_test_span_gre_dir $tundev ingress + + tunnel_create $tundev $type $local_ip $remote_ip \ + ttl 100 tos inherit $flags + + # Recreating the tunnel doesn't reestablish mirroring, so reinstall it + # and verify it works for the follow-up tests. + mirror_uninstall $swp1 ingress + mirror_install $swp1 ingress $tundev "matchall $tcflags" + quick_test_span_gre_dir $tundev ingress + mirror_uninstall $swp1 ingress + + log_test "$what: tunnel deleted ($tcflags)" +} + +test_span_gre_route_del() +{ + local tundev=$1; shift + local edev=$1; shift + local route=$1; shift + local what=$1; shift + + RET=0 + + mirror_install $swp1 ingress $tundev "matchall $tcflags" + quick_test_span_gre_dir $tundev ingress + + ip route del $route dev $edev + fail_test_span_gre_dir $tundev ingress + + ip route add $route dev $edev + quick_test_span_gre_dir $tundev ingress + + mirror_uninstall $swp1 ingress + + log_test "$what: underlay route removal ($tcflags)" +} + +test_ttl() +{ + test_span_gre_ttl gt4 gretap ip "mirror to gretap" + test_span_gre_ttl gt6 ip6gretap ipv6 "mirror to ip6gretap" +} + +test_tun_up() +{ + test_span_gre_tun_up gt4 "mirror to gretap" + test_span_gre_tun_up gt6 "mirror to ip6gretap" +} + +test_egress_up() +{ + test_span_gre_egress_up gt4 192.0.2.130 "mirror to gretap" + test_span_gre_egress_up gt6 2001:db8:2::2 "mirror to ip6gretap" +} + +test_remote_ip() +{ + test_span_gre_remote_ip gt4 gretap 192.0.2.130 192.0.2.132 "mirror to gretap" + test_span_gre_remote_ip gt6 ip6gretap 2001:db8:2::2 2001:db8:2::4 "mirror to ip6gretap" +} + +test_tun_del() +{ + test_span_gre_tun_del gt4 gretap "" \ + 192.0.2.129 192.0.2.130 "mirror to gretap" + test_span_gre_tun_del gt6 ip6gretap allow-localremote \ + 2001:db8:2::1 2001:db8:2::2 "mirror to ip6gretap" +} + +test_route_del() +{ + test_span_gre_route_del gt4 $swp3 192.0.2.128/28 "mirror to gretap" + test_span_gre_route_del gt6 $swp3 2001:db8:2::/64 "mirror to ip6gretap" +} + +test_all() +{ + slow_path_trap_install $swp1 ingress + slow_path_trap_install $swp1 egress + + tests_run + + slow_path_trap_uninstall $swp1 egress + slow_path_trap_uninstall $swp1 ingress +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tcflags="skip_hw" +test_all + +if ! tc_offload_check; then + echo "WARN: Could not test offloaded functionality" +else + tcflags="skip_sw" + test_all +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh new file mode 100755 index 0000000..12914f4 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh @@ -0,0 +1,137 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test uses standard topology for testing gretap. See +# mirror_gre_topo_lib.sh for more details. +# +# This tests flower-triggered mirroring to gretap and ip6gretap netdevices. The +# interfaces on H1 and H2 have two addresses each. Flower match on one of the +# addresses is configured with mirror action. It is expected that when pinging +# this address, mirroring takes place, whereas when pinging the other one, +# there's no mirroring. + +ALL_TESTS=" + test_gretap + test_ip6gretap +" + +NUM_NETIFS=6 +source lib.sh +source mirror_lib.sh +source mirror_gre_lib.sh +source mirror_gre_topo_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + mirror_gre_topo_create + + ip address add dev $swp3 192.0.2.129/28 + ip address add dev $h3 192.0.2.130/28 + + ip address add dev $swp3 2001:db8:2::1/64 + ip address add dev $h3 2001:db8:2::2/64 + + ip address add dev $h1 192.0.2.3/28 + ip address add dev $h2 192.0.2.4/28 +} + +cleanup() +{ + pre_cleanup + + ip address del dev $h2 192.0.2.4/28 + ip address del dev $h1 192.0.2.3/28 + + ip address del dev $h3 2001:db8:2::2/64 + ip address del dev $swp3 2001:db8:2::1/64 + + ip address del dev $h3 192.0.2.130/28 + ip address del dev $swp3 192.0.2.129/28 + + mirror_gre_topo_destroy + vrf_cleanup +} + +test_span_gre_dir_acl() +{ + test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4 +} + +fail_test_span_gre_dir_acl() +{ + fail_test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4 +} + +full_test_span_gre_dir_acl() +{ + local tundev=$1; shift + local direction=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + local match_dip=$1; shift + local what=$1; shift + + mirror_install $swp1 $direction $tundev \ + "protocol ip flower $tcflags dst_ip $match_dip" + fail_test_span_gre_dir $tundev $direction + test_span_gre_dir_acl "$tundev" "$direction" \ + "$forward_type" "$backward_type" + mirror_uninstall $swp1 $direction + + # Test lack of mirroring after ACL mirror is uninstalled. + fail_test_span_gre_dir_acl "$tundev" "$direction" + + log_test "$direction $what ($tcflags)" +} + +test_gretap() +{ + full_test_span_gre_dir_acl gt4 ingress 8 0 192.0.2.4 "ACL mirror to gretap" + full_test_span_gre_dir_acl gt4 egress 0 8 192.0.2.3 "ACL mirror to gretap" +} + +test_ip6gretap() +{ + full_test_span_gre_dir_acl gt6 ingress 8 0 192.0.2.4 "ACL mirror to ip6gretap" + full_test_span_gre_dir_acl gt6 egress 0 8 192.0.2.3 "ACL mirror to ip6gretap" +} + +test_all() +{ + RET=0 + + slow_path_trap_install $swp1 ingress + slow_path_trap_install $swp1 egress + + tests_run + + slow_path_trap_uninstall $swp1 egress + slow_path_trap_uninstall $swp1 ingress +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tcflags="skip_hw" +test_all + +if ! tc_offload_check; then + echo "WARN: Could not test offloaded functionality" +else + tcflags="skip_sw" + test_all +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh new file mode 100644 index 0000000..619b469 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh @@ -0,0 +1,130 @@ +# SPDX-License-Identifier: GPL-2.0 + +source mirror_lib.sh + +quick_test_span_gre_dir_ips() +{ + local tundev=$1; shift + + do_test_span_dir_ips 10 h3-$tundev "$@" +} + +fail_test_span_gre_dir_ips() +{ + local tundev=$1; shift + + do_test_span_dir_ips 0 h3-$tundev "$@" +} + +test_span_gre_dir_ips() +{ + local tundev=$1; shift + + test_span_dir_ips h3-$tundev "$@" +} + +full_test_span_gre_dir_ips() +{ + local tundev=$1; shift + local direction=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + local what=$1; shift + local ip1=$1; shift + local ip2=$1; shift + + RET=0 + + mirror_install $swp1 $direction $tundev "matchall $tcflags" + test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \ + "$backward_type" "$ip1" "$ip2" + mirror_uninstall $swp1 $direction + + log_test "$direction $what ($tcflags)" +} + +full_test_span_gre_dir_vlan_ips() +{ + local tundev=$1; shift + local direction=$1; shift + local vlan_match=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + local what=$1; shift + local ip1=$1; shift + local ip2=$1; shift + + RET=0 + + mirror_install $swp1 $direction $tundev "matchall $tcflags" + + test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \ + "$backward_type" "$ip1" "$ip2" + + tc filter add dev $h3 ingress pref 77 prot 802.1q \ + flower $vlan_match ip_proto 0x2f \ + action pass + mirror_test v$h1 $ip1 $ip2 $h3 77 10 + tc filter del dev $h3 ingress pref 77 + + mirror_uninstall $swp1 $direction + + log_test "$direction $what ($tcflags)" +} + +quick_test_span_gre_dir() +{ + quick_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2 +} + +fail_test_span_gre_dir() +{ + fail_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2 +} + +test_span_gre_dir() +{ + test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2 +} + +full_test_span_gre_dir() +{ + full_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2 +} + +full_test_span_gre_dir_vlan() +{ + full_test_span_gre_dir_vlan_ips "$@" 192.0.2.1 192.0.2.2 +} + +full_test_span_gre_stp_ips() +{ + local tundev=$1; shift + local nbpdev=$1; shift + local what=$1; shift + local ip1=$1; shift + local ip2=$1; shift + local h3mac=$(mac_get $h3) + + RET=0 + + mirror_install $swp1 ingress $tundev "matchall $tcflags" + quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2 + + bridge link set dev $nbpdev state disabled + sleep 1 + fail_test_span_gre_dir_ips $tundev ingress $ip1 $ip2 + + bridge link set dev $nbpdev state forwarding + sleep 1 + quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2 + + mirror_uninstall $swp1 ingress + + log_test "$what: STP state ($tcflags)" +} + +full_test_span_gre_stp() +{ + full_test_span_gre_stp_ips "$@" 192.0.2.1 192.0.2.2 +} diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh new file mode 100755 index 0000000..fc0508e --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh @@ -0,0 +1,115 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test uses standard topology for testing gretap. See +# mirror_gre_topo_lib.sh for more details. +# +# Test for mirroring to gretap and ip6gretap, such that the neighbor entry for +# the tunnel remote address has invalid address at the time that the mirroring +# is set up. Later on, the neighbor is deleted and it is expected to be +# reinitialized using the usual ARP process, and the mirroring offload updated. + +ALL_TESTS=" + test_gretap + test_ip6gretap +" + +NUM_NETIFS=6 +source lib.sh +source mirror_lib.sh +source mirror_gre_lib.sh +source mirror_gre_topo_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + mirror_gre_topo_create + + ip address add dev $swp3 192.0.2.129/28 + ip address add dev $h3 192.0.2.130/28 + + ip address add dev $swp3 2001:db8:2::1/64 + ip address add dev $h3 2001:db8:2::2/64 +} + +cleanup() +{ + pre_cleanup + + ip address del dev $h3 2001:db8:2::2/64 + ip address del dev $swp3 2001:db8:2::1/64 + + ip address del dev $h3 192.0.2.130/28 + ip address del dev $swp3 192.0.2.129/28 + + mirror_gre_topo_destroy + vrf_cleanup +} + +test_span_gre_neigh() +{ + local addr=$1; shift + local tundev=$1; shift + local direction=$1; shift + local what=$1; shift + + RET=0 + + ip neigh replace dev $swp3 $addr lladdr 00:11:22:33:44:55 + mirror_install $swp1 $direction $tundev "matchall $tcflags" + fail_test_span_gre_dir $tundev ingress + ip neigh del dev $swp3 $addr + quick_test_span_gre_dir $tundev ingress + mirror_uninstall $swp1 $direction + + log_test "$direction $what: neighbor change ($tcflags)" +} + +test_gretap() +{ + test_span_gre_neigh 192.0.2.130 gt4 ingress "mirror to gretap" + test_span_gre_neigh 192.0.2.130 gt4 egress "mirror to gretap" +} + +test_ip6gretap() +{ + test_span_gre_neigh 2001:db8:2::2 gt6 ingress "mirror to ip6gretap" + test_span_gre_neigh 2001:db8:2::2 gt6 egress "mirror to ip6gretap" +} + +test_all() +{ + slow_path_trap_install $swp1 ingress + slow_path_trap_install $swp1 egress + + tests_run + + slow_path_trap_uninstall $swp1 egress + slow_path_trap_uninstall $swp1 ingress +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tcflags="skip_hw" +test_all + +if ! tc_offload_check; then + echo "WARN: Could not test offloaded functionality" +else + tcflags="skip_sw" + test_all +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh new file mode 100755 index 0000000..8fa681e --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh @@ -0,0 +1,127 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test uses standard topology for testing gretap. See +# mirror_gre_topo_lib.sh for more details. +# +# Test that gretap and ip6gretap mirroring works when the other tunnel endpoint +# is reachable through a next-hop route (as opposed to directly-attached route). + +ALL_TESTS=" + test_gretap + test_ip6gretap +" + +NUM_NETIFS=6 +source lib.sh +source mirror_lib.sh +source mirror_gre_lib.sh +source mirror_gre_topo_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + sysctl_set net.ipv4.conf.all.rp_filter 0 + sysctl_set net.ipv4.conf.$h3.rp_filter 0 + + vrf_prepare + mirror_gre_topo_create + + ip address add dev $swp3 192.0.2.161/28 + ip address add dev $h3 192.0.2.162/28 + ip address add dev gt4 192.0.2.129/32 + ip address add dev h3-gt4 192.0.2.130/32 + + # IPv6 route can't be added after address. Such routes are rejected due + # to the gateway address having been configured on the local system. It + # works the other way around though. + ip address add dev $swp3 2001:db8:4::1/64 + ip -6 route add 2001:db8:2::2/128 via 2001:db8:4::2 + ip address add dev $h3 2001:db8:4::2/64 + ip address add dev gt6 2001:db8:2::1 + ip address add dev h3-gt6 2001:db8:2::2 +} + +cleanup() +{ + pre_cleanup + + ip -6 route del 2001:db8:2::2/128 via 2001:db8:4::2 + ip address del dev $h3 2001:db8:4::2/64 + ip address del dev $swp3 2001:db8:4::1/64 + + ip address del dev $h3 192.0.2.162/28 + ip address del dev $swp3 192.0.2.161/28 + + mirror_gre_topo_destroy + vrf_cleanup + + sysctl_restore net.ipv4.conf.$h3.rp_filter + sysctl_restore net.ipv4.conf.all.rp_filter +} + +test_gretap() +{ + RET=0 + mirror_install $swp1 ingress gt4 "matchall $tcflags" + + # For IPv4, test that there's no mirroring without the route directing + # the traffic to tunnel remote address. Then add it and test that + # mirroring starts. For IPv6 we can't test this due to the limitation + # that routes for locally-specified IPv6 addresses can't be added. + fail_test_span_gre_dir gt4 ingress + + ip route add 192.0.2.130/32 via 192.0.2.162 + quick_test_span_gre_dir gt4 ingress + ip route del 192.0.2.130/32 via 192.0.2.162 + + mirror_uninstall $swp1 ingress + log_test "mirror to gre with next-hop remote ($tcflags)" +} + +test_ip6gretap() +{ + RET=0 + + mirror_install $swp1 ingress gt6 "matchall $tcflags" + quick_test_span_gre_dir gt6 ingress + mirror_uninstall $swp1 ingress + + log_test "mirror to ip6gre with next-hop remote ($tcflags)" +} + +test_all() +{ + slow_path_trap_install $swp1 ingress + slow_path_trap_install $swp1 egress + + tests_run + + slow_path_trap_uninstall $swp1 egress + slow_path_trap_uninstall $swp1 ingress +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tcflags="skip_hw" +test_all + +if ! tc_offload_check; then + echo "WARN: Could not test offloaded functionality" +else + tcflags="skip_sw" + test_all +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh new file mode 100644 index 0000000..2534195 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh @@ -0,0 +1,94 @@ +# SPDX-License-Identifier: GPL-2.0 + +# This is the standard topology for testing mirroring to gretap and ip6gretap +# netdevices. The tests that use it tweak it in one way or another--importantly, +# $swp3 and $h3 need to have addresses set up. +# +# +---------------------+ +---------------------+ +# | H1 | | H2 | +# | + $h1 | | $h2 + | +# | | 192.0.2.1/28 | | 192.0.2.2/28 | | +# +-----|---------------+ +---------------|-----+ +# | | +# +-----|-------------------------------------------------------------|-----+ +# | SW o--> mirror | | +# | +---|-------------------------------------------------------------|---+ | +# | | + $swp1 BR $swp2 + | | +# | +---------------------------------------------------------------------+ | +# | | +# | + $swp3 + gt6 (ip6gretap) + gt4 (gretap) | +# | | : loc=2001:db8:2::1 : loc=192.0.2.129 | +# | | : rem=2001:db8:2::2 : rem=192.0.2.130 | +# | | : ttl=100 : ttl=100 | +# | | : tos=inherit : tos=inherit | +# | | : : | +# +-----|---------------------:----------------------:----------------------+ +# | : : +# +-----|---------------------:----------------------:----------------------+ +# | H3 + $h3 + h3-gt6 (ip6gretap) + h3-gt4 (gretap) | +# | loc=2001:db8:2::2 loc=192.0.2.130 | +# | rem=2001:db8:2::1 rem=192.0.2.129 | +# | ttl=100 ttl=100 | +# | tos=inherit tos=inherit | +# | | +# +-------------------------------------------------------------------------+ + +source mirror_topo_lib.sh + +mirror_gre_topo_h3_create() +{ + mirror_topo_h3_create + + tunnel_create h3-gt4 gretap 192.0.2.130 192.0.2.129 + ip link set h3-gt4 vrf v$h3 + matchall_sink_create h3-gt4 + + tunnel_create h3-gt6 ip6gretap 2001:db8:2::2 2001:db8:2::1 + ip link set h3-gt6 vrf v$h3 + matchall_sink_create h3-gt6 +} + +mirror_gre_topo_h3_destroy() +{ + tunnel_destroy h3-gt6 + tunnel_destroy h3-gt4 + + mirror_topo_h3_destroy +} + +mirror_gre_topo_switch_create() +{ + mirror_topo_switch_create + + tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \ + ttl 100 tos inherit + + tunnel_create gt6 ip6gretap 2001:db8:2::1 2001:db8:2::2 \ + ttl 100 tos inherit allow-localremote +} + +mirror_gre_topo_switch_destroy() +{ + tunnel_destroy gt6 + tunnel_destroy gt4 + + mirror_topo_switch_destroy +} + +mirror_gre_topo_create() +{ + mirror_topo_h1_create + mirror_topo_h2_create + mirror_gre_topo_h3_create + + mirror_gre_topo_switch_create +} + +mirror_gre_topo_destroy() +{ + mirror_gre_topo_switch_destroy + + mirror_gre_topo_h3_destroy + mirror_topo_h2_destroy + mirror_topo_h1_destroy +} diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh new file mode 100755 index 0000000..88cecdb --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh @@ -0,0 +1,92 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test uses standard topology for testing gretap. See +# mirror_gre_topo_lib.sh for more details. +# +# Test for "tc action mirred egress mirror" that mirrors to a gretap netdevice +# whose underlay route points at a vlan device. + +ALL_TESTS=" + test_gretap +" + +NUM_NETIFS=6 +source lib.sh +source mirror_lib.sh +source mirror_gre_lib.sh +source mirror_gre_topo_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + mirror_gre_topo_create + + ip link add name $swp3.555 link $swp3 type vlan id 555 + ip address add dev $swp3.555 192.0.2.129/32 + ip address add dev $swp3.555 2001:db8:2::1/128 + ip link set dev $swp3.555 up + + ip route add 192.0.2.130/32 dev $swp3.555 + ip -6 route add 2001:db8:2::2/128 dev $swp3.555 + + ip link add name $h3.555 link $h3 type vlan id 555 + ip link set dev $h3.555 master v$h3 + ip address add dev $h3.555 192.0.2.130/28 + ip address add dev $h3.555 2001:db8:2::2/64 + ip link set dev $h3.555 up +} + +cleanup() +{ + pre_cleanup + + ip link del dev $h3.555 + ip link del dev $swp3.555 + + mirror_gre_topo_destroy + vrf_cleanup +} + +test_gretap() +{ + full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap" + full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap" +} + +test_all() +{ + slow_path_trap_install $swp1 ingress + slow_path_trap_install $swp1 egress + + tests_run + + slow_path_trap_uninstall $swp1 egress + slow_path_trap_uninstall $swp1 ingress +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tcflags="skip_hw" +test_all + +if ! tc_offload_check; then + echo "WARN: Could not test offloaded functionality" +else + tcflags="skip_sw" + test_all +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh new file mode 100755 index 0000000..5dbc7a0 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh @@ -0,0 +1,270 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test uses standard topology for testing gretap. See +# mirror_gre_topo_lib.sh for more details. +# +# Test for "tc action mirred egress mirror" when the underlay route points at a +# vlan device on top of a bridge device with vlan filtering (802.1q). + +ALL_TESTS=" + test_gretap + test_ip6gretap + test_gretap_forbidden_cpu + test_ip6gretap_forbidden_cpu + test_gretap_forbidden_egress + test_ip6gretap_forbidden_egress + test_gretap_untagged_egress + test_ip6gretap_untagged_egress + test_gretap_fdb_roaming + test_ip6gretap_fdb_roaming + test_gretap_stp + test_ip6gretap_stp +" + +NUM_NETIFS=6 +source lib.sh +source mirror_lib.sh +source mirror_gre_lib.sh +source mirror_gre_topo_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + mirror_gre_topo_create + + vlan_create br1 555 "" 192.0.2.129/32 2001:db8:2::1/128 + bridge vlan add dev br1 vid 555 self + ip route rep 192.0.2.130/32 dev br1.555 + ip -6 route rep 2001:db8:2::2/128 dev br1.555 + + vlan_create $h3 555 v$h3 192.0.2.130/28 2001:db8:2::2/64 + + ip link set dev $swp3 master br1 + bridge vlan add dev $swp3 vid 555 + bridge vlan add dev $swp2 vid 555 +} + +cleanup() +{ + pre_cleanup + + ip link set dev $swp2 nomaster + ip link set dev $swp3 nomaster + vlan_destroy $h3 555 + vlan_destroy br1 555 + + mirror_gre_topo_destroy + vrf_cleanup +} + +test_vlan_match() +{ + local tundev=$1; shift + local vlan_match=$1; shift + local what=$1; shift + + full_test_span_gre_dir_vlan $tundev ingress "$vlan_match" 8 0 "$what" + full_test_span_gre_dir_vlan $tundev egress "$vlan_match" 0 8 "$what" +} + +test_gretap() +{ + test_vlan_match gt4 'vlan_id 555 vlan_ethtype ip' "mirror to gretap" +} + +test_ip6gretap() +{ + test_vlan_match gt6 'vlan_id 555 vlan_ethtype ipv6' "mirror to ip6gretap" +} + +test_span_gre_forbidden_cpu() +{ + local tundev=$1; shift + local what=$1; shift + + RET=0 + + # Run the pass-test first, to prime neighbor table. + mirror_install $swp1 ingress $tundev "matchall $tcflags" + quick_test_span_gre_dir $tundev ingress + + # Now forbid the VLAN at the bridge and see it fail. + bridge vlan del dev br1 vid 555 self + sleep 1 + fail_test_span_gre_dir $tundev ingress + + bridge vlan add dev br1 vid 555 self + sleep 1 + quick_test_span_gre_dir $tundev ingress + + mirror_uninstall $swp1 ingress + + log_test "$what: vlan forbidden at a bridge ($tcflags)" +} + +test_gretap_forbidden_cpu() +{ + test_span_gre_forbidden_cpu gt4 "mirror to gretap" +} + +test_ip6gretap_forbidden_cpu() +{ + test_span_gre_forbidden_cpu gt6 "mirror to ip6gretap" +} + +test_span_gre_forbidden_egress() +{ + local tundev=$1; shift + local what=$1; shift + + RET=0 + + mirror_install $swp1 ingress $tundev "matchall $tcflags" + quick_test_span_gre_dir $tundev ingress + + bridge vlan del dev $swp3 vid 555 + sleep 1 + fail_test_span_gre_dir $tundev ingress + + bridge vlan add dev $swp3 vid 555 + # Re-prime FDB + arping -I br1.555 192.0.2.130 -fqc 1 + sleep 1 + quick_test_span_gre_dir $tundev ingress + + mirror_uninstall $swp1 ingress + + log_test "$what: vlan forbidden at a bridge egress ($tcflags)" +} + +test_gretap_forbidden_egress() +{ + test_span_gre_forbidden_egress gt4 "mirror to gretap" +} + +test_ip6gretap_forbidden_egress() +{ + test_span_gre_forbidden_egress gt6 "mirror to ip6gretap" +} + +test_span_gre_untagged_egress() +{ + local tundev=$1; shift + local what=$1; shift + + RET=0 + + mirror_install $swp1 ingress $tundev "matchall $tcflags" + + quick_test_span_gre_dir $tundev ingress + quick_test_span_vlan_dir $h3 555 ingress + + bridge vlan add dev $swp3 vid 555 pvid untagged + sleep 1 + quick_test_span_gre_dir $tundev ingress + fail_test_span_vlan_dir $h3 555 ingress + + bridge vlan add dev $swp3 vid 555 + sleep 1 + quick_test_span_gre_dir $tundev ingress + quick_test_span_vlan_dir $h3 555 ingress + + mirror_uninstall $swp1 ingress + + log_test "$what: vlan untagged at a bridge egress ($tcflags)" +} + +test_gretap_untagged_egress() +{ + test_span_gre_untagged_egress gt4 "mirror to gretap" +} + +test_ip6gretap_untagged_egress() +{ + test_span_gre_untagged_egress gt6 "mirror to ip6gretap" +} + +test_span_gre_fdb_roaming() +{ + local tundev=$1; shift + local what=$1; shift + local h3mac=$(mac_get $h3) + + RET=0 + + mirror_install $swp1 ingress $tundev "matchall $tcflags" + quick_test_span_gre_dir $tundev ingress + + bridge fdb del dev $swp3 $h3mac vlan 555 master + bridge fdb add dev $swp2 $h3mac vlan 555 master + sleep 1 + fail_test_span_gre_dir $tundev ingress + + bridge fdb del dev $swp2 $h3mac vlan 555 master + # Re-prime FDB + arping -I br1.555 192.0.2.130 -fqc 1 + sleep 1 + quick_test_span_gre_dir $tundev ingress + + mirror_uninstall $swp1 ingress + + log_test "$what: MAC roaming ($tcflags)" +} + +test_gretap_fdb_roaming() +{ + test_span_gre_fdb_roaming gt4 "mirror to gretap" +} + +test_ip6gretap_fdb_roaming() +{ + test_span_gre_fdb_roaming gt6 "mirror to ip6gretap" +} + +test_gretap_stp() +{ + full_test_span_gre_stp gt4 $swp3 "mirror to gretap" +} + +test_ip6gretap_stp() +{ + full_test_span_gre_stp gt6 $swp3 "mirror to ip6gretap" +} + +test_all() +{ + slow_path_trap_install $swp1 ingress + slow_path_trap_install $swp1 egress + + tests_run + + slow_path_trap_uninstall $swp1 egress + slow_path_trap_uninstall $swp1 ingress +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tcflags="skip_hw" +test_all + +if ! tc_offload_check; then + echo "WARN: Could not test offloaded functionality" +else + tcflags="skip_sw" + test_all +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh new file mode 100644 index 0000000..d36dc26 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh @@ -0,0 +1,132 @@ +# SPDX-License-Identifier: GPL-2.0 + +mirror_install() +{ + local from_dev=$1; shift + local direction=$1; shift + local to_dev=$1; shift + local filter=$1; shift + + tc filter add dev $from_dev $direction \ + pref 1000 $filter \ + action mirred egress mirror dev $to_dev +} + +mirror_uninstall() +{ + local from_dev=$1; shift + local direction=$1; shift + + tc filter del dev $swp1 $direction pref 1000 +} + +mirror_test() +{ + local vrf_name=$1; shift + local sip=$1; shift + local dip=$1; shift + local dev=$1; shift + local pref=$1; shift + local expect=$1; shift + + local t0=$(tc_rule_stats_get $dev $pref) + ip vrf exec $vrf_name \ + ${PING} ${sip:+-I $sip} $dip -c 10 -i 0.1 -w 2 &> /dev/null + local t1=$(tc_rule_stats_get $dev $pref) + local delta=$((t1 - t0)) + # Tolerate a couple stray extra packets. + ((expect <= delta && delta <= expect + 2)) + check_err $? "Expected to capture $expect packets, got $delta." +} + +do_test_span_dir_ips() +{ + local expect=$1; shift + local dev=$1; shift + local direction=$1; shift + local ip1=$1; shift + local ip2=$1; shift + + icmp_capture_install $dev + mirror_test v$h1 $ip1 $ip2 $dev 100 $expect + mirror_test v$h2 $ip2 $ip1 $dev 100 $expect + icmp_capture_uninstall $dev +} + +quick_test_span_dir_ips() +{ + do_test_span_dir_ips 10 "$@" +} + +fail_test_span_dir_ips() +{ + do_test_span_dir_ips 0 "$@" +} + +test_span_dir_ips() +{ + local dev=$1; shift + local direction=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + local ip1=$1; shift + local ip2=$1; shift + + quick_test_span_dir_ips "$dev" "$direction" "$ip1" "$ip2" + + icmp_capture_install $dev "type $forward_type" + mirror_test v$h1 $ip1 $ip2 $dev 100 10 + icmp_capture_uninstall $dev + + icmp_capture_install $dev "type $backward_type" + mirror_test v$h2 $ip2 $ip1 $dev 100 10 + icmp_capture_uninstall $dev +} + +fail_test_span_dir() +{ + fail_test_span_dir_ips "$@" 192.0.2.1 192.0.2.2 +} + +test_span_dir() +{ + test_span_dir_ips "$@" 192.0.2.1 192.0.2.2 +} + +do_test_span_vlan_dir_ips() +{ + local expect=$1; shift + local dev=$1; shift + local vid=$1; shift + local direction=$1; shift + local ip1=$1; shift + local ip2=$1; shift + + # Install the capture as skip_hw to avoid double-counting of packets. + # The traffic is meant for local box anyway, so will be trapped to + # kernel. + vlan_capture_install $dev "skip_hw vlan_id $vid" + mirror_test v$h1 $ip1 $ip2 $dev 100 $expect + mirror_test v$h2 $ip2 $ip1 $dev 100 $expect + vlan_capture_uninstall $dev +} + +quick_test_span_vlan_dir_ips() +{ + do_test_span_vlan_dir_ips 10 "$@" +} + +fail_test_span_vlan_dir_ips() +{ + do_test_span_vlan_dir_ips 0 "$@" +} + +quick_test_span_vlan_dir() +{ + quick_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2 +} + +fail_test_span_vlan_dir() +{ + fail_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2 +} diff --git a/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh new file mode 100644 index 0000000..04979e5 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh @@ -0,0 +1,101 @@ +# SPDX-License-Identifier: GPL-2.0 + +# This is the standard topology for testing mirroring. The tests that use it +# tweak it in one way or another--typically add more devices to the topology. +# +# +---------------------+ +---------------------+ +# | H1 | | H2 | +# | + $h1 | | $h2 + | +# | | 192.0.2.1/28 | | 192.0.2.2/28 | | +# +-----|---------------+ +---------------|-----+ +# | | +# +-----|-------------------------------------------------------------|-----+ +# | SW o--> mirror | | +# | +---|-------------------------------------------------------------|---+ | +# | | + $swp1 BR $swp2 + | | +# | +---------------------------------------------------------------------+ | +# | | +# | + $swp3 | +# +-----|-------------------------------------------------------------------+ +# | +# +-----|-------------------------------------------------------------------+ +# | H3 + $h3 | +# | | +# +-------------------------------------------------------------------------+ + +mirror_topo_h1_create() +{ + simple_if_init $h1 192.0.2.1/28 +} + +mirror_topo_h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 +} + +mirror_topo_h2_create() +{ + simple_if_init $h2 192.0.2.2/28 +} + +mirror_topo_h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/28 +} + +mirror_topo_h3_create() +{ + simple_if_init $h3 + tc qdisc add dev $h3 clsact +} + +mirror_topo_h3_destroy() +{ + tc qdisc del dev $h3 clsact + simple_if_fini $h3 +} + +mirror_topo_switch_create() +{ + ip link set dev $swp3 up + + ip link add name br1 type bridge vlan_filtering 1 + ip link set dev br1 up + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + tc qdisc add dev $swp1 clsact +} + +mirror_topo_switch_destroy() +{ + tc qdisc del dev $swp1 clsact + + ip link set dev $swp1 down + ip link set dev $swp2 down + ip link del dev br1 + + ip link set dev $swp3 down +} + +mirror_topo_create() +{ + mirror_topo_h1_create + mirror_topo_h2_create + mirror_topo_h3_create + + mirror_topo_switch_create +} + +mirror_topo_destroy() +{ + mirror_topo_switch_destroy + + mirror_topo_h3_destroy + mirror_topo_h2_destroy + mirror_topo_h1_destroy +} diff --git a/tools/testing/selftests/net/forwarding/mirror_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_vlan.sh new file mode 100755 index 0000000..9ab2ce7 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_vlan.sh @@ -0,0 +1,131 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test uses standard topology for testing mirroring. See mirror_topo_lib.sh +# for more details. +# +# Test for "tc action mirred egress mirror" that mirrors to a vlan device. + +ALL_TESTS=" + test_vlan + test_tagged_vlan +" + +NUM_NETIFS=6 +source lib.sh +source mirror_lib.sh +source mirror_topo_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + mirror_topo_create + + vlan_create $swp3 555 + + vlan_create $h3 555 v$h3 + matchall_sink_create $h3.555 + + vlan_create $h1 111 v$h1 192.0.2.17/28 + bridge vlan add dev $swp1 vid 111 + + vlan_create $h2 111 v$h2 192.0.2.18/28 + bridge vlan add dev $swp2 vid 111 +} + +cleanup() +{ + pre_cleanup + + vlan_destroy $h2 111 + vlan_destroy $h1 111 + vlan_destroy $h3 555 + vlan_destroy $swp3 555 + + mirror_topo_destroy + vrf_cleanup +} + +test_vlan_dir() +{ + local direction=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + + RET=0 + + mirror_install $swp1 $direction $swp3.555 "matchall $tcflags" + test_span_dir "$h3.555" "$direction" "$forward_type" "$backward_type" + mirror_uninstall $swp1 $direction + + log_test "$direction mirror to vlan ($tcflags)" +} + +test_vlan() +{ + test_vlan_dir ingress 8 0 + test_vlan_dir egress 0 8 +} + +test_tagged_vlan_dir() +{ + local direction=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + + RET=0 + + mirror_install $swp1 $direction $swp3.555 "matchall $tcflags" + do_test_span_vlan_dir_ips 10 "$h3.555" 111 "$direction" \ + 192.0.2.17 192.0.2.18 + do_test_span_vlan_dir_ips 0 "$h3.555" 555 "$direction" \ + 192.0.2.17 192.0.2.18 + mirror_uninstall $swp1 $direction + + log_test "$direction mirror tagged to vlan ($tcflags)" +} + +test_tagged_vlan() +{ + test_tagged_vlan_dir ingress 8 0 + test_tagged_vlan_dir egress 0 8 +} + +test_all() +{ + slow_path_trap_install $swp1 ingress + slow_path_trap_install $swp1 egress + trap_install $h3 ingress + + tests_run + + trap_uninstall $h3 ingress + slow_path_trap_uninstall $swp1 egress + slow_path_trap_uninstall $swp1 ingress +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tcflags="skip_hw" +test_all + +if ! tc_offload_check; then + echo "WARN: Could not test offloaded functionality" +else + tcflags="skip_sw" + test_all +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh index cc6a14a..a75cb51 100755 --- a/tools/testing/selftests/net/forwarding/router.sh +++ b/tools/testing/selftests/net/forwarding/router.sh @@ -1,6 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +ALL_TESTS="ping_ipv4 ping_ipv6" NUM_NETIFS=4 source lib.sh @@ -114,12 +115,21 @@ cleanup() vrf_cleanup } +ping_ipv4() +{ + ping_test $h1 198.51.100.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 +} + trap cleanup EXIT setup_prepare setup_wait -ping_test $h1 198.51.100.2 -ping6_test $h1 2001:db8:2::2 +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh index 3bc3510..8b6d0fb 100755 --- a/tools/testing/selftests/net/forwarding/router_multipath.sh +++ b/tools/testing/selftests/net/forwarding/router_multipath.sh @@ -1,6 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +ALL_TESTS="ping_ipv4 ping_ipv6 multipath_test" NUM_NETIFS=8 source lib.sh @@ -191,7 +192,7 @@ multipath_eval() diff=$(echo $weights_ratio - $packets_ratio | bc -l) diff=${diff#-} - test "$(echo "$diff / $weights_ratio > 0.1" | bc -l)" -eq 0 + test "$(echo "$diff / $weights_ratio > 0.15" | bc -l)" -eq 0 check_err $? "Too large discrepancy between expected and measured ratios" log_test "$desc" log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio" @@ -204,13 +205,11 @@ multipath4_test() local weight_rp13=$3 local t0_rp12 t0_rp13 t1_rp12 t1_rp13 local packets_rp12 packets_rp13 - local hash_policy # Transmit multiple flows from h1 to h2 and make sure they are # distributed between both multipath links (rp12 and rp13) # according to the configured weights. - hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy) - sysctl -q -w net.ipv4.fib_multipath_hash_policy=1 + sysctl_set net.ipv4.fib_multipath_hash_policy 1 ip route replace 198.51.100.0/24 vrf vrf-r1 \ nexthop via 169.254.2.22 dev $rp12 weight $weight_rp12 \ nexthop via 169.254.3.23 dev $rp13 weight $weight_rp13 @@ -232,7 +231,7 @@ multipath4_test() ip route replace 198.51.100.0/24 vrf vrf-r1 \ nexthop via 169.254.2.22 dev $rp12 \ nexthop via 169.254.3.23 dev $rp13 - sysctl -q -w net.ipv4.fib_multipath_hash_policy=$hash_policy + sysctl_restore net.ipv4.fib_multipath_hash_policy } multipath6_l4_test() @@ -242,13 +241,11 @@ multipath6_l4_test() local weight_rp13=$3 local t0_rp12 t0_rp13 t1_rp12 t1_rp13 local packets_rp12 packets_rp13 - local hash_policy # Transmit multiple flows from h1 to h2 and make sure they are # distributed between both multipath links (rp12 and rp13) # according to the configured weights. - hash_policy=$(sysctl -n net.ipv6.fib_multipath_hash_policy) - sysctl -q -w net.ipv6.fib_multipath_hash_policy=1 + sysctl_set net.ipv6.fib_multipath_hash_policy 1 ip route replace 2001:db8:2::/64 vrf vrf-r1 \ nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \ @@ -271,7 +268,7 @@ multipath6_l4_test() nexthop via fe80:2::22 dev $rp12 \ nexthop via fe80:3::23 dev $rp13 - sysctl -q -w net.ipv6.fib_multipath_hash_policy=$hash_policy + sysctl_restore net.ipv6.fib_multipath_hash_policy } multipath6_test() @@ -364,13 +361,21 @@ cleanup() vrf_cleanup } +ping_ipv4() +{ + ping_test $h1 198.51.100.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 +} + trap cleanup EXIT setup_prepare setup_wait -ping_test $h1 198.51.100.2 -ping6_test $h1 2001:db8:2::2 -multipath_test +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh index 3a6385e..813d02d 100755 --- a/tools/testing/selftests/net/forwarding/tc_actions.sh +++ b/tools/testing/selftests/net/forwarding/tc_actions.sh @@ -1,6 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \ + mirred_egress_mirror_test gact_trap_test" NUM_NETIFS=4 source tc_common.sh source lib.sh @@ -111,6 +113,10 @@ gact_trap_test() { RET=0 + if [[ "$tcflags" != "skip_sw" ]]; then + return 0; + fi + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ skip_hw dst_ip 192.0.2.2 action drop tc filter add dev $swp1 ingress protocol ip pref 3 handle 103 flower \ @@ -179,24 +185,29 @@ cleanup() ip link set $swp1 address $swp1origmac } +mirred_egress_redirect_test() +{ + mirred_egress_test "redirect" +} + +mirred_egress_mirror_test() +{ + mirred_egress_test "mirror" +} + trap cleanup EXIT setup_prepare setup_wait -gact_drop_and_ok_test -mirred_egress_test "redirect" -mirred_egress_test "mirror" +tests_run tc_offload_check if [[ $? -ne 0 ]]; then log_info "Could not test offloaded functionality" else tcflags="skip_sw" - gact_drop_and_ok_test - mirred_egress_test "redirect" - mirred_egress_test "mirror" - gact_trap_test + tests_run fi exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_chains.sh b/tools/testing/selftests/net/forwarding/tc_chains.sh index 2fd1522..d2c783e 100755 --- a/tools/testing/selftests/net/forwarding/tc_chains.sh +++ b/tools/testing/selftests/net/forwarding/tc_chains.sh @@ -1,6 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +ALL_TESTS="unreachable_chain_test gact_goto_chain_test" NUM_NETIFS=2 source tc_common.sh source lib.sh @@ -107,16 +108,14 @@ trap cleanup EXIT setup_prepare setup_wait -unreachable_chain_test -gact_goto_chain_test +tests_run tc_offload_check if [[ $? -ne 0 ]]; then log_info "Could not test offloaded functionality" else tcflags="skip_sw" - unreachable_chain_test - gact_goto_chain_test + tests_run fi exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh index 032b882..20d1077 100755 --- a/tools/testing/selftests/net/forwarding/tc_flower.sh +++ b/tools/testing/selftests/net/forwarding/tc_flower.sh @@ -1,6 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \ + match_src_ip_test match_ip_flags_test" NUM_NETIFS=2 source tc_common.sh source lib.sh @@ -149,6 +151,74 @@ match_src_ip_test() log_test "src_ip match ($tcflags)" } +match_ip_flags_test() +{ + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags ip_flags frag action continue + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags ip_flags firstfrag action continue + tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ + $tcflags ip_flags nofirstfrag action continue + tc filter add dev $h2 ingress protocol ip pref 4 handle 104 flower \ + $tcflags ip_flags nofrag action drop + + $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip "frag=0" -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched on wrong frag filter (nofrag)" + + tc_check_packets "dev $h2 ingress" 102 1 + check_fail $? "Matched on wrong firstfrag filter (nofrag)" + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Did not match on nofirstfrag filter (nofrag) " + + tc_check_packets "dev $h2 ingress" 104 1 + check_err $? "Did not match on nofrag filter (nofrag)" + + $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip "frag=0,mf" -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Did not match on frag filter (1stfrag)" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match fistfrag filter (1stfrag)" + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Matched on wrong nofirstfrag filter (1stfrag)" + + tc_check_packets "dev $h2 ingress" 104 1 + check_err $? "Match on wrong nofrag filter (1stfrag)" + + $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip "frag=256,mf" -q + $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip "frag=256" -q + + tc_check_packets "dev $h2 ingress" 101 3 + check_err $? "Did not match on frag filter (no1stfrag)" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Matched on wrong firstfrag filter (no1stfrag)" + + tc_check_packets "dev $h2 ingress" 103 3 + check_err $? "Did not match on nofirstfrag filter (no1stfrag)" + + tc_check_packets "dev $h2 ingress" 104 1 + check_err $? "Matched on nofrag filter (no1stfrag)" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower + tc filter del dev $h2 ingress protocol ip pref 4 handle 104 flower + + log_test "ip_flags match ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} @@ -177,20 +247,14 @@ trap cleanup EXIT setup_prepare setup_wait -match_dst_mac_test -match_src_mac_test -match_dst_ip_test -match_src_ip_test +tests_run tc_offload_check if [[ $? -ne 0 ]]; then log_info "Could not test offloaded functionality" else tcflags="skip_sw" - match_dst_mac_test - match_src_mac_test - match_dst_ip_test - match_src_ip_test + tests_run fi exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_shblocks.sh b/tools/testing/selftests/net/forwarding/tc_shblocks.sh index 077b980..b5b9172 100755 --- a/tools/testing/selftests/net/forwarding/tc_shblocks.sh +++ b/tools/testing/selftests/net/forwarding/tc_shblocks.sh @@ -1,6 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +ALL_TESTS="shared_block_test" NUM_NETIFS=4 source tc_common.sh source lib.sh @@ -109,14 +110,14 @@ trap cleanup EXIT setup_prepare setup_wait -shared_block_test +tests_run tc_offload_check if [[ $? -ne 0 ]]; then log_info "Could not test offloaded functionality" else tcflags="skip_sw" - shared_block_test + tests_run fi exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh index d571d21..c43c6de 100755 --- a/tools/testing/selftests/net/msg_zerocopy.sh +++ b/tools/testing/selftests/net/msg_zerocopy.sh @@ -21,6 +21,14 @@ readonly DADDR6='fd::2' readonly path_sysctl_mem="net.core.optmem_max" +# No arguments: automated test +if [[ "$#" -eq "0" ]]; then + $0 4 tcp -t 1 + $0 6 tcp -t 1 + echo "OK. All tests passed" + exit 0 +fi + # Argument parsing if [[ "$#" -lt "2" ]]; then echo "Usage: $0 [4|6] [tcp|udp|raw|raw_hdrincl|packet|packet_dgram] <args>" diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 7514f93..f8cc38a 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -371,7 +371,7 @@ test_pmtu_vti6_link_add_mtu() { fail=0 - min=1280 + min=68 # vti6 can carry IPv4 packets too max=$((65535 - 40)) # Check invalid values first for v in $((min - 1)) $((max + 1)); do @@ -387,7 +387,7 @@ test_pmtu_vti6_link_add_mtu() { done # Now check valid values - for v in 1280 1300 $((65535 - 40)); do + for v in 68 1280 1300 $((65535 - 40)); do ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 mtu="$(link_get_mtu "${ns_a}" vti6_a)" ${ns_a} ip link del vti6_a diff --git a/tools/testing/selftests/net/psock_snd.c b/tools/testing/selftests/net/psock_snd.c new file mode 100644 index 0000000..7d15e10 --- /dev/null +++ b/tools/testing/selftests/net/psock_snd.c @@ -0,0 +1,397 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <limits.h> +#include <linux/filter.h> +#include <linux/bpf.h> +#include <linux/if_packet.h> +#include <linux/if_vlan.h> +#include <linux/virtio_net.h> +#include <net/if.h> +#include <net/ethernet.h> +#include <netinet/ip.h> +#include <netinet/udp.h> +#include <poll.h> +#include <sched.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "psock_lib.h" + +static bool cfg_use_bind; +static bool cfg_use_csum_off; +static bool cfg_use_csum_off_bad; +static bool cfg_use_dgram; +static bool cfg_use_gso; +static bool cfg_use_qdisc_bypass; +static bool cfg_use_vlan; +static bool cfg_use_vnet; + +static char *cfg_ifname = "lo"; +static int cfg_mtu = 1500; +static int cfg_payload_len = DATA_LEN; +static int cfg_truncate_len = INT_MAX; +static uint16_t cfg_port = 8000; + +/* test sending up to max mtu + 1 */ +#define TEST_SZ (sizeof(struct virtio_net_hdr) + ETH_HLEN + ETH_MAX_MTU + 1) + +static char tbuf[TEST_SZ], rbuf[TEST_SZ]; + +static unsigned long add_csum_hword(const uint16_t *start, int num_u16) +{ + unsigned long sum = 0; + int i; + + for (i = 0; i < num_u16; i++) + sum += start[i]; + + return sum; +} + +static uint16_t build_ip_csum(const uint16_t *start, int num_u16, + unsigned long sum) +{ + sum += add_csum_hword(start, num_u16); + + while (sum >> 16) + sum = (sum & 0xffff) + (sum >> 16); + + return ~sum; +} + +static int build_vnet_header(void *header) +{ + struct virtio_net_hdr *vh = header; + + vh->hdr_len = ETH_HLEN + sizeof(struct iphdr) + sizeof(struct udphdr); + + if (cfg_use_csum_off) { + vh->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM; + vh->csum_start = ETH_HLEN + sizeof(struct iphdr); + vh->csum_offset = __builtin_offsetof(struct udphdr, check); + + /* position check field exactly one byte beyond end of packet */ + if (cfg_use_csum_off_bad) + vh->csum_start += sizeof(struct udphdr) + cfg_payload_len - + vh->csum_offset - 1; + } + + if (cfg_use_gso) { + vh->gso_type = VIRTIO_NET_HDR_GSO_UDP; + vh->gso_size = cfg_mtu - sizeof(struct iphdr); + } + + return sizeof(*vh); +} + +static int build_eth_header(void *header) +{ + struct ethhdr *eth = header; + + if (cfg_use_vlan) { + uint16_t *tag = header + ETH_HLEN; + + eth->h_proto = htons(ETH_P_8021Q); + tag[1] = htons(ETH_P_IP); + return ETH_HLEN + 4; + } + + eth->h_proto = htons(ETH_P_IP); + return ETH_HLEN; +} + +static int build_ipv4_header(void *header, int payload_len) +{ + struct iphdr *iph = header; + + iph->ihl = 5; + iph->version = 4; + iph->ttl = 8; + iph->tot_len = htons(sizeof(*iph) + sizeof(struct udphdr) + payload_len); + iph->id = htons(1337); + iph->protocol = IPPROTO_UDP; + iph->saddr = htonl((172 << 24) | (17 << 16) | 2); + iph->daddr = htonl((172 << 24) | (17 << 16) | 1); + iph->check = build_ip_csum((void *) iph, iph->ihl << 1, 0); + + return iph->ihl << 2; +} + +static int build_udp_header(void *header, int payload_len) +{ + const int alen = sizeof(uint32_t); + struct udphdr *udph = header; + int len = sizeof(*udph) + payload_len; + + udph->source = htons(9); + udph->dest = htons(cfg_port); + udph->len = htons(len); + + if (cfg_use_csum_off) + udph->check = build_ip_csum(header - (2 * alen), alen, + htons(IPPROTO_UDP) + udph->len); + else + udph->check = 0; + + return sizeof(*udph); +} + +static int build_packet(int payload_len) +{ + int off = 0; + + off += build_vnet_header(tbuf); + off += build_eth_header(tbuf + off); + off += build_ipv4_header(tbuf + off, payload_len); + off += build_udp_header(tbuf + off, payload_len); + + if (off + payload_len > sizeof(tbuf)) + error(1, 0, "payload length exceeds max"); + + memset(tbuf + off, DATA_CHAR, payload_len); + + return off + payload_len; +} + +static void do_bind(int fd) +{ + struct sockaddr_ll laddr = {0}; + + laddr.sll_family = AF_PACKET; + laddr.sll_protocol = htons(ETH_P_IP); + laddr.sll_ifindex = if_nametoindex(cfg_ifname); + if (!laddr.sll_ifindex) + error(1, errno, "if_nametoindex"); + + if (bind(fd, (void *)&laddr, sizeof(laddr))) + error(1, errno, "bind"); +} + +static void do_send(int fd, char *buf, int len) +{ + int ret; + + if (!cfg_use_vnet) { + buf += sizeof(struct virtio_net_hdr); + len -= sizeof(struct virtio_net_hdr); + } + if (cfg_use_dgram) { + buf += ETH_HLEN; + len -= ETH_HLEN; + } + + if (cfg_use_bind) { + ret = write(fd, buf, len); + } else { + struct sockaddr_ll laddr = {0}; + + laddr.sll_protocol = htons(ETH_P_IP); + laddr.sll_ifindex = if_nametoindex(cfg_ifname); + if (!laddr.sll_ifindex) + error(1, errno, "if_nametoindex"); + + ret = sendto(fd, buf, len, 0, (void *)&laddr, sizeof(laddr)); + } + + if (ret == -1) + error(1, errno, "write"); + if (ret != len) + error(1, 0, "write: %u %u", ret, len); + + fprintf(stderr, "tx: %u\n", ret); +} + +static int do_tx(void) +{ + const int one = 1; + int fd, len; + + fd = socket(PF_PACKET, cfg_use_dgram ? SOCK_DGRAM : SOCK_RAW, 0); + if (fd == -1) + error(1, errno, "socket t"); + + if (cfg_use_bind) + do_bind(fd); + + if (cfg_use_qdisc_bypass && + setsockopt(fd, SOL_PACKET, PACKET_QDISC_BYPASS, &one, sizeof(one))) + error(1, errno, "setsockopt qdisc bypass"); + + if (cfg_use_vnet && + setsockopt(fd, SOL_PACKET, PACKET_VNET_HDR, &one, sizeof(one))) + error(1, errno, "setsockopt vnet"); + + len = build_packet(cfg_payload_len); + + if (cfg_truncate_len < len) + len = cfg_truncate_len; + + do_send(fd, tbuf, len); + + if (close(fd)) + error(1, errno, "close t"); + + return len; +} + +static int setup_rx(void) +{ + struct timeval tv = { .tv_usec = 100 * 1000 }; + struct sockaddr_in raddr = {0}; + int fd; + + fd = socket(PF_INET, SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket r"); + + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) + error(1, errno, "setsockopt rcv timeout"); + + raddr.sin_family = AF_INET; + raddr.sin_port = htons(cfg_port); + raddr.sin_addr.s_addr = htonl(INADDR_ANY); + + if (bind(fd, (void *)&raddr, sizeof(raddr))) + error(1, errno, "bind r"); + + return fd; +} + +static void do_rx(int fd, int expected_len, char *expected) +{ + int ret; + + ret = recv(fd, rbuf, sizeof(rbuf), 0); + if (ret == -1) + error(1, errno, "recv"); + if (ret != expected_len) + error(1, 0, "recv: %u != %u", ret, expected_len); + + if (memcmp(rbuf, expected, ret)) + error(1, 0, "recv: data mismatch"); + + fprintf(stderr, "rx: %u\n", ret); +} + +static int setup_sniffer(void) +{ + struct timeval tv = { .tv_usec = 100 * 1000 }; + int fd; + + fd = socket(PF_PACKET, SOCK_RAW, 0); + if (fd == -1) + error(1, errno, "socket p"); + + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) + error(1, errno, "setsockopt rcv timeout"); + + pair_udp_setfilter(fd); + do_bind(fd); + + return fd; +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "bcCdgl:qt:vV")) != -1) { + switch (c) { + case 'b': + cfg_use_bind = true; + break; + case 'c': + cfg_use_csum_off = true; + break; + case 'C': + cfg_use_csum_off_bad = true; + break; + case 'd': + cfg_use_dgram = true; + break; + case 'g': + cfg_use_gso = true; + break; + case 'l': + cfg_payload_len = strtoul(optarg, NULL, 0); + break; + case 'q': + cfg_use_qdisc_bypass = true; + break; + case 't': + cfg_truncate_len = strtoul(optarg, NULL, 0); + break; + case 'v': + cfg_use_vnet = true; + break; + case 'V': + cfg_use_vlan = true; + break; + default: + error(1, 0, "%s: parse error", argv[0]); + } + } + + if (cfg_use_vlan && cfg_use_dgram) + error(1, 0, "option vlan (-V) conflicts with dgram (-d)"); + + if (cfg_use_csum_off && !cfg_use_vnet) + error(1, 0, "option csum offload (-c) requires vnet (-v)"); + + if (cfg_use_csum_off_bad && !cfg_use_csum_off) + error(1, 0, "option csum bad (-C) requires csum offload (-c)"); + + if (cfg_use_gso && !cfg_use_csum_off) + error(1, 0, "option gso (-g) requires csum offload (-c)"); +} + +static void run_test(void) +{ + int fdr, fds, total_len; + + fdr = setup_rx(); + fds = setup_sniffer(); + + total_len = do_tx(); + + /* BPF filter accepts only this length, vlan changes MAC */ + if (cfg_payload_len == DATA_LEN && !cfg_use_vlan) + do_rx(fds, total_len - sizeof(struct virtio_net_hdr), + tbuf + sizeof(struct virtio_net_hdr)); + + do_rx(fdr, cfg_payload_len, tbuf + total_len - cfg_payload_len); + + if (close(fds)) + error(1, errno, "close s"); + if (close(fdr)) + error(1, errno, "close r"); +} + +int main(int argc, char **argv) +{ + parse_opts(argc, argv); + + if (system("ip link set dev lo mtu 1500")) + error(1, errno, "ip link set mtu"); + if (system("ip addr add dev lo 172.17.0.1/24")) + error(1, errno, "ip addr add"); + + run_test(); + + fprintf(stderr, "OK\n\n"); + return 0; +} diff --git a/tools/testing/selftests/net/psock_snd.sh b/tools/testing/selftests/net/psock_snd.sh new file mode 100755 index 0000000..6331d91 --- /dev/null +++ b/tools/testing/selftests/net/psock_snd.sh @@ -0,0 +1,98 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Run a series of packet socket send regression tests + +set -e + +readonly mtu=1500 +readonly iphlen=20 +readonly udphlen=8 + +readonly vnet_hlen=10 +readonly eth_hlen=14 + +readonly mss="$((${mtu} - ${iphlen} - ${udphlen}))" +readonly mss_exceeds="$((${mss} + 1))" + +readonly max_mtu=65535 +readonly max_mss="$((${max_mtu} - ${iphlen} - ${udphlen}))" +readonly max_mss_exceeds="$((${max_mss} + 1))" + +# functional checks (not a full cross-product) + +echo "dgram" +./in_netns.sh ./psock_snd -d + +echo "dgram bind" +./in_netns.sh ./psock_snd -d -b + +echo "raw" +./in_netns.sh ./psock_snd + +echo "raw bind" +./in_netns.sh ./psock_snd -b + +echo "raw qdisc bypass" +./in_netns.sh ./psock_snd -q + +echo "raw vlan" +./in_netns.sh ./psock_snd -V + +echo "raw vnet hdr" +./in_netns.sh ./psock_snd -v + +echo "raw csum_off" +./in_netns.sh ./psock_snd -v -c + +echo "raw csum_off with bad offset (fails)" +(! ./in_netns.sh ./psock_snd -v -c -C) + + +# bounds check: send {max, max + 1, min, min - 1} lengths + +echo "raw min size" +./in_netns.sh ./psock_snd -l 0 + +echo "raw mtu size" +./in_netns.sh ./psock_snd -l "${mss}" + +echo "raw mtu size + 1 (fails)" +(! ./in_netns.sh ./psock_snd -l "${mss_exceeds}") + +# fails due to ARPHRD_ETHER check in packet_extra_vlan_len_allowed +# +# echo "raw vlan mtu size" +# ./in_netns.sh ./psock_snd -V -l "${mss}" + +echo "raw vlan mtu size + 1 (fails)" +(! ./in_netns.sh ./psock_snd -V -l "${mss_exceeds}") + +echo "dgram mtu size" +./in_netns.sh ./psock_snd -d -l "${mss}" + +echo "dgram mtu size + 1 (fails)" +(! ./in_netns.sh ./psock_snd -d -l "${mss_exceeds}") + +echo "raw truncate hlen (fails: does not arrive)" +(! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen}))") + +echo "raw truncate hlen - 1 (fails: EINVAL)" +(! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen} - 1))") + + +# gso checks: implies -l, because with gso len must exceed gso_size + +echo "raw gso min size" +./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}" + +echo "raw gso min size - 1 (fails)" +(! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}") + +echo "raw gso max size" +./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}" + +echo "raw gso max size + 1 (fails)" +(! ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss_exceeds}") + +echo "OK. All tests passed" diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index fb37678..0d7a44f 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -505,6 +505,108 @@ kci_test_macsec() echo "PASS: macsec" } +#------------------------------------------------------------------- +# Example commands +# ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \ +# spi 0x07 mode transport reqid 0x07 replay-window 32 \ +# aead 'rfc4106(gcm(aes))' 1234567890123456dcba 128 \ +# sel src 14.0.0.52/24 dst 14.0.0.70/24 +# ip x p add dir out src 14.0.0.52/24 dst 14.0.0.70/24 \ +# tmpl proto esp src 14.0.0.52 dst 14.0.0.70 \ +# spi 0x07 mode transport reqid 0x07 +# +# Subcommands not tested +# ip x s update +# ip x s allocspi +# ip x s deleteall +# ip x p update +# ip x p deleteall +# ip x p set +#------------------------------------------------------------------- +kci_test_ipsec() +{ + srcip="14.0.0.52" + dstip="14.0.0.70" + algo="aead rfc4106(gcm(aes)) 0x3132333435363738393031323334353664636261 128" + + # flush to be sure there's nothing configured + ip x s flush ; ip x p flush + check_err $? + + # start the monitor in the background + tmpfile=`mktemp ipsectestXXX` + ip x m > $tmpfile & + mpid=$! + sleep 0.2 + + ipsecid="proto esp src $srcip dst $dstip spi 0x07" + ip x s add $ipsecid \ + mode transport reqid 0x07 replay-window 32 \ + $algo sel src $srcip/24 dst $dstip/24 + check_err $? + + lines=`ip x s list | grep $srcip | grep $dstip | wc -l` + test $lines -eq 2 + check_err $? + + ip x s count | grep -q "SAD count 1" + check_err $? + + lines=`ip x s get $ipsecid | grep $srcip | grep $dstip | wc -l` + test $lines -eq 2 + check_err $? + + ip x s delete $ipsecid + check_err $? + + lines=`ip x s list | wc -l` + test $lines -eq 0 + check_err $? + + ipsecsel="dir out src $srcip/24 dst $dstip/24" + ip x p add $ipsecsel \ + tmpl proto esp src $srcip dst $dstip \ + spi 0x07 mode transport reqid 0x07 + check_err $? + + lines=`ip x p list | grep $srcip | grep $dstip | wc -l` + test $lines -eq 2 + check_err $? + + ip x p count | grep -q "SPD IN 0 OUT 1 FWD 0" + check_err $? + + lines=`ip x p get $ipsecsel | grep $srcip | grep $dstip | wc -l` + test $lines -eq 2 + check_err $? + + ip x p delete $ipsecsel + check_err $? + + lines=`ip x p list | wc -l` + test $lines -eq 0 + check_err $? + + # check the monitor results + kill $mpid + lines=`wc -l $tmpfile | cut "-d " -f1` + test $lines -eq 20 + check_err $? + rm -rf $tmpfile + + # clean up any leftovers + ip x s flush + check_err $? + ip x p flush + check_err $? + + if [ $ret -ne 0 ]; then + echo "FAIL: ipsec" + return 1 + fi + echo "PASS: ipsec" +} + kci_test_gretap() { testns="testns" @@ -758,6 +860,7 @@ kci_test_rtnl() kci_test_vrf kci_test_encap kci_test_macsec + kci_test_ipsec kci_del_dummy } diff --git a/tools/testing/selftests/net/tcp_inq.c b/tools/testing/selftests/net/tcp_inq.c new file mode 100644 index 0000000..d044b29 --- /dev/null +++ b/tools/testing/selftests/net/tcp_inq.c @@ -0,0 +1,189 @@ +/* + * Copyright 2018 Google Inc. + * Author: Soheil Hassas Yeganeh (soheil@google.com) + * + * Simple example on how to use TCP_INQ and TCP_CM_INQ. + * + * License (GPLv2): + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for + * more details. + */ +#define _GNU_SOURCE + +#include <error.h> +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <pthread.h> +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <unistd.h> + +#ifndef TCP_INQ +#define TCP_INQ 36 +#endif + +#ifndef TCP_CM_INQ +#define TCP_CM_INQ TCP_INQ +#endif + +#define BUF_SIZE 8192 +#define CMSG_SIZE 32 + +static int family = AF_INET6; +static socklen_t addr_len = sizeof(struct sockaddr_in6); +static int port = 4974; + +static void setup_loopback_addr(int family, struct sockaddr_storage *sockaddr) +{ + struct sockaddr_in6 *addr6 = (void *) sockaddr; + struct sockaddr_in *addr4 = (void *) sockaddr; + + switch (family) { + case PF_INET: + memset(addr4, 0, sizeof(*addr4)); + addr4->sin_family = AF_INET; + addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + addr4->sin_port = htons(port); + break; + case PF_INET6: + memset(addr6, 0, sizeof(*addr6)); + addr6->sin6_family = AF_INET6; + addr6->sin6_addr = in6addr_loopback; + addr6->sin6_port = htons(port); + break; + default: + error(1, 0, "illegal family"); + } +} + +void *start_server(void *arg) +{ + int server_fd = (int)(unsigned long)arg; + struct sockaddr_in addr; + socklen_t addrlen = sizeof(addr); + char *buf; + int fd; + int r; + + buf = malloc(BUF_SIZE); + + for (;;) { + fd = accept(server_fd, (struct sockaddr *)&addr, &addrlen); + if (fd == -1) { + perror("accept"); + break; + } + do { + r = send(fd, buf, BUF_SIZE, 0); + } while (r < 0 && errno == EINTR); + if (r < 0) + perror("send"); + if (r != BUF_SIZE) + fprintf(stderr, "can only send %d bytes\n", r); + /* TCP_INQ can overestimate in-queue by one byte if we send + * the FIN packet. Sleep for 1 second, so that the client + * likely invoked recvmsg(). + */ + sleep(1); + close(fd); + } + + free(buf); + close(server_fd); + pthread_exit(0); +} + +int main(int argc, char *argv[]) +{ + struct sockaddr_storage listen_addr, addr; + int c, one = 1, inq = -1; + pthread_t server_thread; + char cmsgbuf[CMSG_SIZE]; + struct iovec iov[1]; + struct cmsghdr *cm; + struct msghdr msg; + int server_fd, fd; + char *buf; + + while ((c = getopt(argc, argv, "46p:")) != -1) { + switch (c) { + case '4': + family = PF_INET; + addr_len = sizeof(struct sockaddr_in); + break; + case '6': + family = PF_INET6; + addr_len = sizeof(struct sockaddr_in6); + break; + case 'p': + port = atoi(optarg); + break; + } + } + + server_fd = socket(family, SOCK_STREAM, 0); + if (server_fd < 0) + error(1, errno, "server socket"); + setup_loopback_addr(family, &listen_addr); + if (setsockopt(server_fd, SOL_SOCKET, SO_REUSEADDR, + &one, sizeof(one)) != 0) + error(1, errno, "setsockopt(SO_REUSEADDR)"); + if (bind(server_fd, (const struct sockaddr *)&listen_addr, + addr_len) == -1) + error(1, errno, "bind"); + if (listen(server_fd, 128) == -1) + error(1, errno, "listen"); + if (pthread_create(&server_thread, NULL, start_server, + (void *)(unsigned long)server_fd) != 0) + error(1, errno, "pthread_create"); + + fd = socket(family, SOCK_STREAM, 0); + if (fd < 0) + error(1, errno, "client socket"); + setup_loopback_addr(family, &addr); + if (connect(fd, (const struct sockaddr *)&addr, addr_len) == -1) + error(1, errno, "connect"); + if (setsockopt(fd, SOL_TCP, TCP_INQ, &one, sizeof(one)) != 0) + error(1, errno, "setsockopt(TCP_INQ)"); + + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = iov; + msg.msg_iovlen = 1; + msg.msg_control = cmsgbuf; + msg.msg_controllen = sizeof(cmsgbuf); + msg.msg_flags = 0; + + buf = malloc(BUF_SIZE); + iov[0].iov_base = buf; + iov[0].iov_len = BUF_SIZE / 2; + + if (recvmsg(fd, &msg, 0) != iov[0].iov_len) + error(1, errno, "recvmsg"); + if (msg.msg_flags & MSG_CTRUNC) + error(1, 0, "control message is truncated"); + + for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) + if (cm->cmsg_level == SOL_TCP && cm->cmsg_type == TCP_CM_INQ) + inq = *((int *) CMSG_DATA(cm)); + + if (inq != BUF_SIZE - iov[0].iov_len) { + fprintf(stderr, "unexpected inq: %d\n", inq); + exit(1); + } + + printf("PASSED\n"); + free(buf); + close(fd); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c new file mode 100644 index 0000000..77f7627 --- /dev/null +++ b/tools/testing/selftests/net/tcp_mmap.c @@ -0,0 +1,447 @@ +/* + * Copyright 2018 Google Inc. + * Author: Eric Dumazet (edumazet@google.com) + * + * Reference program demonstrating tcp mmap() usage, + * and SO_RCVLOWAT hints for receiver. + * + * Note : NIC with header split is needed to use mmap() on TCP : + * Each incoming frame must be a multiple of PAGE_SIZE bytes of TCP payload. + * + * How to use on loopback interface : + * + * ifconfig lo mtu 61512 # 15*4096 + 40 (ipv6 header) + 32 (TCP with TS option header) + * tcp_mmap -s -z & + * tcp_mmap -H ::1 -z + * + * Or leave default lo mtu, but use -M option to set TCP_MAXSEG option to (4096 + 12) + * (4096 : page size on x86, 12: TCP TS option length) + * tcp_mmap -s -z -M $((4096+12)) & + * tcp_mmap -H ::1 -z -M $((4096+12)) + * + * Note: -z option on sender uses MSG_ZEROCOPY, which forces a copy when packets go through loopback interface. + * We might use sendfile() instead, but really this test program is about mmap(), for receivers ;) + * + * $ ./tcp_mmap -s & # Without mmap() + * $ for i in {1..4}; do ./tcp_mmap -H ::1 -z ; done + * received 32768 MB (0 % mmap'ed) in 14.1157 s, 19.4732 Gbit + * cpu usage user:0.057 sys:7.815, 240.234 usec per MB, 65531 c-switches + * received 32768 MB (0 % mmap'ed) in 14.6833 s, 18.7204 Gbit + * cpu usage user:0.043 sys:8.103, 248.596 usec per MB, 65524 c-switches + * received 32768 MB (0 % mmap'ed) in 11.143 s, 24.6682 Gbit + * cpu usage user:0.044 sys:6.576, 202.026 usec per MB, 65519 c-switches + * received 32768 MB (0 % mmap'ed) in 14.9056 s, 18.4413 Gbit + * cpu usage user:0.036 sys:8.193, 251.129 usec per MB, 65530 c-switches + * $ kill %1 # kill tcp_mmap server + * + * $ ./tcp_mmap -s -z & # With mmap() + * $ for i in {1..4}; do ./tcp_mmap -H ::1 -z ; done + * received 32768 MB (99.9939 % mmap'ed) in 6.73792 s, 40.7956 Gbit + * cpu usage user:0.045 sys:2.827, 87.6465 usec per MB, 65532 c-switches + * received 32768 MB (99.9939 % mmap'ed) in 7.26732 s, 37.8238 Gbit + * cpu usage user:0.037 sys:3.087, 95.3369 usec per MB, 65532 c-switches + * received 32768 MB (99.9939 % mmap'ed) in 7.61661 s, 36.0893 Gbit + * cpu usage user:0.046 sys:3.559, 110.016 usec per MB, 65529 c-switches + * received 32768 MB (99.9939 % mmap'ed) in 7.43764 s, 36.9577 Gbit + * cpu usage user:0.035 sys:3.467, 106.873 usec per MB, 65530 c-switches + * + * License (GPLv2): + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ +#define _GNU_SOURCE +#include <pthread.h> +#include <sys/types.h> +#include <fcntl.h> +#include <error.h> +#include <sys/socket.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <unistd.h> +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <time.h> +#include <sys/time.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <poll.h> +#include <linux/tcp.h> +#include <assert.h> + +#ifndef MSG_ZEROCOPY +#define MSG_ZEROCOPY 0x4000000 +#endif + +#define FILE_SZ (1UL << 35) +static int cfg_family = AF_INET6; +static socklen_t cfg_alen = sizeof(struct sockaddr_in6); +static int cfg_port = 8787; + +static int rcvbuf; /* Default: autotuning. Can be set with -r <integer> option */ +static int sndbuf; /* Default: autotuning. Can be set with -w <integer> option */ +static int zflg; /* zero copy option. (MSG_ZEROCOPY for sender, mmap() for receiver */ +static int xflg; /* hash received data (simple xor) (-h option) */ +static int keepflag; /* -k option: receiver shall keep all received file in memory (no munmap() calls) */ + +static int chunk_size = 512*1024; + +unsigned long htotal; + +static inline void prefetch(const void *x) +{ +#if defined(__x86_64__) + asm volatile("prefetcht0 %P0" : : "m" (*(const char *)x)); +#endif +} + +void hash_zone(void *zone, unsigned int length) +{ + unsigned long temp = htotal; + + while (length >= 8*sizeof(long)) { + prefetch(zone + 384); + temp ^= *(unsigned long *)zone; + temp ^= *(unsigned long *)(zone + sizeof(long)); + temp ^= *(unsigned long *)(zone + 2*sizeof(long)); + temp ^= *(unsigned long *)(zone + 3*sizeof(long)); + temp ^= *(unsigned long *)(zone + 4*sizeof(long)); + temp ^= *(unsigned long *)(zone + 5*sizeof(long)); + temp ^= *(unsigned long *)(zone + 6*sizeof(long)); + temp ^= *(unsigned long *)(zone + 7*sizeof(long)); + zone += 8*sizeof(long); + length -= 8*sizeof(long); + } + while (length >= 1) { + temp ^= *(unsigned char *)zone; + zone += 1; + length--; + } + htotal = temp; +} + +void *child_thread(void *arg) +{ + unsigned long total_mmap = 0, total = 0; + struct tcp_zerocopy_receive zc; + unsigned long delta_usec; + int flags = MAP_SHARED; + struct timeval t0, t1; + char *buffer = NULL; + void *addr = NULL; + double throughput; + struct rusage ru; + int lu, fd; + + fd = (int)(unsigned long)arg; + + gettimeofday(&t0, NULL); + + fcntl(fd, F_SETFL, O_NDELAY); + buffer = malloc(chunk_size); + if (!buffer) { + perror("malloc"); + goto error; + } + if (zflg) { + addr = mmap(NULL, chunk_size, PROT_READ, flags, fd, 0); + if (addr == (void *)-1) + zflg = 0; + } + while (1) { + struct pollfd pfd = { .fd = fd, .events = POLLIN, }; + int sub; + + poll(&pfd, 1, 10000); + if (zflg) { + socklen_t zc_len = sizeof(zc); + int res; + + zc.address = (__u64)addr; + zc.length = chunk_size; + zc.recv_skip_hint = 0; + res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, + &zc, &zc_len); + if (res == -1) + break; + + if (zc.length) { + assert(zc.length <= chunk_size); + total_mmap += zc.length; + if (xflg) + hash_zone(addr, zc.length); + total += zc.length; + } + if (zc.recv_skip_hint) { + assert(zc.recv_skip_hint <= chunk_size); + lu = read(fd, buffer, zc.recv_skip_hint); + if (lu > 0) { + if (xflg) + hash_zone(buffer, lu); + total += lu; + } + } + continue; + } + sub = 0; + while (sub < chunk_size) { + lu = read(fd, buffer + sub, chunk_size - sub); + if (lu == 0) + goto end; + if (lu < 0) + break; + if (xflg) + hash_zone(buffer + sub, lu); + total += lu; + sub += lu; + } + } +end: + gettimeofday(&t1, NULL); + delta_usec = (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec; + + throughput = 0; + if (delta_usec) + throughput = total * 8.0 / (double)delta_usec / 1000.0; + getrusage(RUSAGE_THREAD, &ru); + if (total > 1024*1024) { + unsigned long total_usec; + unsigned long mb = total >> 20; + total_usec = 1000000*ru.ru_utime.tv_sec + ru.ru_utime.tv_usec + + 1000000*ru.ru_stime.tv_sec + ru.ru_stime.tv_usec; + printf("received %lg MB (%lg %% mmap'ed) in %lg s, %lg Gbit\n" + " cpu usage user:%lg sys:%lg, %lg usec per MB, %lu c-switches\n", + total / (1024.0 * 1024.0), + 100.0*total_mmap/total, + (double)delta_usec / 1000000.0, + throughput, + (double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec / 1000000.0, + (double)ru.ru_stime.tv_sec + (double)ru.ru_stime.tv_usec / 1000000.0, + (double)total_usec/mb, + ru.ru_nvcsw); + } +error: + free(buffer); + close(fd); + if (zflg) + munmap(addr, chunk_size); + pthread_exit(0); +} + +static void apply_rcvsnd_buf(int fd) +{ + if (rcvbuf && setsockopt(fd, SOL_SOCKET, + SO_RCVBUF, &rcvbuf, sizeof(rcvbuf)) == -1) { + perror("setsockopt SO_RCVBUF"); + } + + if (sndbuf && setsockopt(fd, SOL_SOCKET, + SO_SNDBUF, &sndbuf, sizeof(sndbuf)) == -1) { + perror("setsockopt SO_SNDBUF"); + } +} + + +static void setup_sockaddr(int domain, const char *str_addr, + struct sockaddr_storage *sockaddr) +{ + struct sockaddr_in6 *addr6 = (void *) sockaddr; + struct sockaddr_in *addr4 = (void *) sockaddr; + + switch (domain) { + case PF_INET: + memset(addr4, 0, sizeof(*addr4)); + addr4->sin_family = AF_INET; + addr4->sin_port = htons(cfg_port); + if (str_addr && + inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1) + error(1, 0, "ipv4 parse error: %s", str_addr); + break; + case PF_INET6: + memset(addr6, 0, sizeof(*addr6)); + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(cfg_port); + if (str_addr && + inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1) + error(1, 0, "ipv6 parse error: %s", str_addr); + break; + default: + error(1, 0, "illegal domain"); + } +} + +static void do_accept(int fdlisten) +{ + if (setsockopt(fdlisten, SOL_SOCKET, SO_RCVLOWAT, + &chunk_size, sizeof(chunk_size)) == -1) { + perror("setsockopt SO_RCVLOWAT"); + } + + apply_rcvsnd_buf(fdlisten); + + while (1) { + struct sockaddr_in addr; + socklen_t addrlen = sizeof(addr); + pthread_t th; + int fd, res; + + fd = accept(fdlisten, (struct sockaddr *)&addr, &addrlen); + if (fd == -1) { + perror("accept"); + continue; + } + res = pthread_create(&th, NULL, child_thread, + (void *)(unsigned long)fd); + if (res) { + errno = res; + perror("pthread_create"); + close(fd); + } + } +} + +int main(int argc, char *argv[]) +{ + struct sockaddr_storage listenaddr, addr; + unsigned int max_pacing_rate = 0; + unsigned long total = 0; + char *host = NULL; + int fd, c, on = 1; + char *buffer; + int sflg = 0; + int mss = 0; + + while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:")) != -1) { + switch (c) { + case '4': + cfg_family = PF_INET; + cfg_alen = sizeof(struct sockaddr_in); + break; + case '6': + cfg_family = PF_INET6; + cfg_alen = sizeof(struct sockaddr_in6); + break; + case 'p': + cfg_port = atoi(optarg); + break; + case 'H': + host = optarg; + break; + case 's': /* server : listen for incoming connections */ + sflg++; + break; + case 'r': + rcvbuf = atoi(optarg); + break; + case 'w': + sndbuf = atoi(optarg); + break; + case 'z': + zflg = 1; + break; + case 'M': + mss = atoi(optarg); + break; + case 'x': + xflg = 1; + break; + case 'k': + keepflag = 1; + break; + case 'P': + max_pacing_rate = atoi(optarg) ; + break; + default: + exit(1); + } + } + if (sflg) { + int fdlisten = socket(cfg_family, SOCK_STREAM, 0); + + if (fdlisten == -1) { + perror("socket"); + exit(1); + } + apply_rcvsnd_buf(fdlisten); + setsockopt(fdlisten, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); + + setup_sockaddr(cfg_family, host, &listenaddr); + + if (mss && + setsockopt(fdlisten, IPPROTO_TCP, TCP_MAXSEG, + &mss, sizeof(mss)) == -1) { + perror("setsockopt TCP_MAXSEG"); + exit(1); + } + if (bind(fdlisten, (const struct sockaddr *)&listenaddr, cfg_alen) == -1) { + perror("bind"); + exit(1); + } + if (listen(fdlisten, 128) == -1) { + perror("listen"); + exit(1); + } + do_accept(fdlisten); + } + buffer = mmap(NULL, chunk_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (buffer == (char *)-1) { + perror("mmap"); + exit(1); + } + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd == -1) { + perror("socket"); + exit(1); + } + apply_rcvsnd_buf(fd); + + setup_sockaddr(cfg_family, host, &addr); + + if (mss && + setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == -1) { + perror("setsockopt TCP_MAXSEG"); + exit(1); + } + if (connect(fd, (const struct sockaddr *)&addr, cfg_alen) == -1) { + perror("connect"); + exit(1); + } + if (max_pacing_rate && + setsockopt(fd, SOL_SOCKET, SO_MAX_PACING_RATE, + &max_pacing_rate, sizeof(max_pacing_rate)) == -1) + perror("setsockopt SO_MAX_PACING_RATE"); + + if (zflg && setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, + &on, sizeof(on)) == -1) { + perror("setsockopt SO_ZEROCOPY, (-z option disabled)"); + zflg = 0; + } + while (total < FILE_SZ) { + long wr = FILE_SZ - total; + + if (wr > chunk_size) + wr = chunk_size; + /* Note : we just want to fill the pipe with 0 bytes */ + wr = send(fd, buffer, wr, zflg ? MSG_ZEROCOPY : 0); + if (wr <= 0) + break; + total += wr; + } + close(fd); + munmap(buffer, chunk_size); + return 0; +} diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c new file mode 100644 index 0000000..e279051 --- /dev/null +++ b/tools/testing/selftests/net/udpgso.c @@ -0,0 +1,693 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <stddef.h> +#include <arpa/inet.h> +#include <error.h> +#include <errno.h> +#include <net/if.h> +#include <linux/in.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <netinet/if_ether.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/udp.h> +#include <stdbool.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +#ifndef ETH_MAX_MTU +#define ETH_MAX_MTU 0xFFFFU +#endif + +#ifndef UDP_SEGMENT +#define UDP_SEGMENT 103 +#endif + +#ifndef UDP_MAX_SEGMENTS +#define UDP_MAX_SEGMENTS (1 << 6UL) +#endif + +#define CONST_MTU_TEST 1500 + +#define CONST_HDRLEN_V4 (sizeof(struct iphdr) + sizeof(struct udphdr)) +#define CONST_HDRLEN_V6 (sizeof(struct ip6_hdr) + sizeof(struct udphdr)) + +#define CONST_MSS_V4 (CONST_MTU_TEST - CONST_HDRLEN_V4) +#define CONST_MSS_V6 (CONST_MTU_TEST - CONST_HDRLEN_V6) + +#define CONST_MAX_SEGS_V4 (ETH_MAX_MTU / CONST_MSS_V4) +#define CONST_MAX_SEGS_V6 (ETH_MAX_MTU / CONST_MSS_V6) + +static bool cfg_do_ipv4; +static bool cfg_do_ipv6; +static bool cfg_do_connected; +static bool cfg_do_connectionless; +static bool cfg_do_msgmore; +static bool cfg_do_setsockopt; +static int cfg_specific_test_id = -1; + +static const char cfg_ifname[] = "lo"; +static unsigned short cfg_port = 9000; + +static char buf[ETH_MAX_MTU]; + +struct testcase { + int tlen; /* send() buffer size, may exceed mss */ + bool tfail; /* send() call is expected to fail */ + int gso_len; /* mss after applying gso */ + int r_num_mss; /* recv(): number of calls of full mss */ + int r_len_last; /* recv(): size of last non-mss dgram, if any */ +}; + +const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT; +const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) }; + +struct testcase testcases_v4[] = { + { + /* no GSO: send a single byte */ + .tlen = 1, + .r_len_last = 1, + }, + { + /* no GSO: send a single MSS */ + .tlen = CONST_MSS_V4, + .r_num_mss = 1, + }, + { + /* no GSO: send a single MSS + 1B: fail */ + .tlen = CONST_MSS_V4 + 1, + .tfail = true, + }, + { + /* send a single MSS: will fail with GSO, because the segment + * logic in udp4_ufo_fragment demands a gso skb to be > MTU + */ + .tlen = CONST_MSS_V4, + .gso_len = CONST_MSS_V4, + .tfail = true, + .r_num_mss = 1, + }, + { + /* send a single MSS + 1B */ + .tlen = CONST_MSS_V4 + 1, + .gso_len = CONST_MSS_V4, + .r_num_mss = 1, + .r_len_last = 1, + }, + { + /* send exactly 2 MSS */ + .tlen = CONST_MSS_V4 * 2, + .gso_len = CONST_MSS_V4, + .r_num_mss = 2, + }, + { + /* send 2 MSS + 1B */ + .tlen = (CONST_MSS_V4 * 2) + 1, + .gso_len = CONST_MSS_V4, + .r_num_mss = 2, + .r_len_last = 1, + }, + { + /* send MAX segs */ + .tlen = (ETH_MAX_MTU / CONST_MSS_V4) * CONST_MSS_V4, + .gso_len = CONST_MSS_V4, + .r_num_mss = (ETH_MAX_MTU / CONST_MSS_V4), + }, + + { + /* send MAX bytes */ + .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4, + .gso_len = CONST_MSS_V4, + .r_num_mss = CONST_MAX_SEGS_V4, + .r_len_last = ETH_MAX_MTU - CONST_HDRLEN_V4 - + (CONST_MAX_SEGS_V4 * CONST_MSS_V4), + }, + { + /* send MAX + 1: fail */ + .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4 + 1, + .gso_len = CONST_MSS_V4, + .tfail = true, + }, + { + /* send a single 1B MSS: will fail, see single MSS above */ + .tlen = 1, + .gso_len = 1, + .tfail = true, + .r_num_mss = 1, + }, + { + /* send 2 1B segments */ + .tlen = 2, + .gso_len = 1, + .r_num_mss = 2, + }, + { + /* send 2B + 2B + 1B segments */ + .tlen = 5, + .gso_len = 2, + .r_num_mss = 2, + .r_len_last = 1, + }, + { + /* send max number of min sized segments */ + .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4, + .gso_len = 1, + .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4, + }, + { + /* send max number + 1 of min sized segments: fail */ + .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4 + 1, + .gso_len = 1, + .tfail = true, + }, + { + /* EOL */ + } +}; + +#ifndef IP6_MAX_MTU +#define IP6_MAX_MTU (ETH_MAX_MTU + sizeof(struct ip6_hdr)) +#endif + +struct testcase testcases_v6[] = { + { + /* no GSO: send a single byte */ + .tlen = 1, + .r_len_last = 1, + }, + { + /* no GSO: send a single MSS */ + .tlen = CONST_MSS_V6, + .r_num_mss = 1, + }, + { + /* no GSO: send a single MSS + 1B: fail */ + .tlen = CONST_MSS_V6 + 1, + .tfail = true, + }, + { + /* send a single MSS: will fail with GSO, because the segment + * logic in udp4_ufo_fragment demands a gso skb to be > MTU + */ + .tlen = CONST_MSS_V6, + .gso_len = CONST_MSS_V6, + .tfail = true, + .r_num_mss = 1, + }, + { + /* send a single MSS + 1B */ + .tlen = CONST_MSS_V6 + 1, + .gso_len = CONST_MSS_V6, + .r_num_mss = 1, + .r_len_last = 1, + }, + { + /* send exactly 2 MSS */ + .tlen = CONST_MSS_V6 * 2, + .gso_len = CONST_MSS_V6, + .r_num_mss = 2, + }, + { + /* send 2 MSS + 1B */ + .tlen = (CONST_MSS_V6 * 2) + 1, + .gso_len = CONST_MSS_V6, + .r_num_mss = 2, + .r_len_last = 1, + }, + { + /* send MAX segs */ + .tlen = (IP6_MAX_MTU / CONST_MSS_V6) * CONST_MSS_V6, + .gso_len = CONST_MSS_V6, + .r_num_mss = (IP6_MAX_MTU / CONST_MSS_V6), + }, + + { + /* send MAX bytes */ + .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6, + .gso_len = CONST_MSS_V6, + .r_num_mss = CONST_MAX_SEGS_V6, + .r_len_last = IP6_MAX_MTU - CONST_HDRLEN_V6 - + (CONST_MAX_SEGS_V6 * CONST_MSS_V6), + }, + { + /* send MAX + 1: fail */ + .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6 + 1, + .gso_len = CONST_MSS_V6, + .tfail = true, + }, + { + /* send a single 1B MSS: will fail, see single MSS above */ + .tlen = 1, + .gso_len = 1, + .tfail = true, + .r_num_mss = 1, + }, + { + /* send 2 1B segments */ + .tlen = 2, + .gso_len = 1, + .r_num_mss = 2, + }, + { + /* send 2B + 2B + 1B segments */ + .tlen = 5, + .gso_len = 2, + .r_num_mss = 2, + .r_len_last = 1, + }, + { + /* send max number of min sized segments */ + .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6, + .gso_len = 1, + .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6, + }, + { + /* send max number + 1 of min sized segments: fail */ + .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6 + 1, + .gso_len = 1, + .tfail = true, + }, + { + /* EOL */ + } +}; + +static unsigned int get_device_mtu(int fd, const char *ifname) +{ + struct ifreq ifr; + + memset(&ifr, 0, sizeof(ifr)); + + strcpy(ifr.ifr_name, ifname); + + if (ioctl(fd, SIOCGIFMTU, &ifr)) + error(1, errno, "ioctl get mtu"); + + return ifr.ifr_mtu; +} + +static void __set_device_mtu(int fd, const char *ifname, unsigned int mtu) +{ + struct ifreq ifr; + + memset(&ifr, 0, sizeof(ifr)); + + ifr.ifr_mtu = mtu; + strcpy(ifr.ifr_name, ifname); + + if (ioctl(fd, SIOCSIFMTU, &ifr)) + error(1, errno, "ioctl set mtu"); +} + +static void set_device_mtu(int fd, int mtu) +{ + int val; + + val = get_device_mtu(fd, cfg_ifname); + fprintf(stderr, "device mtu (orig): %u\n", val); + + __set_device_mtu(fd, cfg_ifname, mtu); + val = get_device_mtu(fd, cfg_ifname); + if (val != mtu) + error(1, 0, "unable to set device mtu to %u\n", val); + + fprintf(stderr, "device mtu (test): %u\n", val); +} + +static void set_pmtu_discover(int fd, bool is_ipv4) +{ + int level, name, val; + + if (is_ipv4) { + level = SOL_IP; + name = IP_MTU_DISCOVER; + val = IP_PMTUDISC_DO; + } else { + level = SOL_IPV6; + name = IPV6_MTU_DISCOVER; + val = IPV6_PMTUDISC_DO; + } + + if (setsockopt(fd, level, name, &val, sizeof(val))) + error(1, errno, "setsockopt path mtu"); +} + +static unsigned int get_path_mtu(int fd, bool is_ipv4) +{ + socklen_t vallen; + unsigned int mtu; + int ret; + + vallen = sizeof(mtu); + if (is_ipv4) + ret = getsockopt(fd, SOL_IP, IP_MTU, &mtu, &vallen); + else + ret = getsockopt(fd, SOL_IPV6, IPV6_MTU, &mtu, &vallen); + + if (ret) + error(1, errno, "getsockopt mtu"); + + + fprintf(stderr, "path mtu (read): %u\n", mtu); + return mtu; +} + +/* very wordy version of system("ip route add dev lo mtu 1500 127.0.0.3/32") */ +static void set_route_mtu(int mtu, bool is_ipv4) +{ + struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; + struct nlmsghdr *nh; + struct rtattr *rta; + struct rtmsg *rt; + char data[NLMSG_ALIGN(sizeof(*nh)) + + NLMSG_ALIGN(sizeof(*rt)) + + NLMSG_ALIGN(RTA_LENGTH(sizeof(addr6))) + + NLMSG_ALIGN(RTA_LENGTH(sizeof(int))) + + NLMSG_ALIGN(RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)))]; + int fd, ret, alen, off = 0; + + alen = is_ipv4 ? sizeof(addr4) : sizeof(addr6); + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (fd == -1) + error(1, errno, "socket netlink"); + + memset(data, 0, sizeof(data)); + + nh = (void *)data; + nh->nlmsg_type = RTM_NEWROUTE; + nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE; + off += NLMSG_ALIGN(sizeof(*nh)); + + rt = (void *)(data + off); + rt->rtm_family = is_ipv4 ? AF_INET : AF_INET6; + rt->rtm_table = RT_TABLE_MAIN; + rt->rtm_dst_len = alen << 3; + rt->rtm_protocol = RTPROT_BOOT; + rt->rtm_scope = RT_SCOPE_UNIVERSE; + rt->rtm_type = RTN_UNICAST; + off += NLMSG_ALIGN(sizeof(*rt)); + + rta = (void *)(data + off); + rta->rta_type = RTA_DST; + rta->rta_len = RTA_LENGTH(alen); + if (is_ipv4) + memcpy(RTA_DATA(rta), &addr4, alen); + else + memcpy(RTA_DATA(rta), &addr6, alen); + off += NLMSG_ALIGN(rta->rta_len); + + rta = (void *)(data + off); + rta->rta_type = RTA_OIF; + rta->rta_len = RTA_LENGTH(sizeof(int)); + *((int *)(RTA_DATA(rta))) = 1; //if_nametoindex("lo"); + off += NLMSG_ALIGN(rta->rta_len); + + /* MTU is a subtype in a metrics type */ + rta = (void *)(data + off); + rta->rta_type = RTA_METRICS; + rta->rta_len = RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)); + off += NLMSG_ALIGN(rta->rta_len); + + /* now fill MTU subtype. Note that it fits within above rta_len */ + rta = (void *)(((char *) rta) + RTA_LENGTH(0)); + rta->rta_type = RTAX_MTU; + rta->rta_len = RTA_LENGTH(sizeof(int)); + *((int *)(RTA_DATA(rta))) = mtu; + + nh->nlmsg_len = off; + + ret = sendto(fd, data, off, 0, (void *)&nladdr, sizeof(nladdr)); + if (ret != off) + error(1, errno, "send netlink: %uB != %uB\n", ret, off); + + if (close(fd)) + error(1, errno, "close netlink"); + + fprintf(stderr, "route mtu (test): %u\n", mtu); +} + +static bool __send_one(int fd, struct msghdr *msg, int flags) +{ + int ret; + + ret = sendmsg(fd, msg, flags); + if (ret == -1 && + (errno == EMSGSIZE || errno == ENOMEM || errno == EINVAL)) + return false; + if (ret == -1) + error(1, errno, "sendmsg"); + if (ret != msg->msg_iov->iov_len) + error(1, 0, "sendto: %d != %lu", ret, msg->msg_iov->iov_len); + if (msg->msg_flags) + error(1, 0, "sendmsg: return flags 0x%x\n", msg->msg_flags); + + return true; +} + +static bool send_one(int fd, int len, int gso_len, + struct sockaddr *addr, socklen_t alen) +{ + char control[CMSG_SPACE(sizeof(uint16_t))] = {0}; + struct msghdr msg = {0}; + struct iovec iov = {0}; + struct cmsghdr *cm; + + iov.iov_base = buf; + iov.iov_len = len; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + msg.msg_name = addr; + msg.msg_namelen = alen; + + if (gso_len && !cfg_do_setsockopt) { + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + cm = CMSG_FIRSTHDR(&msg); + cm->cmsg_level = SOL_UDP; + cm->cmsg_type = UDP_SEGMENT; + cm->cmsg_len = CMSG_LEN(sizeof(uint16_t)); + *((uint16_t *) CMSG_DATA(cm)) = gso_len; + } + + /* If MSG_MORE, send 1 byte followed by remainder */ + if (cfg_do_msgmore && len > 1) { + iov.iov_len = 1; + if (!__send_one(fd, &msg, MSG_MORE)) + error(1, 0, "send 1B failed"); + + iov.iov_base++; + iov.iov_len = len - 1; + } + + return __send_one(fd, &msg, 0); +} + +static int recv_one(int fd, int flags) +{ + int ret; + + ret = recv(fd, buf, sizeof(buf), flags); + if (ret == -1 && errno == EAGAIN && (flags & MSG_DONTWAIT)) + return 0; + if (ret == -1) + error(1, errno, "recv"); + + return ret; +} + +static void run_one(struct testcase *test, int fdt, int fdr, + struct sockaddr *addr, socklen_t alen) +{ + int i, ret, val, mss; + bool sent; + + fprintf(stderr, "ipv%d tx:%d gso:%d %s\n", + addr->sa_family == AF_INET ? 4 : 6, + test->tlen, test->gso_len, + test->tfail ? "(fail)" : ""); + + val = test->gso_len; + if (cfg_do_setsockopt) { + if (setsockopt(fdt, SOL_UDP, UDP_SEGMENT, &val, sizeof(val))) + error(1, errno, "setsockopt udp segment"); + } + + sent = send_one(fdt, test->tlen, test->gso_len, addr, alen); + if (sent && test->tfail) + error(1, 0, "send succeeded while expecting failure"); + if (!sent && !test->tfail) + error(1, 0, "send failed while expecting success"); + if (!sent) + return; + + if (test->gso_len) + mss = test->gso_len; + else + mss = addr->sa_family == AF_INET ? CONST_MSS_V4 : CONST_MSS_V6; + + + /* Recv all full MSS datagrams */ + for (i = 0; i < test->r_num_mss; i++) { + ret = recv_one(fdr, 0); + if (ret != mss) + error(1, 0, "recv.%d: %d != %d", i, ret, mss); + } + + /* Recv the non-full last datagram, if tlen was not a multiple of mss */ + if (test->r_len_last) { + ret = recv_one(fdr, 0); + if (ret != test->r_len_last) + error(1, 0, "recv.%d: %d != %d (last)", + i, ret, test->r_len_last); + } + + /* Verify received all data */ + ret = recv_one(fdr, MSG_DONTWAIT); + if (ret) + error(1, 0, "recv: unexpected datagram"); +} + +static void run_all(int fdt, int fdr, struct sockaddr *addr, socklen_t alen) +{ + struct testcase *tests, *test; + + tests = addr->sa_family == AF_INET ? testcases_v4 : testcases_v6; + + for (test = tests; test->tlen; test++) { + /* if a specific test is given, then skip all others */ + if (cfg_specific_test_id == -1 || + cfg_specific_test_id == test - tests) + run_one(test, fdt, fdr, addr, alen); + } +} + +static void run_test(struct sockaddr *addr, socklen_t alen) +{ + struct timeval tv = { .tv_usec = 100 * 1000 }; + int fdr, fdt, val; + + fdr = socket(addr->sa_family, SOCK_DGRAM, 0); + if (fdr == -1) + error(1, errno, "socket r"); + + if (bind(fdr, addr, alen)) + error(1, errno, "bind"); + + /* Have tests fail quickly instead of hang */ + if (setsockopt(fdr, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) + error(1, errno, "setsockopt rcv timeout"); + + fdt = socket(addr->sa_family, SOCK_DGRAM, 0); + if (fdt == -1) + error(1, errno, "socket t"); + + /* Do not fragment these datagrams: only succeed if GSO works */ + set_pmtu_discover(fdt, addr->sa_family == AF_INET); + + if (cfg_do_connectionless) { + set_device_mtu(fdt, CONST_MTU_TEST); + run_all(fdt, fdr, addr, alen); + } + + if (cfg_do_connected) { + set_device_mtu(fdt, CONST_MTU_TEST + 100); + set_route_mtu(CONST_MTU_TEST, addr->sa_family == AF_INET); + + if (connect(fdt, addr, alen)) + error(1, errno, "connect"); + + val = get_path_mtu(fdt, addr->sa_family == AF_INET); + if (val != CONST_MTU_TEST) + error(1, 0, "bad path mtu %u\n", val); + + run_all(fdt, fdr, addr, 0 /* use connected addr */); + } + + if (close(fdt)) + error(1, errno, "close t"); + if (close(fdr)) + error(1, errno, "close r"); +} + +static void run_test_v4(void) +{ + struct sockaddr_in addr = {0}; + + addr.sin_family = AF_INET; + addr.sin_port = htons(cfg_port); + addr.sin_addr = addr4; + + run_test((void *)&addr, sizeof(addr)); +} + +static void run_test_v6(void) +{ + struct sockaddr_in6 addr = {0}; + + addr.sin6_family = AF_INET6; + addr.sin6_port = htons(cfg_port); + addr.sin6_addr = addr6; + + run_test((void *)&addr, sizeof(addr)); +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "46cCmst:")) != -1) { + switch (c) { + case '4': + cfg_do_ipv4 = true; + break; + case '6': + cfg_do_ipv6 = true; + break; + case 'c': + cfg_do_connected = true; + break; + case 'C': + cfg_do_connectionless = true; + break; + case 'm': + cfg_do_msgmore = true; + break; + case 's': + cfg_do_setsockopt = true; + break; + case 't': + cfg_specific_test_id = strtoul(optarg, NULL, 0); + break; + default: + error(1, 0, "%s: parse error", argv[0]); + } + } +} + +int main(int argc, char **argv) +{ + parse_opts(argc, argv); + + if (cfg_do_ipv4) + run_test_v4(); + if (cfg_do_ipv6) + run_test_v6(); + + fprintf(stderr, "OK\n"); + return 0; +} diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh new file mode 100755 index 0000000..fec24f5 --- /dev/null +++ b/tools/testing/selftests/net/udpgso.sh @@ -0,0 +1,29 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Run a series of udpgso regression tests + +echo "ipv4 cmsg" +./in_netns.sh ./udpgso -4 -C + +echo "ipv4 setsockopt" +./in_netns.sh ./udpgso -4 -C -s + +echo "ipv6 cmsg" +./in_netns.sh ./udpgso -6 -C + +echo "ipv6 setsockopt" +./in_netns.sh ./udpgso -6 -C -s + +echo "ipv4 connected" +./in_netns.sh ./udpgso -4 -c + +# blocked on 2nd loopback address +# echo "ipv6 connected" +# ./in_netns.sh ./udpgso -6 -c + +echo "ipv4 msg_more" +./in_netns.sh ./udpgso -4 -C -m + +echo "ipv6 msg_more" +./in_netns.sh ./udpgso -6 -C -m diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh new file mode 100755 index 0000000..792fa4d --- /dev/null +++ b/tools/testing/selftests/net/udpgso_bench.sh @@ -0,0 +1,74 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Run a series of udpgso benchmarks + +wake_children() { + local -r jobs="$(jobs -p)" + + if [[ "${jobs}" != "" ]]; then + kill -1 ${jobs} 2>/dev/null + fi +} +trap wake_children EXIT + +run_one() { + local -r args=$@ + + ./udpgso_bench_rx & + ./udpgso_bench_rx -t & + + ./udpgso_bench_tx ${args} +} + +run_in_netns() { + local -r args=$@ + + ./in_netns.sh $0 __subprocess ${args} +} + +run_udp() { + local -r args=$@ + + echo "udp" + run_in_netns ${args} + + echo "udp gso" + run_in_netns ${args} -S + + echo "udp gso zerocopy" + run_in_netns ${args} -S -z +} + +run_tcp() { + local -r args=$@ + + echo "tcp" + run_in_netns ${args} -t + + echo "tcp zerocopy" + run_in_netns ${args} -t -z +} + +run_all() { + local -r core_args="-l 4" + local -r ipv4_args="${core_args} -4 -D 127.0.0.1" + local -r ipv6_args="${core_args} -6 -D ::1" + + echo "ipv4" + run_tcp "${ipv4_args}" + run_udp "${ipv4_args}" + + echo "ipv6" + run_tcp "${ipv4_args}" + run_udp "${ipv6_args}" +} + +if [[ $# -eq 0 ]]; then + run_all +elif [[ $1 == "__subprocess" ]]; then + shift + run_one $@ +else + run_in_netns $@ +fi diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c new file mode 100644 index 0000000..727cf67 --- /dev/null +++ b/tools/testing/selftests/net/udpgso_bench_rx.c @@ -0,0 +1,265 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <error.h> +#include <errno.h> +#include <limits.h> +#include <linux/errqueue.h> +#include <linux/if_packet.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <poll.h> +#include <sched.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +static int cfg_port = 8000; +static bool cfg_tcp; +static bool cfg_verify; + +static bool interrupted; +static unsigned long packets, bytes; + +static void sigint_handler(int signum) +{ + if (signum == SIGINT) + interrupted = true; +} + +static unsigned long gettimeofday_ms(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); +} + +static void do_poll(int fd) +{ + struct pollfd pfd; + int ret; + + pfd.events = POLLIN; + pfd.revents = 0; + pfd.fd = fd; + + do { + ret = poll(&pfd, 1, 10); + if (ret == -1) + error(1, errno, "poll"); + if (ret == 0) + continue; + if (pfd.revents != POLLIN) + error(1, errno, "poll: 0x%x expected 0x%x\n", + pfd.revents, POLLIN); + } while (!ret && !interrupted); +} + +static int do_socket(bool do_tcp) +{ + struct sockaddr_in6 addr = {0}; + int fd, val; + + fd = socket(PF_INET6, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket"); + + val = 1 << 21; + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val))) + error(1, errno, "setsockopt rcvbuf"); + val = 1; + if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val))) + error(1, errno, "setsockopt reuseport"); + + addr.sin6_family = PF_INET6; + addr.sin6_port = htons(cfg_port); + addr.sin6_addr = in6addr_any; + if (bind(fd, (void *) &addr, sizeof(addr))) + error(1, errno, "bind"); + + if (do_tcp) { + int accept_fd = fd; + + if (listen(accept_fd, 1)) + error(1, errno, "listen"); + + do_poll(accept_fd); + + fd = accept(accept_fd, NULL, NULL); + if (fd == -1) + error(1, errno, "accept"); + if (close(accept_fd)) + error(1, errno, "close accept fd"); + } + + return fd; +} + +/* Flush all outstanding bytes for the tcp receive queue */ +static void do_flush_tcp(int fd) +{ + int ret; + + while (true) { + /* MSG_TRUNC flushes up to len bytes */ + ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT); + if (ret == -1 && errno == EAGAIN) + return; + if (ret == -1) + error(1, errno, "flush"); + if (ret == 0) { + /* client detached */ + exit(0); + } + + packets++; + bytes += ret; + } + +} + +static char sanitized_char(char val) +{ + return (val >= 'a' && val <= 'z') ? val : '.'; +} + +static void do_verify_udp(const char *data, int len) +{ + char cur = data[0]; + int i; + + /* verify contents */ + if (cur < 'a' || cur > 'z') + error(1, 0, "data initial byte out of range"); + + for (i = 1; i < len; i++) { + if (cur == 'z') + cur = 'a'; + else + cur++; + + if (data[i] != cur) + error(1, 0, "data[%d]: len %d, %c(%hhu) != %c(%hhu)\n", + i, len, + sanitized_char(data[i]), data[i], + sanitized_char(cur), cur); + } +} + +/* Flush all outstanding datagrams. Verify first few bytes of each. */ +static void do_flush_udp(int fd) +{ + static char rbuf[ETH_DATA_LEN]; + int ret, len, budget = 256; + + len = cfg_verify ? sizeof(rbuf) : 0; + while (budget--) { + /* MSG_TRUNC will make return value full datagram length */ + ret = recv(fd, rbuf, len, MSG_TRUNC | MSG_DONTWAIT); + if (ret == -1 && errno == EAGAIN) + return; + if (ret == -1) + error(1, errno, "recv"); + if (len) { + if (ret == 0) + error(1, errno, "recv: 0 byte datagram\n"); + + do_verify_udp(rbuf, ret); + } + + packets++; + bytes += ret; + } +} + +static void usage(const char *filepath) +{ + error(1, 0, "Usage: %s [-tv] [-p port]", filepath); +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "ptv")) != -1) { + switch (c) { + case 'p': + cfg_port = htons(strtoul(optarg, NULL, 0)); + break; + case 't': + cfg_tcp = true; + break; + case 'v': + cfg_verify = true; + break; + } + } + + if (optind != argc) + usage(argv[0]); + + if (cfg_tcp && cfg_verify) + error(1, 0, "TODO: implement verify mode for tcp"); +} + +static void do_recv(void) +{ + unsigned long tnow, treport; + int fd; + + fd = do_socket(cfg_tcp); + + treport = gettimeofday_ms() + 1000; + do { + do_poll(fd); + + if (cfg_tcp) + do_flush_tcp(fd); + else + do_flush_udp(fd); + + tnow = gettimeofday_ms(); + if (tnow > treport) { + if (packets) + fprintf(stderr, + "%s rx: %6lu MB/s %8lu calls/s\n", + cfg_tcp ? "tcp" : "udp", + bytes >> 20, packets); + bytes = packets = 0; + treport = tnow + 1000; + } + + } while (!interrupted); + + if (close(fd)) + error(1, errno, "close"); +} + +int main(int argc, char **argv) +{ + parse_opts(argc, argv); + + signal(SIGINT, sigint_handler); + + do_recv(); + + return 0; +} diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c new file mode 100644 index 0000000..e821564 --- /dev/null +++ b/tools/testing/selftests/net/udpgso_bench_tx.c @@ -0,0 +1,420 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <netinet/if_ether.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/udp.h> +#include <poll.h> +#include <sched.h> +#include <signal.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +#ifndef ETH_MAX_MTU +#define ETH_MAX_MTU 0xFFFFU +#endif + +#ifndef UDP_SEGMENT +#define UDP_SEGMENT 103 +#endif + +#ifndef SO_ZEROCOPY +#define SO_ZEROCOPY 60 +#endif + +#ifndef MSG_ZEROCOPY +#define MSG_ZEROCOPY 0x4000000 +#endif + +#define NUM_PKT 100 + +static bool cfg_cache_trash; +static int cfg_cpu = -1; +static int cfg_connected = true; +static int cfg_family = PF_UNSPEC; +static uint16_t cfg_mss; +static int cfg_payload_len = (1472 * 42); +static int cfg_port = 8000; +static int cfg_runtime_ms = -1; +static bool cfg_segment; +static bool cfg_sendmmsg; +static bool cfg_tcp; +static bool cfg_zerocopy; + +static socklen_t cfg_alen; +static struct sockaddr_storage cfg_dst_addr; + +static bool interrupted; +static char buf[NUM_PKT][ETH_MAX_MTU]; + +static void sigint_handler(int signum) +{ + if (signum == SIGINT) + interrupted = true; +} + +static unsigned long gettimeofday_ms(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); +} + +static int set_cpu(int cpu) +{ + cpu_set_t mask; + + CPU_ZERO(&mask); + CPU_SET(cpu, &mask); + if (sched_setaffinity(0, sizeof(mask), &mask)) + error(1, 0, "setaffinity %d", cpu); + + return 0; +} + +static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr) +{ + struct sockaddr_in6 *addr6 = (void *) sockaddr; + struct sockaddr_in *addr4 = (void *) sockaddr; + + switch (domain) { + case PF_INET: + addr4->sin_family = AF_INET; + addr4->sin_port = htons(cfg_port); + if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1) + error(1, 0, "ipv4 parse error: %s", str_addr); + break; + case PF_INET6: + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(cfg_port); + if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1) + error(1, 0, "ipv6 parse error: %s", str_addr); + break; + default: + error(1, 0, "illegal domain"); + } +} + +static void flush_zerocopy(int fd) +{ + struct msghdr msg = {0}; /* flush */ + int ret; + + while (1) { + ret = recvmsg(fd, &msg, MSG_ERRQUEUE); + if (ret == -1 && errno == EAGAIN) + break; + if (ret == -1) + error(1, errno, "errqueue"); + if (msg.msg_flags != (MSG_ERRQUEUE | MSG_CTRUNC)) + error(1, 0, "errqueue: flags 0x%x\n", msg.msg_flags); + msg.msg_flags = 0; + } +} + +static int send_tcp(int fd, char *data) +{ + int ret, done = 0, count = 0; + + while (done < cfg_payload_len) { + ret = send(fd, data + done, cfg_payload_len - done, + cfg_zerocopy ? MSG_ZEROCOPY : 0); + if (ret == -1) + error(1, errno, "write"); + + done += ret; + count++; + } + + return count; +} + +static int send_udp(int fd, char *data) +{ + int ret, total_len, len, count = 0; + + total_len = cfg_payload_len; + + while (total_len) { + len = total_len < cfg_mss ? total_len : cfg_mss; + + ret = sendto(fd, data, len, cfg_zerocopy ? MSG_ZEROCOPY : 0, + cfg_connected ? NULL : (void *)&cfg_dst_addr, + cfg_connected ? 0 : cfg_alen); + if (ret == -1) + error(1, errno, "write"); + if (ret != len) + error(1, errno, "write: %uB != %uB\n", ret, len); + + total_len -= len; + count++; + } + + return count; +} + +static int send_udp_sendmmsg(int fd, char *data) +{ + const int max_nr_msg = ETH_MAX_MTU / ETH_DATA_LEN; + struct mmsghdr mmsgs[max_nr_msg]; + struct iovec iov[max_nr_msg]; + unsigned int off = 0, left; + int i = 0, ret; + + memset(mmsgs, 0, sizeof(mmsgs)); + + left = cfg_payload_len; + while (left) { + if (i == max_nr_msg) + error(1, 0, "sendmmsg: exceeds max_nr_msg"); + + iov[i].iov_base = data + off; + iov[i].iov_len = cfg_mss < left ? cfg_mss : left; + + mmsgs[i].msg_hdr.msg_iov = iov + i; + mmsgs[i].msg_hdr.msg_iovlen = 1; + + off += iov[i].iov_len; + left -= iov[i].iov_len; + i++; + } + + ret = sendmmsg(fd, mmsgs, i, cfg_zerocopy ? MSG_ZEROCOPY : 0); + if (ret == -1) + error(1, errno, "sendmmsg"); + + return ret; +} + +static void send_udp_segment_cmsg(struct cmsghdr *cm) +{ + uint16_t *valp; + + cm->cmsg_level = SOL_UDP; + cm->cmsg_type = UDP_SEGMENT; + cm->cmsg_len = CMSG_LEN(sizeof(cfg_mss)); + valp = (void *)CMSG_DATA(cm); + *valp = cfg_mss; +} + +static int send_udp_segment(int fd, char *data) +{ + char control[CMSG_SPACE(sizeof(cfg_mss))] = {0}; + struct msghdr msg = {0}; + struct iovec iov = {0}; + int ret; + + iov.iov_base = data; + iov.iov_len = cfg_payload_len; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + send_udp_segment_cmsg(CMSG_FIRSTHDR(&msg)); + + msg.msg_name = (void *)&cfg_dst_addr; + msg.msg_namelen = cfg_alen; + + ret = sendmsg(fd, &msg, cfg_zerocopy ? MSG_ZEROCOPY : 0); + if (ret == -1) + error(1, errno, "sendmsg"); + if (ret != iov.iov_len) + error(1, 0, "sendmsg: %u != %lu\n", ret, iov.iov_len); + + return 1; +} + +static void usage(const char *filepath) +{ + error(1, 0, "Usage: %s [-46cmStuz] [-C cpu] [-D dst ip] [-l secs] [-p port] [-s sendsize]", + filepath); +} + +static void parse_opts(int argc, char **argv) +{ + int max_len, hdrlen; + int c; + + while ((c = getopt(argc, argv, "46cC:D:l:mp:s:Stuz")) != -1) { + switch (c) { + case '4': + if (cfg_family != PF_UNSPEC) + error(1, 0, "Pass one of -4 or -6"); + cfg_family = PF_INET; + cfg_alen = sizeof(struct sockaddr_in); + break; + case '6': + if (cfg_family != PF_UNSPEC) + error(1, 0, "Pass one of -4 or -6"); + cfg_family = PF_INET6; + cfg_alen = sizeof(struct sockaddr_in6); + break; + case 'c': + cfg_cache_trash = true; + break; + case 'C': + cfg_cpu = strtol(optarg, NULL, 0); + break; + case 'D': + setup_sockaddr(cfg_family, optarg, &cfg_dst_addr); + break; + case 'l': + cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000; + break; + case 'm': + cfg_sendmmsg = true; + break; + case 'p': + cfg_port = strtoul(optarg, NULL, 0); + break; + case 's': + cfg_payload_len = strtoul(optarg, NULL, 0); + break; + case 'S': + cfg_segment = true; + break; + case 't': + cfg_tcp = true; + break; + case 'u': + cfg_connected = false; + break; + case 'z': + cfg_zerocopy = true; + break; + } + } + + if (optind != argc) + usage(argv[0]); + + if (cfg_family == PF_UNSPEC) + error(1, 0, "must pass one of -4 or -6"); + if (cfg_tcp && !cfg_connected) + error(1, 0, "connectionless tcp makes no sense"); + if (cfg_segment && cfg_sendmmsg) + error(1, 0, "cannot combine segment offload and sendmmsg"); + + if (cfg_family == PF_INET) + hdrlen = sizeof(struct iphdr) + sizeof(struct udphdr); + else + hdrlen = sizeof(struct ip6_hdr) + sizeof(struct udphdr); + + cfg_mss = ETH_DATA_LEN - hdrlen; + max_len = ETH_MAX_MTU - hdrlen; + + if (cfg_payload_len > max_len) + error(1, 0, "payload length %u exceeds max %u", + cfg_payload_len, max_len); +} + +static void set_pmtu_discover(int fd, bool is_ipv4) +{ + int level, name, val; + + if (is_ipv4) { + level = SOL_IP; + name = IP_MTU_DISCOVER; + val = IP_PMTUDISC_DO; + } else { + level = SOL_IPV6; + name = IPV6_MTU_DISCOVER; + val = IPV6_PMTUDISC_DO; + } + + if (setsockopt(fd, level, name, &val, sizeof(val))) + error(1, errno, "setsockopt path mtu"); +} + +int main(int argc, char **argv) +{ + unsigned long num_msgs, num_sends; + unsigned long tnow, treport, tstop; + int fd, i, val; + + parse_opts(argc, argv); + + if (cfg_cpu > 0) + set_cpu(cfg_cpu); + + for (i = 0; i < sizeof(buf[0]); i++) + buf[0][i] = 'a' + (i % 26); + for (i = 1; i < NUM_PKT; i++) + memcpy(buf[i], buf[0], sizeof(buf[0])); + + signal(SIGINT, sigint_handler); + + fd = socket(cfg_family, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket"); + + if (cfg_zerocopy) { + val = 1; + if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) + error(1, errno, "setsockopt zerocopy"); + } + + if (cfg_connected && + connect(fd, (void *)&cfg_dst_addr, cfg_alen)) + error(1, errno, "connect"); + + if (cfg_segment) + set_pmtu_discover(fd, cfg_family == PF_INET); + + num_msgs = num_sends = 0; + tnow = gettimeofday_ms(); + tstop = tnow + cfg_runtime_ms; + treport = tnow + 1000; + + i = 0; + do { + if (cfg_tcp) + num_sends += send_tcp(fd, buf[i]); + else if (cfg_segment) + num_sends += send_udp_segment(fd, buf[i]); + else if (cfg_sendmmsg) + num_sends += send_udp_sendmmsg(fd, buf[i]); + else + num_sends += send_udp(fd, buf[i]); + num_msgs++; + + if (cfg_zerocopy && ((num_msgs & 0xF) == 0)) + flush_zerocopy(fd); + + tnow = gettimeofday_ms(); + if (tnow > treport) { + fprintf(stderr, + "%s tx: %6lu MB/s %8lu calls/s %6lu msg/s\n", + cfg_tcp ? "tcp" : "udp", + (num_msgs * cfg_payload_len) >> 20, + num_sends, num_msgs); + num_msgs = num_sends = 0; + treport = tnow + 1000; + } + + /* cold cache when writing buffer */ + if (cfg_cache_trash) + i = ++i < NUM_PKT ? i : 0; + + } while (!interrupted && (cfg_runtime_ms == -1 || tnow < tstop)); + + if (close(fd)) + error(1, errno, "close"); + + return 0; +} |