summaryrefslogtreecommitdiffstats
path: root/tools/testing/selftests/net
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests/net')
-rw-r--r--tools/testing/selftests/net/.gitignore5
-rw-r--r--tools/testing/selftests/net/Makefile7
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh248
-rw-r--r--[-rwxr-xr-x]tools/testing/selftests/net/fib_tests.sh858
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_aware.sh26
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh26
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh190
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre.sh159
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bound.sh226
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh121
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_changes.sh278
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_flower.sh137
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_gre_lib.sh130
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_neigh.sh115
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_nh.sh127
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh94
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_vlan.sh92
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh270
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_lib.sh132
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_topo_lib.sh101
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_vlan.sh131
-rwxr-xr-xtools/testing/selftests/net/forwarding/router.sh14
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_multipath.sh29
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_actions.sh25
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_chains.sh7
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower.sh80
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_shblocks.sh5
-rwxr-xr-xtools/testing/selftests/net/msg_zerocopy.sh8
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh4
-rw-r--r--tools/testing/selftests/net/psock_snd.c397
-rwxr-xr-xtools/testing/selftests/net/psock_snd.sh98
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh103
-rw-r--r--tools/testing/selftests/net/tcp_inq.c189
-rw-r--r--tools/testing/selftests/net/tcp_mmap.c447
-rw-r--r--tools/testing/selftests/net/udpgso.c693
-rwxr-xr-xtools/testing/selftests/net/udpgso.sh29
-rwxr-xr-xtools/testing/selftests/net/udpgso_bench.sh74
-rw-r--r--tools/testing/selftests/net/udpgso_bench_rx.c265
-rw-r--r--tools/testing/selftests/net/udpgso_bench_tx.c420
39 files changed, 6296 insertions, 64 deletions
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index c612d6e..128e548 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -1,9 +1,14 @@
msg_zerocopy
socket
psock_fanout
+psock_snd
psock_tpacket
reuseport_bpf
reuseport_bpf_cpu
reuseport_bpf_numa
reuseport_dualstack
reuseaddr_conflict
+tcp_mmap
+udpgso
+udpgso_bench_rx
+udpgso_bench_tx
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 3ff81a4..663e11e 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -5,13 +5,18 @@ CFLAGS = -Wall -Wl,--no-as-needed -O2 -g
CFLAGS += -I../../../../usr/include/
TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
-TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh
+TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh
+TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
+TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd
+TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict
include ../lib.mk
$(OUTPUT)/reuseport_bpf_numa: LDFLAGS += -lnuma
+$(OUTPUT)/tcp_mmap: LDFLAGS += -lpthread
+$(OUTPUT)/tcp_inq: LDFLAGS += -lpthread
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
new file mode 100755
index 0000000..d4cfb6a
--- /dev/null
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -0,0 +1,248 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking IPv4 and IPv6 FIB rules API
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+IP="ip -netns testns"
+
+RTABLE=100
+GW_IP4=192.51.100.2
+SRC_IP=192.51.100.3
+GW_IP6=2001:db8:1::2
+SRC_IP6=2001:db8:1::3
+
+DEV_ADDR=192.51.100.1
+DEV=dummy0
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-50s [ OK ]\n" "${msg}"
+ else
+ nfail=$((nfail+1))
+ printf "\n TEST: %-50s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "######################################################################"
+ echo "TEST SECTION: $*"
+ echo "######################################################################"
+}
+
+setup()
+{
+ set -e
+ ip netns add testns
+ $IP link set dev lo up
+
+ $IP link add dummy0 type dummy
+ $IP link set dev dummy0 up
+ $IP address add 198.51.100.1/24 dev dummy0
+ $IP -6 address add 2001:db8:1::1/64 dev dummy0
+
+ set +e
+}
+
+cleanup()
+{
+ $IP link del dev dummy0 &> /dev/null
+ ip netns del testns
+}
+
+fib_check_iproute_support()
+{
+ ip rule help 2>&1 | grep -q $1
+ if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 iprule too old, missing $1 match"
+ return 1
+ fi
+
+ ip route get help 2>&1 | grep -q $2
+ if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 get route too old, missing $2 match"
+ return 1
+ fi
+
+ return 0
+}
+
+fib_rule6_del()
+{
+ $IP -6 rule del $1
+ log_test $? 0 "rule6 del $1"
+}
+
+fib_rule6_del_by_pref()
+{
+ pref=$($IP -6 rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1)
+ $IP -6 rule del pref $pref
+}
+
+fib_rule6_test_match_n_redirect()
+{
+ local match="$1"
+ local getmatch="$2"
+
+ $IP -6 rule add $match table $RTABLE
+ $IP -6 route get $GW_IP6 $getmatch | grep -q "table $RTABLE"
+ log_test $? 0 "rule6 check: $1"
+
+ fib_rule6_del_by_pref "$match"
+ log_test $? 0 "rule6 del by pref: $match"
+}
+
+fib_rule6_test()
+{
+ # setup the fib rule redirect route
+ $IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink
+
+ match="oif $DEV"
+ fib_rule6_test_match_n_redirect "$match" "$match" "oif redirect to table"
+
+ match="from $SRC_IP6 iif $DEV"
+ fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table"
+
+ match="tos 0x10"
+ fib_rule6_test_match_n_redirect "$match" "$match" "tos redirect to table"
+
+ match="fwmark 0x64"
+ getmatch="mark 0x64"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table"
+
+ fib_check_iproute_support "uidrange" "uid"
+ if [ $? -eq 0 ]; then
+ match="uidrange 100-100"
+ getmatch="uid 100"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" "uid redirect to table"
+ fi
+
+ fib_check_iproute_support "sport" "sport"
+ if [ $? -eq 0 ]; then
+ match="sport 666 dport 777"
+ fib_rule6_test_match_n_redirect "$match" "$match" "sport and dport redirect to table"
+ fi
+
+ fib_check_iproute_support "ipproto" "ipproto"
+ if [ $? -eq 0 ]; then
+ match="ipproto tcp"
+ fib_rule6_test_match_n_redirect "$match" "$match" "ipproto match"
+ fi
+
+ fib_check_iproute_support "ipproto" "ipproto"
+ if [ $? -eq 0 ]; then
+ match="ipproto icmp"
+ fib_rule6_test_match_n_redirect "$match" "$match" "ipproto icmp match"
+ fi
+}
+
+fib_rule4_del()
+{
+ $IP rule del $1
+ log_test $? 0 "del $1"
+}
+
+fib_rule4_del_by_pref()
+{
+ pref=$($IP rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1)
+ $IP rule del pref $pref
+}
+
+fib_rule4_test_match_n_redirect()
+{
+ local match="$1"
+ local getmatch="$2"
+
+ $IP rule add $match table $RTABLE
+ $IP route get $GW_IP4 $getmatch | grep -q "table $RTABLE"
+ log_test $? 0 "rule4 check: $1"
+
+ fib_rule4_del_by_pref "$match"
+ log_test $? 0 "rule4 del by pref: $match"
+}
+
+fib_rule4_test()
+{
+ # setup the fib rule redirect route
+ $IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink
+
+ match="oif $DEV"
+ fib_rule4_test_match_n_redirect "$match" "$match" "oif redirect to table"
+
+ match="from $SRC_IP iif $DEV"
+ fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table"
+
+ match="tos 0x10"
+ fib_rule4_test_match_n_redirect "$match" "$match" "tos redirect to table"
+
+ match="fwmark 0x64"
+ getmatch="mark 0x64"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table"
+
+ fib_check_iproute_support "uidrange" "uid"
+ if [ $? -eq 0 ]; then
+ match="uidrange 100-100"
+ getmatch="uid 100"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" "uid redirect to table"
+ fi
+
+ fib_check_iproute_support "sport" "sport"
+ if [ $? -eq 0 ]; then
+ match="sport 666 dport 777"
+ fib_rule4_test_match_n_redirect "$match" "$match" "sport and dport redirect to table"
+ fi
+
+ fib_check_iproute_support "ipproto" "ipproto"
+ if [ $? -eq 0 ]; then
+ match="ipproto tcp"
+ fib_rule4_test_match_n_redirect "$match" "$match" "ipproto tcp match"
+ fi
+
+ fib_check_iproute_support "ipproto" "ipproto"
+ if [ $? -eq 0 ]; then
+ match="ipproto icmp"
+ fib_rule4_test_match_n_redirect "$match" "$match" "ipproto icmp match"
+ fi
+}
+
+run_fibrule_tests()
+{
+ log_section "IPv4 fib rule"
+ fib_rule4_test
+ log_section "IPv6 fib rule"
+ fib_rule6_test
+}
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit 0
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit 0
+fi
+
+# start clean
+cleanup &> /dev/null
+setup
+run_fibrule_tests
+cleanup
+
+exit $ret
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 5baac82..78245d6 100755..100644
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -8,8 +8,11 @@ ret=0
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
-VERBOSE=${VERBOSE:=0}
-PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+# all tests in this script. Can be overridden with -t option
+TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric"
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
IP="ip -netns testns"
log_test()
@@ -20,8 +23,10 @@ log_test()
if [ ${rc} -eq ${expected} ]; then
printf " TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
else
ret=1
+ nfail=$((nfail+1))
printf " TEST: %-60s [FAIL]\n" "${msg}"
if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
echo
@@ -30,6 +35,13 @@ log_test()
[ "$a" = "q" ] && exit 1
fi
fi
+
+ if [ "${PAUSE}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
}
setup()
@@ -565,20 +577,825 @@ fib_nexthop_test()
}
################################################################################
-#
+# Tests on route add and replace
+
+run_cmd()
+{
+ local cmd="$1"
+ local out
+ local stderr="2>/dev/null"
+
+ if [ "$VERBOSE" = "1" ]; then
+ printf " COMMAND: $cmd\n"
+ stderr=
+ fi
+
+ out=$(eval $cmd $stderr)
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo " $out"
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+
+ return $rc
+}
+
+# add route for a prefix, flushing any existing routes first
+# expected to be the first step of a test
+add_route6()
+{
+ local pfx="$1"
+ local nh="$2"
+ local out
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo
+ echo " ##################################################"
+ echo
+ fi
+
+ run_cmd "$IP -6 ro flush ${pfx}"
+ [ $? -ne 0 ] && exit 1
+
+ out=$($IP -6 ro ls match ${pfx})
+ if [ -n "$out" ]; then
+ echo "Failed to flush routes for prefix used for tests."
+ exit 1
+ fi
+
+ run_cmd "$IP -6 ro add ${pfx} ${nh}"
+ if [ $? -ne 0 ]; then
+ echo "Failed to add initial route for test."
+ exit 1
+ fi
+}
+
+# add initial route - used in replace route tests
+add_initial_route6()
+{
+ add_route6 "2001:db8:104::/64" "$1"
+}
+
+check_route6()
+{
+ local pfx="2001:db8:104::/64"
+ local expected="$1"
+ local out
+ local rc=0
+
+ out=$($IP -6 ro ls match ${pfx} | sed -e 's/ pref medium//')
+ [ "${out}" = "${expected}" ] && return 0
+
+ if [ -z "${out}" ]; then
+ if [ "$VERBOSE" = "1" ]; then
+ printf "\nNo route entry found\n"
+ printf "Expected:\n"
+ printf " ${expected}\n"
+ fi
+ return 1
+ fi
+
+ # tricky way to convert output to 1-line without ip's
+ # messy '\'; this drops all extra white space
+ out=$(echo ${out})
+ if [ "${out}" != "${expected}" ]; then
+ rc=1
+ if [ "${VERBOSE}" = "1" ]; then
+ printf " Unexpected route entry. Have:\n"
+ printf " ${out}\n"
+ printf " Expected:\n"
+ printf " ${expected}\n\n"
+ fi
+ fi
+
+ return $rc
+}
+
+route_cleanup()
+{
+ $IP li del red 2>/dev/null
+ $IP li del dummy1 2>/dev/null
+ $IP li del veth1 2>/dev/null
+ $IP li del veth3 2>/dev/null
+
+ cleanup &> /dev/null
+}
+
+route_setup()
+{
+ route_cleanup
+ setup
+
+ [ "${VERBOSE}" = "1" ] && set -x
+ set -e
+
+ $IP li add red up type vrf table 101
+ $IP li add veth1 type veth peer name veth2
+ $IP li add veth3 type veth peer name veth4
+
+ $IP li set veth1 up
+ $IP li set veth3 up
+ $IP li set veth2 vrf red up
+ $IP li set veth4 vrf red up
+ $IP li add dummy1 type dummy
+ $IP li set dummy1 vrf red up
+
+ $IP -6 addr add 2001:db8:101::1/64 dev veth1
+ $IP -6 addr add 2001:db8:101::2/64 dev veth2
+ $IP -6 addr add 2001:db8:103::1/64 dev veth3
+ $IP -6 addr add 2001:db8:103::2/64 dev veth4
+ $IP -6 addr add 2001:db8:104::1/64 dev dummy1
+
+ $IP addr add 172.16.101.1/24 dev veth1
+ $IP addr add 172.16.101.2/24 dev veth2
+ $IP addr add 172.16.103.1/24 dev veth3
+ $IP addr add 172.16.103.2/24 dev veth4
+ $IP addr add 172.16.104.1/24 dev dummy1
+
+ set +ex
+}
+
+# assumption is that basic add of a single path route works
+# otherwise just adding an address on an interface is broken
+ipv6_rt_add()
+{
+ local rc
+
+ echo
+ echo "IPv6 route add / append tests"
+
+ # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+ add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+ run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::2"
+ log_test $? 2 "Attempt to add duplicate route - gw"
+
+ # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+ add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+ run_cmd "$IP -6 ro add 2001:db8:104::/64 dev veth3"
+ log_test $? 2 "Attempt to add duplicate route - dev only"
+
+ # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+ add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+ run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64"
+ log_test $? 2 "Attempt to add duplicate route - reject route"
+
+ # iproute2 prepend only sets NLM_F_CREATE
+ # - adds a new route; does NOT convert existing route to ECMP
+ add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+ run_cmd "$IP -6 ro prepend 2001:db8:104::/64 via 2001:db8:103::2"
+ check_route6 "2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024 2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024"
+ log_test $? 0 "Add new route for existing prefix (w/o NLM_F_EXCL)"
+
+ # route append with same prefix adds a new route
+ # - iproute2 sets NLM_F_CREATE | NLM_F_APPEND
+ add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+ run_cmd "$IP -6 ro append 2001:db8:104::/64 via 2001:db8:103::2"
+ check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+ log_test $? 0 "Append nexthop to existing route - gw"
+
+ add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+ run_cmd "$IP -6 ro append 2001:db8:104::/64 dev veth3"
+ check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop dev veth3 weight 1"
+ log_test $? 0 "Append nexthop to existing route - dev only"
+
+ # multipath route can not have a nexthop that is a reject route
+ add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+ run_cmd "$IP -6 ro append unreachable 2001:db8:104::/64"
+ log_test $? 2 "Append nexthop to existing route - reject route"
+
+ # reject route can not be converted to multipath route
+ run_cmd "$IP -6 ro flush 2001:db8:104::/64"
+ run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64"
+ run_cmd "$IP -6 ro append 2001:db8:104::/64 via 2001:db8:103::2"
+ log_test $? 2 "Append nexthop to existing reject route - gw"
+
+ run_cmd "$IP -6 ro flush 2001:db8:104::/64"
+ run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64"
+ run_cmd "$IP -6 ro append 2001:db8:104::/64 dev veth3"
+ log_test $? 2 "Append nexthop to existing reject route - dev only"
+
+ # insert mpath directly
+ add_route6 "2001:db8:104::/64" "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+ log_test $? 0 "Add multipath route"
+
+ add_route6 "2001:db8:104::/64" "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ run_cmd "$IP -6 ro add 2001:db8:104::/64 nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ log_test $? 2 "Attempt to add duplicate multipath route"
+
+ # insert of a second route without append but different metric
+ add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+ run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::2 metric 512"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::3 metric 256"
+ rc=$?
+ fi
+ log_test $rc 0 "Route add with different metrics"
+
+ run_cmd "$IP -6 ro del 2001:db8:104::/64 metric 512"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route6 "2001:db8:104::/64 via 2001:db8:103::3 dev veth3 metric 256 2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024"
+ rc=$?
+ fi
+ log_test $rc 0 "Route delete with metric"
+}
-fib_test()
+ipv6_rt_replace_single()
{
- if [ -n "$TEST" ]; then
- eval $TEST
+ # single path with single path
+ #
+ add_initial_route6 "via 2001:db8:101::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:103::2"
+ check_route6 "2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024"
+ log_test $? 0 "Single path with single path"
+
+ # single path with multipath
+ #
+ add_initial_route6 "nexthop via 2001:db8:101::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::2"
+ check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::3 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+ log_test $? 0 "Single path with multipath"
+
+ # single path with reject
+ #
+ add_initial_route6 "nexthop via 2001:db8:101::2"
+ run_cmd "$IP -6 ro replace unreachable 2001:db8:104::/64"
+ check_route6 "unreachable 2001:db8:104::/64 dev lo metric 1024"
+ log_test $? 0 "Single path with reject route"
+
+ # single path with single path using MULTIPATH attribute
+ #
+ add_initial_route6 "via 2001:db8:101::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:103::2"
+ check_route6 "2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024"
+ log_test $? 0 "Single path with single path via multipath attribute"
+
+ # route replace fails - invalid nexthop
+ add_initial_route6 "via 2001:db8:101::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:104::2"
+ if [ $? -eq 0 ]; then
+ # previous command is expected to fail so if it returns 0
+ # that means the test failed.
+ log_test 0 1 "Invalid nexthop"
else
- fib_unreg_test
- fib_down_test
- fib_carrier_test
- fib_nexthop_test
+ check_route6 "2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024"
+ log_test $? 0 "Invalid nexthop"
fi
+
+ # replace non-existent route
+ # - note use of change versus replace since ip adds NLM_F_CREATE
+ # for replace
+ add_initial_route6 "via 2001:db8:101::2"
+ run_cmd "$IP -6 ro change 2001:db8:105::/64 via 2001:db8:101::2"
+ log_test $? 2 "Single path - replace of non-existent route"
+}
+
+ipv6_rt_replace_mpath()
+{
+ # multipath with multipath
+ add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::3"
+ check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::3 dev veth1 weight 1 nexthop via 2001:db8:103::3 dev veth3 weight 1"
+ log_test $? 0 "Multipath with multipath"
+
+ # multipath with single
+ add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:101::3"
+ check_route6 "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024"
+ log_test $? 0 "Multipath with single path"
+
+ # multipath with single
+ add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3"
+ check_route6 "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024"
+ log_test $? 0 "Multipath with single path via multipath attribute"
+
+ # multipath with reject
+ add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ run_cmd "$IP -6 ro replace unreachable 2001:db8:104::/64"
+ check_route6 "unreachable 2001:db8:104::/64 dev lo metric 1024"
+ log_test $? 0 "Multipath with reject route"
+
+ # route replace fails - invalid nexthop 1
+ add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:111::3 nexthop via 2001:db8:103::3"
+ check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+ log_test $? 0 "Multipath - invalid first nexthop"
+
+ # route replace fails - invalid nexthop 2
+ add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:113::3"
+ check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+ log_test $? 0 "Multipath - invalid second nexthop"
+
+ # multipath non-existent route
+ add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ run_cmd "$IP -6 ro change 2001:db8:105::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::3"
+ log_test $? 2 "Multipath - replace of non-existent route"
+}
+
+ipv6_rt_replace()
+{
+ echo
+ echo "IPv6 route replace tests"
+
+ ipv6_rt_replace_single
+ ipv6_rt_replace_mpath
+}
+
+ipv6_route_test()
+{
+ route_setup
+
+ ipv6_rt_add
+ ipv6_rt_replace
+
+ route_cleanup
}
+ip_addr_metric_check()
+{
+ ip addr help 2>&1 | grep -q metric
+ if [ $? -ne 0 ]; then
+ echo "iproute2 command does not support metric for addresses. Skipping test"
+ return 1
+ fi
+
+ return 0
+}
+
+ipv6_addr_metric_test()
+{
+ local rc
+
+ echo
+ echo "IPv6 prefix route tests"
+
+ ip_addr_metric_check || return 1
+
+ setup
+
+ set -e
+ $IP li add dummy1 type dummy
+ $IP li add dummy2 type dummy
+ $IP li set dummy1 up
+ $IP li set dummy2 up
+
+ # default entry is metric 256
+ run_cmd "$IP -6 addr add dev dummy1 2001:db8:104::1/64"
+ run_cmd "$IP -6 addr add dev dummy2 2001:db8:104::2/64"
+ set +e
+
+ check_route6 "2001:db8:104::/64 dev dummy1 proto kernel metric 256 2001:db8:104::/64 dev dummy2 proto kernel metric 256"
+ log_test $? 0 "Default metric"
+
+ set -e
+ run_cmd "$IP -6 addr flush dev dummy1"
+ run_cmd "$IP -6 addr add dev dummy1 2001:db8:104::1/64 metric 257"
+ set +e
+
+ check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 256 2001:db8:104::/64 dev dummy1 proto kernel metric 257"
+ log_test $? 0 "User specified metric on first device"
+
+ set -e
+ run_cmd "$IP -6 addr flush dev dummy2"
+ run_cmd "$IP -6 addr add dev dummy2 2001:db8:104::2/64 metric 258"
+ set +e
+
+ check_route6 "2001:db8:104::/64 dev dummy1 proto kernel metric 257 2001:db8:104::/64 dev dummy2 proto kernel metric 258"
+ log_test $? 0 "User specified metric on second device"
+
+ run_cmd "$IP -6 addr del dev dummy1 2001:db8:104::1/64 metric 257"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 258"
+ rc=$?
+ fi
+ log_test $rc 0 "Delete of address on first device"
+
+ run_cmd "$IP -6 addr change dev dummy2 2001:db8:104::2/64 metric 259"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 259"
+ rc=$?
+ fi
+ log_test $rc 0 "Modify metric of address"
+
+ # verify prefix route removed on down
+ run_cmd "ip netns exec testns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1"
+ run_cmd "$IP li set dev dummy2 down"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route6 ""
+ rc=$?
+ fi
+ log_test $rc 0 "Prefix route removed on link down"
+
+ # verify prefix route re-inserted with assigned metric
+ run_cmd "$IP li set dev dummy2 up"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 259"
+ rc=$?
+ fi
+ log_test $rc 0 "Prefix route with metric on link up"
+
+ $IP li del dummy1
+ $IP li del dummy2
+ cleanup
+}
+
+# add route for a prefix, flushing any existing routes first
+# expected to be the first step of a test
+add_route()
+{
+ local pfx="$1"
+ local nh="$2"
+ local out
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo
+ echo " ##################################################"
+ echo
+ fi
+
+ run_cmd "$IP ro flush ${pfx}"
+ [ $? -ne 0 ] && exit 1
+
+ out=$($IP ro ls match ${pfx})
+ if [ -n "$out" ]; then
+ echo "Failed to flush routes for prefix used for tests."
+ exit 1
+ fi
+
+ run_cmd "$IP ro add ${pfx} ${nh}"
+ if [ $? -ne 0 ]; then
+ echo "Failed to add initial route for test."
+ exit 1
+ fi
+}
+
+# add initial route - used in replace route tests
+add_initial_route()
+{
+ add_route "172.16.104.0/24" "$1"
+}
+
+check_route()
+{
+ local pfx="172.16.104.0/24"
+ local expected="$1"
+ local out
+ local rc=0
+
+ out=$($IP ro ls match ${pfx})
+ [ "${out}" = "${expected}" ] && return 0
+
+ if [ -z "${out}" ]; then
+ if [ "$VERBOSE" = "1" ]; then
+ printf "\nNo route entry found\n"
+ printf "Expected:\n"
+ printf " ${expected}\n"
+ fi
+ return 1
+ fi
+
+ # tricky way to convert output to 1-line without ip's
+ # messy '\'; this drops all extra white space
+ out=$(echo ${out})
+ if [ "${out}" != "${expected}" ]; then
+ rc=1
+ if [ "${VERBOSE}" = "1" ]; then
+ printf " Unexpected route entry. Have:\n"
+ printf " ${out}\n"
+ printf " Expected:\n"
+ printf " ${expected}\n\n"
+ fi
+ fi
+
+ return $rc
+}
+
+# assumption is that basic add of a single path route works
+# otherwise just adding an address on an interface is broken
+ipv4_rt_add()
+{
+ local rc
+
+ echo
+ echo "IPv4 route add / append tests"
+
+ # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+ add_route "172.16.104.0/24" "via 172.16.101.2"
+ run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.2"
+ log_test $? 2 "Attempt to add duplicate route - gw"
+
+ # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+ add_route "172.16.104.0/24" "via 172.16.101.2"
+ run_cmd "$IP ro add 172.16.104.0/24 dev veth3"
+ log_test $? 2 "Attempt to add duplicate route - dev only"
+
+ # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+ add_route "172.16.104.0/24" "via 172.16.101.2"
+ run_cmd "$IP ro add unreachable 172.16.104.0/24"
+ log_test $? 2 "Attempt to add duplicate route - reject route"
+
+ # iproute2 prepend only sets NLM_F_CREATE
+ # - adds a new route; does NOT convert existing route to ECMP
+ add_route "172.16.104.0/24" "via 172.16.101.2"
+ run_cmd "$IP ro prepend 172.16.104.0/24 via 172.16.103.2"
+ check_route "172.16.104.0/24 via 172.16.103.2 dev veth3 172.16.104.0/24 via 172.16.101.2 dev veth1"
+ log_test $? 0 "Add new nexthop for existing prefix"
+
+ # route append with same prefix adds a new route
+ # - iproute2 sets NLM_F_CREATE | NLM_F_APPEND
+ add_route "172.16.104.0/24" "via 172.16.101.2"
+ run_cmd "$IP ro append 172.16.104.0/24 via 172.16.103.2"
+ check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 via 172.16.103.2 dev veth3"
+ log_test $? 0 "Append nexthop to existing route - gw"
+
+ add_route "172.16.104.0/24" "via 172.16.101.2"
+ run_cmd "$IP ro append 172.16.104.0/24 dev veth3"
+ check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 dev veth3 scope link"
+ log_test $? 0 "Append nexthop to existing route - dev only"
+
+ add_route "172.16.104.0/24" "via 172.16.101.2"
+ run_cmd "$IP ro append unreachable 172.16.104.0/24"
+ check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 unreachable 172.16.104.0/24"
+ log_test $? 0 "Append nexthop to existing route - reject route"
+
+ run_cmd "$IP ro flush 172.16.104.0/24"
+ run_cmd "$IP ro add unreachable 172.16.104.0/24"
+ run_cmd "$IP ro append 172.16.104.0/24 via 172.16.103.2"
+ check_route "unreachable 172.16.104.0/24 172.16.104.0/24 via 172.16.103.2 dev veth3"
+ log_test $? 0 "Append nexthop to existing reject route - gw"
+
+ run_cmd "$IP ro flush 172.16.104.0/24"
+ run_cmd "$IP ro add unreachable 172.16.104.0/24"
+ run_cmd "$IP ro append 172.16.104.0/24 dev veth3"
+ check_route "unreachable 172.16.104.0/24 172.16.104.0/24 dev veth3 scope link"
+ log_test $? 0 "Append nexthop to existing reject route - dev only"
+
+ # insert mpath directly
+ add_route "172.16.104.0/24" "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ check_route "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+ log_test $? 0 "add multipath route"
+
+ add_route "172.16.104.0/24" "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ run_cmd "$IP ro add 172.16.104.0/24 nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ log_test $? 2 "Attempt to add duplicate multipath route"
+
+ # insert of a second route without append but different metric
+ add_route "172.16.104.0/24" "via 172.16.101.2"
+ run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.2 metric 512"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.3 metric 256"
+ rc=$?
+ fi
+ log_test $rc 0 "Route add with different metrics"
+
+ run_cmd "$IP ro del 172.16.104.0/24 metric 512"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 via 172.16.103.3 dev veth3 metric 256"
+ rc=$?
+ fi
+ log_test $rc 0 "Route delete with metric"
+}
+
+ipv4_rt_replace_single()
+{
+ # single path with single path
+ #
+ add_initial_route "via 172.16.101.2"
+ run_cmd "$IP ro replace 172.16.104.0/24 via 172.16.103.2"
+ check_route "172.16.104.0/24 via 172.16.103.2 dev veth3"
+ log_test $? 0 "Single path with single path"
+
+ # single path with multipath
+ #
+ add_initial_route "nexthop via 172.16.101.2"
+ run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.2"
+ check_route "172.16.104.0/24 nexthop via 172.16.101.3 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+ log_test $? 0 "Single path with multipath"
+
+ # single path with reject
+ #
+ add_initial_route "nexthop via 172.16.101.2"
+ run_cmd "$IP ro replace unreachable 172.16.104.0/24"
+ check_route "unreachable 172.16.104.0/24"
+ log_test $? 0 "Single path with reject route"
+
+ # single path with single path using MULTIPATH attribute
+ #
+ add_initial_route "via 172.16.101.2"
+ run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.103.2"
+ check_route "172.16.104.0/24 via 172.16.103.2 dev veth3"
+ log_test $? 0 "Single path with single path via multipath attribute"
+
+ # route replace fails - invalid nexthop
+ add_initial_route "via 172.16.101.2"
+ run_cmd "$IP ro replace 172.16.104.0/24 via 2001:db8:104::2"
+ if [ $? -eq 0 ]; then
+ # previous command is expected to fail so if it returns 0
+ # that means the test failed.
+ log_test 0 1 "Invalid nexthop"
+ else
+ check_route "172.16.104.0/24 via 172.16.101.2 dev veth1"
+ log_test $? 0 "Invalid nexthop"
+ fi
+
+ # replace non-existent route
+ # - note use of change versus replace since ip adds NLM_F_CREATE
+ # for replace
+ add_initial_route "via 172.16.101.2"
+ run_cmd "$IP ro change 172.16.105.0/24 via 172.16.101.2"
+ log_test $? 2 "Single path - replace of non-existent route"
+}
+
+ipv4_rt_replace_mpath()
+{
+ # multipath with multipath
+ add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.3"
+ check_route "172.16.104.0/24 nexthop via 172.16.101.3 dev veth1 weight 1 nexthop via 172.16.103.3 dev veth3 weight 1"
+ log_test $? 0 "Multipath with multipath"
+
+ # multipath with single
+ add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ run_cmd "$IP ro replace 172.16.104.0/24 via 172.16.101.3"
+ check_route "172.16.104.0/24 via 172.16.101.3 dev veth1"
+ log_test $? 0 "Multipath with single path"
+
+ # multipath with single
+ add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3"
+ check_route "172.16.104.0/24 via 172.16.101.3 dev veth1"
+ log_test $? 0 "Multipath with single path via multipath attribute"
+
+ # multipath with reject
+ add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ run_cmd "$IP ro replace unreachable 172.16.104.0/24"
+ check_route "unreachable 172.16.104.0/24"
+ log_test $? 0 "Multipath with reject route"
+
+ # route replace fails - invalid nexthop 1
+ add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.111.3 nexthop via 172.16.103.3"
+ check_route "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+ log_test $? 0 "Multipath - invalid first nexthop"
+
+ # route replace fails - invalid nexthop 2
+ add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.113.3"
+ check_route "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+ log_test $? 0 "Multipath - invalid second nexthop"
+
+ # multipath non-existent route
+ add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ run_cmd "$IP ro change 172.16.105.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.3"
+ log_test $? 2 "Multipath - replace of non-existent route"
+}
+
+ipv4_rt_replace()
+{
+ echo
+ echo "IPv4 route replace tests"
+
+ ipv4_rt_replace_single
+ ipv4_rt_replace_mpath
+}
+
+ipv4_route_test()
+{
+ route_setup
+
+ ipv4_rt_add
+ ipv4_rt_replace
+
+ route_cleanup
+}
+
+ipv4_addr_metric_test()
+{
+ local rc
+
+ echo
+ echo "IPv4 prefix route tests"
+
+ ip_addr_metric_check || return 1
+
+ setup
+
+ set -e
+ $IP li add dummy1 type dummy
+ $IP li add dummy2 type dummy
+ $IP li set dummy1 up
+ $IP li set dummy2 up
+
+ # default entry is metric 256
+ run_cmd "$IP addr add dev dummy1 172.16.104.1/24"
+ run_cmd "$IP addr add dev dummy2 172.16.104.2/24"
+ set +e
+
+ check_route "172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2"
+ log_test $? 0 "Default metric"
+
+ set -e
+ run_cmd "$IP addr flush dev dummy1"
+ run_cmd "$IP addr add dev dummy1 172.16.104.1/24 metric 257"
+ set +e
+
+ check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 metric 257"
+ log_test $? 0 "User specified metric on first device"
+
+ set -e
+ run_cmd "$IP addr flush dev dummy2"
+ run_cmd "$IP addr add dev dummy2 172.16.104.2/24 metric 258"
+ set +e
+
+ check_route "172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 metric 257 172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 258"
+ log_test $? 0 "User specified metric on second device"
+
+ run_cmd "$IP addr del dev dummy1 172.16.104.1/24 metric 257"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 258"
+ rc=$?
+ fi
+ log_test $rc 0 "Delete of address on first device"
+
+ run_cmd "$IP addr change dev dummy2 172.16.104.2/24 metric 259"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 259"
+ rc=$?
+ fi
+ log_test $rc 0 "Modify metric of address"
+
+ # verify prefix route removed on down
+ run_cmd "$IP li set dev dummy2 down"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route ""
+ rc=$?
+ fi
+ log_test $rc 0 "Prefix route removed on link down"
+
+ # verify prefix route re-inserted with assigned metric
+ run_cmd "$IP li set dev dummy2 up"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 259"
+ rc=$?
+ fi
+ log_test $rc 0 "Prefix route with metric on link up"
+
+ $IP li del dummy1
+ $IP li del dummy2
+ cleanup
+}
+
+################################################################################
+# usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+ -p Pause on fail
+ -P Pause after each test before cleanup
+ -v verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# main
+
+while getopts :t:pPhv o
+do
+ case $o in
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ P) PAUSE=yes;;
+ v) VERBOSE=$(($VERBOSE + 1));;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+PEER_CMD="ip netns exec ${PEER_NS}"
+
+# make sure we don't pause twice
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
exit $ksft_skip;
@@ -598,6 +1415,25 @@ fi
# start clean
cleanup &> /dev/null
-fib_test
+for t in $TESTS
+do
+ case $t in
+ fib_unreg_test|unregister) fib_unreg_test;;
+ fib_down_test|down) fib_down_test;;
+ fib_carrier_test|carrier) fib_carrier_test;;
+ fib_nexthop_test|nexthop) fib_nexthop_test;;
+ ipv6_route_test|ipv6_rt) ipv6_route_test;;
+ ipv4_route_test|ipv4_rt) ipv4_route_test;;
+ ipv6_addr_metric) ipv6_addr_metric_test;;
+ ipv4_addr_metric) ipv4_addr_metric_test;;
+
+ help) echo "Test names: $TESTS"; exit 0;;
+ esac
+done
+
+if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+fi
exit $ret
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
index 75d9224..d8313d0 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
@@ -1,6 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding"
NUM_NETIFS=4
CHECK_TC="yes"
source lib.sh
@@ -75,14 +76,31 @@ cleanup()
vrf_cleanup
}
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:1::2
+}
+
+learning()
+{
+ learning_test "br0" $swp1 $h1 $h2
+}
+
+flooding()
+{
+ flood_test $swp2 $h1 $h2
+}
+
trap cleanup EXIT
setup_prepare
setup_wait
-ping_test $h1 192.0.2.2
-ping6_test $h1 2001:db8:1::2
-learning_test "br0" $swp1 $h1 $h2
-flood_test $swp2 $h1 $h2
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
index 1cddf06..c15c6c8 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
@@ -1,6 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding"
NUM_NETIFS=4
source lib.sh
@@ -73,14 +74,31 @@ cleanup()
vrf_cleanup
}
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:1::2
+}
+
+learning()
+{
+ learning_test "br0" $swp1 $h1 $h2
+}
+
+flooding()
+{
+ flood_test $swp2 $h1 $h2
+}
+
trap cleanup EXIT
setup_prepare
setup_wait
-ping_test $h1 192.0.2.2
-ping6_test $h1 2001:db8:1::2
-learning_test "br0" $swp1 $h1 $h2
-flood_test $swp2 $h1 $h2
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 1ac6c62..7b18a53 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -321,6 +321,50 @@ simple_if_fini()
vrf_destroy $vrf_name
}
+tunnel_create()
+{
+ local name=$1; shift
+ local type=$1; shift
+ local local=$1; shift
+ local remote=$1; shift
+
+ ip link add name $name type $type \
+ local $local remote $remote "$@"
+ ip link set dev $name up
+}
+
+tunnel_destroy()
+{
+ local name=$1; shift
+
+ ip link del dev $name
+}
+
+vlan_create()
+{
+ local if_name=$1; shift
+ local vid=$1; shift
+ local vrf=$1; shift
+ local ips=("${@}")
+ local name=$if_name.$vid
+
+ ip link add name $name link $if_name type vlan id $vid
+ if [ "$vrf" != "" ]; then
+ ip link set dev $name master $vrf
+ fi
+ ip link set dev $name up
+ __addr_add_del $name add "${ips[@]}"
+}
+
+vlan_destroy()
+{
+ local if_name=$1; shift
+ local vid=$1; shift
+ local name=$if_name.$vid
+
+ ip link del dev $name
+}
+
master_name_get()
{
local if_name=$1
@@ -335,6 +379,15 @@ link_stats_tx_packets_get()
ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]'
}
+tc_rule_stats_get()
+{
+ local dev=$1; shift
+ local pref=$1; shift
+
+ tc -j -s filter show dev $dev ingress pref $pref |
+ jq '.[1].options.actions[].stats.packets'
+}
+
mac_get()
{
local if_name=$1
@@ -353,19 +406,33 @@ bridge_ageing_time_get()
echo $((ageing_time / 100))
}
-forwarding_enable()
+declare -A SYSCTL_ORIG
+sysctl_set()
+{
+ local key=$1; shift
+ local value=$1; shift
+
+ SYSCTL_ORIG[$key]=$(sysctl -n $key)
+ sysctl -qw $key=$value
+}
+
+sysctl_restore()
{
- ipv4_fwd=$(sysctl -n net.ipv4.conf.all.forwarding)
- ipv6_fwd=$(sysctl -n net.ipv6.conf.all.forwarding)
+ local key=$1; shift
- sysctl -q -w net.ipv4.conf.all.forwarding=1
- sysctl -q -w net.ipv6.conf.all.forwarding=1
+ sysctl -qw $key=${SYSCTL_ORIG["$key"]}
+}
+
+forwarding_enable()
+{
+ sysctl_set net.ipv4.conf.all.forwarding 1
+ sysctl_set net.ipv6.conf.all.forwarding 1
}
forwarding_restore()
{
- sysctl -q -w net.ipv6.conf.all.forwarding=$ipv6_fwd
- sysctl -q -w net.ipv4.conf.all.forwarding=$ipv4_fwd
+ sysctl_restore net.ipv6.conf.all.forwarding
+ sysctl_restore net.ipv4.conf.all.forwarding
}
tc_offload_check()
@@ -381,6 +448,115 @@ tc_offload_check()
return 0
}
+trap_install()
+{
+ local dev=$1; shift
+ local direction=$1; shift
+
+ # For slow-path testing, we need to install a trap to get to
+ # slow path the packets that would otherwise be switched in HW.
+ tc filter add dev $dev $direction pref 1 flower skip_sw action trap
+}
+
+trap_uninstall()
+{
+ local dev=$1; shift
+ local direction=$1; shift
+
+ tc filter del dev $dev $direction pref 1 flower skip_sw
+}
+
+slow_path_trap_install()
+{
+ if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
+ trap_install "$@"
+ fi
+}
+
+slow_path_trap_uninstall()
+{
+ if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
+ trap_uninstall "$@"
+ fi
+}
+
+__icmp_capture_add_del()
+{
+ local add_del=$1; shift
+ local pref=$1; shift
+ local vsuf=$1; shift
+ local tundev=$1; shift
+ local filter=$1; shift
+
+ tc filter $add_del dev "$tundev" ingress \
+ proto ip$vsuf pref $pref \
+ flower ip_proto icmp$vsuf $filter \
+ action pass
+}
+
+icmp_capture_install()
+{
+ __icmp_capture_add_del add 100 "" "$@"
+}
+
+icmp_capture_uninstall()
+{
+ __icmp_capture_add_del del 100 "" "$@"
+}
+
+icmp6_capture_install()
+{
+ __icmp_capture_add_del add 100 v6 "$@"
+}
+
+icmp6_capture_uninstall()
+{
+ __icmp_capture_add_del del 100 v6 "$@"
+}
+
+__vlan_capture_add_del()
+{
+ local add_del=$1; shift
+ local pref=$1; shift
+ local dev=$1; shift
+ local filter=$1; shift
+
+ tc filter $add_del dev "$dev" ingress \
+ proto 802.1q pref $pref \
+ flower $filter \
+ action pass
+}
+
+vlan_capture_install()
+{
+ __vlan_capture_add_del add 100 "$@"
+}
+
+vlan_capture_uninstall()
+{
+ __vlan_capture_add_del del 100 "$@"
+}
+
+matchall_sink_create()
+{
+ local dev=$1; shift
+
+ tc qdisc add dev $dev clsact
+ tc filter add dev $dev ingress \
+ pref 10000 \
+ matchall \
+ action drop
+}
+
+tests_run()
+{
+ local current_test
+
+ for current_test in ${TESTS:-$ALL_TESTS}; do
+ $current_test
+ done
+}
+
##############################################################################
# Tests
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre.sh b/tools/testing/selftests/net/forwarding/mirror_gre.sh
new file mode 100755
index 0000000..e6fd7a1
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre.sh
@@ -0,0 +1,159 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" when the device to mirror to is a
+# gretap or ip6gretap netdevice. Expect that the packets come out encapsulated,
+# and another gretap / ip6gretap netdevice is then capable of decapsulating the
+# traffic. Test that the payload is what is expected (ICMP ping request or
+# reply, depending on test).
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+ test_gretap_mac
+ test_ip6gretap_mac
+ test_two_spans
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ ip address add dev $swp3 192.0.2.129/28
+ ip address add dev $h3 192.0.2.130/28
+
+ ip address add dev $swp3 2001:db8:2::1/64
+ ip address add dev $h3 2001:db8:2::2/64
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip address del dev $h3 2001:db8:2::2/64
+ ip address del dev $swp3 2001:db8:2::1/64
+
+ ip address del dev $h3 192.0.2.130/28
+ ip address del dev $swp3 192.0.2.129/28
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_span_gre_mac()
+{
+ local tundev=$1; shift
+ local direction=$1; shift
+ local prot=$1; shift
+ local what=$1; shift
+
+ local swp3mac=$(mac_get $swp3)
+ local h3mac=$(mac_get $h3)
+
+ RET=0
+
+ mirror_install $swp1 $direction $tundev "matchall $tcflags"
+ tc filter add dev $h3 ingress pref 77 prot $prot \
+ flower ip_proto 0x2f src_mac $swp3mac dst_mac $h3mac \
+ action pass
+
+ mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10
+
+ tc filter del dev $h3 ingress pref 77
+ mirror_uninstall $swp1 $direction
+
+ log_test "$direction $what: envelope MAC ($tcflags)"
+}
+
+test_two_spans()
+{
+ RET=0
+
+ mirror_install $swp1 ingress gt4 "matchall $tcflags"
+ mirror_install $swp1 egress gt6 "matchall $tcflags"
+ quick_test_span_gre_dir gt4 ingress
+ quick_test_span_gre_dir gt6 egress
+
+ mirror_uninstall $swp1 ingress
+ fail_test_span_gre_dir gt4 ingress
+ quick_test_span_gre_dir gt6 egress
+
+ mirror_install $swp1 ingress gt4 "matchall $tcflags"
+ mirror_uninstall $swp1 egress
+ quick_test_span_gre_dir gt4 ingress
+ fail_test_span_gre_dir gt6 egress
+
+ mirror_uninstall $swp1 ingress
+ log_test "two simultaneously configured mirrors ($tcflags)"
+}
+
+test_gretap()
+{
+ full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
+ full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+ full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap"
+ full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
+}
+
+test_gretap_mac()
+{
+ test_span_gre_mac gt4 ingress ip "mirror to gretap"
+ test_span_gre_mac gt4 egress ip "mirror to gretap"
+}
+
+test_ip6gretap_mac()
+{
+ test_span_gre_mac gt6 ingress ipv6 "mirror to ip6gretap"
+ test_span_gre_mac gt6 egress ipv6 "mirror to ip6gretap"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
new file mode 100755
index 0000000..360ca13
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
@@ -0,0 +1,226 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +---------------------+ +---------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# +-----|---------------+ +---------------|-----+
+# | |
+# +-----|-------------------------------------------------------------|-----+
+# | SW o--> mirror | |
+# | +---|-------------------------------------------------------------|---+ |
+# | | + $swp1 BR $swp2 + | |
+# | +---------------------------------------------------------------------+ |
+# | |
+# | +---------------------------------------------------------------------+ |
+# | | OL + gt6 (ip6gretap) + gt4 (gretap) | |
+# | | : loc=2001:db8:2::1 : loc=192.0.2.129 | |
+# | | : rem=2001:db8:2::2 : rem=192.0.2.130 | |
+# | | : ttl=100 : ttl=100 | |
+# | | : tos=inherit : tos=inherit | |
+# | +-------------------------:--|-------------------:--|-----------------+ |
+# | : | : | |
+# | +-------------------------:--|-------------------:--|-----------------+ |
+# | | UL : |,---------------------' | |
+# | | + $swp3 : || : | |
+# | | | 192.0.2.129/28 : vv : | |
+# | | | 2001:db8:2::1/64 : + ul (dummy) : | |
+# | +---|---------------------:----------------------:--------------------+ |
+# +-----|---------------------:----------------------:----------------------+
+# | : :
+# +-----|---------------------:----------------------:----------------------+
+# | H3 + $h3 + h3-gt6 (ip6gretap) + h3-gt4 (gretap) |
+# | 192.0.2.130/28 loc=2001:db8:2::2 loc=192.0.2.130 |
+# | 2001:db8:2::2/64 rem=2001:db8:2::1 rem=192.0.2.129 |
+# | ttl=100 ttl=100 |
+# | tos=inherit tos=inherit |
+# | |
+# +-------------------------------------------------------------------------+
+#
+# This tests mirroring to gretap and ip6gretap configured in an overlay /
+# underlay manner, i.e. with a bound dummy device that marks underlay VRF where
+# the encapsulated packed should be routed.
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/28
+}
+
+h3_create()
+{
+ simple_if_init $h3 192.0.2.130/28 2001:db8:2::2/64
+
+ tunnel_create h3-gt4 gretap 192.0.2.130 192.0.2.129
+ ip link set h3-gt4 vrf v$h3
+ matchall_sink_create h3-gt4
+
+ tunnel_create h3-gt6 ip6gretap 2001:db8:2::2 2001:db8:2::1
+ ip link set h3-gt6 vrf v$h3
+ matchall_sink_create h3-gt6
+}
+
+h3_destroy()
+{
+ tunnel_destroy h3-gt6
+ tunnel_destroy h3-gt4
+
+ simple_if_fini $h3 192.0.2.130/28 2001:db8:2::2/64
+}
+
+switch_create()
+{
+ # Bridge between H1 and H2.
+
+ ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 up
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ tc qdisc add dev $swp1 clsact
+
+ # Underlay.
+
+ simple_if_init $swp3 192.0.2.129/28 2001:db8:2::1/64
+
+ ip link add name ul type dummy
+ ip link set dev ul master v$swp3
+ ip link set dev ul up
+
+ # Overlay.
+
+ vrf_create vrf-ol
+ ip link set dev vrf-ol up
+
+ tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \
+ ttl 100 tos inherit dev ul
+ ip link set dev gt4 master vrf-ol
+ ip link set dev gt4 up
+
+ tunnel_create gt6 ip6gretap 2001:db8:2::1 2001:db8:2::2 \
+ ttl 100 tos inherit dev ul allow-localremote
+ ip link set dev gt6 master vrf-ol
+ ip link set dev gt6 up
+}
+
+switch_destroy()
+{
+ vrf_destroy vrf-ol
+
+ tunnel_destroy gt6
+ tunnel_destroy gt4
+
+ simple_if_fini $swp3 192.0.2.129/28 2001:db8:2::1/64
+
+ ip link del dev ul
+
+ tc qdisc del dev $swp1 clsact
+
+ ip link set dev $swp1 down
+ ip link set dev $swp2 down
+ ip link del dev br1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ h3_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+test_gretap()
+{
+ full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap w/ UL"
+ full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap w/ UL"
+}
+
+test_ip6gretap()
+{
+ full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap w/ UL"
+ full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap w/ UL"
+}
+
+test_all()
+{
+ RET=0
+
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
new file mode 100755
index 0000000..3bb4c2b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# bridge device without vlan filtering (802.1d). The device attached to that
+# bridge is a VLAN.
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+ test_gretap_stp
+ test_ip6gretap_stp
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ ip link add name br2 type bridge vlan_filtering 0
+ ip link set dev br2 up
+
+ vlan_create $swp3 555
+
+ ip link set dev $swp3.555 master br2
+ ip route add 192.0.2.130/32 dev br2
+ ip -6 route add 2001:db8:2::2/128 dev br2
+
+ ip address add dev br2 192.0.2.129/32
+ ip address add dev br2 2001:db8:2::1/128
+
+ vlan_create $h3 555 v$h3 192.0.2.130/28 2001:db8:2::2/64
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ vlan_destroy $h3 555
+ ip link del dev br2
+ vlan_destroy $swp3 555
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_vlan_match()
+{
+ local tundev=$1; shift
+ local vlan_match=$1; shift
+ local what=$1; shift
+
+ full_test_span_gre_dir_vlan $tundev ingress "$vlan_match" 8 0 "$what"
+ full_test_span_gre_dir_vlan $tundev egress "$vlan_match" 0 8 "$what"
+}
+
+test_gretap()
+{
+ test_vlan_match gt4 'vlan_id 555 vlan_ethtype ip' "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+ test_vlan_match gt6 'vlan_id 555 vlan_ethtype ipv6' "mirror to ip6gretap"
+}
+
+test_gretap_stp()
+{
+ full_test_span_gre_stp gt4 $swp3.555 "mirror to gretap"
+}
+
+test_ip6gretap_stp()
+{
+ full_test_span_gre_stp gt6 $swp3.555 "mirror to ip6gretap"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
new file mode 100755
index 0000000..aa29d46
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
@@ -0,0 +1,278 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test how mirrors to gretap and ip6gretap react to changes to relevant
+# configuration.
+
+ALL_TESTS="
+ test_ttl
+ test_tun_up
+ test_egress_up
+ test_remote_ip
+ test_tun_del
+ test_route_del
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ # This test downs $swp3, which deletes the configured IPv6 address
+ # unless this sysctl is set.
+ sysctl_set net.ipv6.conf.$swp3.keep_addr_on_down 1
+
+ ip address add dev $swp3 192.0.2.129/28
+ ip address add dev $h3 192.0.2.130/28
+
+ ip address add dev $swp3 2001:db8:2::1/64
+ ip address add dev $h3 2001:db8:2::2/64
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip address del dev $h3 2001:db8:2::2/64
+ ip address del dev $swp3 2001:db8:2::1/64
+
+ ip address del dev $h3 192.0.2.130/28
+ ip address del dev $swp3 192.0.2.129/28
+
+ sysctl_restore net.ipv6.conf.$swp3.keep_addr_on_down
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_span_gre_ttl()
+{
+ local tundev=$1; shift
+ local type=$1; shift
+ local prot=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ tc filter add dev $h3 ingress pref 77 prot $prot \
+ flower ip_ttl 50 action pass
+
+ mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 0
+
+ ip link set dev $tundev type $type ttl 50
+ mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10
+
+ ip link set dev $tundev type $type ttl 100
+ tc filter del dev $h3 ingress pref 77
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: TTL change ($tcflags)"
+}
+
+test_span_gre_tun_up()
+{
+ local tundev=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ip link set dev $tundev down
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ fail_test_span_gre_dir $tundev ingress
+
+ ip link set dev $tundev up
+
+ quick_test_span_gre_dir $tundev ingress
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: tunnel down/up ($tcflags)"
+}
+
+test_span_gre_egress_up()
+{
+ local tundev=$1; shift
+ local remote_ip=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ip link set dev $swp3 down
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ fail_test_span_gre_dir $tundev ingress
+
+ # After setting the device up, wait for neighbor to get resolved so that
+ # we can expect mirroring to work.
+ ip link set dev $swp3 up
+ while true; do
+ ip neigh sh dev $swp3 $remote_ip nud reachable |
+ grep -q ^
+ if [[ $? -ne 0 ]]; then
+ sleep 1
+ else
+ break
+ fi
+ done
+
+ quick_test_span_gre_dir $tundev ingress
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: egress down/up ($tcflags)"
+}
+
+test_span_gre_remote_ip()
+{
+ local tundev=$1; shift
+ local type=$1; shift
+ local correct_ip=$1; shift
+ local wrong_ip=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ip link set dev $tundev type $type remote $wrong_ip
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ fail_test_span_gre_dir $tundev ingress
+
+ ip link set dev $tundev type $type remote $correct_ip
+ quick_test_span_gre_dir $tundev ingress
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: remote address change ($tcflags)"
+}
+
+test_span_gre_tun_del()
+{
+ local tundev=$1; shift
+ local type=$1; shift
+ local flags=$1; shift
+ local local_ip=$1; shift
+ local remote_ip=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ quick_test_span_gre_dir $tundev ingress
+ ip link del dev $tundev
+ fail_test_span_gre_dir $tundev ingress
+
+ tunnel_create $tundev $type $local_ip $remote_ip \
+ ttl 100 tos inherit $flags
+
+ # Recreating the tunnel doesn't reestablish mirroring, so reinstall it
+ # and verify it works for the follow-up tests.
+ mirror_uninstall $swp1 ingress
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ quick_test_span_gre_dir $tundev ingress
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: tunnel deleted ($tcflags)"
+}
+
+test_span_gre_route_del()
+{
+ local tundev=$1; shift
+ local edev=$1; shift
+ local route=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ quick_test_span_gre_dir $tundev ingress
+
+ ip route del $route dev $edev
+ fail_test_span_gre_dir $tundev ingress
+
+ ip route add $route dev $edev
+ quick_test_span_gre_dir $tundev ingress
+
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: underlay route removal ($tcflags)"
+}
+
+test_ttl()
+{
+ test_span_gre_ttl gt4 gretap ip "mirror to gretap"
+ test_span_gre_ttl gt6 ip6gretap ipv6 "mirror to ip6gretap"
+}
+
+test_tun_up()
+{
+ test_span_gre_tun_up gt4 "mirror to gretap"
+ test_span_gre_tun_up gt6 "mirror to ip6gretap"
+}
+
+test_egress_up()
+{
+ test_span_gre_egress_up gt4 192.0.2.130 "mirror to gretap"
+ test_span_gre_egress_up gt6 2001:db8:2::2 "mirror to ip6gretap"
+}
+
+test_remote_ip()
+{
+ test_span_gre_remote_ip gt4 gretap 192.0.2.130 192.0.2.132 "mirror to gretap"
+ test_span_gre_remote_ip gt6 ip6gretap 2001:db8:2::2 2001:db8:2::4 "mirror to ip6gretap"
+}
+
+test_tun_del()
+{
+ test_span_gre_tun_del gt4 gretap "" \
+ 192.0.2.129 192.0.2.130 "mirror to gretap"
+ test_span_gre_tun_del gt6 ip6gretap allow-localremote \
+ 2001:db8:2::1 2001:db8:2::2 "mirror to ip6gretap"
+}
+
+test_route_del()
+{
+ test_span_gre_route_del gt4 $swp3 192.0.2.128/28 "mirror to gretap"
+ test_span_gre_route_del gt6 $swp3 2001:db8:2::/64 "mirror to ip6gretap"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh
new file mode 100755
index 0000000..12914f4
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh
@@ -0,0 +1,137 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# This tests flower-triggered mirroring to gretap and ip6gretap netdevices. The
+# interfaces on H1 and H2 have two addresses each. Flower match on one of the
+# addresses is configured with mirror action. It is expected that when pinging
+# this address, mirroring takes place, whereas when pinging the other one,
+# there's no mirroring.
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ ip address add dev $swp3 192.0.2.129/28
+ ip address add dev $h3 192.0.2.130/28
+
+ ip address add dev $swp3 2001:db8:2::1/64
+ ip address add dev $h3 2001:db8:2::2/64
+
+ ip address add dev $h1 192.0.2.3/28
+ ip address add dev $h2 192.0.2.4/28
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip address del dev $h2 192.0.2.4/28
+ ip address del dev $h1 192.0.2.3/28
+
+ ip address del dev $h3 2001:db8:2::2/64
+ ip address del dev $swp3 2001:db8:2::1/64
+
+ ip address del dev $h3 192.0.2.130/28
+ ip address del dev $swp3 192.0.2.129/28
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_span_gre_dir_acl()
+{
+ test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4
+}
+
+fail_test_span_gre_dir_acl()
+{
+ fail_test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4
+}
+
+full_test_span_gre_dir_acl()
+{
+ local tundev=$1; shift
+ local direction=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+ local match_dip=$1; shift
+ local what=$1; shift
+
+ mirror_install $swp1 $direction $tundev \
+ "protocol ip flower $tcflags dst_ip $match_dip"
+ fail_test_span_gre_dir $tundev $direction
+ test_span_gre_dir_acl "$tundev" "$direction" \
+ "$forward_type" "$backward_type"
+ mirror_uninstall $swp1 $direction
+
+ # Test lack of mirroring after ACL mirror is uninstalled.
+ fail_test_span_gre_dir_acl "$tundev" "$direction"
+
+ log_test "$direction $what ($tcflags)"
+}
+
+test_gretap()
+{
+ full_test_span_gre_dir_acl gt4 ingress 8 0 192.0.2.4 "ACL mirror to gretap"
+ full_test_span_gre_dir_acl gt4 egress 0 8 192.0.2.3 "ACL mirror to gretap"
+}
+
+test_ip6gretap()
+{
+ full_test_span_gre_dir_acl gt6 ingress 8 0 192.0.2.4 "ACL mirror to ip6gretap"
+ full_test_span_gre_dir_acl gt6 egress 0 8 192.0.2.3 "ACL mirror to ip6gretap"
+}
+
+test_all()
+{
+ RET=0
+
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
new file mode 100644
index 0000000..619b469
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
@@ -0,0 +1,130 @@
+# SPDX-License-Identifier: GPL-2.0
+
+source mirror_lib.sh
+
+quick_test_span_gre_dir_ips()
+{
+ local tundev=$1; shift
+
+ do_test_span_dir_ips 10 h3-$tundev "$@"
+}
+
+fail_test_span_gre_dir_ips()
+{
+ local tundev=$1; shift
+
+ do_test_span_dir_ips 0 h3-$tundev "$@"
+}
+
+test_span_gre_dir_ips()
+{
+ local tundev=$1; shift
+
+ test_span_dir_ips h3-$tundev "$@"
+}
+
+full_test_span_gre_dir_ips()
+{
+ local tundev=$1; shift
+ local direction=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+ local what=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 $direction $tundev "matchall $tcflags"
+ test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \
+ "$backward_type" "$ip1" "$ip2"
+ mirror_uninstall $swp1 $direction
+
+ log_test "$direction $what ($tcflags)"
+}
+
+full_test_span_gre_dir_vlan_ips()
+{
+ local tundev=$1; shift
+ local direction=$1; shift
+ local vlan_match=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+ local what=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 $direction $tundev "matchall $tcflags"
+
+ test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \
+ "$backward_type" "$ip1" "$ip2"
+
+ tc filter add dev $h3 ingress pref 77 prot 802.1q \
+ flower $vlan_match ip_proto 0x2f \
+ action pass
+ mirror_test v$h1 $ip1 $ip2 $h3 77 10
+ tc filter del dev $h3 ingress pref 77
+
+ mirror_uninstall $swp1 $direction
+
+ log_test "$direction $what ($tcflags)"
+}
+
+quick_test_span_gre_dir()
+{
+ quick_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+fail_test_span_gre_dir()
+{
+ fail_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+test_span_gre_dir()
+{
+ test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+full_test_span_gre_dir()
+{
+ full_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+full_test_span_gre_dir_vlan()
+{
+ full_test_span_gre_dir_vlan_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+full_test_span_gre_stp_ips()
+{
+ local tundev=$1; shift
+ local nbpdev=$1; shift
+ local what=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+ local h3mac=$(mac_get $h3)
+
+ RET=0
+
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2
+
+ bridge link set dev $nbpdev state disabled
+ sleep 1
+ fail_test_span_gre_dir_ips $tundev ingress $ip1 $ip2
+
+ bridge link set dev $nbpdev state forwarding
+ sleep 1
+ quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2
+
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: STP state ($tcflags)"
+}
+
+full_test_span_gre_stp()
+{
+ full_test_span_gre_stp_ips "$@" 192.0.2.1 192.0.2.2
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh
new file mode 100755
index 0000000..fc0508e
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh
@@ -0,0 +1,115 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for mirroring to gretap and ip6gretap, such that the neighbor entry for
+# the tunnel remote address has invalid address at the time that the mirroring
+# is set up. Later on, the neighbor is deleted and it is expected to be
+# reinitialized using the usual ARP process, and the mirroring offload updated.
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ ip address add dev $swp3 192.0.2.129/28
+ ip address add dev $h3 192.0.2.130/28
+
+ ip address add dev $swp3 2001:db8:2::1/64
+ ip address add dev $h3 2001:db8:2::2/64
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip address del dev $h3 2001:db8:2::2/64
+ ip address del dev $swp3 2001:db8:2::1/64
+
+ ip address del dev $h3 192.0.2.130/28
+ ip address del dev $swp3 192.0.2.129/28
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_span_gre_neigh()
+{
+ local addr=$1; shift
+ local tundev=$1; shift
+ local direction=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ip neigh replace dev $swp3 $addr lladdr 00:11:22:33:44:55
+ mirror_install $swp1 $direction $tundev "matchall $tcflags"
+ fail_test_span_gre_dir $tundev ingress
+ ip neigh del dev $swp3 $addr
+ quick_test_span_gre_dir $tundev ingress
+ mirror_uninstall $swp1 $direction
+
+ log_test "$direction $what: neighbor change ($tcflags)"
+}
+
+test_gretap()
+{
+ test_span_gre_neigh 192.0.2.130 gt4 ingress "mirror to gretap"
+ test_span_gre_neigh 192.0.2.130 gt4 egress "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+ test_span_gre_neigh 2001:db8:2::2 gt6 ingress "mirror to ip6gretap"
+ test_span_gre_neigh 2001:db8:2::2 gt6 egress "mirror to ip6gretap"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh
new file mode 100755
index 0000000..8fa681e
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh
@@ -0,0 +1,127 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test that gretap and ip6gretap mirroring works when the other tunnel endpoint
+# is reachable through a next-hop route (as opposed to directly-attached route).
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+ sysctl_set net.ipv4.conf.$h3.rp_filter 0
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ ip address add dev $swp3 192.0.2.161/28
+ ip address add dev $h3 192.0.2.162/28
+ ip address add dev gt4 192.0.2.129/32
+ ip address add dev h3-gt4 192.0.2.130/32
+
+ # IPv6 route can't be added after address. Such routes are rejected due
+ # to the gateway address having been configured on the local system. It
+ # works the other way around though.
+ ip address add dev $swp3 2001:db8:4::1/64
+ ip -6 route add 2001:db8:2::2/128 via 2001:db8:4::2
+ ip address add dev $h3 2001:db8:4::2/64
+ ip address add dev gt6 2001:db8:2::1
+ ip address add dev h3-gt6 2001:db8:2::2
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip -6 route del 2001:db8:2::2/128 via 2001:db8:4::2
+ ip address del dev $h3 2001:db8:4::2/64
+ ip address del dev $swp3 2001:db8:4::1/64
+
+ ip address del dev $h3 192.0.2.162/28
+ ip address del dev $swp3 192.0.2.161/28
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+
+ sysctl_restore net.ipv4.conf.$h3.rp_filter
+ sysctl_restore net.ipv4.conf.all.rp_filter
+}
+
+test_gretap()
+{
+ RET=0
+ mirror_install $swp1 ingress gt4 "matchall $tcflags"
+
+ # For IPv4, test that there's no mirroring without the route directing
+ # the traffic to tunnel remote address. Then add it and test that
+ # mirroring starts. For IPv6 we can't test this due to the limitation
+ # that routes for locally-specified IPv6 addresses can't be added.
+ fail_test_span_gre_dir gt4 ingress
+
+ ip route add 192.0.2.130/32 via 192.0.2.162
+ quick_test_span_gre_dir gt4 ingress
+ ip route del 192.0.2.130/32 via 192.0.2.162
+
+ mirror_uninstall $swp1 ingress
+ log_test "mirror to gre with next-hop remote ($tcflags)"
+}
+
+test_ip6gretap()
+{
+ RET=0
+
+ mirror_install $swp1 ingress gt6 "matchall $tcflags"
+ quick_test_span_gre_dir gt6 ingress
+ mirror_uninstall $swp1 ingress
+
+ log_test "mirror to ip6gre with next-hop remote ($tcflags)"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
new file mode 100644
index 0000000..2534195
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
@@ -0,0 +1,94 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# This is the standard topology for testing mirroring to gretap and ip6gretap
+# netdevices. The tests that use it tweak it in one way or another--importantly,
+# $swp3 and $h3 need to have addresses set up.
+#
+# +---------------------+ +---------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# +-----|---------------+ +---------------|-----+
+# | |
+# +-----|-------------------------------------------------------------|-----+
+# | SW o--> mirror | |
+# | +---|-------------------------------------------------------------|---+ |
+# | | + $swp1 BR $swp2 + | |
+# | +---------------------------------------------------------------------+ |
+# | |
+# | + $swp3 + gt6 (ip6gretap) + gt4 (gretap) |
+# | | : loc=2001:db8:2::1 : loc=192.0.2.129 |
+# | | : rem=2001:db8:2::2 : rem=192.0.2.130 |
+# | | : ttl=100 : ttl=100 |
+# | | : tos=inherit : tos=inherit |
+# | | : : |
+# +-----|---------------------:----------------------:----------------------+
+# | : :
+# +-----|---------------------:----------------------:----------------------+
+# | H3 + $h3 + h3-gt6 (ip6gretap) + h3-gt4 (gretap) |
+# | loc=2001:db8:2::2 loc=192.0.2.130 |
+# | rem=2001:db8:2::1 rem=192.0.2.129 |
+# | ttl=100 ttl=100 |
+# | tos=inherit tos=inherit |
+# | |
+# +-------------------------------------------------------------------------+
+
+source mirror_topo_lib.sh
+
+mirror_gre_topo_h3_create()
+{
+ mirror_topo_h3_create
+
+ tunnel_create h3-gt4 gretap 192.0.2.130 192.0.2.129
+ ip link set h3-gt4 vrf v$h3
+ matchall_sink_create h3-gt4
+
+ tunnel_create h3-gt6 ip6gretap 2001:db8:2::2 2001:db8:2::1
+ ip link set h3-gt6 vrf v$h3
+ matchall_sink_create h3-gt6
+}
+
+mirror_gre_topo_h3_destroy()
+{
+ tunnel_destroy h3-gt6
+ tunnel_destroy h3-gt4
+
+ mirror_topo_h3_destroy
+}
+
+mirror_gre_topo_switch_create()
+{
+ mirror_topo_switch_create
+
+ tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \
+ ttl 100 tos inherit
+
+ tunnel_create gt6 ip6gretap 2001:db8:2::1 2001:db8:2::2 \
+ ttl 100 tos inherit allow-localremote
+}
+
+mirror_gre_topo_switch_destroy()
+{
+ tunnel_destroy gt6
+ tunnel_destroy gt4
+
+ mirror_topo_switch_destroy
+}
+
+mirror_gre_topo_create()
+{
+ mirror_topo_h1_create
+ mirror_topo_h2_create
+ mirror_gre_topo_h3_create
+
+ mirror_gre_topo_switch_create
+}
+
+mirror_gre_topo_destroy()
+{
+ mirror_gre_topo_switch_destroy
+
+ mirror_gre_topo_h3_destroy
+ mirror_topo_h2_destroy
+ mirror_topo_h1_destroy
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh
new file mode 100755
index 0000000..88cecdb
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" that mirrors to a gretap netdevice
+# whose underlay route points at a vlan device.
+
+ALL_TESTS="
+ test_gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ ip link add name $swp3.555 link $swp3 type vlan id 555
+ ip address add dev $swp3.555 192.0.2.129/32
+ ip address add dev $swp3.555 2001:db8:2::1/128
+ ip link set dev $swp3.555 up
+
+ ip route add 192.0.2.130/32 dev $swp3.555
+ ip -6 route add 2001:db8:2::2/128 dev $swp3.555
+
+ ip link add name $h3.555 link $h3 type vlan id 555
+ ip link set dev $h3.555 master v$h3
+ ip address add dev $h3.555 192.0.2.130/28
+ ip address add dev $h3.555 2001:db8:2::2/64
+ ip link set dev $h3.555 up
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip link del dev $h3.555
+ ip link del dev $swp3.555
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_gretap()
+{
+ full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
+ full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
new file mode 100755
index 0000000..5dbc7a0
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
@@ -0,0 +1,270 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# vlan device on top of a bridge device with vlan filtering (802.1q).
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+ test_gretap_forbidden_cpu
+ test_ip6gretap_forbidden_cpu
+ test_gretap_forbidden_egress
+ test_ip6gretap_forbidden_egress
+ test_gretap_untagged_egress
+ test_ip6gretap_untagged_egress
+ test_gretap_fdb_roaming
+ test_ip6gretap_fdb_roaming
+ test_gretap_stp
+ test_ip6gretap_stp
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ vlan_create br1 555 "" 192.0.2.129/32 2001:db8:2::1/128
+ bridge vlan add dev br1 vid 555 self
+ ip route rep 192.0.2.130/32 dev br1.555
+ ip -6 route rep 2001:db8:2::2/128 dev br1.555
+
+ vlan_create $h3 555 v$h3 192.0.2.130/28 2001:db8:2::2/64
+
+ ip link set dev $swp3 master br1
+ bridge vlan add dev $swp3 vid 555
+ bridge vlan add dev $swp2 vid 555
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp3 nomaster
+ vlan_destroy $h3 555
+ vlan_destroy br1 555
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_vlan_match()
+{
+ local tundev=$1; shift
+ local vlan_match=$1; shift
+ local what=$1; shift
+
+ full_test_span_gre_dir_vlan $tundev ingress "$vlan_match" 8 0 "$what"
+ full_test_span_gre_dir_vlan $tundev egress "$vlan_match" 0 8 "$what"
+}
+
+test_gretap()
+{
+ test_vlan_match gt4 'vlan_id 555 vlan_ethtype ip' "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+ test_vlan_match gt6 'vlan_id 555 vlan_ethtype ipv6' "mirror to ip6gretap"
+}
+
+test_span_gre_forbidden_cpu()
+{
+ local tundev=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ # Run the pass-test first, to prime neighbor table.
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ quick_test_span_gre_dir $tundev ingress
+
+ # Now forbid the VLAN at the bridge and see it fail.
+ bridge vlan del dev br1 vid 555 self
+ sleep 1
+ fail_test_span_gre_dir $tundev ingress
+
+ bridge vlan add dev br1 vid 555 self
+ sleep 1
+ quick_test_span_gre_dir $tundev ingress
+
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: vlan forbidden at a bridge ($tcflags)"
+}
+
+test_gretap_forbidden_cpu()
+{
+ test_span_gre_forbidden_cpu gt4 "mirror to gretap"
+}
+
+test_ip6gretap_forbidden_cpu()
+{
+ test_span_gre_forbidden_cpu gt6 "mirror to ip6gretap"
+}
+
+test_span_gre_forbidden_egress()
+{
+ local tundev=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ quick_test_span_gre_dir $tundev ingress
+
+ bridge vlan del dev $swp3 vid 555
+ sleep 1
+ fail_test_span_gre_dir $tundev ingress
+
+ bridge vlan add dev $swp3 vid 555
+ # Re-prime FDB
+ arping -I br1.555 192.0.2.130 -fqc 1
+ sleep 1
+ quick_test_span_gre_dir $tundev ingress
+
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: vlan forbidden at a bridge egress ($tcflags)"
+}
+
+test_gretap_forbidden_egress()
+{
+ test_span_gre_forbidden_egress gt4 "mirror to gretap"
+}
+
+test_ip6gretap_forbidden_egress()
+{
+ test_span_gre_forbidden_egress gt6 "mirror to ip6gretap"
+}
+
+test_span_gre_untagged_egress()
+{
+ local tundev=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+
+ quick_test_span_gre_dir $tundev ingress
+ quick_test_span_vlan_dir $h3 555 ingress
+
+ bridge vlan add dev $swp3 vid 555 pvid untagged
+ sleep 1
+ quick_test_span_gre_dir $tundev ingress
+ fail_test_span_vlan_dir $h3 555 ingress
+
+ bridge vlan add dev $swp3 vid 555
+ sleep 1
+ quick_test_span_gre_dir $tundev ingress
+ quick_test_span_vlan_dir $h3 555 ingress
+
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: vlan untagged at a bridge egress ($tcflags)"
+}
+
+test_gretap_untagged_egress()
+{
+ test_span_gre_untagged_egress gt4 "mirror to gretap"
+}
+
+test_ip6gretap_untagged_egress()
+{
+ test_span_gre_untagged_egress gt6 "mirror to ip6gretap"
+}
+
+test_span_gre_fdb_roaming()
+{
+ local tundev=$1; shift
+ local what=$1; shift
+ local h3mac=$(mac_get $h3)
+
+ RET=0
+
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ quick_test_span_gre_dir $tundev ingress
+
+ bridge fdb del dev $swp3 $h3mac vlan 555 master
+ bridge fdb add dev $swp2 $h3mac vlan 555 master
+ sleep 1
+ fail_test_span_gre_dir $tundev ingress
+
+ bridge fdb del dev $swp2 $h3mac vlan 555 master
+ # Re-prime FDB
+ arping -I br1.555 192.0.2.130 -fqc 1
+ sleep 1
+ quick_test_span_gre_dir $tundev ingress
+
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: MAC roaming ($tcflags)"
+}
+
+test_gretap_fdb_roaming()
+{
+ test_span_gre_fdb_roaming gt4 "mirror to gretap"
+}
+
+test_ip6gretap_fdb_roaming()
+{
+ test_span_gre_fdb_roaming gt6 "mirror to ip6gretap"
+}
+
+test_gretap_stp()
+{
+ full_test_span_gre_stp gt4 $swp3 "mirror to gretap"
+}
+
+test_ip6gretap_stp()
+{
+ full_test_span_gre_stp gt6 $swp3 "mirror to ip6gretap"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh
new file mode 100644
index 0000000..d36dc26
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh
@@ -0,0 +1,132 @@
+# SPDX-License-Identifier: GPL-2.0
+
+mirror_install()
+{
+ local from_dev=$1; shift
+ local direction=$1; shift
+ local to_dev=$1; shift
+ local filter=$1; shift
+
+ tc filter add dev $from_dev $direction \
+ pref 1000 $filter \
+ action mirred egress mirror dev $to_dev
+}
+
+mirror_uninstall()
+{
+ local from_dev=$1; shift
+ local direction=$1; shift
+
+ tc filter del dev $swp1 $direction pref 1000
+}
+
+mirror_test()
+{
+ local vrf_name=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local dev=$1; shift
+ local pref=$1; shift
+ local expect=$1; shift
+
+ local t0=$(tc_rule_stats_get $dev $pref)
+ ip vrf exec $vrf_name \
+ ${PING} ${sip:+-I $sip} $dip -c 10 -i 0.1 -w 2 &> /dev/null
+ local t1=$(tc_rule_stats_get $dev $pref)
+ local delta=$((t1 - t0))
+ # Tolerate a couple stray extra packets.
+ ((expect <= delta && delta <= expect + 2))
+ check_err $? "Expected to capture $expect packets, got $delta."
+}
+
+do_test_span_dir_ips()
+{
+ local expect=$1; shift
+ local dev=$1; shift
+ local direction=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+
+ icmp_capture_install $dev
+ mirror_test v$h1 $ip1 $ip2 $dev 100 $expect
+ mirror_test v$h2 $ip2 $ip1 $dev 100 $expect
+ icmp_capture_uninstall $dev
+}
+
+quick_test_span_dir_ips()
+{
+ do_test_span_dir_ips 10 "$@"
+}
+
+fail_test_span_dir_ips()
+{
+ do_test_span_dir_ips 0 "$@"
+}
+
+test_span_dir_ips()
+{
+ local dev=$1; shift
+ local direction=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+
+ quick_test_span_dir_ips "$dev" "$direction" "$ip1" "$ip2"
+
+ icmp_capture_install $dev "type $forward_type"
+ mirror_test v$h1 $ip1 $ip2 $dev 100 10
+ icmp_capture_uninstall $dev
+
+ icmp_capture_install $dev "type $backward_type"
+ mirror_test v$h2 $ip2 $ip1 $dev 100 10
+ icmp_capture_uninstall $dev
+}
+
+fail_test_span_dir()
+{
+ fail_test_span_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+test_span_dir()
+{
+ test_span_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+do_test_span_vlan_dir_ips()
+{
+ local expect=$1; shift
+ local dev=$1; shift
+ local vid=$1; shift
+ local direction=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+
+ # Install the capture as skip_hw to avoid double-counting of packets.
+ # The traffic is meant for local box anyway, so will be trapped to
+ # kernel.
+ vlan_capture_install $dev "skip_hw vlan_id $vid"
+ mirror_test v$h1 $ip1 $ip2 $dev 100 $expect
+ mirror_test v$h2 $ip2 $ip1 $dev 100 $expect
+ vlan_capture_uninstall $dev
+}
+
+quick_test_span_vlan_dir_ips()
+{
+ do_test_span_vlan_dir_ips 10 "$@"
+}
+
+fail_test_span_vlan_dir_ips()
+{
+ do_test_span_vlan_dir_ips 0 "$@"
+}
+
+quick_test_span_vlan_dir()
+{
+ quick_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+fail_test_span_vlan_dir()
+{
+ fail_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh
new file mode 100644
index 0000000..04979e5
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# This is the standard topology for testing mirroring. The tests that use it
+# tweak it in one way or another--typically add more devices to the topology.
+#
+# +---------------------+ +---------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# +-----|---------------+ +---------------|-----+
+# | |
+# +-----|-------------------------------------------------------------|-----+
+# | SW o--> mirror | |
+# | +---|-------------------------------------------------------------|---+ |
+# | | + $swp1 BR $swp2 + | |
+# | +---------------------------------------------------------------------+ |
+# | |
+# | + $swp3 |
+# +-----|-------------------------------------------------------------------+
+# |
+# +-----|-------------------------------------------------------------------+
+# | H3 + $h3 |
+# | |
+# +-------------------------------------------------------------------------+
+
+mirror_topo_h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28
+}
+
+mirror_topo_h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+mirror_topo_h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28
+}
+
+mirror_topo_h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/28
+}
+
+mirror_topo_h3_create()
+{
+ simple_if_init $h3
+ tc qdisc add dev $h3 clsact
+}
+
+mirror_topo_h3_destroy()
+{
+ tc qdisc del dev $h3 clsact
+ simple_if_fini $h3
+}
+
+mirror_topo_switch_create()
+{
+ ip link set dev $swp3 up
+
+ ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 up
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ tc qdisc add dev $swp1 clsact
+}
+
+mirror_topo_switch_destroy()
+{
+ tc qdisc del dev $swp1 clsact
+
+ ip link set dev $swp1 down
+ ip link set dev $swp2 down
+ ip link del dev br1
+
+ ip link set dev $swp3 down
+}
+
+mirror_topo_create()
+{
+ mirror_topo_h1_create
+ mirror_topo_h2_create
+ mirror_topo_h3_create
+
+ mirror_topo_switch_create
+}
+
+mirror_topo_destroy()
+{
+ mirror_topo_switch_destroy
+
+ mirror_topo_h3_destroy
+ mirror_topo_h2_destroy
+ mirror_topo_h1_destroy
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_vlan.sh
new file mode 100755
index 0000000..9ab2ce7
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_vlan.sh
@@ -0,0 +1,131 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing mirroring. See mirror_topo_lib.sh
+# for more details.
+#
+# Test for "tc action mirred egress mirror" that mirrors to a vlan device.
+
+ALL_TESTS="
+ test_vlan
+ test_tagged_vlan
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_topo_create
+
+ vlan_create $swp3 555
+
+ vlan_create $h3 555 v$h3
+ matchall_sink_create $h3.555
+
+ vlan_create $h1 111 v$h1 192.0.2.17/28
+ bridge vlan add dev $swp1 vid 111
+
+ vlan_create $h2 111 v$h2 192.0.2.18/28
+ bridge vlan add dev $swp2 vid 111
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ vlan_destroy $h2 111
+ vlan_destroy $h1 111
+ vlan_destroy $h3 555
+ vlan_destroy $swp3 555
+
+ mirror_topo_destroy
+ vrf_cleanup
+}
+
+test_vlan_dir()
+{
+ local direction=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 $direction $swp3.555 "matchall $tcflags"
+ test_span_dir "$h3.555" "$direction" "$forward_type" "$backward_type"
+ mirror_uninstall $swp1 $direction
+
+ log_test "$direction mirror to vlan ($tcflags)"
+}
+
+test_vlan()
+{
+ test_vlan_dir ingress 8 0
+ test_vlan_dir egress 0 8
+}
+
+test_tagged_vlan_dir()
+{
+ local direction=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 $direction $swp3.555 "matchall $tcflags"
+ do_test_span_vlan_dir_ips 10 "$h3.555" 111 "$direction" \
+ 192.0.2.17 192.0.2.18
+ do_test_span_vlan_dir_ips 0 "$h3.555" 555 "$direction" \
+ 192.0.2.17 192.0.2.18
+ mirror_uninstall $swp1 $direction
+
+ log_test "$direction mirror tagged to vlan ($tcflags)"
+}
+
+test_tagged_vlan()
+{
+ test_tagged_vlan_dir ingress 8 0
+ test_tagged_vlan_dir egress 0 8
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+ trap_install $h3 ingress
+
+ tests_run
+
+ trap_uninstall $h3 ingress
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh
index cc6a14a..a75cb51 100755
--- a/tools/testing/selftests/net/forwarding/router.sh
+++ b/tools/testing/selftests/net/forwarding/router.sh
@@ -1,6 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+ALL_TESTS="ping_ipv4 ping_ipv6"
NUM_NETIFS=4
source lib.sh
@@ -114,12 +115,21 @@ cleanup()
vrf_cleanup
}
+ping_ipv4()
+{
+ ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
trap cleanup EXIT
setup_prepare
setup_wait
-ping_test $h1 198.51.100.2
-ping6_test $h1 2001:db8:2::2
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
index 3bc3510..8b6d0fb 100755
--- a/tools/testing/selftests/net/forwarding/router_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/router_multipath.sh
@@ -1,6 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+ALL_TESTS="ping_ipv4 ping_ipv6 multipath_test"
NUM_NETIFS=8
source lib.sh
@@ -191,7 +192,7 @@ multipath_eval()
diff=$(echo $weights_ratio - $packets_ratio | bc -l)
diff=${diff#-}
- test "$(echo "$diff / $weights_ratio > 0.1" | bc -l)" -eq 0
+ test "$(echo "$diff / $weights_ratio > 0.15" | bc -l)" -eq 0
check_err $? "Too large discrepancy between expected and measured ratios"
log_test "$desc"
log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
@@ -204,13 +205,11 @@ multipath4_test()
local weight_rp13=$3
local t0_rp12 t0_rp13 t1_rp12 t1_rp13
local packets_rp12 packets_rp13
- local hash_policy
# Transmit multiple flows from h1 to h2 and make sure they are
# distributed between both multipath links (rp12 and rp13)
# according to the configured weights.
- hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy)
- sysctl -q -w net.ipv4.fib_multipath_hash_policy=1
+ sysctl_set net.ipv4.fib_multipath_hash_policy 1
ip route replace 198.51.100.0/24 vrf vrf-r1 \
nexthop via 169.254.2.22 dev $rp12 weight $weight_rp12 \
nexthop via 169.254.3.23 dev $rp13 weight $weight_rp13
@@ -232,7 +231,7 @@ multipath4_test()
ip route replace 198.51.100.0/24 vrf vrf-r1 \
nexthop via 169.254.2.22 dev $rp12 \
nexthop via 169.254.3.23 dev $rp13
- sysctl -q -w net.ipv4.fib_multipath_hash_policy=$hash_policy
+ sysctl_restore net.ipv4.fib_multipath_hash_policy
}
multipath6_l4_test()
@@ -242,13 +241,11 @@ multipath6_l4_test()
local weight_rp13=$3
local t0_rp12 t0_rp13 t1_rp12 t1_rp13
local packets_rp12 packets_rp13
- local hash_policy
# Transmit multiple flows from h1 to h2 and make sure they are
# distributed between both multipath links (rp12 and rp13)
# according to the configured weights.
- hash_policy=$(sysctl -n net.ipv6.fib_multipath_hash_policy)
- sysctl -q -w net.ipv6.fib_multipath_hash_policy=1
+ sysctl_set net.ipv6.fib_multipath_hash_policy 1
ip route replace 2001:db8:2::/64 vrf vrf-r1 \
nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
@@ -271,7 +268,7 @@ multipath6_l4_test()
nexthop via fe80:2::22 dev $rp12 \
nexthop via fe80:3::23 dev $rp13
- sysctl -q -w net.ipv6.fib_multipath_hash_policy=$hash_policy
+ sysctl_restore net.ipv6.fib_multipath_hash_policy
}
multipath6_test()
@@ -364,13 +361,21 @@ cleanup()
vrf_cleanup
}
+ping_ipv4()
+{
+ ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
trap cleanup EXIT
setup_prepare
setup_wait
-ping_test $h1 198.51.100.2
-ping6_test $h1 2001:db8:2::2
-multipath_test
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index 3a6385e..813d02d 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -1,6 +1,8 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \
+ mirred_egress_mirror_test gact_trap_test"
NUM_NETIFS=4
source tc_common.sh
source lib.sh
@@ -111,6 +113,10 @@ gact_trap_test()
{
RET=0
+ if [[ "$tcflags" != "skip_sw" ]]; then
+ return 0;
+ fi
+
tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
skip_hw dst_ip 192.0.2.2 action drop
tc filter add dev $swp1 ingress protocol ip pref 3 handle 103 flower \
@@ -179,24 +185,29 @@ cleanup()
ip link set $swp1 address $swp1origmac
}
+mirred_egress_redirect_test()
+{
+ mirred_egress_test "redirect"
+}
+
+mirred_egress_mirror_test()
+{
+ mirred_egress_test "mirror"
+}
+
trap cleanup EXIT
setup_prepare
setup_wait
-gact_drop_and_ok_test
-mirred_egress_test "redirect"
-mirred_egress_test "mirror"
+tests_run
tc_offload_check
if [[ $? -ne 0 ]]; then
log_info "Could not test offloaded functionality"
else
tcflags="skip_sw"
- gact_drop_and_ok_test
- mirred_egress_test "redirect"
- mirred_egress_test "mirror"
- gact_trap_test
+ tests_run
fi
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_chains.sh b/tools/testing/selftests/net/forwarding/tc_chains.sh
index 2fd1522..d2c783e 100755
--- a/tools/testing/selftests/net/forwarding/tc_chains.sh
+++ b/tools/testing/selftests/net/forwarding/tc_chains.sh
@@ -1,6 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+ALL_TESTS="unreachable_chain_test gact_goto_chain_test"
NUM_NETIFS=2
source tc_common.sh
source lib.sh
@@ -107,16 +108,14 @@ trap cleanup EXIT
setup_prepare
setup_wait
-unreachable_chain_test
-gact_goto_chain_test
+tests_run
tc_offload_check
if [[ $? -ne 0 ]]; then
log_info "Could not test offloaded functionality"
else
tcflags="skip_sw"
- unreachable_chain_test
- gact_goto_chain_test
+ tests_run
fi
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
index 032b882..20d1077 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -1,6 +1,8 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \
+ match_src_ip_test match_ip_flags_test"
NUM_NETIFS=2
source tc_common.sh
source lib.sh
@@ -149,6 +151,74 @@ match_src_ip_test()
log_test "src_ip match ($tcflags)"
}
+match_ip_flags_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags ip_flags frag action continue
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags ip_flags firstfrag action continue
+ tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+ $tcflags ip_flags nofirstfrag action continue
+ tc filter add dev $h2 ingress protocol ip pref 4 handle 104 flower \
+ $tcflags ip_flags nofrag action drop
+
+ $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip "frag=0" -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on wrong frag filter (nofrag)"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_fail $? "Matched on wrong firstfrag filter (nofrag)"
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Did not match on nofirstfrag filter (nofrag) "
+
+ tc_check_packets "dev $h2 ingress" 104 1
+ check_err $? "Did not match on nofrag filter (nofrag)"
+
+ $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip "frag=0,mf" -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match on frag filter (1stfrag)"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match fistfrag filter (1stfrag)"
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Matched on wrong nofirstfrag filter (1stfrag)"
+
+ tc_check_packets "dev $h2 ingress" 104 1
+ check_err $? "Match on wrong nofrag filter (1stfrag)"
+
+ $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip "frag=256,mf" -q
+ $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip "frag=256" -q
+
+ tc_check_packets "dev $h2 ingress" 101 3
+ check_err $? "Did not match on frag filter (no1stfrag)"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Matched on wrong firstfrag filter (no1stfrag)"
+
+ tc_check_packets "dev $h2 ingress" 103 3
+ check_err $? "Did not match on nofirstfrag filter (no1stfrag)"
+
+ tc_check_packets "dev $h2 ingress" 104 1
+ check_err $? "Matched on nofrag filter (no1stfrag)"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+ tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+ tc filter del dev $h2 ingress protocol ip pref 4 handle 104 flower
+
+ log_test "ip_flags match ($tcflags)"
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
@@ -177,20 +247,14 @@ trap cleanup EXIT
setup_prepare
setup_wait
-match_dst_mac_test
-match_src_mac_test
-match_dst_ip_test
-match_src_ip_test
+tests_run
tc_offload_check
if [[ $? -ne 0 ]]; then
log_info "Could not test offloaded functionality"
else
tcflags="skip_sw"
- match_dst_mac_test
- match_src_mac_test
- match_dst_ip_test
- match_src_ip_test
+ tests_run
fi
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_shblocks.sh b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
index 077b980..b5b9172 100755
--- a/tools/testing/selftests/net/forwarding/tc_shblocks.sh
+++ b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
@@ -1,6 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+ALL_TESTS="shared_block_test"
NUM_NETIFS=4
source tc_common.sh
source lib.sh
@@ -109,14 +110,14 @@ trap cleanup EXIT
setup_prepare
setup_wait
-shared_block_test
+tests_run
tc_offload_check
if [[ $? -ne 0 ]]; then
log_info "Could not test offloaded functionality"
else
tcflags="skip_sw"
- shared_block_test
+ tests_run
fi
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh
index d571d21..c43c6de 100755
--- a/tools/testing/selftests/net/msg_zerocopy.sh
+++ b/tools/testing/selftests/net/msg_zerocopy.sh
@@ -21,6 +21,14 @@ readonly DADDR6='fd::2'
readonly path_sysctl_mem="net.core.optmem_max"
+# No arguments: automated test
+if [[ "$#" -eq "0" ]]; then
+ $0 4 tcp -t 1
+ $0 6 tcp -t 1
+ echo "OK. All tests passed"
+ exit 0
+fi
+
# Argument parsing
if [[ "$#" -lt "2" ]]; then
echo "Usage: $0 [4|6] [tcp|udp|raw|raw_hdrincl|packet|packet_dgram] <args>"
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 7514f93..f8cc38a 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -371,7 +371,7 @@ test_pmtu_vti6_link_add_mtu() {
fail=0
- min=1280
+ min=68 # vti6 can carry IPv4 packets too
max=$((65535 - 40))
# Check invalid values first
for v in $((min - 1)) $((max + 1)); do
@@ -387,7 +387,7 @@ test_pmtu_vti6_link_add_mtu() {
done
# Now check valid values
- for v in 1280 1300 $((65535 - 40)); do
+ for v in 68 1280 1300 $((65535 - 40)); do
${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
mtu="$(link_get_mtu "${ns_a}" vti6_a)"
${ns_a} ip link del vti6_a
diff --git a/tools/testing/selftests/net/psock_snd.c b/tools/testing/selftests/net/psock_snd.c
new file mode 100644
index 0000000..7d15e10
--- /dev/null
+++ b/tools/testing/selftests/net/psock_snd.c
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/virtio_net.h>
+#include <net/if.h>
+#include <net/ethernet.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "psock_lib.h"
+
+static bool cfg_use_bind;
+static bool cfg_use_csum_off;
+static bool cfg_use_csum_off_bad;
+static bool cfg_use_dgram;
+static bool cfg_use_gso;
+static bool cfg_use_qdisc_bypass;
+static bool cfg_use_vlan;
+static bool cfg_use_vnet;
+
+static char *cfg_ifname = "lo";
+static int cfg_mtu = 1500;
+static int cfg_payload_len = DATA_LEN;
+static int cfg_truncate_len = INT_MAX;
+static uint16_t cfg_port = 8000;
+
+/* test sending up to max mtu + 1 */
+#define TEST_SZ (sizeof(struct virtio_net_hdr) + ETH_HLEN + ETH_MAX_MTU + 1)
+
+static char tbuf[TEST_SZ], rbuf[TEST_SZ];
+
+static unsigned long add_csum_hword(const uint16_t *start, int num_u16)
+{
+ unsigned long sum = 0;
+ int i;
+
+ for (i = 0; i < num_u16; i++)
+ sum += start[i];
+
+ return sum;
+}
+
+static uint16_t build_ip_csum(const uint16_t *start, int num_u16,
+ unsigned long sum)
+{
+ sum += add_csum_hword(start, num_u16);
+
+ while (sum >> 16)
+ sum = (sum & 0xffff) + (sum >> 16);
+
+ return ~sum;
+}
+
+static int build_vnet_header(void *header)
+{
+ struct virtio_net_hdr *vh = header;
+
+ vh->hdr_len = ETH_HLEN + sizeof(struct iphdr) + sizeof(struct udphdr);
+
+ if (cfg_use_csum_off) {
+ vh->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ vh->csum_start = ETH_HLEN + sizeof(struct iphdr);
+ vh->csum_offset = __builtin_offsetof(struct udphdr, check);
+
+ /* position check field exactly one byte beyond end of packet */
+ if (cfg_use_csum_off_bad)
+ vh->csum_start += sizeof(struct udphdr) + cfg_payload_len -
+ vh->csum_offset - 1;
+ }
+
+ if (cfg_use_gso) {
+ vh->gso_type = VIRTIO_NET_HDR_GSO_UDP;
+ vh->gso_size = cfg_mtu - sizeof(struct iphdr);
+ }
+
+ return sizeof(*vh);
+}
+
+static int build_eth_header(void *header)
+{
+ struct ethhdr *eth = header;
+
+ if (cfg_use_vlan) {
+ uint16_t *tag = header + ETH_HLEN;
+
+ eth->h_proto = htons(ETH_P_8021Q);
+ tag[1] = htons(ETH_P_IP);
+ return ETH_HLEN + 4;
+ }
+
+ eth->h_proto = htons(ETH_P_IP);
+ return ETH_HLEN;
+}
+
+static int build_ipv4_header(void *header, int payload_len)
+{
+ struct iphdr *iph = header;
+
+ iph->ihl = 5;
+ iph->version = 4;
+ iph->ttl = 8;
+ iph->tot_len = htons(sizeof(*iph) + sizeof(struct udphdr) + payload_len);
+ iph->id = htons(1337);
+ iph->protocol = IPPROTO_UDP;
+ iph->saddr = htonl((172 << 24) | (17 << 16) | 2);
+ iph->daddr = htonl((172 << 24) | (17 << 16) | 1);
+ iph->check = build_ip_csum((void *) iph, iph->ihl << 1, 0);
+
+ return iph->ihl << 2;
+}
+
+static int build_udp_header(void *header, int payload_len)
+{
+ const int alen = sizeof(uint32_t);
+ struct udphdr *udph = header;
+ int len = sizeof(*udph) + payload_len;
+
+ udph->source = htons(9);
+ udph->dest = htons(cfg_port);
+ udph->len = htons(len);
+
+ if (cfg_use_csum_off)
+ udph->check = build_ip_csum(header - (2 * alen), alen,
+ htons(IPPROTO_UDP) + udph->len);
+ else
+ udph->check = 0;
+
+ return sizeof(*udph);
+}
+
+static int build_packet(int payload_len)
+{
+ int off = 0;
+
+ off += build_vnet_header(tbuf);
+ off += build_eth_header(tbuf + off);
+ off += build_ipv4_header(tbuf + off, payload_len);
+ off += build_udp_header(tbuf + off, payload_len);
+
+ if (off + payload_len > sizeof(tbuf))
+ error(1, 0, "payload length exceeds max");
+
+ memset(tbuf + off, DATA_CHAR, payload_len);
+
+ return off + payload_len;
+}
+
+static void do_bind(int fd)
+{
+ struct sockaddr_ll laddr = {0};
+
+ laddr.sll_family = AF_PACKET;
+ laddr.sll_protocol = htons(ETH_P_IP);
+ laddr.sll_ifindex = if_nametoindex(cfg_ifname);
+ if (!laddr.sll_ifindex)
+ error(1, errno, "if_nametoindex");
+
+ if (bind(fd, (void *)&laddr, sizeof(laddr)))
+ error(1, errno, "bind");
+}
+
+static void do_send(int fd, char *buf, int len)
+{
+ int ret;
+
+ if (!cfg_use_vnet) {
+ buf += sizeof(struct virtio_net_hdr);
+ len -= sizeof(struct virtio_net_hdr);
+ }
+ if (cfg_use_dgram) {
+ buf += ETH_HLEN;
+ len -= ETH_HLEN;
+ }
+
+ if (cfg_use_bind) {
+ ret = write(fd, buf, len);
+ } else {
+ struct sockaddr_ll laddr = {0};
+
+ laddr.sll_protocol = htons(ETH_P_IP);
+ laddr.sll_ifindex = if_nametoindex(cfg_ifname);
+ if (!laddr.sll_ifindex)
+ error(1, errno, "if_nametoindex");
+
+ ret = sendto(fd, buf, len, 0, (void *)&laddr, sizeof(laddr));
+ }
+
+ if (ret == -1)
+ error(1, errno, "write");
+ if (ret != len)
+ error(1, 0, "write: %u %u", ret, len);
+
+ fprintf(stderr, "tx: %u\n", ret);
+}
+
+static int do_tx(void)
+{
+ const int one = 1;
+ int fd, len;
+
+ fd = socket(PF_PACKET, cfg_use_dgram ? SOCK_DGRAM : SOCK_RAW, 0);
+ if (fd == -1)
+ error(1, errno, "socket t");
+
+ if (cfg_use_bind)
+ do_bind(fd);
+
+ if (cfg_use_qdisc_bypass &&
+ setsockopt(fd, SOL_PACKET, PACKET_QDISC_BYPASS, &one, sizeof(one)))
+ error(1, errno, "setsockopt qdisc bypass");
+
+ if (cfg_use_vnet &&
+ setsockopt(fd, SOL_PACKET, PACKET_VNET_HDR, &one, sizeof(one)))
+ error(1, errno, "setsockopt vnet");
+
+ len = build_packet(cfg_payload_len);
+
+ if (cfg_truncate_len < len)
+ len = cfg_truncate_len;
+
+ do_send(fd, tbuf, len);
+
+ if (close(fd))
+ error(1, errno, "close t");
+
+ return len;
+}
+
+static int setup_rx(void)
+{
+ struct timeval tv = { .tv_usec = 100 * 1000 };
+ struct sockaddr_in raddr = {0};
+ int fd;
+
+ fd = socket(PF_INET, SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket r");
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+ error(1, errno, "setsockopt rcv timeout");
+
+ raddr.sin_family = AF_INET;
+ raddr.sin_port = htons(cfg_port);
+ raddr.sin_addr.s_addr = htonl(INADDR_ANY);
+
+ if (bind(fd, (void *)&raddr, sizeof(raddr)))
+ error(1, errno, "bind r");
+
+ return fd;
+}
+
+static void do_rx(int fd, int expected_len, char *expected)
+{
+ int ret;
+
+ ret = recv(fd, rbuf, sizeof(rbuf), 0);
+ if (ret == -1)
+ error(1, errno, "recv");
+ if (ret != expected_len)
+ error(1, 0, "recv: %u != %u", ret, expected_len);
+
+ if (memcmp(rbuf, expected, ret))
+ error(1, 0, "recv: data mismatch");
+
+ fprintf(stderr, "rx: %u\n", ret);
+}
+
+static int setup_sniffer(void)
+{
+ struct timeval tv = { .tv_usec = 100 * 1000 };
+ int fd;
+
+ fd = socket(PF_PACKET, SOCK_RAW, 0);
+ if (fd == -1)
+ error(1, errno, "socket p");
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+ error(1, errno, "setsockopt rcv timeout");
+
+ pair_udp_setfilter(fd);
+ do_bind(fd);
+
+ return fd;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "bcCdgl:qt:vV")) != -1) {
+ switch (c) {
+ case 'b':
+ cfg_use_bind = true;
+ break;
+ case 'c':
+ cfg_use_csum_off = true;
+ break;
+ case 'C':
+ cfg_use_csum_off_bad = true;
+ break;
+ case 'd':
+ cfg_use_dgram = true;
+ break;
+ case 'g':
+ cfg_use_gso = true;
+ break;
+ case 'l':
+ cfg_payload_len = strtoul(optarg, NULL, 0);
+ break;
+ case 'q':
+ cfg_use_qdisc_bypass = true;
+ break;
+ case 't':
+ cfg_truncate_len = strtoul(optarg, NULL, 0);
+ break;
+ case 'v':
+ cfg_use_vnet = true;
+ break;
+ case 'V':
+ cfg_use_vlan = true;
+ break;
+ default:
+ error(1, 0, "%s: parse error", argv[0]);
+ }
+ }
+
+ if (cfg_use_vlan && cfg_use_dgram)
+ error(1, 0, "option vlan (-V) conflicts with dgram (-d)");
+
+ if (cfg_use_csum_off && !cfg_use_vnet)
+ error(1, 0, "option csum offload (-c) requires vnet (-v)");
+
+ if (cfg_use_csum_off_bad && !cfg_use_csum_off)
+ error(1, 0, "option csum bad (-C) requires csum offload (-c)");
+
+ if (cfg_use_gso && !cfg_use_csum_off)
+ error(1, 0, "option gso (-g) requires csum offload (-c)");
+}
+
+static void run_test(void)
+{
+ int fdr, fds, total_len;
+
+ fdr = setup_rx();
+ fds = setup_sniffer();
+
+ total_len = do_tx();
+
+ /* BPF filter accepts only this length, vlan changes MAC */
+ if (cfg_payload_len == DATA_LEN && !cfg_use_vlan)
+ do_rx(fds, total_len - sizeof(struct virtio_net_hdr),
+ tbuf + sizeof(struct virtio_net_hdr));
+
+ do_rx(fdr, cfg_payload_len, tbuf + total_len - cfg_payload_len);
+
+ if (close(fds))
+ error(1, errno, "close s");
+ if (close(fdr))
+ error(1, errno, "close r");
+}
+
+int main(int argc, char **argv)
+{
+ parse_opts(argc, argv);
+
+ if (system("ip link set dev lo mtu 1500"))
+ error(1, errno, "ip link set mtu");
+ if (system("ip addr add dev lo 172.17.0.1/24"))
+ error(1, errno, "ip addr add");
+
+ run_test();
+
+ fprintf(stderr, "OK\n\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/net/psock_snd.sh b/tools/testing/selftests/net/psock_snd.sh
new file mode 100755
index 0000000..6331d91
--- /dev/null
+++ b/tools/testing/selftests/net/psock_snd.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of packet socket send regression tests
+
+set -e
+
+readonly mtu=1500
+readonly iphlen=20
+readonly udphlen=8
+
+readonly vnet_hlen=10
+readonly eth_hlen=14
+
+readonly mss="$((${mtu} - ${iphlen} - ${udphlen}))"
+readonly mss_exceeds="$((${mss} + 1))"
+
+readonly max_mtu=65535
+readonly max_mss="$((${max_mtu} - ${iphlen} - ${udphlen}))"
+readonly max_mss_exceeds="$((${max_mss} + 1))"
+
+# functional checks (not a full cross-product)
+
+echo "dgram"
+./in_netns.sh ./psock_snd -d
+
+echo "dgram bind"
+./in_netns.sh ./psock_snd -d -b
+
+echo "raw"
+./in_netns.sh ./psock_snd
+
+echo "raw bind"
+./in_netns.sh ./psock_snd -b
+
+echo "raw qdisc bypass"
+./in_netns.sh ./psock_snd -q
+
+echo "raw vlan"
+./in_netns.sh ./psock_snd -V
+
+echo "raw vnet hdr"
+./in_netns.sh ./psock_snd -v
+
+echo "raw csum_off"
+./in_netns.sh ./psock_snd -v -c
+
+echo "raw csum_off with bad offset (fails)"
+(! ./in_netns.sh ./psock_snd -v -c -C)
+
+
+# bounds check: send {max, max + 1, min, min - 1} lengths
+
+echo "raw min size"
+./in_netns.sh ./psock_snd -l 0
+
+echo "raw mtu size"
+./in_netns.sh ./psock_snd -l "${mss}"
+
+echo "raw mtu size + 1 (fails)"
+(! ./in_netns.sh ./psock_snd -l "${mss_exceeds}")
+
+# fails due to ARPHRD_ETHER check in packet_extra_vlan_len_allowed
+#
+# echo "raw vlan mtu size"
+# ./in_netns.sh ./psock_snd -V -l "${mss}"
+
+echo "raw vlan mtu size + 1 (fails)"
+(! ./in_netns.sh ./psock_snd -V -l "${mss_exceeds}")
+
+echo "dgram mtu size"
+./in_netns.sh ./psock_snd -d -l "${mss}"
+
+echo "dgram mtu size + 1 (fails)"
+(! ./in_netns.sh ./psock_snd -d -l "${mss_exceeds}")
+
+echo "raw truncate hlen (fails: does not arrive)"
+(! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen}))")
+
+echo "raw truncate hlen - 1 (fails: EINVAL)"
+(! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen} - 1))")
+
+
+# gso checks: implies -l, because with gso len must exceed gso_size
+
+echo "raw gso min size"
+./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}"
+
+echo "raw gso min size - 1 (fails)"
+(! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}")
+
+echo "raw gso max size"
+./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}"
+
+echo "raw gso max size + 1 (fails)"
+(! ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss_exceeds}")
+
+echo "OK. All tests passed"
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index fb37678..0d7a44f 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -505,6 +505,108 @@ kci_test_macsec()
echo "PASS: macsec"
}
+#-------------------------------------------------------------------
+# Example commands
+# ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \
+# spi 0x07 mode transport reqid 0x07 replay-window 32 \
+# aead 'rfc4106(gcm(aes))' 1234567890123456dcba 128 \
+# sel src 14.0.0.52/24 dst 14.0.0.70/24
+# ip x p add dir out src 14.0.0.52/24 dst 14.0.0.70/24 \
+# tmpl proto esp src 14.0.0.52 dst 14.0.0.70 \
+# spi 0x07 mode transport reqid 0x07
+#
+# Subcommands not tested
+# ip x s update
+# ip x s allocspi
+# ip x s deleteall
+# ip x p update
+# ip x p deleteall
+# ip x p set
+#-------------------------------------------------------------------
+kci_test_ipsec()
+{
+ srcip="14.0.0.52"
+ dstip="14.0.0.70"
+ algo="aead rfc4106(gcm(aes)) 0x3132333435363738393031323334353664636261 128"
+
+ # flush to be sure there's nothing configured
+ ip x s flush ; ip x p flush
+ check_err $?
+
+ # start the monitor in the background
+ tmpfile=`mktemp ipsectestXXX`
+ ip x m > $tmpfile &
+ mpid=$!
+ sleep 0.2
+
+ ipsecid="proto esp src $srcip dst $dstip spi 0x07"
+ ip x s add $ipsecid \
+ mode transport reqid 0x07 replay-window 32 \
+ $algo sel src $srcip/24 dst $dstip/24
+ check_err $?
+
+ lines=`ip x s list | grep $srcip | grep $dstip | wc -l`
+ test $lines -eq 2
+ check_err $?
+
+ ip x s count | grep -q "SAD count 1"
+ check_err $?
+
+ lines=`ip x s get $ipsecid | grep $srcip | grep $dstip | wc -l`
+ test $lines -eq 2
+ check_err $?
+
+ ip x s delete $ipsecid
+ check_err $?
+
+ lines=`ip x s list | wc -l`
+ test $lines -eq 0
+ check_err $?
+
+ ipsecsel="dir out src $srcip/24 dst $dstip/24"
+ ip x p add $ipsecsel \
+ tmpl proto esp src $srcip dst $dstip \
+ spi 0x07 mode transport reqid 0x07
+ check_err $?
+
+ lines=`ip x p list | grep $srcip | grep $dstip | wc -l`
+ test $lines -eq 2
+ check_err $?
+
+ ip x p count | grep -q "SPD IN 0 OUT 1 FWD 0"
+ check_err $?
+
+ lines=`ip x p get $ipsecsel | grep $srcip | grep $dstip | wc -l`
+ test $lines -eq 2
+ check_err $?
+
+ ip x p delete $ipsecsel
+ check_err $?
+
+ lines=`ip x p list | wc -l`
+ test $lines -eq 0
+ check_err $?
+
+ # check the monitor results
+ kill $mpid
+ lines=`wc -l $tmpfile | cut "-d " -f1`
+ test $lines -eq 20
+ check_err $?
+ rm -rf $tmpfile
+
+ # clean up any leftovers
+ ip x s flush
+ check_err $?
+ ip x p flush
+ check_err $?
+
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: ipsec"
+ return 1
+ fi
+ echo "PASS: ipsec"
+}
+
kci_test_gretap()
{
testns="testns"
@@ -758,6 +860,7 @@ kci_test_rtnl()
kci_test_vrf
kci_test_encap
kci_test_macsec
+ kci_test_ipsec
kci_del_dummy
}
diff --git a/tools/testing/selftests/net/tcp_inq.c b/tools/testing/selftests/net/tcp_inq.c
new file mode 100644
index 0000000..d044b29
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_inq.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright 2018 Google Inc.
+ * Author: Soheil Hassas Yeganeh (soheil@google.com)
+ *
+ * Simple example on how to use TCP_INQ and TCP_CM_INQ.
+ *
+ * License (GPLv2):
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ */
+#define _GNU_SOURCE
+
+#include <error.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#ifndef TCP_INQ
+#define TCP_INQ 36
+#endif
+
+#ifndef TCP_CM_INQ
+#define TCP_CM_INQ TCP_INQ
+#endif
+
+#define BUF_SIZE 8192
+#define CMSG_SIZE 32
+
+static int family = AF_INET6;
+static socklen_t addr_len = sizeof(struct sockaddr_in6);
+static int port = 4974;
+
+static void setup_loopback_addr(int family, struct sockaddr_storage *sockaddr)
+{
+ struct sockaddr_in6 *addr6 = (void *) sockaddr;
+ struct sockaddr_in *addr4 = (void *) sockaddr;
+
+ switch (family) {
+ case PF_INET:
+ memset(addr4, 0, sizeof(*addr4));
+ addr4->sin_family = AF_INET;
+ addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ addr4->sin_port = htons(port);
+ break;
+ case PF_INET6:
+ memset(addr6, 0, sizeof(*addr6));
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_addr = in6addr_loopback;
+ addr6->sin6_port = htons(port);
+ break;
+ default:
+ error(1, 0, "illegal family");
+ }
+}
+
+void *start_server(void *arg)
+{
+ int server_fd = (int)(unsigned long)arg;
+ struct sockaddr_in addr;
+ socklen_t addrlen = sizeof(addr);
+ char *buf;
+ int fd;
+ int r;
+
+ buf = malloc(BUF_SIZE);
+
+ for (;;) {
+ fd = accept(server_fd, (struct sockaddr *)&addr, &addrlen);
+ if (fd == -1) {
+ perror("accept");
+ break;
+ }
+ do {
+ r = send(fd, buf, BUF_SIZE, 0);
+ } while (r < 0 && errno == EINTR);
+ if (r < 0)
+ perror("send");
+ if (r != BUF_SIZE)
+ fprintf(stderr, "can only send %d bytes\n", r);
+ /* TCP_INQ can overestimate in-queue by one byte if we send
+ * the FIN packet. Sleep for 1 second, so that the client
+ * likely invoked recvmsg().
+ */
+ sleep(1);
+ close(fd);
+ }
+
+ free(buf);
+ close(server_fd);
+ pthread_exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+ struct sockaddr_storage listen_addr, addr;
+ int c, one = 1, inq = -1;
+ pthread_t server_thread;
+ char cmsgbuf[CMSG_SIZE];
+ struct iovec iov[1];
+ struct cmsghdr *cm;
+ struct msghdr msg;
+ int server_fd, fd;
+ char *buf;
+
+ while ((c = getopt(argc, argv, "46p:")) != -1) {
+ switch (c) {
+ case '4':
+ family = PF_INET;
+ addr_len = sizeof(struct sockaddr_in);
+ break;
+ case '6':
+ family = PF_INET6;
+ addr_len = sizeof(struct sockaddr_in6);
+ break;
+ case 'p':
+ port = atoi(optarg);
+ break;
+ }
+ }
+
+ server_fd = socket(family, SOCK_STREAM, 0);
+ if (server_fd < 0)
+ error(1, errno, "server socket");
+ setup_loopback_addr(family, &listen_addr);
+ if (setsockopt(server_fd, SOL_SOCKET, SO_REUSEADDR,
+ &one, sizeof(one)) != 0)
+ error(1, errno, "setsockopt(SO_REUSEADDR)");
+ if (bind(server_fd, (const struct sockaddr *)&listen_addr,
+ addr_len) == -1)
+ error(1, errno, "bind");
+ if (listen(server_fd, 128) == -1)
+ error(1, errno, "listen");
+ if (pthread_create(&server_thread, NULL, start_server,
+ (void *)(unsigned long)server_fd) != 0)
+ error(1, errno, "pthread_create");
+
+ fd = socket(family, SOCK_STREAM, 0);
+ if (fd < 0)
+ error(1, errno, "client socket");
+ setup_loopback_addr(family, &addr);
+ if (connect(fd, (const struct sockaddr *)&addr, addr_len) == -1)
+ error(1, errno, "connect");
+ if (setsockopt(fd, SOL_TCP, TCP_INQ, &one, sizeof(one)) != 0)
+ error(1, errno, "setsockopt(TCP_INQ)");
+
+ msg.msg_name = NULL;
+ msg.msg_namelen = 0;
+ msg.msg_iov = iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsgbuf;
+ msg.msg_controllen = sizeof(cmsgbuf);
+ msg.msg_flags = 0;
+
+ buf = malloc(BUF_SIZE);
+ iov[0].iov_base = buf;
+ iov[0].iov_len = BUF_SIZE / 2;
+
+ if (recvmsg(fd, &msg, 0) != iov[0].iov_len)
+ error(1, errno, "recvmsg");
+ if (msg.msg_flags & MSG_CTRUNC)
+ error(1, 0, "control message is truncated");
+
+ for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm))
+ if (cm->cmsg_level == SOL_TCP && cm->cmsg_type == TCP_CM_INQ)
+ inq = *((int *) CMSG_DATA(cm));
+
+ if (inq != BUF_SIZE - iov[0].iov_len) {
+ fprintf(stderr, "unexpected inq: %d\n", inq);
+ exit(1);
+ }
+
+ printf("PASSED\n");
+ free(buf);
+ close(fd);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c
new file mode 100644
index 0000000..77f7627
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_mmap.c
@@ -0,0 +1,447 @@
+/*
+ * Copyright 2018 Google Inc.
+ * Author: Eric Dumazet (edumazet@google.com)
+ *
+ * Reference program demonstrating tcp mmap() usage,
+ * and SO_RCVLOWAT hints for receiver.
+ *
+ * Note : NIC with header split is needed to use mmap() on TCP :
+ * Each incoming frame must be a multiple of PAGE_SIZE bytes of TCP payload.
+ *
+ * How to use on loopback interface :
+ *
+ * ifconfig lo mtu 61512 # 15*4096 + 40 (ipv6 header) + 32 (TCP with TS option header)
+ * tcp_mmap -s -z &
+ * tcp_mmap -H ::1 -z
+ *
+ * Or leave default lo mtu, but use -M option to set TCP_MAXSEG option to (4096 + 12)
+ * (4096 : page size on x86, 12: TCP TS option length)
+ * tcp_mmap -s -z -M $((4096+12)) &
+ * tcp_mmap -H ::1 -z -M $((4096+12))
+ *
+ * Note: -z option on sender uses MSG_ZEROCOPY, which forces a copy when packets go through loopback interface.
+ * We might use sendfile() instead, but really this test program is about mmap(), for receivers ;)
+ *
+ * $ ./tcp_mmap -s & # Without mmap()
+ * $ for i in {1..4}; do ./tcp_mmap -H ::1 -z ; done
+ * received 32768 MB (0 % mmap'ed) in 14.1157 s, 19.4732 Gbit
+ * cpu usage user:0.057 sys:7.815, 240.234 usec per MB, 65531 c-switches
+ * received 32768 MB (0 % mmap'ed) in 14.6833 s, 18.7204 Gbit
+ * cpu usage user:0.043 sys:8.103, 248.596 usec per MB, 65524 c-switches
+ * received 32768 MB (0 % mmap'ed) in 11.143 s, 24.6682 Gbit
+ * cpu usage user:0.044 sys:6.576, 202.026 usec per MB, 65519 c-switches
+ * received 32768 MB (0 % mmap'ed) in 14.9056 s, 18.4413 Gbit
+ * cpu usage user:0.036 sys:8.193, 251.129 usec per MB, 65530 c-switches
+ * $ kill %1 # kill tcp_mmap server
+ *
+ * $ ./tcp_mmap -s -z & # With mmap()
+ * $ for i in {1..4}; do ./tcp_mmap -H ::1 -z ; done
+ * received 32768 MB (99.9939 % mmap'ed) in 6.73792 s, 40.7956 Gbit
+ * cpu usage user:0.045 sys:2.827, 87.6465 usec per MB, 65532 c-switches
+ * received 32768 MB (99.9939 % mmap'ed) in 7.26732 s, 37.8238 Gbit
+ * cpu usage user:0.037 sys:3.087, 95.3369 usec per MB, 65532 c-switches
+ * received 32768 MB (99.9939 % mmap'ed) in 7.61661 s, 36.0893 Gbit
+ * cpu usage user:0.046 sys:3.559, 110.016 usec per MB, 65529 c-switches
+ * received 32768 MB (99.9939 % mmap'ed) in 7.43764 s, 36.9577 Gbit
+ * cpu usage user:0.035 sys:3.467, 106.873 usec per MB, 65530 c-switches
+ *
+ * License (GPLv2):
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <error.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/time.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <poll.h>
+#include <linux/tcp.h>
+#include <assert.h>
+
+#ifndef MSG_ZEROCOPY
+#define MSG_ZEROCOPY 0x4000000
+#endif
+
+#define FILE_SZ (1UL << 35)
+static int cfg_family = AF_INET6;
+static socklen_t cfg_alen = sizeof(struct sockaddr_in6);
+static int cfg_port = 8787;
+
+static int rcvbuf; /* Default: autotuning. Can be set with -r <integer> option */
+static int sndbuf; /* Default: autotuning. Can be set with -w <integer> option */
+static int zflg; /* zero copy option. (MSG_ZEROCOPY for sender, mmap() for receiver */
+static int xflg; /* hash received data (simple xor) (-h option) */
+static int keepflag; /* -k option: receiver shall keep all received file in memory (no munmap() calls) */
+
+static int chunk_size = 512*1024;
+
+unsigned long htotal;
+
+static inline void prefetch(const void *x)
+{
+#if defined(__x86_64__)
+ asm volatile("prefetcht0 %P0" : : "m" (*(const char *)x));
+#endif
+}
+
+void hash_zone(void *zone, unsigned int length)
+{
+ unsigned long temp = htotal;
+
+ while (length >= 8*sizeof(long)) {
+ prefetch(zone + 384);
+ temp ^= *(unsigned long *)zone;
+ temp ^= *(unsigned long *)(zone + sizeof(long));
+ temp ^= *(unsigned long *)(zone + 2*sizeof(long));
+ temp ^= *(unsigned long *)(zone + 3*sizeof(long));
+ temp ^= *(unsigned long *)(zone + 4*sizeof(long));
+ temp ^= *(unsigned long *)(zone + 5*sizeof(long));
+ temp ^= *(unsigned long *)(zone + 6*sizeof(long));
+ temp ^= *(unsigned long *)(zone + 7*sizeof(long));
+ zone += 8*sizeof(long);
+ length -= 8*sizeof(long);
+ }
+ while (length >= 1) {
+ temp ^= *(unsigned char *)zone;
+ zone += 1;
+ length--;
+ }
+ htotal = temp;
+}
+
+void *child_thread(void *arg)
+{
+ unsigned long total_mmap = 0, total = 0;
+ struct tcp_zerocopy_receive zc;
+ unsigned long delta_usec;
+ int flags = MAP_SHARED;
+ struct timeval t0, t1;
+ char *buffer = NULL;
+ void *addr = NULL;
+ double throughput;
+ struct rusage ru;
+ int lu, fd;
+
+ fd = (int)(unsigned long)arg;
+
+ gettimeofday(&t0, NULL);
+
+ fcntl(fd, F_SETFL, O_NDELAY);
+ buffer = malloc(chunk_size);
+ if (!buffer) {
+ perror("malloc");
+ goto error;
+ }
+ if (zflg) {
+ addr = mmap(NULL, chunk_size, PROT_READ, flags, fd, 0);
+ if (addr == (void *)-1)
+ zflg = 0;
+ }
+ while (1) {
+ struct pollfd pfd = { .fd = fd, .events = POLLIN, };
+ int sub;
+
+ poll(&pfd, 1, 10000);
+ if (zflg) {
+ socklen_t zc_len = sizeof(zc);
+ int res;
+
+ zc.address = (__u64)addr;
+ zc.length = chunk_size;
+ zc.recv_skip_hint = 0;
+ res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE,
+ &zc, &zc_len);
+ if (res == -1)
+ break;
+
+ if (zc.length) {
+ assert(zc.length <= chunk_size);
+ total_mmap += zc.length;
+ if (xflg)
+ hash_zone(addr, zc.length);
+ total += zc.length;
+ }
+ if (zc.recv_skip_hint) {
+ assert(zc.recv_skip_hint <= chunk_size);
+ lu = read(fd, buffer, zc.recv_skip_hint);
+ if (lu > 0) {
+ if (xflg)
+ hash_zone(buffer, lu);
+ total += lu;
+ }
+ }
+ continue;
+ }
+ sub = 0;
+ while (sub < chunk_size) {
+ lu = read(fd, buffer + sub, chunk_size - sub);
+ if (lu == 0)
+ goto end;
+ if (lu < 0)
+ break;
+ if (xflg)
+ hash_zone(buffer + sub, lu);
+ total += lu;
+ sub += lu;
+ }
+ }
+end:
+ gettimeofday(&t1, NULL);
+ delta_usec = (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec;
+
+ throughput = 0;
+ if (delta_usec)
+ throughput = total * 8.0 / (double)delta_usec / 1000.0;
+ getrusage(RUSAGE_THREAD, &ru);
+ if (total > 1024*1024) {
+ unsigned long total_usec;
+ unsigned long mb = total >> 20;
+ total_usec = 1000000*ru.ru_utime.tv_sec + ru.ru_utime.tv_usec +
+ 1000000*ru.ru_stime.tv_sec + ru.ru_stime.tv_usec;
+ printf("received %lg MB (%lg %% mmap'ed) in %lg s, %lg Gbit\n"
+ " cpu usage user:%lg sys:%lg, %lg usec per MB, %lu c-switches\n",
+ total / (1024.0 * 1024.0),
+ 100.0*total_mmap/total,
+ (double)delta_usec / 1000000.0,
+ throughput,
+ (double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec / 1000000.0,
+ (double)ru.ru_stime.tv_sec + (double)ru.ru_stime.tv_usec / 1000000.0,
+ (double)total_usec/mb,
+ ru.ru_nvcsw);
+ }
+error:
+ free(buffer);
+ close(fd);
+ if (zflg)
+ munmap(addr, chunk_size);
+ pthread_exit(0);
+}
+
+static void apply_rcvsnd_buf(int fd)
+{
+ if (rcvbuf && setsockopt(fd, SOL_SOCKET,
+ SO_RCVBUF, &rcvbuf, sizeof(rcvbuf)) == -1) {
+ perror("setsockopt SO_RCVBUF");
+ }
+
+ if (sndbuf && setsockopt(fd, SOL_SOCKET,
+ SO_SNDBUF, &sndbuf, sizeof(sndbuf)) == -1) {
+ perror("setsockopt SO_SNDBUF");
+ }
+}
+
+
+static void setup_sockaddr(int domain, const char *str_addr,
+ struct sockaddr_storage *sockaddr)
+{
+ struct sockaddr_in6 *addr6 = (void *) sockaddr;
+ struct sockaddr_in *addr4 = (void *) sockaddr;
+
+ switch (domain) {
+ case PF_INET:
+ memset(addr4, 0, sizeof(*addr4));
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = htons(cfg_port);
+ if (str_addr &&
+ inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+ error(1, 0, "ipv4 parse error: %s", str_addr);
+ break;
+ case PF_INET6:
+ memset(addr6, 0, sizeof(*addr6));
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = htons(cfg_port);
+ if (str_addr &&
+ inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+ error(1, 0, "ipv6 parse error: %s", str_addr);
+ break;
+ default:
+ error(1, 0, "illegal domain");
+ }
+}
+
+static void do_accept(int fdlisten)
+{
+ if (setsockopt(fdlisten, SOL_SOCKET, SO_RCVLOWAT,
+ &chunk_size, sizeof(chunk_size)) == -1) {
+ perror("setsockopt SO_RCVLOWAT");
+ }
+
+ apply_rcvsnd_buf(fdlisten);
+
+ while (1) {
+ struct sockaddr_in addr;
+ socklen_t addrlen = sizeof(addr);
+ pthread_t th;
+ int fd, res;
+
+ fd = accept(fdlisten, (struct sockaddr *)&addr, &addrlen);
+ if (fd == -1) {
+ perror("accept");
+ continue;
+ }
+ res = pthread_create(&th, NULL, child_thread,
+ (void *)(unsigned long)fd);
+ if (res) {
+ errno = res;
+ perror("pthread_create");
+ close(fd);
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ struct sockaddr_storage listenaddr, addr;
+ unsigned int max_pacing_rate = 0;
+ unsigned long total = 0;
+ char *host = NULL;
+ int fd, c, on = 1;
+ char *buffer;
+ int sflg = 0;
+ int mss = 0;
+
+ while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:")) != -1) {
+ switch (c) {
+ case '4':
+ cfg_family = PF_INET;
+ cfg_alen = sizeof(struct sockaddr_in);
+ break;
+ case '6':
+ cfg_family = PF_INET6;
+ cfg_alen = sizeof(struct sockaddr_in6);
+ break;
+ case 'p':
+ cfg_port = atoi(optarg);
+ break;
+ case 'H':
+ host = optarg;
+ break;
+ case 's': /* server : listen for incoming connections */
+ sflg++;
+ break;
+ case 'r':
+ rcvbuf = atoi(optarg);
+ break;
+ case 'w':
+ sndbuf = atoi(optarg);
+ break;
+ case 'z':
+ zflg = 1;
+ break;
+ case 'M':
+ mss = atoi(optarg);
+ break;
+ case 'x':
+ xflg = 1;
+ break;
+ case 'k':
+ keepflag = 1;
+ break;
+ case 'P':
+ max_pacing_rate = atoi(optarg) ;
+ break;
+ default:
+ exit(1);
+ }
+ }
+ if (sflg) {
+ int fdlisten = socket(cfg_family, SOCK_STREAM, 0);
+
+ if (fdlisten == -1) {
+ perror("socket");
+ exit(1);
+ }
+ apply_rcvsnd_buf(fdlisten);
+ setsockopt(fdlisten, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
+
+ setup_sockaddr(cfg_family, host, &listenaddr);
+
+ if (mss &&
+ setsockopt(fdlisten, IPPROTO_TCP, TCP_MAXSEG,
+ &mss, sizeof(mss)) == -1) {
+ perror("setsockopt TCP_MAXSEG");
+ exit(1);
+ }
+ if (bind(fdlisten, (const struct sockaddr *)&listenaddr, cfg_alen) == -1) {
+ perror("bind");
+ exit(1);
+ }
+ if (listen(fdlisten, 128) == -1) {
+ perror("listen");
+ exit(1);
+ }
+ do_accept(fdlisten);
+ }
+ buffer = mmap(NULL, chunk_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (buffer == (char *)-1) {
+ perror("mmap");
+ exit(1);
+ }
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd == -1) {
+ perror("socket");
+ exit(1);
+ }
+ apply_rcvsnd_buf(fd);
+
+ setup_sockaddr(cfg_family, host, &addr);
+
+ if (mss &&
+ setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == -1) {
+ perror("setsockopt TCP_MAXSEG");
+ exit(1);
+ }
+ if (connect(fd, (const struct sockaddr *)&addr, cfg_alen) == -1) {
+ perror("connect");
+ exit(1);
+ }
+ if (max_pacing_rate &&
+ setsockopt(fd, SOL_SOCKET, SO_MAX_PACING_RATE,
+ &max_pacing_rate, sizeof(max_pacing_rate)) == -1)
+ perror("setsockopt SO_MAX_PACING_RATE");
+
+ if (zflg && setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY,
+ &on, sizeof(on)) == -1) {
+ perror("setsockopt SO_ZEROCOPY, (-z option disabled)");
+ zflg = 0;
+ }
+ while (total < FILE_SZ) {
+ long wr = FILE_SZ - total;
+
+ if (wr > chunk_size)
+ wr = chunk_size;
+ /* Note : we just want to fill the pipe with 0 bytes */
+ wr = send(fd, buffer, wr, zflg ? MSG_ZEROCOPY : 0);
+ if (wr <= 0)
+ break;
+ total += wr;
+ }
+ close(fd);
+ munmap(buffer, chunk_size);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
new file mode 100644
index 0000000..e279051
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso.c
@@ -0,0 +1,693 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <stddef.h>
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <net/if.h>
+#include <linux/in.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <netinet/if_ether.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#ifndef ETH_MAX_MTU
+#define ETH_MAX_MTU 0xFFFFU
+#endif
+
+#ifndef UDP_SEGMENT
+#define UDP_SEGMENT 103
+#endif
+
+#ifndef UDP_MAX_SEGMENTS
+#define UDP_MAX_SEGMENTS (1 << 6UL)
+#endif
+
+#define CONST_MTU_TEST 1500
+
+#define CONST_HDRLEN_V4 (sizeof(struct iphdr) + sizeof(struct udphdr))
+#define CONST_HDRLEN_V6 (sizeof(struct ip6_hdr) + sizeof(struct udphdr))
+
+#define CONST_MSS_V4 (CONST_MTU_TEST - CONST_HDRLEN_V4)
+#define CONST_MSS_V6 (CONST_MTU_TEST - CONST_HDRLEN_V6)
+
+#define CONST_MAX_SEGS_V4 (ETH_MAX_MTU / CONST_MSS_V4)
+#define CONST_MAX_SEGS_V6 (ETH_MAX_MTU / CONST_MSS_V6)
+
+static bool cfg_do_ipv4;
+static bool cfg_do_ipv6;
+static bool cfg_do_connected;
+static bool cfg_do_connectionless;
+static bool cfg_do_msgmore;
+static bool cfg_do_setsockopt;
+static int cfg_specific_test_id = -1;
+
+static const char cfg_ifname[] = "lo";
+static unsigned short cfg_port = 9000;
+
+static char buf[ETH_MAX_MTU];
+
+struct testcase {
+ int tlen; /* send() buffer size, may exceed mss */
+ bool tfail; /* send() call is expected to fail */
+ int gso_len; /* mss after applying gso */
+ int r_num_mss; /* recv(): number of calls of full mss */
+ int r_len_last; /* recv(): size of last non-mss dgram, if any */
+};
+
+const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT;
+const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) };
+
+struct testcase testcases_v4[] = {
+ {
+ /* no GSO: send a single byte */
+ .tlen = 1,
+ .r_len_last = 1,
+ },
+ {
+ /* no GSO: send a single MSS */
+ .tlen = CONST_MSS_V4,
+ .r_num_mss = 1,
+ },
+ {
+ /* no GSO: send a single MSS + 1B: fail */
+ .tlen = CONST_MSS_V4 + 1,
+ .tfail = true,
+ },
+ {
+ /* send a single MSS: will fail with GSO, because the segment
+ * logic in udp4_ufo_fragment demands a gso skb to be > MTU
+ */
+ .tlen = CONST_MSS_V4,
+ .gso_len = CONST_MSS_V4,
+ .tfail = true,
+ .r_num_mss = 1,
+ },
+ {
+ /* send a single MSS + 1B */
+ .tlen = CONST_MSS_V4 + 1,
+ .gso_len = CONST_MSS_V4,
+ .r_num_mss = 1,
+ .r_len_last = 1,
+ },
+ {
+ /* send exactly 2 MSS */
+ .tlen = CONST_MSS_V4 * 2,
+ .gso_len = CONST_MSS_V4,
+ .r_num_mss = 2,
+ },
+ {
+ /* send 2 MSS + 1B */
+ .tlen = (CONST_MSS_V4 * 2) + 1,
+ .gso_len = CONST_MSS_V4,
+ .r_num_mss = 2,
+ .r_len_last = 1,
+ },
+ {
+ /* send MAX segs */
+ .tlen = (ETH_MAX_MTU / CONST_MSS_V4) * CONST_MSS_V4,
+ .gso_len = CONST_MSS_V4,
+ .r_num_mss = (ETH_MAX_MTU / CONST_MSS_V4),
+ },
+
+ {
+ /* send MAX bytes */
+ .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4,
+ .gso_len = CONST_MSS_V4,
+ .r_num_mss = CONST_MAX_SEGS_V4,
+ .r_len_last = ETH_MAX_MTU - CONST_HDRLEN_V4 -
+ (CONST_MAX_SEGS_V4 * CONST_MSS_V4),
+ },
+ {
+ /* send MAX + 1: fail */
+ .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4 + 1,
+ .gso_len = CONST_MSS_V4,
+ .tfail = true,
+ },
+ {
+ /* send a single 1B MSS: will fail, see single MSS above */
+ .tlen = 1,
+ .gso_len = 1,
+ .tfail = true,
+ .r_num_mss = 1,
+ },
+ {
+ /* send 2 1B segments */
+ .tlen = 2,
+ .gso_len = 1,
+ .r_num_mss = 2,
+ },
+ {
+ /* send 2B + 2B + 1B segments */
+ .tlen = 5,
+ .gso_len = 2,
+ .r_num_mss = 2,
+ .r_len_last = 1,
+ },
+ {
+ /* send max number of min sized segments */
+ .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4,
+ .gso_len = 1,
+ .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4,
+ },
+ {
+ /* send max number + 1 of min sized segments: fail */
+ .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4 + 1,
+ .gso_len = 1,
+ .tfail = true,
+ },
+ {
+ /* EOL */
+ }
+};
+
+#ifndef IP6_MAX_MTU
+#define IP6_MAX_MTU (ETH_MAX_MTU + sizeof(struct ip6_hdr))
+#endif
+
+struct testcase testcases_v6[] = {
+ {
+ /* no GSO: send a single byte */
+ .tlen = 1,
+ .r_len_last = 1,
+ },
+ {
+ /* no GSO: send a single MSS */
+ .tlen = CONST_MSS_V6,
+ .r_num_mss = 1,
+ },
+ {
+ /* no GSO: send a single MSS + 1B: fail */
+ .tlen = CONST_MSS_V6 + 1,
+ .tfail = true,
+ },
+ {
+ /* send a single MSS: will fail with GSO, because the segment
+ * logic in udp4_ufo_fragment demands a gso skb to be > MTU
+ */
+ .tlen = CONST_MSS_V6,
+ .gso_len = CONST_MSS_V6,
+ .tfail = true,
+ .r_num_mss = 1,
+ },
+ {
+ /* send a single MSS + 1B */
+ .tlen = CONST_MSS_V6 + 1,
+ .gso_len = CONST_MSS_V6,
+ .r_num_mss = 1,
+ .r_len_last = 1,
+ },
+ {
+ /* send exactly 2 MSS */
+ .tlen = CONST_MSS_V6 * 2,
+ .gso_len = CONST_MSS_V6,
+ .r_num_mss = 2,
+ },
+ {
+ /* send 2 MSS + 1B */
+ .tlen = (CONST_MSS_V6 * 2) + 1,
+ .gso_len = CONST_MSS_V6,
+ .r_num_mss = 2,
+ .r_len_last = 1,
+ },
+ {
+ /* send MAX segs */
+ .tlen = (IP6_MAX_MTU / CONST_MSS_V6) * CONST_MSS_V6,
+ .gso_len = CONST_MSS_V6,
+ .r_num_mss = (IP6_MAX_MTU / CONST_MSS_V6),
+ },
+
+ {
+ /* send MAX bytes */
+ .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6,
+ .gso_len = CONST_MSS_V6,
+ .r_num_mss = CONST_MAX_SEGS_V6,
+ .r_len_last = IP6_MAX_MTU - CONST_HDRLEN_V6 -
+ (CONST_MAX_SEGS_V6 * CONST_MSS_V6),
+ },
+ {
+ /* send MAX + 1: fail */
+ .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6 + 1,
+ .gso_len = CONST_MSS_V6,
+ .tfail = true,
+ },
+ {
+ /* send a single 1B MSS: will fail, see single MSS above */
+ .tlen = 1,
+ .gso_len = 1,
+ .tfail = true,
+ .r_num_mss = 1,
+ },
+ {
+ /* send 2 1B segments */
+ .tlen = 2,
+ .gso_len = 1,
+ .r_num_mss = 2,
+ },
+ {
+ /* send 2B + 2B + 1B segments */
+ .tlen = 5,
+ .gso_len = 2,
+ .r_num_mss = 2,
+ .r_len_last = 1,
+ },
+ {
+ /* send max number of min sized segments */
+ .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6,
+ .gso_len = 1,
+ .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6,
+ },
+ {
+ /* send max number + 1 of min sized segments: fail */
+ .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6 + 1,
+ .gso_len = 1,
+ .tfail = true,
+ },
+ {
+ /* EOL */
+ }
+};
+
+static unsigned int get_device_mtu(int fd, const char *ifname)
+{
+ struct ifreq ifr;
+
+ memset(&ifr, 0, sizeof(ifr));
+
+ strcpy(ifr.ifr_name, ifname);
+
+ if (ioctl(fd, SIOCGIFMTU, &ifr))
+ error(1, errno, "ioctl get mtu");
+
+ return ifr.ifr_mtu;
+}
+
+static void __set_device_mtu(int fd, const char *ifname, unsigned int mtu)
+{
+ struct ifreq ifr;
+
+ memset(&ifr, 0, sizeof(ifr));
+
+ ifr.ifr_mtu = mtu;
+ strcpy(ifr.ifr_name, ifname);
+
+ if (ioctl(fd, SIOCSIFMTU, &ifr))
+ error(1, errno, "ioctl set mtu");
+}
+
+static void set_device_mtu(int fd, int mtu)
+{
+ int val;
+
+ val = get_device_mtu(fd, cfg_ifname);
+ fprintf(stderr, "device mtu (orig): %u\n", val);
+
+ __set_device_mtu(fd, cfg_ifname, mtu);
+ val = get_device_mtu(fd, cfg_ifname);
+ if (val != mtu)
+ error(1, 0, "unable to set device mtu to %u\n", val);
+
+ fprintf(stderr, "device mtu (test): %u\n", val);
+}
+
+static void set_pmtu_discover(int fd, bool is_ipv4)
+{
+ int level, name, val;
+
+ if (is_ipv4) {
+ level = SOL_IP;
+ name = IP_MTU_DISCOVER;
+ val = IP_PMTUDISC_DO;
+ } else {
+ level = SOL_IPV6;
+ name = IPV6_MTU_DISCOVER;
+ val = IPV6_PMTUDISC_DO;
+ }
+
+ if (setsockopt(fd, level, name, &val, sizeof(val)))
+ error(1, errno, "setsockopt path mtu");
+}
+
+static unsigned int get_path_mtu(int fd, bool is_ipv4)
+{
+ socklen_t vallen;
+ unsigned int mtu;
+ int ret;
+
+ vallen = sizeof(mtu);
+ if (is_ipv4)
+ ret = getsockopt(fd, SOL_IP, IP_MTU, &mtu, &vallen);
+ else
+ ret = getsockopt(fd, SOL_IPV6, IPV6_MTU, &mtu, &vallen);
+
+ if (ret)
+ error(1, errno, "getsockopt mtu");
+
+
+ fprintf(stderr, "path mtu (read): %u\n", mtu);
+ return mtu;
+}
+
+/* very wordy version of system("ip route add dev lo mtu 1500 127.0.0.3/32") */
+static void set_route_mtu(int mtu, bool is_ipv4)
+{
+ struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
+ struct nlmsghdr *nh;
+ struct rtattr *rta;
+ struct rtmsg *rt;
+ char data[NLMSG_ALIGN(sizeof(*nh)) +
+ NLMSG_ALIGN(sizeof(*rt)) +
+ NLMSG_ALIGN(RTA_LENGTH(sizeof(addr6))) +
+ NLMSG_ALIGN(RTA_LENGTH(sizeof(int))) +
+ NLMSG_ALIGN(RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)))];
+ int fd, ret, alen, off = 0;
+
+ alen = is_ipv4 ? sizeof(addr4) : sizeof(addr6);
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (fd == -1)
+ error(1, errno, "socket netlink");
+
+ memset(data, 0, sizeof(data));
+
+ nh = (void *)data;
+ nh->nlmsg_type = RTM_NEWROUTE;
+ nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE;
+ off += NLMSG_ALIGN(sizeof(*nh));
+
+ rt = (void *)(data + off);
+ rt->rtm_family = is_ipv4 ? AF_INET : AF_INET6;
+ rt->rtm_table = RT_TABLE_MAIN;
+ rt->rtm_dst_len = alen << 3;
+ rt->rtm_protocol = RTPROT_BOOT;
+ rt->rtm_scope = RT_SCOPE_UNIVERSE;
+ rt->rtm_type = RTN_UNICAST;
+ off += NLMSG_ALIGN(sizeof(*rt));
+
+ rta = (void *)(data + off);
+ rta->rta_type = RTA_DST;
+ rta->rta_len = RTA_LENGTH(alen);
+ if (is_ipv4)
+ memcpy(RTA_DATA(rta), &addr4, alen);
+ else
+ memcpy(RTA_DATA(rta), &addr6, alen);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ rta = (void *)(data + off);
+ rta->rta_type = RTA_OIF;
+ rta->rta_len = RTA_LENGTH(sizeof(int));
+ *((int *)(RTA_DATA(rta))) = 1; //if_nametoindex("lo");
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ /* MTU is a subtype in a metrics type */
+ rta = (void *)(data + off);
+ rta->rta_type = RTA_METRICS;
+ rta->rta_len = RTA_LENGTH(0) + RTA_LENGTH(sizeof(int));
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ /* now fill MTU subtype. Note that it fits within above rta_len */
+ rta = (void *)(((char *) rta) + RTA_LENGTH(0));
+ rta->rta_type = RTAX_MTU;
+ rta->rta_len = RTA_LENGTH(sizeof(int));
+ *((int *)(RTA_DATA(rta))) = mtu;
+
+ nh->nlmsg_len = off;
+
+ ret = sendto(fd, data, off, 0, (void *)&nladdr, sizeof(nladdr));
+ if (ret != off)
+ error(1, errno, "send netlink: %uB != %uB\n", ret, off);
+
+ if (close(fd))
+ error(1, errno, "close netlink");
+
+ fprintf(stderr, "route mtu (test): %u\n", mtu);
+}
+
+static bool __send_one(int fd, struct msghdr *msg, int flags)
+{
+ int ret;
+
+ ret = sendmsg(fd, msg, flags);
+ if (ret == -1 &&
+ (errno == EMSGSIZE || errno == ENOMEM || errno == EINVAL))
+ return false;
+ if (ret == -1)
+ error(1, errno, "sendmsg");
+ if (ret != msg->msg_iov->iov_len)
+ error(1, 0, "sendto: %d != %lu", ret, msg->msg_iov->iov_len);
+ if (msg->msg_flags)
+ error(1, 0, "sendmsg: return flags 0x%x\n", msg->msg_flags);
+
+ return true;
+}
+
+static bool send_one(int fd, int len, int gso_len,
+ struct sockaddr *addr, socklen_t alen)
+{
+ char control[CMSG_SPACE(sizeof(uint16_t))] = {0};
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ struct cmsghdr *cm;
+
+ iov.iov_base = buf;
+ iov.iov_len = len;
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ msg.msg_name = addr;
+ msg.msg_namelen = alen;
+
+ if (gso_len && !cfg_do_setsockopt) {
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ cm = CMSG_FIRSTHDR(&msg);
+ cm->cmsg_level = SOL_UDP;
+ cm->cmsg_type = UDP_SEGMENT;
+ cm->cmsg_len = CMSG_LEN(sizeof(uint16_t));
+ *((uint16_t *) CMSG_DATA(cm)) = gso_len;
+ }
+
+ /* If MSG_MORE, send 1 byte followed by remainder */
+ if (cfg_do_msgmore && len > 1) {
+ iov.iov_len = 1;
+ if (!__send_one(fd, &msg, MSG_MORE))
+ error(1, 0, "send 1B failed");
+
+ iov.iov_base++;
+ iov.iov_len = len - 1;
+ }
+
+ return __send_one(fd, &msg, 0);
+}
+
+static int recv_one(int fd, int flags)
+{
+ int ret;
+
+ ret = recv(fd, buf, sizeof(buf), flags);
+ if (ret == -1 && errno == EAGAIN && (flags & MSG_DONTWAIT))
+ return 0;
+ if (ret == -1)
+ error(1, errno, "recv");
+
+ return ret;
+}
+
+static void run_one(struct testcase *test, int fdt, int fdr,
+ struct sockaddr *addr, socklen_t alen)
+{
+ int i, ret, val, mss;
+ bool sent;
+
+ fprintf(stderr, "ipv%d tx:%d gso:%d %s\n",
+ addr->sa_family == AF_INET ? 4 : 6,
+ test->tlen, test->gso_len,
+ test->tfail ? "(fail)" : "");
+
+ val = test->gso_len;
+ if (cfg_do_setsockopt) {
+ if (setsockopt(fdt, SOL_UDP, UDP_SEGMENT, &val, sizeof(val)))
+ error(1, errno, "setsockopt udp segment");
+ }
+
+ sent = send_one(fdt, test->tlen, test->gso_len, addr, alen);
+ if (sent && test->tfail)
+ error(1, 0, "send succeeded while expecting failure");
+ if (!sent && !test->tfail)
+ error(1, 0, "send failed while expecting success");
+ if (!sent)
+ return;
+
+ if (test->gso_len)
+ mss = test->gso_len;
+ else
+ mss = addr->sa_family == AF_INET ? CONST_MSS_V4 : CONST_MSS_V6;
+
+
+ /* Recv all full MSS datagrams */
+ for (i = 0; i < test->r_num_mss; i++) {
+ ret = recv_one(fdr, 0);
+ if (ret != mss)
+ error(1, 0, "recv.%d: %d != %d", i, ret, mss);
+ }
+
+ /* Recv the non-full last datagram, if tlen was not a multiple of mss */
+ if (test->r_len_last) {
+ ret = recv_one(fdr, 0);
+ if (ret != test->r_len_last)
+ error(1, 0, "recv.%d: %d != %d (last)",
+ i, ret, test->r_len_last);
+ }
+
+ /* Verify received all data */
+ ret = recv_one(fdr, MSG_DONTWAIT);
+ if (ret)
+ error(1, 0, "recv: unexpected datagram");
+}
+
+static void run_all(int fdt, int fdr, struct sockaddr *addr, socklen_t alen)
+{
+ struct testcase *tests, *test;
+
+ tests = addr->sa_family == AF_INET ? testcases_v4 : testcases_v6;
+
+ for (test = tests; test->tlen; test++) {
+ /* if a specific test is given, then skip all others */
+ if (cfg_specific_test_id == -1 ||
+ cfg_specific_test_id == test - tests)
+ run_one(test, fdt, fdr, addr, alen);
+ }
+}
+
+static void run_test(struct sockaddr *addr, socklen_t alen)
+{
+ struct timeval tv = { .tv_usec = 100 * 1000 };
+ int fdr, fdt, val;
+
+ fdr = socket(addr->sa_family, SOCK_DGRAM, 0);
+ if (fdr == -1)
+ error(1, errno, "socket r");
+
+ if (bind(fdr, addr, alen))
+ error(1, errno, "bind");
+
+ /* Have tests fail quickly instead of hang */
+ if (setsockopt(fdr, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+ error(1, errno, "setsockopt rcv timeout");
+
+ fdt = socket(addr->sa_family, SOCK_DGRAM, 0);
+ if (fdt == -1)
+ error(1, errno, "socket t");
+
+ /* Do not fragment these datagrams: only succeed if GSO works */
+ set_pmtu_discover(fdt, addr->sa_family == AF_INET);
+
+ if (cfg_do_connectionless) {
+ set_device_mtu(fdt, CONST_MTU_TEST);
+ run_all(fdt, fdr, addr, alen);
+ }
+
+ if (cfg_do_connected) {
+ set_device_mtu(fdt, CONST_MTU_TEST + 100);
+ set_route_mtu(CONST_MTU_TEST, addr->sa_family == AF_INET);
+
+ if (connect(fdt, addr, alen))
+ error(1, errno, "connect");
+
+ val = get_path_mtu(fdt, addr->sa_family == AF_INET);
+ if (val != CONST_MTU_TEST)
+ error(1, 0, "bad path mtu %u\n", val);
+
+ run_all(fdt, fdr, addr, 0 /* use connected addr */);
+ }
+
+ if (close(fdt))
+ error(1, errno, "close t");
+ if (close(fdr))
+ error(1, errno, "close r");
+}
+
+static void run_test_v4(void)
+{
+ struct sockaddr_in addr = {0};
+
+ addr.sin_family = AF_INET;
+ addr.sin_port = htons(cfg_port);
+ addr.sin_addr = addr4;
+
+ run_test((void *)&addr, sizeof(addr));
+}
+
+static void run_test_v6(void)
+{
+ struct sockaddr_in6 addr = {0};
+
+ addr.sin6_family = AF_INET6;
+ addr.sin6_port = htons(cfg_port);
+ addr.sin6_addr = addr6;
+
+ run_test((void *)&addr, sizeof(addr));
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "46cCmst:")) != -1) {
+ switch (c) {
+ case '4':
+ cfg_do_ipv4 = true;
+ break;
+ case '6':
+ cfg_do_ipv6 = true;
+ break;
+ case 'c':
+ cfg_do_connected = true;
+ break;
+ case 'C':
+ cfg_do_connectionless = true;
+ break;
+ case 'm':
+ cfg_do_msgmore = true;
+ break;
+ case 's':
+ cfg_do_setsockopt = true;
+ break;
+ case 't':
+ cfg_specific_test_id = strtoul(optarg, NULL, 0);
+ break;
+ default:
+ error(1, 0, "%s: parse error", argv[0]);
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ parse_opts(argc, argv);
+
+ if (cfg_do_ipv4)
+ run_test_v4();
+ if (cfg_do_ipv6)
+ run_test_v6();
+
+ fprintf(stderr, "OK\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh
new file mode 100755
index 0000000..fec24f5
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of udpgso regression tests
+
+echo "ipv4 cmsg"
+./in_netns.sh ./udpgso -4 -C
+
+echo "ipv4 setsockopt"
+./in_netns.sh ./udpgso -4 -C -s
+
+echo "ipv6 cmsg"
+./in_netns.sh ./udpgso -6 -C
+
+echo "ipv6 setsockopt"
+./in_netns.sh ./udpgso -6 -C -s
+
+echo "ipv4 connected"
+./in_netns.sh ./udpgso -4 -c
+
+# blocked on 2nd loopback address
+# echo "ipv6 connected"
+# ./in_netns.sh ./udpgso -6 -c
+
+echo "ipv4 msg_more"
+./in_netns.sh ./udpgso -4 -C -m
+
+echo "ipv6 msg_more"
+./in_netns.sh ./udpgso -6 -C -m
diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh
new file mode 100755
index 0000000..792fa4d
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso_bench.sh
@@ -0,0 +1,74 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of udpgso benchmarks
+
+wake_children() {
+ local -r jobs="$(jobs -p)"
+
+ if [[ "${jobs}" != "" ]]; then
+ kill -1 ${jobs} 2>/dev/null
+ fi
+}
+trap wake_children EXIT
+
+run_one() {
+ local -r args=$@
+
+ ./udpgso_bench_rx &
+ ./udpgso_bench_rx -t &
+
+ ./udpgso_bench_tx ${args}
+}
+
+run_in_netns() {
+ local -r args=$@
+
+ ./in_netns.sh $0 __subprocess ${args}
+}
+
+run_udp() {
+ local -r args=$@
+
+ echo "udp"
+ run_in_netns ${args}
+
+ echo "udp gso"
+ run_in_netns ${args} -S
+
+ echo "udp gso zerocopy"
+ run_in_netns ${args} -S -z
+}
+
+run_tcp() {
+ local -r args=$@
+
+ echo "tcp"
+ run_in_netns ${args} -t
+
+ echo "tcp zerocopy"
+ run_in_netns ${args} -t -z
+}
+
+run_all() {
+ local -r core_args="-l 4"
+ local -r ipv4_args="${core_args} -4 -D 127.0.0.1"
+ local -r ipv6_args="${core_args} -6 -D ::1"
+
+ echo "ipv4"
+ run_tcp "${ipv4_args}"
+ run_udp "${ipv4_args}"
+
+ echo "ipv6"
+ run_tcp "${ipv4_args}"
+ run_udp "${ipv6_args}"
+}
+
+if [[ $# -eq 0 ]]; then
+ run_all
+elif [[ $1 == "__subprocess" ]]; then
+ shift
+ run_one $@
+else
+ run_in_netns $@
+fi
diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c
new file mode 100644
index 0000000..727cf67
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso_bench_rx.c
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/errqueue.h>
+#include <linux/if_packet.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+static int cfg_port = 8000;
+static bool cfg_tcp;
+static bool cfg_verify;
+
+static bool interrupted;
+static unsigned long packets, bytes;
+
+static void sigint_handler(int signum)
+{
+ if (signum == SIGINT)
+ interrupted = true;
+}
+
+static unsigned long gettimeofday_ms(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static void do_poll(int fd)
+{
+ struct pollfd pfd;
+ int ret;
+
+ pfd.events = POLLIN;
+ pfd.revents = 0;
+ pfd.fd = fd;
+
+ do {
+ ret = poll(&pfd, 1, 10);
+ if (ret == -1)
+ error(1, errno, "poll");
+ if (ret == 0)
+ continue;
+ if (pfd.revents != POLLIN)
+ error(1, errno, "poll: 0x%x expected 0x%x\n",
+ pfd.revents, POLLIN);
+ } while (!ret && !interrupted);
+}
+
+static int do_socket(bool do_tcp)
+{
+ struct sockaddr_in6 addr = {0};
+ int fd, val;
+
+ fd = socket(PF_INET6, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket");
+
+ val = 1 << 21;
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val)))
+ error(1, errno, "setsockopt rcvbuf");
+ val = 1;
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)))
+ error(1, errno, "setsockopt reuseport");
+
+ addr.sin6_family = PF_INET6;
+ addr.sin6_port = htons(cfg_port);
+ addr.sin6_addr = in6addr_any;
+ if (bind(fd, (void *) &addr, sizeof(addr)))
+ error(1, errno, "bind");
+
+ if (do_tcp) {
+ int accept_fd = fd;
+
+ if (listen(accept_fd, 1))
+ error(1, errno, "listen");
+
+ do_poll(accept_fd);
+
+ fd = accept(accept_fd, NULL, NULL);
+ if (fd == -1)
+ error(1, errno, "accept");
+ if (close(accept_fd))
+ error(1, errno, "close accept fd");
+ }
+
+ return fd;
+}
+
+/* Flush all outstanding bytes for the tcp receive queue */
+static void do_flush_tcp(int fd)
+{
+ int ret;
+
+ while (true) {
+ /* MSG_TRUNC flushes up to len bytes */
+ ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT);
+ if (ret == -1 && errno == EAGAIN)
+ return;
+ if (ret == -1)
+ error(1, errno, "flush");
+ if (ret == 0) {
+ /* client detached */
+ exit(0);
+ }
+
+ packets++;
+ bytes += ret;
+ }
+
+}
+
+static char sanitized_char(char val)
+{
+ return (val >= 'a' && val <= 'z') ? val : '.';
+}
+
+static void do_verify_udp(const char *data, int len)
+{
+ char cur = data[0];
+ int i;
+
+ /* verify contents */
+ if (cur < 'a' || cur > 'z')
+ error(1, 0, "data initial byte out of range");
+
+ for (i = 1; i < len; i++) {
+ if (cur == 'z')
+ cur = 'a';
+ else
+ cur++;
+
+ if (data[i] != cur)
+ error(1, 0, "data[%d]: len %d, %c(%hhu) != %c(%hhu)\n",
+ i, len,
+ sanitized_char(data[i]), data[i],
+ sanitized_char(cur), cur);
+ }
+}
+
+/* Flush all outstanding datagrams. Verify first few bytes of each. */
+static void do_flush_udp(int fd)
+{
+ static char rbuf[ETH_DATA_LEN];
+ int ret, len, budget = 256;
+
+ len = cfg_verify ? sizeof(rbuf) : 0;
+ while (budget--) {
+ /* MSG_TRUNC will make return value full datagram length */
+ ret = recv(fd, rbuf, len, MSG_TRUNC | MSG_DONTWAIT);
+ if (ret == -1 && errno == EAGAIN)
+ return;
+ if (ret == -1)
+ error(1, errno, "recv");
+ if (len) {
+ if (ret == 0)
+ error(1, errno, "recv: 0 byte datagram\n");
+
+ do_verify_udp(rbuf, ret);
+ }
+
+ packets++;
+ bytes += ret;
+ }
+}
+
+static void usage(const char *filepath)
+{
+ error(1, 0, "Usage: %s [-tv] [-p port]", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "ptv")) != -1) {
+ switch (c) {
+ case 'p':
+ cfg_port = htons(strtoul(optarg, NULL, 0));
+ break;
+ case 't':
+ cfg_tcp = true;
+ break;
+ case 'v':
+ cfg_verify = true;
+ break;
+ }
+ }
+
+ if (optind != argc)
+ usage(argv[0]);
+
+ if (cfg_tcp && cfg_verify)
+ error(1, 0, "TODO: implement verify mode for tcp");
+}
+
+static void do_recv(void)
+{
+ unsigned long tnow, treport;
+ int fd;
+
+ fd = do_socket(cfg_tcp);
+
+ treport = gettimeofday_ms() + 1000;
+ do {
+ do_poll(fd);
+
+ if (cfg_tcp)
+ do_flush_tcp(fd);
+ else
+ do_flush_udp(fd);
+
+ tnow = gettimeofday_ms();
+ if (tnow > treport) {
+ if (packets)
+ fprintf(stderr,
+ "%s rx: %6lu MB/s %8lu calls/s\n",
+ cfg_tcp ? "tcp" : "udp",
+ bytes >> 20, packets);
+ bytes = packets = 0;
+ treport = tnow + 1000;
+ }
+
+ } while (!interrupted);
+
+ if (close(fd))
+ error(1, errno, "close");
+}
+
+int main(int argc, char **argv)
+{
+ parse_opts(argc, argv);
+
+ signal(SIGINT, sigint_handler);
+
+ do_recv();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
new file mode 100644
index 0000000..e821564
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso_bench_tx.c
@@ -0,0 +1,420 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <netinet/if_ether.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#ifndef ETH_MAX_MTU
+#define ETH_MAX_MTU 0xFFFFU
+#endif
+
+#ifndef UDP_SEGMENT
+#define UDP_SEGMENT 103
+#endif
+
+#ifndef SO_ZEROCOPY
+#define SO_ZEROCOPY 60
+#endif
+
+#ifndef MSG_ZEROCOPY
+#define MSG_ZEROCOPY 0x4000000
+#endif
+
+#define NUM_PKT 100
+
+static bool cfg_cache_trash;
+static int cfg_cpu = -1;
+static int cfg_connected = true;
+static int cfg_family = PF_UNSPEC;
+static uint16_t cfg_mss;
+static int cfg_payload_len = (1472 * 42);
+static int cfg_port = 8000;
+static int cfg_runtime_ms = -1;
+static bool cfg_segment;
+static bool cfg_sendmmsg;
+static bool cfg_tcp;
+static bool cfg_zerocopy;
+
+static socklen_t cfg_alen;
+static struct sockaddr_storage cfg_dst_addr;
+
+static bool interrupted;
+static char buf[NUM_PKT][ETH_MAX_MTU];
+
+static void sigint_handler(int signum)
+{
+ if (signum == SIGINT)
+ interrupted = true;
+}
+
+static unsigned long gettimeofday_ms(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static int set_cpu(int cpu)
+{
+ cpu_set_t mask;
+
+ CPU_ZERO(&mask);
+ CPU_SET(cpu, &mask);
+ if (sched_setaffinity(0, sizeof(mask), &mask))
+ error(1, 0, "setaffinity %d", cpu);
+
+ return 0;
+}
+
+static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr)
+{
+ struct sockaddr_in6 *addr6 = (void *) sockaddr;
+ struct sockaddr_in *addr4 = (void *) sockaddr;
+
+ switch (domain) {
+ case PF_INET:
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = htons(cfg_port);
+ if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+ error(1, 0, "ipv4 parse error: %s", str_addr);
+ break;
+ case PF_INET6:
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = htons(cfg_port);
+ if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+ error(1, 0, "ipv6 parse error: %s", str_addr);
+ break;
+ default:
+ error(1, 0, "illegal domain");
+ }
+}
+
+static void flush_zerocopy(int fd)
+{
+ struct msghdr msg = {0}; /* flush */
+ int ret;
+
+ while (1) {
+ ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
+ if (ret == -1 && errno == EAGAIN)
+ break;
+ if (ret == -1)
+ error(1, errno, "errqueue");
+ if (msg.msg_flags != (MSG_ERRQUEUE | MSG_CTRUNC))
+ error(1, 0, "errqueue: flags 0x%x\n", msg.msg_flags);
+ msg.msg_flags = 0;
+ }
+}
+
+static int send_tcp(int fd, char *data)
+{
+ int ret, done = 0, count = 0;
+
+ while (done < cfg_payload_len) {
+ ret = send(fd, data + done, cfg_payload_len - done,
+ cfg_zerocopy ? MSG_ZEROCOPY : 0);
+ if (ret == -1)
+ error(1, errno, "write");
+
+ done += ret;
+ count++;
+ }
+
+ return count;
+}
+
+static int send_udp(int fd, char *data)
+{
+ int ret, total_len, len, count = 0;
+
+ total_len = cfg_payload_len;
+
+ while (total_len) {
+ len = total_len < cfg_mss ? total_len : cfg_mss;
+
+ ret = sendto(fd, data, len, cfg_zerocopy ? MSG_ZEROCOPY : 0,
+ cfg_connected ? NULL : (void *)&cfg_dst_addr,
+ cfg_connected ? 0 : cfg_alen);
+ if (ret == -1)
+ error(1, errno, "write");
+ if (ret != len)
+ error(1, errno, "write: %uB != %uB\n", ret, len);
+
+ total_len -= len;
+ count++;
+ }
+
+ return count;
+}
+
+static int send_udp_sendmmsg(int fd, char *data)
+{
+ const int max_nr_msg = ETH_MAX_MTU / ETH_DATA_LEN;
+ struct mmsghdr mmsgs[max_nr_msg];
+ struct iovec iov[max_nr_msg];
+ unsigned int off = 0, left;
+ int i = 0, ret;
+
+ memset(mmsgs, 0, sizeof(mmsgs));
+
+ left = cfg_payload_len;
+ while (left) {
+ if (i == max_nr_msg)
+ error(1, 0, "sendmmsg: exceeds max_nr_msg");
+
+ iov[i].iov_base = data + off;
+ iov[i].iov_len = cfg_mss < left ? cfg_mss : left;
+
+ mmsgs[i].msg_hdr.msg_iov = iov + i;
+ mmsgs[i].msg_hdr.msg_iovlen = 1;
+
+ off += iov[i].iov_len;
+ left -= iov[i].iov_len;
+ i++;
+ }
+
+ ret = sendmmsg(fd, mmsgs, i, cfg_zerocopy ? MSG_ZEROCOPY : 0);
+ if (ret == -1)
+ error(1, errno, "sendmmsg");
+
+ return ret;
+}
+
+static void send_udp_segment_cmsg(struct cmsghdr *cm)
+{
+ uint16_t *valp;
+
+ cm->cmsg_level = SOL_UDP;
+ cm->cmsg_type = UDP_SEGMENT;
+ cm->cmsg_len = CMSG_LEN(sizeof(cfg_mss));
+ valp = (void *)CMSG_DATA(cm);
+ *valp = cfg_mss;
+}
+
+static int send_udp_segment(int fd, char *data)
+{
+ char control[CMSG_SPACE(sizeof(cfg_mss))] = {0};
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ int ret;
+
+ iov.iov_base = data;
+ iov.iov_len = cfg_payload_len;
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+ send_udp_segment_cmsg(CMSG_FIRSTHDR(&msg));
+
+ msg.msg_name = (void *)&cfg_dst_addr;
+ msg.msg_namelen = cfg_alen;
+
+ ret = sendmsg(fd, &msg, cfg_zerocopy ? MSG_ZEROCOPY : 0);
+ if (ret == -1)
+ error(1, errno, "sendmsg");
+ if (ret != iov.iov_len)
+ error(1, 0, "sendmsg: %u != %lu\n", ret, iov.iov_len);
+
+ return 1;
+}
+
+static void usage(const char *filepath)
+{
+ error(1, 0, "Usage: %s [-46cmStuz] [-C cpu] [-D dst ip] [-l secs] [-p port] [-s sendsize]",
+ filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int max_len, hdrlen;
+ int c;
+
+ while ((c = getopt(argc, argv, "46cC:D:l:mp:s:Stuz")) != -1) {
+ switch (c) {
+ case '4':
+ if (cfg_family != PF_UNSPEC)
+ error(1, 0, "Pass one of -4 or -6");
+ cfg_family = PF_INET;
+ cfg_alen = sizeof(struct sockaddr_in);
+ break;
+ case '6':
+ if (cfg_family != PF_UNSPEC)
+ error(1, 0, "Pass one of -4 or -6");
+ cfg_family = PF_INET6;
+ cfg_alen = sizeof(struct sockaddr_in6);
+ break;
+ case 'c':
+ cfg_cache_trash = true;
+ break;
+ case 'C':
+ cfg_cpu = strtol(optarg, NULL, 0);
+ break;
+ case 'D':
+ setup_sockaddr(cfg_family, optarg, &cfg_dst_addr);
+ break;
+ case 'l':
+ cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000;
+ break;
+ case 'm':
+ cfg_sendmmsg = true;
+ break;
+ case 'p':
+ cfg_port = strtoul(optarg, NULL, 0);
+ break;
+ case 's':
+ cfg_payload_len = strtoul(optarg, NULL, 0);
+ break;
+ case 'S':
+ cfg_segment = true;
+ break;
+ case 't':
+ cfg_tcp = true;
+ break;
+ case 'u':
+ cfg_connected = false;
+ break;
+ case 'z':
+ cfg_zerocopy = true;
+ break;
+ }
+ }
+
+ if (optind != argc)
+ usage(argv[0]);
+
+ if (cfg_family == PF_UNSPEC)
+ error(1, 0, "must pass one of -4 or -6");
+ if (cfg_tcp && !cfg_connected)
+ error(1, 0, "connectionless tcp makes no sense");
+ if (cfg_segment && cfg_sendmmsg)
+ error(1, 0, "cannot combine segment offload and sendmmsg");
+
+ if (cfg_family == PF_INET)
+ hdrlen = sizeof(struct iphdr) + sizeof(struct udphdr);
+ else
+ hdrlen = sizeof(struct ip6_hdr) + sizeof(struct udphdr);
+
+ cfg_mss = ETH_DATA_LEN - hdrlen;
+ max_len = ETH_MAX_MTU - hdrlen;
+
+ if (cfg_payload_len > max_len)
+ error(1, 0, "payload length %u exceeds max %u",
+ cfg_payload_len, max_len);
+}
+
+static void set_pmtu_discover(int fd, bool is_ipv4)
+{
+ int level, name, val;
+
+ if (is_ipv4) {
+ level = SOL_IP;
+ name = IP_MTU_DISCOVER;
+ val = IP_PMTUDISC_DO;
+ } else {
+ level = SOL_IPV6;
+ name = IPV6_MTU_DISCOVER;
+ val = IPV6_PMTUDISC_DO;
+ }
+
+ if (setsockopt(fd, level, name, &val, sizeof(val)))
+ error(1, errno, "setsockopt path mtu");
+}
+
+int main(int argc, char **argv)
+{
+ unsigned long num_msgs, num_sends;
+ unsigned long tnow, treport, tstop;
+ int fd, i, val;
+
+ parse_opts(argc, argv);
+
+ if (cfg_cpu > 0)
+ set_cpu(cfg_cpu);
+
+ for (i = 0; i < sizeof(buf[0]); i++)
+ buf[0][i] = 'a' + (i % 26);
+ for (i = 1; i < NUM_PKT; i++)
+ memcpy(buf[i], buf[0], sizeof(buf[0]));
+
+ signal(SIGINT, sigint_handler);
+
+ fd = socket(cfg_family, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket");
+
+ if (cfg_zerocopy) {
+ val = 1;
+ if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val)))
+ error(1, errno, "setsockopt zerocopy");
+ }
+
+ if (cfg_connected &&
+ connect(fd, (void *)&cfg_dst_addr, cfg_alen))
+ error(1, errno, "connect");
+
+ if (cfg_segment)
+ set_pmtu_discover(fd, cfg_family == PF_INET);
+
+ num_msgs = num_sends = 0;
+ tnow = gettimeofday_ms();
+ tstop = tnow + cfg_runtime_ms;
+ treport = tnow + 1000;
+
+ i = 0;
+ do {
+ if (cfg_tcp)
+ num_sends += send_tcp(fd, buf[i]);
+ else if (cfg_segment)
+ num_sends += send_udp_segment(fd, buf[i]);
+ else if (cfg_sendmmsg)
+ num_sends += send_udp_sendmmsg(fd, buf[i]);
+ else
+ num_sends += send_udp(fd, buf[i]);
+ num_msgs++;
+
+ if (cfg_zerocopy && ((num_msgs & 0xF) == 0))
+ flush_zerocopy(fd);
+
+ tnow = gettimeofday_ms();
+ if (tnow > treport) {
+ fprintf(stderr,
+ "%s tx: %6lu MB/s %8lu calls/s %6lu msg/s\n",
+ cfg_tcp ? "tcp" : "udp",
+ (num_msgs * cfg_payload_len) >> 20,
+ num_sends, num_msgs);
+ num_msgs = num_sends = 0;
+ treport = tnow + 1000;
+ }
+
+ /* cold cache when writing buffer */
+ if (cfg_cache_trash)
+ i = ++i < NUM_PKT ? i : 0;
+
+ } while (!interrupted && (cfg_runtime_ms == -1 || tnow < tstop));
+
+ if (close(fd))
+ error(1, errno, "close");
+
+ return 0;
+}
OpenPOWER on IntegriCloud