diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-27 12:03:20 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-27 12:03:20 -0700 |
commit | 468fc7ed5537615efe671d94248446ac24679773 (patch) | |
tree | 27bc9de792e863d6ec1630927b77ac9e7dabb38a /samples | |
parent | 08fd8c17686c6b09fa410a26d516548dd80ff147 (diff) | |
parent | 36232012344b8db67052432742deaf17f82e70e6 (diff) | |
download | op-kernel-dev-468fc7ed5537615efe671d94248446ac24679773.zip op-kernel-dev-468fc7ed5537615efe671d94248446ac24679773.tar.gz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
1) Unified UDP encapsulation offload methods for drivers, from
Alexander Duyck.
2) Make DSA binding more sane, from Andrew Lunn.
3) Support QCA9888 chips in ath10k, from Anilkumar Kolli.
4) Several workqueue usage cleanups, from Bhaktipriya Shridhar.
5) Add XDP (eXpress Data Path), essentially running BPF programs on RX
packets as soon as the device sees them, with the option to mirror
the packet on TX via the same interface. From Brenden Blanco and
others.
6) Allow qdisc/class stats dumps to run lockless, from Eric Dumazet.
7) Add VLAN support to b53 and bcm_sf2, from Florian Fainelli.
8) Simplify netlink conntrack entry layout, from Florian Westphal.
9) Add ipv4 forwarding support to mlxsw spectrum driver, from Ido
Schimmel, Yotam Gigi, and Jiri Pirko.
10) Add SKB array infrastructure and convert tun and macvtap over to it.
From Michael S Tsirkin and Jason Wang.
11) Support qdisc packet injection in pktgen, from John Fastabend.
12) Add neighbour monitoring framework to TIPC, from Jon Paul Maloy.
13) Add NV congestion control support to TCP, from Lawrence Brakmo.
14) Add GSO support to SCTP, from Marcelo Ricardo Leitner.
15) Allow GRO and RPS to function on macsec devices, from Paolo Abeni.
16) Support MPLS over IPV4, from Simon Horman.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1622 commits)
xgene: Fix build warning with ACPI disabled.
be2net: perform temperature query in adapter regardless of its interface state
l2tp: Correctly return -EBADF from pppol2tp_getname.
net/mlx5_core/health: Remove deprecated create_singlethread_workqueue
net: ipmr/ip6mr: update lastuse on entry change
macsec: ensure rx_sa is set when validation is disabled
tipc: dump monitor attributes
tipc: add a function to get the bearer name
tipc: get monitor threshold for the cluster
tipc: make cluster size threshold for monitoring configurable
tipc: introduce constants for tipc address validation
net: neigh: disallow transition to NUD_STALE if lladdr is unchanged in neigh_update()
MAINTAINERS: xgene: Add driver and documentation path
Documentation: dtb: xgene: Add MDIO node
dtb: xgene: Add MDIO node
drivers: net: xgene: ethtool: Use phy_ethtool_gset and sset
drivers: net: xgene: Use exported functions
drivers: net: xgene: Enable MDIO driver
drivers: net: xgene: Add backward compatibility
drivers: net: phy: xgene: Add MDIO driver
...
Diffstat (limited to 'samples')
23 files changed, 1178 insertions, 140 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 0bf2478..90ebf7d 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -14,12 +14,16 @@ hostprogs-y += tracex3 hostprogs-y += tracex4 hostprogs-y += tracex5 hostprogs-y += tracex6 +hostprogs-y += test_probe_write_user hostprogs-y += trace_output hostprogs-y += lathist hostprogs-y += offwaketime hostprogs-y += spintest hostprogs-y += map_perf_test hostprogs-y += test_overhead +hostprogs-y += test_cgrp2_array_pin +hostprogs-y += xdp1 +hostprogs-y += xdp2 test_verifier-objs := test_verifier.o libbpf.o test_maps-objs := test_maps.o libbpf.o @@ -34,12 +38,17 @@ tracex3-objs := bpf_load.o libbpf.o tracex3_user.o tracex4-objs := bpf_load.o libbpf.o tracex4_user.o tracex5-objs := bpf_load.o libbpf.o tracex5_user.o tracex6-objs := bpf_load.o libbpf.o tracex6_user.o +test_probe_write_user-objs := bpf_load.o libbpf.o test_probe_write_user_user.o trace_output-objs := bpf_load.o libbpf.o trace_output_user.o lathist-objs := bpf_load.o libbpf.o lathist_user.o offwaketime-objs := bpf_load.o libbpf.o offwaketime_user.o spintest-objs := bpf_load.o libbpf.o spintest_user.o map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o +test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o +xdp1-objs := bpf_load.o libbpf.o xdp1_user.o +# reuse xdp1 source intentionally +xdp2-objs := bpf_load.o libbpf.o xdp1_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -52,6 +61,7 @@ always += tracex3_kern.o always += tracex4_kern.o always += tracex5_kern.o always += tracex6_kern.o +always += test_probe_write_user_kern.o always += trace_output_kern.o always += tcbpf1_kern.o always += lathist_kern.o @@ -61,6 +71,9 @@ always += map_perf_test_kern.o always += test_overhead_tp_kern.o always += test_overhead_kprobe_kern.o always += parse_varlen.o parse_simple.o parse_ldabs.o +always += test_cgrp2_tc_kern.o +always += xdp1_kern.o +always += xdp2_kern.o HOSTCFLAGS += -I$(objtree)/usr/include @@ -75,12 +88,15 @@ HOSTLOADLIBES_tracex3 += -lelf HOSTLOADLIBES_tracex4 += -lelf -lrt HOSTLOADLIBES_tracex5 += -lelf HOSTLOADLIBES_tracex6 += -lelf +HOSTLOADLIBES_test_probe_write_user += -lelf HOSTLOADLIBES_trace_output += -lelf -lrt HOSTLOADLIBES_lathist += -lelf HOSTLOADLIBES_offwaketime += -lelf HOSTLOADLIBES_spintest += -lelf HOSTLOADLIBES_map_perf_test += -lelf -lrt HOSTLOADLIBES_test_overhead += -lelf -lrt +HOSTLOADLIBES_xdp1 += -lelf +HOSTLOADLIBES_xdp2 += -lelf # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index 7904a2a..217c8d507 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -41,6 +41,8 @@ static int (*bpf_perf_event_output)(void *ctx, void *map, int index, void *data, (void *) BPF_FUNC_perf_event_output; static int (*bpf_get_stackid)(void *ctx, void *map, int flags) = (void *) BPF_FUNC_get_stackid; +static int (*bpf_probe_write_user)(void *dst, void *src, int size) = + (void *) BPF_FUNC_probe_write_user; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions @@ -70,6 +72,8 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag (void *) BPF_FUNC_l3_csum_replace; static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) = (void *) BPF_FUNC_l4_csum_replace; +static int (*bpf_skb_in_cgroup)(void *ctx, void *map, int index) = + (void *) BPF_FUNC_skb_in_cgroup; #if defined(__x86_64__) diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index 022af71..0cfda23 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -50,6 +50,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) bool is_kprobe = strncmp(event, "kprobe/", 7) == 0; bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0; bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0; + bool is_xdp = strncmp(event, "xdp", 3) == 0; enum bpf_prog_type prog_type; char buf[256]; int fd, efd, err, id; @@ -66,6 +67,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) prog_type = BPF_PROG_TYPE_KPROBE; } else if (is_tracepoint) { prog_type = BPF_PROG_TYPE_TRACEPOINT; + } else if (is_xdp) { + prog_type = BPF_PROG_TYPE_XDP; } else { printf("Unknown event '%s'\n", event); return -1; @@ -79,6 +82,9 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) prog_fd[prog_cnt++] = fd; + if (is_xdp) + return 0; + if (is_socket) { event += 6; if (*event != '/') @@ -319,6 +325,7 @@ int load_bpf_file(char *path) if (memcmp(shname_prog, "kprobe/", 7) == 0 || memcmp(shname_prog, "kretprobe/", 10) == 0 || memcmp(shname_prog, "tracepoint/", 11) == 0 || + memcmp(shname_prog, "xdp", 3) == 0 || memcmp(shname_prog, "socket", 6) == 0) load_and_attach(shname_prog, insns, data_prog->d_size); } @@ -336,6 +343,7 @@ int load_bpf_file(char *path) if (memcmp(shname, "kprobe/", 7) == 0 || memcmp(shname, "kretprobe/", 10) == 0 || memcmp(shname, "tracepoint/", 11) == 0 || + memcmp(shname, "xdp", 3) == 0 || memcmp(shname, "socket", 6) == 0) load_and_attach(shname, data->d_buf, data->d_size); } diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c index 29a276d..8a4085c 100644 --- a/samples/bpf/sockex2_user.c +++ b/samples/bpf/sockex2_user.c @@ -5,6 +5,7 @@ #include "bpf_load.h" #include <unistd.h> #include <arpa/inet.h> +#include <sys/resource.h> struct pair { __u64 packets; @@ -13,11 +14,13 @@ struct pair { int main(int ac, char **argv) { + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; char filename[256]; FILE *f; int i, sock; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + setrlimit(RLIMIT_MEMLOCK, &r); if (load_bpf_file(filename)) { printf("%s", bpf_log_buf); diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c index 2617772..d4184ab 100644 --- a/samples/bpf/sockex3_user.c +++ b/samples/bpf/sockex3_user.c @@ -5,6 +5,7 @@ #include "bpf_load.h" #include <unistd.h> #include <arpa/inet.h> +#include <sys/resource.h> struct flow_keys { __be32 src; @@ -23,11 +24,13 @@ struct pair { int main(int argc, char **argv) { + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; char filename[256]; FILE *f; int i, sock; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + setrlimit(RLIMIT_MEMLOCK, &r); if (load_bpf_file(filename)) { printf("%s", bpf_log_buf); diff --git a/samples/bpf/test_cgrp2_array_pin.c b/samples/bpf/test_cgrp2_array_pin.c new file mode 100644 index 0000000..70e86f7 --- /dev/null +++ b/samples/bpf/test_cgrp2_array_pin.c @@ -0,0 +1,109 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/unistd.h> +#include <linux/bpf.h> + +#include <stdio.h> +#include <stdint.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <fcntl.h> + +#include "libbpf.h" + +static void usage(void) +{ + printf("Usage: test_cgrp2_array_pin [...]\n"); + printf(" -F <file> File to pin an BPF cgroup array\n"); + printf(" -U <file> Update an already pinned BPF cgroup array\n"); + printf(" -v <value> Full path of the cgroup2\n"); + printf(" -h Display this help\n"); +} + +int main(int argc, char **argv) +{ + const char *pinned_file = NULL, *cg2 = NULL; + int create_array = 1; + int array_key = 0; + int array_fd = -1; + int cg2_fd = -1; + int ret = -1; + int opt; + + while ((opt = getopt(argc, argv, "F:U:v:")) != -1) { + switch (opt) { + /* General args */ + case 'F': + pinned_file = optarg; + break; + case 'U': + pinned_file = optarg; + create_array = 0; + break; + case 'v': + cg2 = optarg; + break; + default: + usage(); + goto out; + } + } + + if (!cg2 || !pinned_file) { + usage(); + goto out; + } + + cg2_fd = open(cg2, O_RDONLY); + if (cg2_fd < 0) { + fprintf(stderr, "open(%s,...): %s(%d)\n", + cg2, strerror(errno), errno); + goto out; + } + + if (create_array) { + array_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY, + sizeof(uint32_t), sizeof(uint32_t), + 1, 0); + if (array_fd < 0) { + fprintf(stderr, + "bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY,...): %s(%d)\n", + strerror(errno), errno); + goto out; + } + } else { + array_fd = bpf_obj_get(pinned_file); + if (array_fd < 0) { + fprintf(stderr, "bpf_obj_get(%s): %s(%d)\n", + pinned_file, strerror(errno), errno); + goto out; + } + } + + ret = bpf_update_elem(array_fd, &array_key, &cg2_fd, 0); + if (ret) { + perror("bpf_update_elem"); + goto out; + } + + if (create_array) { + ret = bpf_obj_pin(array_fd, pinned_file); + if (ret) { + fprintf(stderr, "bpf_obj_pin(..., %s): %s(%d)\n", + pinned_file, strerror(errno), errno); + goto out; + } + } + +out: + if (array_fd != -1) + close(array_fd); + if (cg2_fd != -1) + close(cg2_fd); + return ret; +} diff --git a/samples/bpf/test_cgrp2_tc.sh b/samples/bpf/test_cgrp2_tc.sh new file mode 100755 index 0000000..0b119ee --- /dev/null +++ b/samples/bpf/test_cgrp2_tc.sh @@ -0,0 +1,184 @@ +#!/bin/bash + +MY_DIR=$(dirname $0) +# Details on the bpf prog +BPF_CGRP2_ARRAY_NAME='test_cgrp2_array_pin' +BPF_PROG="$MY_DIR/test_cgrp2_tc_kern.o" +BPF_SECTION='filter' + +[ -z "$TC" ] && TC='tc' +[ -z "$IP" ] && IP='ip' + +# Names of the veth interface, net namespace...etc. +HOST_IFC='ve' +NS_IFC='vens' +NS='ns' + +find_mnt() { + cat /proc/mounts | \ + awk '{ if ($3 == "'$1'" && mnt == "") { mnt = $2 }} END { print mnt }' +} + +# Init cgroup2 vars +init_cgrp2_vars() { + CGRP2_ROOT=$(find_mnt cgroup2) + if [ -z "$CGRP2_ROOT" ] + then + CGRP2_ROOT='/mnt/cgroup2' + MOUNT_CGRP2="yes" + fi + CGRP2_TC="$CGRP2_ROOT/tc" + CGRP2_TC_LEAF="$CGRP2_TC/leaf" +} + +# Init bpf fs vars +init_bpf_fs_vars() { + local bpf_fs_root=$(find_mnt bpf) + [ -n "$bpf_fs_root" ] || return -1 + BPF_FS_TC_SHARE="$bpf_fs_root/tc/globals" +} + +setup_cgrp2() { + case $1 in + start) + if [ "$MOUNT_CGRP2" == 'yes' ] + then + [ -d $CGRP2_ROOT ] || mkdir -p $CGRP2_ROOT + mount -t cgroup2 none $CGRP2_ROOT || return $? + fi + mkdir -p $CGRP2_TC_LEAF + ;; + *) + rmdir $CGRP2_TC_LEAF && rmdir $CGRP2_TC + [ "$MOUNT_CGRP2" == 'yes' ] && umount $CGRP2_ROOT + ;; + esac +} + +setup_bpf_cgrp2_array() { + local bpf_cgrp2_array="$BPF_FS_TC_SHARE/$BPF_CGRP2_ARRAY_NAME" + case $1 in + start) + $MY_DIR/test_cgrp2_array_pin -U $bpf_cgrp2_array -v $CGRP2_TC + ;; + *) + [ -d "$BPF_FS_TC_SHARE" ] && rm -f $bpf_cgrp2_array + ;; + esac +} + +setup_net() { + case $1 in + start) + $IP link add $HOST_IFC type veth peer name $NS_IFC || return $? + $IP link set dev $HOST_IFC up || return $? + sysctl -q net.ipv6.conf.$HOST_IFC.accept_dad=0 + + $IP netns add ns || return $? + $IP link set dev $NS_IFC netns ns || return $? + $IP -n $NS link set dev $NS_IFC up || return $? + $IP netns exec $NS sysctl -q net.ipv6.conf.$NS_IFC.accept_dad=0 + $TC qdisc add dev $HOST_IFC clsact || return $? + $TC filter add dev $HOST_IFC egress bpf da obj $BPF_PROG sec $BPF_SECTION || return $? + ;; + *) + $IP netns del $NS + $IP link del $HOST_IFC + ;; + esac +} + +run_in_cgrp() { + # Fork another bash and move it under the specified cgroup. + # It makes the cgroup cleanup easier at the end of the test. + cmd='echo $$ > ' + cmd="$cmd $1/cgroup.procs; exec $2" + bash -c "$cmd" +} + +do_test() { + run_in_cgrp $CGRP2_TC_LEAF "ping -6 -c3 ff02::1%$HOST_IFC >& /dev/null" + local dropped=$($TC -s qdisc show dev $HOST_IFC | tail -3 | \ + awk '/drop/{print substr($7, 0, index($7, ",")-1)}') + if [[ $dropped -eq 0 ]] + then + echo "FAIL" + return 1 + else + echo "Successfully filtered $dropped packets" + return 0 + fi +} + +do_exit() { + if [ "$DEBUG" == "yes" ] && [ "$MODE" != 'cleanuponly' ] + then + echo "------ DEBUG ------" + echo "mount: "; mount | egrep '(cgroup2|bpf)'; echo + echo "$CGRP2_TC_LEAF: "; ls -l $CGRP2_TC_LEAF; echo + if [ -d "$BPF_FS_TC_SHARE" ] + then + echo "$BPF_FS_TC_SHARE: "; ls -l $BPF_FS_TC_SHARE; echo + fi + echo "Host net:" + $IP netns + $IP link show dev $HOST_IFC + $IP -6 a show dev $HOST_IFC + $TC -s qdisc show dev $HOST_IFC + echo + echo "$NS net:" + $IP -n $NS link show dev $NS_IFC + $IP -n $NS -6 link show dev $NS_IFC + echo "------ DEBUG ------" + echo + fi + + if [ "$MODE" != 'nocleanup' ] + then + setup_net stop + setup_bpf_cgrp2_array stop + setup_cgrp2 stop + fi +} + +init_cgrp2_vars +init_bpf_fs_vars + +while [[ $# -ge 1 ]] +do + a="$1" + case $a in + debug) + DEBUG='yes' + shift 1 + ;; + cleanup-only) + MODE='cleanuponly' + shift 1 + ;; + no-cleanup) + MODE='nocleanup' + shift 1 + ;; + *) + echo "test_cgrp2_tc [debug] [cleanup-only | no-cleanup]" + echo " debug: Print cgrp and network setup details at the end of the test" + echo " cleanup-only: Try to cleanup things from last test. No test will be run" + echo " no-cleanup: Run the test but don't do cleanup at the end" + echo "[Note: If no arg is given, it will run the test and do cleanup at the end]" + echo + exit -1 + ;; + esac +done + +trap do_exit 0 + +[ "$MODE" == 'cleanuponly' ] && exit + +setup_cgrp2 start || exit $? +setup_net start || exit $? +init_bpf_fs_vars || exit $? +setup_bpf_cgrp2_array start || exit $? +do_test +echo diff --git a/samples/bpf/test_cgrp2_tc_kern.c b/samples/bpf/test_cgrp2_tc_kern.c new file mode 100644 index 0000000..2732c37 --- /dev/null +++ b/samples/bpf/test_cgrp2_tc_kern.c @@ -0,0 +1,69 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <uapi/linux/if_ether.h> +#include <uapi/linux/in6.h> +#include <uapi/linux/ipv6.h> +#include <uapi/linux/pkt_cls.h> +#include <uapi/linux/bpf.h> +#include "bpf_helpers.h" + +/* copy of 'struct ethhdr' without __packed */ +struct eth_hdr { + unsigned char h_dest[ETH_ALEN]; + unsigned char h_source[ETH_ALEN]; + unsigned short h_proto; +}; + +#define PIN_GLOBAL_NS 2 +struct bpf_elf_map { + __u32 type; + __u32 size_key; + __u32 size_value; + __u32 max_elem; + __u32 flags; + __u32 id; + __u32 pinning; +}; + +struct bpf_elf_map SEC("maps") test_cgrp2_array_pin = { + .type = BPF_MAP_TYPE_CGROUP_ARRAY, + .size_key = sizeof(uint32_t), + .size_value = sizeof(uint32_t), + .pinning = PIN_GLOBAL_NS, + .max_elem = 1, +}; + +SEC("filter") +int handle_egress(struct __sk_buff *skb) +{ + void *data = (void *)(long)skb->data; + struct eth_hdr *eth = data; + struct ipv6hdr *ip6h = data + sizeof(*eth); + void *data_end = (void *)(long)skb->data_end; + char dont_care_msg[] = "dont care %04x %d\n"; + char pass_msg[] = "pass\n"; + char reject_msg[] = "reject\n"; + + /* single length check */ + if (data + sizeof(*eth) + sizeof(*ip6h) > data_end) + return TC_ACT_OK; + + if (eth->h_proto != htons(ETH_P_IPV6) || + ip6h->nexthdr != IPPROTO_ICMPV6) { + bpf_trace_printk(dont_care_msg, sizeof(dont_care_msg), + eth->h_proto, ip6h->nexthdr); + return TC_ACT_OK; + } else if (bpf_skb_in_cgroup(skb, &test_cgrp2_array_pin, 0) != 1) { + bpf_trace_printk(pass_msg, sizeof(pass_msg)); + return TC_ACT_OK; + } else { + bpf_trace_printk(reject_msg, sizeof(reject_msg)); + return TC_ACT_SHOT; + } +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/test_probe_write_user_kern.c b/samples/bpf/test_probe_write_user_kern.c new file mode 100644 index 0000000..3a677c8 --- /dev/null +++ b/samples/bpf/test_probe_write_user_kern.c @@ -0,0 +1,52 @@ +/* Copyright (c) 2016 Sargun Dhillon <sargun@sargun.me> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <uapi/linux/bpf.h> +#include <linux/version.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") dnat_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct sockaddr_in), + .value_size = sizeof(struct sockaddr_in), + .max_entries = 256, +}; + +/* kprobe is NOT a stable ABI + * kernel functions can be removed, renamed or completely change semantics. + * Number of arguments and their positions can change, etc. + * In such case this bpf+kprobe example will no longer be meaningful + * + * This example sits on a syscall, and the syscall ABI is relatively stable + * of course, across platforms, and over time, the ABI may change. + */ +SEC("kprobe/sys_connect") +int bpf_prog1(struct pt_regs *ctx) +{ + struct sockaddr_in new_addr, orig_addr = {}; + struct sockaddr_in *mapped_addr; + void *sockaddr_arg = (void *)PT_REGS_PARM2(ctx); + int sockaddr_len = (int)PT_REGS_PARM3(ctx); + + if (sockaddr_len > sizeof(orig_addr)) + return 0; + + if (bpf_probe_read(&orig_addr, sizeof(orig_addr), sockaddr_arg) != 0) + return 0; + + mapped_addr = bpf_map_lookup_elem(&dnat_map, &orig_addr); + if (mapped_addr != NULL) { + memcpy(&new_addr, mapped_addr, sizeof(new_addr)); + bpf_probe_write_user(sockaddr_arg, &new_addr, + sizeof(new_addr)); + } + return 0; +} + +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/test_probe_write_user_user.c b/samples/bpf/test_probe_write_user_user.c new file mode 100644 index 0000000..a44bf34 --- /dev/null +++ b/samples/bpf/test_probe_write_user_user.c @@ -0,0 +1,78 @@ +#include <stdio.h> +#include <assert.h> +#include <linux/bpf.h> +#include <unistd.h> +#include "libbpf.h" +#include "bpf_load.h" +#include <sys/socket.h> +#include <string.h> +#include <netinet/in.h> +#include <arpa/inet.h> + +int main(int ac, char **argv) +{ + int serverfd, serverconnfd, clientfd; + socklen_t sockaddr_len; + struct sockaddr serv_addr, mapped_addr, tmp_addr; + struct sockaddr_in *serv_addr_in, *mapped_addr_in, *tmp_addr_in; + char filename[256]; + char *ip; + + serv_addr_in = (struct sockaddr_in *)&serv_addr; + mapped_addr_in = (struct sockaddr_in *)&mapped_addr; + tmp_addr_in = (struct sockaddr_in *)&tmp_addr; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + assert((serverfd = socket(AF_INET, SOCK_STREAM, 0)) > 0); + assert((clientfd = socket(AF_INET, SOCK_STREAM, 0)) > 0); + + /* Bind server to ephemeral port on lo */ + memset(&serv_addr, 0, sizeof(serv_addr)); + serv_addr_in->sin_family = AF_INET; + serv_addr_in->sin_port = 0; + serv_addr_in->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + + assert(bind(serverfd, &serv_addr, sizeof(serv_addr)) == 0); + + sockaddr_len = sizeof(serv_addr); + assert(getsockname(serverfd, &serv_addr, &sockaddr_len) == 0); + ip = inet_ntoa(serv_addr_in->sin_addr); + printf("Server bound to: %s:%d\n", ip, ntohs(serv_addr_in->sin_port)); + + memset(&mapped_addr, 0, sizeof(mapped_addr)); + mapped_addr_in->sin_family = AF_INET; + mapped_addr_in->sin_port = htons(5555); + mapped_addr_in->sin_addr.s_addr = inet_addr("255.255.255.255"); + + assert(!bpf_update_elem(map_fd[0], &mapped_addr, &serv_addr, BPF_ANY)); + + assert(listen(serverfd, 5) == 0); + + ip = inet_ntoa(mapped_addr_in->sin_addr); + printf("Client connecting to: %s:%d\n", + ip, ntohs(mapped_addr_in->sin_port)); + assert(connect(clientfd, &mapped_addr, sizeof(mapped_addr)) == 0); + + sockaddr_len = sizeof(tmp_addr); + ip = inet_ntoa(tmp_addr_in->sin_addr); + assert((serverconnfd = accept(serverfd, &tmp_addr, &sockaddr_len)) > 0); + printf("Server received connection from: %s:%d\n", + ip, ntohs(tmp_addr_in->sin_port)); + + sockaddr_len = sizeof(tmp_addr); + assert(getpeername(clientfd, &tmp_addr, &sockaddr_len) == 0); + ip = inet_ntoa(tmp_addr_in->sin_addr); + printf("Client's peer address: %s:%d\n", + ip, ntohs(tmp_addr_in->sin_port)); + + /* Is the server's getsockname = the socket getpeername */ + assert(memcmp(&serv_addr, &tmp_addr, sizeof(struct sockaddr_in)) == 0); + + return 0; +} diff --git a/samples/bpf/xdp1_kern.c b/samples/bpf/xdp1_kern.c new file mode 100644 index 0000000..2197421 --- /dev/null +++ b/samples/bpf/xdp1_kern.c @@ -0,0 +1,93 @@ +/* Copyright (c) 2016 PLUMgrid + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#define KBUILD_MODNAME "foo" +#include <uapi/linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/if_vlan.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") rxcnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = 256, +}; + +static int parse_ipv4(void *data, u64 nh_off, void *data_end) +{ + struct iphdr *iph = data + nh_off; + + if (iph + 1 > data_end) + return 0; + return iph->protocol; +} + +static int parse_ipv6(void *data, u64 nh_off, void *data_end) +{ + struct ipv6hdr *ip6h = data + nh_off; + + if (ip6h + 1 > data_end) + return 0; + return ip6h->nexthdr; +} + +SEC("xdp1") +int xdp_prog1(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + int rc = XDP_DROP; + long *value; + u16 h_proto; + u64 nh_off; + u32 ipproto; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return rc; + + h_proto = eth->h_proto; + + if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { + struct vlan_hdr *vhdr; + + vhdr = data + nh_off; + nh_off += sizeof(struct vlan_hdr); + if (data + nh_off > data_end) + return rc; + h_proto = vhdr->h_vlan_encapsulated_proto; + } + if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { + struct vlan_hdr *vhdr; + + vhdr = data + nh_off; + nh_off += sizeof(struct vlan_hdr); + if (data + nh_off > data_end) + return rc; + h_proto = vhdr->h_vlan_encapsulated_proto; + } + + if (h_proto == htons(ETH_P_IP)) + ipproto = parse_ipv4(data, nh_off, data_end); + else if (h_proto == htons(ETH_P_IPV6)) + ipproto = parse_ipv6(data, nh_off, data_end); + else + ipproto = 0; + + value = bpf_map_lookup_elem(&rxcnt, &ipproto); + if (value) + *value += 1; + + return rc; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c new file mode 100644 index 0000000..a5e109e --- /dev/null +++ b/samples/bpf/xdp1_user.c @@ -0,0 +1,181 @@ +/* Copyright (c) 2016 PLUMgrid + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/bpf.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <assert.h> +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <unistd.h> +#include "bpf_load.h" +#include "libbpf.h" + +static int set_link_xdp_fd(int ifindex, int fd) +{ + struct sockaddr_nl sa; + int sock, seq = 0, len, ret = -1; + char buf[4096]; + struct nlattr *nla, *nla_xdp; + struct { + struct nlmsghdr nh; + struct ifinfomsg ifinfo; + char attrbuf[64]; + } req; + struct nlmsghdr *nh; + struct nlmsgerr *err; + + memset(&sa, 0, sizeof(sa)); + sa.nl_family = AF_NETLINK; + + sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (sock < 0) { + printf("open netlink socket: %s\n", strerror(errno)); + return -1; + } + + if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { + printf("bind to netlink: %s\n", strerror(errno)); + goto cleanup; + } + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_type = RTM_SETLINK; + req.nh.nlmsg_pid = 0; + req.nh.nlmsg_seq = ++seq; + req.ifinfo.ifi_family = AF_UNSPEC; + req.ifinfo.ifi_index = ifindex; + nla = (struct nlattr *)(((char *)&req) + + NLMSG_ALIGN(req.nh.nlmsg_len)); + nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/; + + nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN); + nla_xdp->nla_type = 1/*IFLA_XDP_FD*/; + nla_xdp->nla_len = NLA_HDRLEN + sizeof(int); + memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd)); + nla->nla_len = NLA_HDRLEN + nla_xdp->nla_len; + + req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len); + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + printf("send to netlink: %s\n", strerror(errno)); + goto cleanup; + } + + len = recv(sock, buf, sizeof(buf), 0); + if (len < 0) { + printf("recv from netlink: %s\n", strerror(errno)); + goto cleanup; + } + + for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len); + nh = NLMSG_NEXT(nh, len)) { + if (nh->nlmsg_pid != getpid()) { + printf("Wrong pid %d, expected %d\n", + nh->nlmsg_pid, getpid()); + goto cleanup; + } + if (nh->nlmsg_seq != seq) { + printf("Wrong seq %d, expected %d\n", + nh->nlmsg_seq, seq); + goto cleanup; + } + switch (nh->nlmsg_type) { + case NLMSG_ERROR: + err = (struct nlmsgerr *)NLMSG_DATA(nh); + if (!err->error) + continue; + printf("nlmsg error %s\n", strerror(-err->error)); + goto cleanup; + case NLMSG_DONE: + break; + } + } + + ret = 0; + +cleanup: + close(sock); + return ret; +} + +static int ifindex; + +static void int_exit(int sig) +{ + set_link_xdp_fd(ifindex, -1); + exit(0); +} + +/* simple per-protocol drop counter + */ +static void poll_stats(int interval) +{ + unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + const unsigned int nr_keys = 256; + __u64 values[nr_cpus], prev[nr_keys][nr_cpus]; + __u32 key; + int i; + + memset(prev, 0, sizeof(prev)); + + while (1) { + sleep(interval); + + for (key = 0; key < nr_keys; key++) { + __u64 sum = 0; + + assert(bpf_lookup_elem(map_fd[0], &key, values) == 0); + for (i = 0; i < nr_cpus; i++) + sum += (values[i] - prev[key][i]); + if (sum) + printf("proto %u: %10llu pkt/s\n", + key, sum / interval); + memcpy(prev[key], values, sizeof(values)); + } + } +} + +int main(int ac, char **argv) +{ + char filename[256]; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (ac != 2) { + printf("usage: %s IFINDEX\n", argv[0]); + return 1; + } + + ifindex = strtoul(argv[1], NULL, 0); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + if (!prog_fd[0]) { + printf("load_bpf_file: %s\n", strerror(errno)); + return 1; + } + + signal(SIGINT, int_exit); + + if (set_link_xdp_fd(ifindex, prog_fd[0]) < 0) { + printf("link set xdp fd failed\n"); + return 1; + } + + poll_stats(2); + + return 0; +} diff --git a/samples/bpf/xdp2_kern.c b/samples/bpf/xdp2_kern.c new file mode 100644 index 0000000..e012888 --- /dev/null +++ b/samples/bpf/xdp2_kern.c @@ -0,0 +1,114 @@ +/* Copyright (c) 2016 PLUMgrid + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#define KBUILD_MODNAME "foo" +#include <uapi/linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/if_vlan.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") rxcnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = 256, +}; + +static void swap_src_dst_mac(void *data) +{ + unsigned short *p = data; + unsigned short dst[3]; + + dst[0] = p[0]; + dst[1] = p[1]; + dst[2] = p[2]; + p[0] = p[3]; + p[1] = p[4]; + p[2] = p[5]; + p[3] = dst[0]; + p[4] = dst[1]; + p[5] = dst[2]; +} + +static int parse_ipv4(void *data, u64 nh_off, void *data_end) +{ + struct iphdr *iph = data + nh_off; + + if (iph + 1 > data_end) + return 0; + return iph->protocol; +} + +static int parse_ipv6(void *data, u64 nh_off, void *data_end) +{ + struct ipv6hdr *ip6h = data + nh_off; + + if (ip6h + 1 > data_end) + return 0; + return ip6h->nexthdr; +} + +SEC("xdp1") +int xdp_prog1(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + int rc = XDP_DROP; + long *value; + u16 h_proto; + u64 nh_off; + u32 ipproto; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return rc; + + h_proto = eth->h_proto; + + if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { + struct vlan_hdr *vhdr; + + vhdr = data + nh_off; + nh_off += sizeof(struct vlan_hdr); + if (data + nh_off > data_end) + return rc; + h_proto = vhdr->h_vlan_encapsulated_proto; + } + if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { + struct vlan_hdr *vhdr; + + vhdr = data + nh_off; + nh_off += sizeof(struct vlan_hdr); + if (data + nh_off > data_end) + return rc; + h_proto = vhdr->h_vlan_encapsulated_proto; + } + + if (h_proto == htons(ETH_P_IP)) + ipproto = parse_ipv4(data, nh_off, data_end); + else if (h_proto == htons(ETH_P_IPV6)) + ipproto = parse_ipv6(data, nh_off, data_end); + else + ipproto = 0; + + value = bpf_map_lookup_elem(&rxcnt, &ipproto); + if (value) + *value += 1; + + if (ipproto == IPPROTO_UDP) { + swap_src_dst_mac(data); + rc = XDP_TX; + } + + return rc; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/pktgen/parameters.sh b/samples/pktgen/parameters.sh index 33b70fd..f70ea7d 100644 --- a/samples/pktgen/parameters.sh +++ b/samples/pktgen/parameters.sh @@ -14,12 +14,13 @@ function usage() { echo " -b : (\$BURST) HW level bursting of SKBs" echo " -v : (\$VERBOSE) verbose" echo " -x : (\$DEBUG) debug" + echo " -6 : (\$IP6) IPv6" echo "" } ## --- Parse command line arguments / parameters --- ## echo "Commandline options:" -while getopts "s:i:d:m:t:c:b:vxh" option; do +while getopts "s:i:d:m:t:c:b:vxh6" option; do case $option in i) # interface export DEV=$OPTARG @@ -59,6 +60,10 @@ while getopts "s:i:d:m:t:c:b:vxh" option; do export DEBUG=yes info "Debug mode: DEBUG=$DEBUG" ;; + 6) + export IP6=6 + info "IP6: IP6=$IP6" + ;; h|?|*) usage; err 2 "[ERROR] Unknown parameters!!!" diff --git a/samples/pktgen/pktgen.conf-1-1-flows b/samples/pktgen/pktgen.conf-1-1-flows deleted file mode 100755 index 081749c..0000000 --- a/samples/pktgen/pktgen.conf-1-1-flows +++ /dev/null @@ -1,67 +0,0 @@ -#!/bin/bash - -#modprobe pktgen - - -function pgset() { - local result - - echo $1 > $PGDEV - - result=`cat $PGDEV | fgrep "Result: OK:"` - if [ "$result" = "" ]; then - cat $PGDEV | fgrep Result: - fi -} - -# Config Start Here ----------------------------------------------------------- - - -# thread config -# Each CPU has its own thread. One CPU example. We add eth1. - -PGDEV=/proc/net/pktgen/kpktgend_0 - echo "Removing all devices" - pgset "rem_device_all" - echo "Adding eth1" - pgset "add_device eth1" - - -# device config -# delay 0 -# We need to do alloc for every skb since we cannot clone here. - -CLONE_SKB="clone_skb 0" -# NIC adds 4 bytes CRC -PKT_SIZE="pkt_size 60" - -# COUNT 0 means forever -#COUNT="count 0" -COUNT="count 10000000" -DELAY="delay 0" - -PGDEV=/proc/net/pktgen/eth1 - echo "Configuring $PGDEV" - pgset "$COUNT" - pgset "$CLONE_SKB" - pgset "$PKT_SIZE" - pgset "$DELAY" - # Random address with in the min-max range - pgset "flag IPDST_RND" - pgset "dst_min 10.0.0.0" - pgset "dst_max 10.255.255.255" - - # 8k Concurrent flows at 4 pkts - pgset "flows 8192" - pgset "flowlen 4" - - pgset "dst_mac 00:04:23:08:91:dc" - -# Time to run -PGDEV=/proc/net/pktgen/pgctrl - - echo "Running... ctrl^C to stop" - trap true INT - pgset "start" - echo "Done" - cat /proc/net/pktgen/eth1 diff --git a/samples/pktgen/pktgen.conf-1-1-rdos b/samples/pktgen/pktgen.conf-1-1-rdos deleted file mode 100755 index c7553be..0000000 --- a/samples/pktgen/pktgen.conf-1-1-rdos +++ /dev/null @@ -1,64 +0,0 @@ -#!/bin/bash - -#modprobe pktgen - - -function pgset() { - local result - - echo $1 > $PGDEV - - result=`cat $PGDEV | fgrep "Result: OK:"` - if [ "$result" = "" ]; then - cat $PGDEV | fgrep Result: - fi -} - -# Config Start Here ----------------------------------------------------------- - - -# thread config -# Each CPU has its own thread. One CPU example. We add eth1. - -PGDEV=/proc/net/pktgen/kpktgend_0 - echo "Removing all devices" - pgset "rem_device_all" - echo "Adding eth1" - pgset "add_device eth1" - - -# device config -# delay 0 - -# We need to do alloc for every skb since we cannot clone here. - -CLONE_SKB="clone_skb 0" -# NIC adds 4 bytes CRC -PKT_SIZE="pkt_size 60" - -# COUNT 0 means forever -#COUNT="count 0" -COUNT="count 10000000" -DELAY="delay 0" - -PGDEV=/proc/net/pktgen/eth1 - echo "Configuring $PGDEV" - pgset "$COUNT" - pgset "$CLONE_SKB" - pgset "$PKT_SIZE" - pgset "$DELAY" - # Random address with in the min-max range - pgset "flag IPDST_RND" - pgset "dst_min 10.0.0.0" - pgset "dst_max 10.255.255.255" - - pgset "dst_mac 00:04:23:08:91:dc" - -# Time to run -PGDEV=/proc/net/pktgen/pgctrl - - echo "Running... ctrl^C to stop" - trap true INT - pgset "start" - echo "Done" - cat /proc/net/pktgen/eth1 diff --git a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh index cb15903..f3e1bedf 100755 --- a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh +++ b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh @@ -34,7 +34,9 @@ root_check_run_with_sudo "$@" source ${basedir}/parameters.sh # Using invalid DST_MAC will cause the packets to get dropped in # ip_rcv() which is part of the test -[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42" +if [ -z "$DEST_IP" ]; then + [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1" +fi [ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" [ -z "$BURST" ] && BURST=1024 @@ -64,7 +66,7 @@ for ((thread = 0; thread < $THREADS; thread++)); do # Destination pg_set $dev "dst_mac $DST_MAC" - pg_set $dev "dst $DEST_IP" + pg_set $dev "dst$IP6 $DEST_IP" # Inject packet into RX path of stack pg_set $dev "xmit_mode netif_receive" diff --git a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh new file mode 100755 index 0000000..cc102e9 --- /dev/null +++ b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh @@ -0,0 +1,68 @@ +#!/bin/bash +# +# Benchmark script: +# - developed for benchmarking egress qdisc path, derived (more +# like cut'n'pasted) from ingress benchmark script. +# +# Script for injecting packets into egress qdisc path of the stack +# with pktgen "xmit_mode queue_xmit". +# +basedir=`dirname $0` +source ${basedir}/functions.sh +root_check_run_with_sudo "$@" + +# Parameter parsing via include +source ${basedir}/parameters.sh +if [ -z "$DEST_IP" ]; then + [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1" +fi +[ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" + +# Burst greater than 1 are invalid for queue_xmit mode +if [[ -n "$BURST" ]]; then + err 1 "Bursting not supported for this mode" +fi + +# Base Config +DELAY="0" # Zero means max speed +COUNT="10000000" # Zero means indefinitely + +# General cleanup everything since last run +pg_ctrl "reset" + +# Threads are specified with parameter -t value in $THREADS +for ((thread = 0; thread < $THREADS; thread++)); do + # The device name is extended with @name, using thread number to + # make then unique, but any name will do. + dev=${DEV}@${thread} + + # Add remove all other devices and add_device $dev to thread + pg_thread $thread "rem_device_all" + pg_thread $thread "add_device" $dev + + # Base config of dev + pg_set $dev "flag QUEUE_MAP_CPU" + pg_set $dev "count $COUNT" + pg_set $dev "pkt_size $PKT_SIZE" + pg_set $dev "delay $DELAY" + pg_set $dev "flag NO_TIMESTAMP" + + # Destination + pg_set $dev "dst_mac $DST_MAC" + pg_set $dev "dst$IP6 $DEST_IP" + + # Inject packet into TX qdisc egress path of stack + pg_set $dev "xmit_mode queue_xmit" +done + +# start_run +echo "Running... ctrl^C to stop" >&2 +pg_ctrl "start" +echo "Done" >&2 + +# Print results +for ((thread = 0; thread < $THREADS; thread++)); do + dev=${DEV}@${thread} + echo "Device: $dev" + cat /proc/net/pktgen/$dev | grep -A2 "Result:" +done diff --git a/samples/pktgen/pktgen_sample01_simple.sh b/samples/pktgen/pktgen_sample01_simple.sh index 8c9d318..29ef4ba 100755 --- a/samples/pktgen/pktgen_sample01_simple.sh +++ b/samples/pktgen/pktgen_sample01_simple.sh @@ -14,7 +14,9 @@ root_check_run_with_sudo "$@" source ${basedir}/parameters.sh # # Set some default params, if they didn't get set -[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42" +if [ -z "$DEST_IP" ]; then + [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1" +fi [ -z "$CLONE_SKB" ] && CLONE_SKB="0" # Example enforce param "-m" for dst_mac [ -z "$DST_MAC" ] && usage && err 2 "Must specify -m dst_mac" @@ -54,7 +56,7 @@ pg_set $DEV "flag NO_TIMESTAMP" # Destination pg_set $DEV "dst_mac $DST_MAC" -pg_set $DEV "dst $DEST_IP" +pg_set $DEV "dst$IP6 $DEST_IP" # Setup random UDP port src range pg_set $DEV "flag UDPSRC_RND" diff --git a/samples/pktgen/pktgen_sample02_multiqueue.sh b/samples/pktgen/pktgen_sample02_multiqueue.sh index 32467ae..c88a161 100755 --- a/samples/pktgen/pktgen_sample02_multiqueue.sh +++ b/samples/pktgen/pktgen_sample02_multiqueue.sh @@ -23,7 +23,9 @@ UDP_MIN=9 UDP_MAX=109 # (example of setting default params in your script) -[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42" +if [ -z "$DEST_IP" ]; then + [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1" +fi [ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" # General cleanup everything since last run @@ -54,7 +56,7 @@ for ((thread = 0; thread < $THREADS; thread++)); do # Destination pg_set $dev "dst_mac $DST_MAC" - pg_set $dev "dst $DEST_IP" + pg_set $dev "dst$IP6 $DEST_IP" # Setup random UDP port src range pg_set $dev "flag UDPSRC_RND" diff --git a/samples/pktgen/pktgen_sample03_burst_single_flow.sh b/samples/pktgen/pktgen_sample03_burst_single_flow.sh index 775f5d0..80cf8f5 100755 --- a/samples/pktgen/pktgen_sample03_burst_single_flow.sh +++ b/samples/pktgen/pktgen_sample03_burst_single_flow.sh @@ -25,7 +25,9 @@ root_check_run_with_sudo "$@" # Parameter parsing via include source ${basedir}/parameters.sh # Set some default params, if they didn't get set -[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42" +if [ -z "$DEST_IP" ]; then + [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1" +fi [ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" [ -z "$BURST" ] && BURST=32 [ -z "$CLONE_SKB" ] && CLONE_SKB="100000" @@ -55,7 +57,7 @@ for ((thread = 0; thread < $THREADS; thread++)); do # Destination pg_set $dev "dst_mac $DST_MAC" - pg_set $dev "dst $DEST_IP" + pg_set $dev "dst$IP6 $DEST_IP" # Setup burst, for easy testing -b 0 disable bursting # (internally in pktgen default and minimum burst=1) diff --git a/samples/pktgen/pktgen_sample04_many_flows.sh b/samples/pktgen/pktgen_sample04_many_flows.sh new file mode 100755 index 0000000..f60412e --- /dev/null +++ b/samples/pktgen/pktgen_sample04_many_flows.sh @@ -0,0 +1,93 @@ +#!/bin/bash +# +# Script example for many flows testing +# +# Number of simultaneous flows limited by variable $FLOWS +# and number of packets per flow controlled by variable $FLOWLEN +# +basedir=`dirname $0` +source ${basedir}/functions.sh +root_check_run_with_sudo "$@" + +# Parameter parsing via include +source ${basedir}/parameters.sh +# Set some default params, if they didn't get set +[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42" +[ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" +[ -z "$CLONE_SKB" ] && CLONE_SKB="0" + +# NOTICE: Script specific settings +# ======= +# Limiting the number of concurrent flows ($FLOWS) +# and also set how many packets each flow contains ($FLOWLEN) +# +[ -z "$FLOWS" ] && FLOWS="8000" +[ -z "$FLOWLEN" ] && FLOWLEN="10" + +# Base Config +DELAY="0" # Zero means max speed +COUNT="0" # Zero means indefinitely + +if [[ -n "$BURST" ]]; then + err 1 "Bursting not supported for this mode" +fi + +# General cleanup everything since last run +pg_ctrl "reset" + +# Threads are specified with parameter -t value in $THREADS +for ((thread = 0; thread < $THREADS; thread++)); do + dev=${DEV}@${thread} + + # Add remove all other devices and add_device $dev to thread + pg_thread $thread "rem_device_all" + pg_thread $thread "add_device" $dev + + # Base config + pg_set $dev "flag QUEUE_MAP_CPU" + pg_set $dev "count $COUNT" + pg_set $dev "clone_skb $CLONE_SKB" + pg_set $dev "pkt_size $PKT_SIZE" + pg_set $dev "delay $DELAY" + pg_set $dev "flag NO_TIMESTAMP" + + # Single destination + pg_set $dev "dst_mac $DST_MAC" + pg_set $dev "dst $DEST_IP" + + # Randomize source IP-addresses + pg_set $dev "flag IPSRC_RND" + pg_set $dev "src_min 198.18.0.0" + pg_set $dev "src_max 198.19.255.255" + + # Limit number of flows (max 65535) + pg_set $dev "flows $FLOWS" + # + # How many packets a flow will send, before flow "entry" is + # re-generated/setup. + pg_set $dev "flowlen $FLOWLEN" + # + # Flag FLOW_SEQ will cause $FLOWLEN packets from the same flow + # being send back-to-back, before next flow is selected + # incrementally. This helps lookup caches, and is more realistic. + # + pg_set $dev "flag FLOW_SEQ" + +done + +# Run if user hits control-c +function print_result() { + # Print results + for ((thread = 0; thread < $THREADS; thread++)); do + dev=${DEV}@${thread} + echo "Device: $dev" + cat /proc/net/pktgen/$dev | grep -A2 "Result:" + done +} +# trap keyboard interrupt (Ctrl-C) +trap true SIGINT + +echo "Running... ctrl^C to stop" >&2 +pg_ctrl "start" + +print_result diff --git a/samples/pktgen/pktgen_sample05_flow_per_thread.sh b/samples/pktgen/pktgen_sample05_flow_per_thread.sh new file mode 100755 index 0000000..32ad818 --- /dev/null +++ b/samples/pktgen/pktgen_sample05_flow_per_thread.sh @@ -0,0 +1,81 @@ +#!/bin/bash +# +# Script will generate one flow per thread (-t N) +# - Same destination IP +# - Fake source IPs for each flow (fixed based on thread number) +# +# Useful for scale testing on receiver, to see whether silo'ing flows +# works and scales. For optimal scalability (on receiver) each +# separate-flow should not access shared variables/data. This script +# helps magnify any of these scaling issues by overloading the receiver. +# +basedir=`dirname $0` +source ${basedir}/functions.sh +root_check_run_with_sudo "$@" + +# Parameter parsing via include +source ${basedir}/parameters.sh +# Set some default params, if they didn't get set +[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42" +[ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" +[ -z "$CLONE_SKB" ] && CLONE_SKB="0" +[ -z "$BURST" ] && BURST=32 + + +# Base Config +DELAY="0" # Zero means max speed +COUNT="0" # Zero means indefinitely + +# General cleanup everything since last run +pg_ctrl "reset" + +# Threads are specified with parameter -t value in $THREADS +for ((thread = 0; thread < $THREADS; thread++)); do + dev=${DEV}@${thread} + + # Add remove all other devices and add_device $dev to thread + pg_thread $thread "rem_device_all" + pg_thread $thread "add_device" $dev + + # Base config + pg_set $dev "flag QUEUE_MAP_CPU" + pg_set $dev "count $COUNT" + pg_set $dev "clone_skb $CLONE_SKB" + pg_set $dev "pkt_size $PKT_SIZE" + pg_set $dev "delay $DELAY" + pg_set $dev "flag NO_TIMESTAMP" + + # Single destination + pg_set $dev "dst_mac $DST_MAC" + pg_set $dev "dst $DEST_IP" + + # Setup source IP-addresses based on thread number + pg_set $dev "src_min 198.18.$((thread+1)).1" + pg_set $dev "src_max 198.18.$((thread+1)).1" + + # Setup burst, for easy testing -b 0 disable bursting + # (internally in pktgen default and minimum burst=1) + if [[ ${BURST} -ne 0 ]]; then + pg_set $dev "burst $BURST" + else + info "$dev: Not using burst" + fi + +done + +# Run if user hits control-c +function print_result() { + # Print results + for ((thread = 0; thread < $THREADS; thread++)); do + dev=${DEV}@${thread} + echo "Device: $dev" + cat /proc/net/pktgen/$dev | grep -A2 "Result:" + done +} +# trap keyboard interrupt (Ctrl-C) +trap true SIGINT + +echo "Running... ctrl^C to stop" >&2 +pg_ctrl "start" + +print_result |