summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/net/geneve.c10
-rw-r--r--drivers/net/vxlan.c38
-rw-r--r--include/net/checksum.h5
-rw-r--r--include/net/ip_tunnels.h15
-rw-r--r--include/uapi/linux/bpf.h13
-rw-r--r--net/core/filter.c119
-rw-r--r--net/ipv4/ip_gre.c10
7 files changed, 165 insertions, 45 deletions
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index bc5da35..6a0cbbe 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -775,10 +775,10 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
struct flowi4 *fl4,
struct ip_tunnel_info *info)
{
+ bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
struct geneve_dev *geneve = netdev_priv(dev);
struct dst_cache *dst_cache;
struct rtable *rt = NULL;
- bool use_cache = true;
__u8 tos;
memset(fl4, 0, sizeof(*fl4));
@@ -804,7 +804,6 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
dst_cache = &geneve->dst_cache;
}
- use_cache = use_cache && !skb->mark;
if (use_cache) {
rt = dst_cache_get_ip4(dst_cache, &fl4->saddr);
if (rt)
@@ -832,11 +831,11 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
struct flowi6 *fl6,
struct ip_tunnel_info *info)
{
+ bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
struct geneve_dev *geneve = netdev_priv(dev);
struct geneve_sock *gs6 = geneve->sock6;
struct dst_entry *dst = NULL;
struct dst_cache *dst_cache;
- bool use_cache = true;
__u8 prio;
memset(fl6, 0, sizeof(*fl6));
@@ -862,7 +861,6 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
dst_cache = &geneve->dst_cache;
}
- use_cache = use_cache && !skb->mark;
if (use_cache) {
dst = dst_cache_get_ip6(dst_cache, &fl6->saddr);
if (dst)
@@ -940,7 +938,7 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
u8 vni[3];
tunnel_id_to_vni(key->tun_id, vni);
- if (key->tun_flags & TUNNEL_GENEVE_OPT)
+ if (info->options_len)
opts = ip_tunnel_info_opts(info);
if (key->tun_flags & TUNNEL_CSUM)
@@ -1027,7 +1025,7 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
u8 vni[3];
tunnel_id_to_vni(key->tun_id, vni);
- if (key->tun_flags & TUNNEL_GENEVE_OPT)
+ if (info->options_len)
opts = ip_tunnel_info_opts(info);
if (key->tun_flags & TUNNEL_CSUM)
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index fc998a3..2399099 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1756,17 +1756,15 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
struct sk_buff *skb, int oif, u8 tos,
__be32 daddr, __be32 *saddr,
struct dst_cache *dst_cache,
- struct ip_tunnel_info *info)
+ const struct ip_tunnel_info *info)
{
+ bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
struct rtable *rt = NULL;
- bool use_cache = false;
struct flowi4 fl4;
- /* when the ip_tunnel_info is availble, the tos used for lookup is
- * packet independent, so we can use the cache
- */
- if (!skb->mark && (!tos || info)) {
- use_cache = true;
+ if (tos && !info)
+ use_cache = false;
+ if (use_cache) {
rt = dst_cache_get_ip4(dst_cache, saddr);
if (rt)
return rt;
@@ -1791,16 +1789,20 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
#if IS_ENABLED(CONFIG_IPV6)
static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
- struct sk_buff *skb, int oif,
+ struct sk_buff *skb, int oif, u8 tos,
const struct in6_addr *daddr,
struct in6_addr *saddr,
- struct dst_cache *dst_cache)
+ struct dst_cache *dst_cache,
+ const struct ip_tunnel_info *info)
{
+ bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
struct dst_entry *ndst;
struct flowi6 fl6;
int err;
- if (!skb->mark) {
+ if (tos && !info)
+ use_cache = false;
+ if (use_cache) {
ndst = dst_cache_get_ip6(dst_cache, saddr);
if (ndst)
return ndst;
@@ -1808,6 +1810,7 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_oif = oif;
+ fl6.flowi6_tos = RT_TOS(tos);
fl6.daddr = *daddr;
fl6.saddr = vxlan->cfg.saddr.sin6.sin6_addr;
fl6.flowi6_mark = skb->mark;
@@ -1820,7 +1823,7 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
return ERR_PTR(err);
*saddr = fl6.saddr;
- if (!skb->mark)
+ if (use_cache)
dst_cache_set_ip6(dst_cache, ndst, saddr);
return ndst;
}
@@ -2016,9 +2019,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
sk = vxlan->vn6_sock->sock->sk;
ndst = vxlan6_get_route(vxlan, skb,
- rdst ? rdst->remote_ifindex : 0,
+ rdst ? rdst->remote_ifindex : 0, tos,
&dst->sin6.sin6_addr, &saddr,
- dst_cache);
+ dst_cache, info);
if (IS_ERR(ndst)) {
netdev_dbg(dev, "no route to %pI6\n",
&dst->sin6.sin6_addr);
@@ -2053,6 +2056,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
if (!info)
udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
+ tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
ttl = ttl ? : ip6_dst_hoplimit(ndst);
skb_scrub_packet(skb, xnet);
err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr),
@@ -2062,8 +2066,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
return;
}
udp_tunnel6_xmit_skb(ndst, sk, skb, dev,
- &saddr, &dst->sin6.sin6_addr,
- 0, ttl, src_port, dst_port, !udp_sum);
+ &saddr, &dst->sin6.sin6_addr, tos, ttl,
+ src_port, dst_port, !udp_sum);
#endif
}
@@ -2385,9 +2389,9 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
if (!vxlan->vn6_sock)
return -EINVAL;
- ndst = vxlan6_get_route(vxlan, skb, 0,
+ ndst = vxlan6_get_route(vxlan, skb, 0, info->key.tos,
&info->key.u.ipv6.dst,
- &info->key.u.ipv6.src, NULL);
+ &info->key.u.ipv6.src, NULL, info);
if (IS_ERR(ndst))
return PTR_ERR(ndst);
dst_release(ndst);
diff --git a/include/net/checksum.h b/include/net/checksum.h
index 10a16b5..abffc64 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -120,6 +120,11 @@ static inline __wsum csum_partial_ext(const void *buff, int len, __wsum sum)
#define CSUM_MANGLED_0 ((__force __sum16)0xffff)
+static inline void csum_replace_by_diff(__sum16 *sum, __wsum diff)
+{
+ *sum = csum_fold(csum_add(diff, ~csum_unfold(*sum)));
+}
+
static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to)
{
__wsum tmp = csum_sub(~csum_unfold(*sum), (__force __wsum)from);
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 5f28b60..e1395d7 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -140,6 +140,7 @@ struct ip_tunnel {
#define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400)
#define TUNNEL_GENEVE_OPT __cpu_to_be16(0x0800)
#define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000)
+#define TUNNEL_NOCACHE __cpu_to_be16(0x2000)
#define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT)
@@ -206,6 +207,20 @@ static inline void ip_tunnel_key_init(struct ip_tunnel_key *key,
0, sizeof(*key) - IP_TUNNEL_KEY_SIZE);
}
+static inline bool
+ip_tunnel_dst_cache_usable(const struct sk_buff *skb,
+ const struct ip_tunnel_info *info)
+{
+ if (skb->mark)
+ return false;
+ if (!info)
+ return true;
+ if (info->key.tun_flags & TUNNEL_NOCACHE)
+ return false;
+
+ return true;
+}
+
static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info
*tun_info)
{
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ee21932..9221f65 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -298,6 +298,17 @@ enum bpf_func_id {
* Return: csum result
*/
BPF_FUNC_csum_diff,
+
+ /**
+ * bpf_skb_[gs]et_tunnel_opt(skb, opt, size)
+ * retrieve or populate tunnel options metadata
+ * @skb: pointer to skb
+ * @opt: pointer to raw tunnel option data
+ * @size: size of @opt
+ * Return: 0 on success for set, option size for get
+ */
+ BPF_FUNC_skb_get_tunnel_opt,
+ BPF_FUNC_skb_set_tunnel_opt,
__BPF_FUNC_MAX_ID,
};
@@ -305,6 +316,7 @@ enum bpf_func_id {
/* BPF_FUNC_skb_store_bytes flags. */
#define BPF_F_RECOMPUTE_CSUM (1ULL << 0)
+#define BPF_F_INVALIDATE_HASH (1ULL << 1)
/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags.
* First 4 bits are for passing the header field size.
@@ -329,6 +341,7 @@ enum bpf_func_id {
/* BPF_FUNC_skb_set_tunnel_key flags. */
#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
+#define BPF_F_DONT_FRAGMENT (1ULL << 2)
/* user accessible mirror of in-kernel sk_buff.
* new fields can only be added to the end of this structure
diff --git a/net/core/filter.c b/net/core/filter.c
index 69f4ffc..a66dc03 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1353,7 +1353,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
unsigned int len = (unsigned int) r4;
void *ptr;
- if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM)))
+ if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
return -EINVAL;
/* bpf verifier guarantees that:
@@ -1384,11 +1384,13 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
if (flags & BPF_F_RECOMPUTE_CSUM)
skb_postpush_rcsum(skb, ptr, len);
+ if (flags & BPF_F_INVALIDATE_HASH)
+ skb_clear_hash(skb);
return 0;
}
-const struct bpf_func_proto bpf_skb_store_bytes_proto = {
+static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
.func = bpf_skb_store_bytes,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1419,7 +1421,7 @@ static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
return 0;
}
-const struct bpf_func_proto bpf_skb_load_bytes_proto = {
+static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
.func = bpf_skb_load_bytes,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1447,6 +1449,12 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
return -EFAULT;
switch (flags & BPF_F_HDR_FIELD_MASK) {
+ case 0:
+ if (unlikely(from != 0))
+ return -EINVAL;
+
+ csum_replace_by_diff(ptr, to);
+ break;
case 2:
csum_replace2(ptr, from, to);
break;
@@ -1464,7 +1472,7 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
return 0;
}
-const struct bpf_func_proto bpf_l3_csum_replace_proto = {
+static const struct bpf_func_proto bpf_l3_csum_replace_proto = {
.func = bpf_l3_csum_replace,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1523,7 +1531,7 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
return 0;
}
-const struct bpf_func_proto bpf_l4_csum_replace_proto = {
+static const struct bpf_func_proto bpf_l4_csum_replace_proto = {
.func = bpf_l4_csum_replace,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1562,7 +1570,7 @@ static u64 bpf_csum_diff(u64 r1, u64 from_size, u64 r3, u64 to_size, u64 seed)
return csum_partial(sp->diff, diff_size, seed);
}
-const struct bpf_func_proto bpf_csum_diff_proto = {
+static const struct bpf_func_proto bpf_csum_diff_proto = {
.func = bpf_csum_diff,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1600,7 +1608,7 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
return dev_queue_xmit(skb2);
}
-const struct bpf_func_proto bpf_clone_redirect_proto = {
+static const struct bpf_func_proto bpf_clone_redirect_proto = {
.func = bpf_clone_redirect,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1652,7 +1660,7 @@ int skb_do_redirect(struct sk_buff *skb)
return dev_queue_xmit(skb);
}
-const struct bpf_func_proto bpf_redirect_proto = {
+static const struct bpf_func_proto bpf_redirect_proto = {
.func = bpf_redirect,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1791,7 +1799,7 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
return 0;
}
-const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
+static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
.func = bpf_skb_get_tunnel_key,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1801,6 +1809,32 @@ const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
.arg4_type = ARG_ANYTHING,
};
+static u64 bpf_skb_get_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5)
+{
+ struct sk_buff *skb = (struct sk_buff *) (long) r1;
+ u8 *to = (u8 *) (long) r2;
+ const struct ip_tunnel_info *info = skb_tunnel_info(skb);
+
+ if (unlikely(!info ||
+ !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT)))
+ return -ENOENT;
+ if (unlikely(size < info->options_len))
+ return -ENOMEM;
+
+ ip_tunnel_info_opts_get(to, info);
+
+ return info->options_len;
+}
+
+static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = {
+ .func = bpf_skb_get_tunnel_opt,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_STACK,
+ .arg3_type = ARG_CONST_STACK_SIZE,
+};
+
static struct metadata_dst __percpu *md_dst;
static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
@@ -1811,7 +1845,8 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
u8 compat[sizeof(struct bpf_tunnel_key)];
struct ip_tunnel_info *info;
- if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX)))
+ if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
+ BPF_F_DONT_FRAGMENT)))
return -EINVAL;
if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
switch (size) {
@@ -1835,7 +1870,10 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
info = &md->u.tun_info;
info->mode = IP_TUNNEL_INFO_TX;
- info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM;
+ info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
+ if (flags & BPF_F_DONT_FRAGMENT)
+ info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
+
info->key.tun_id = cpu_to_be64(from->tunnel_id);
info->key.tos = from->tunnel_tos;
info->key.ttl = from->tunnel_ttl;
@@ -1853,7 +1891,7 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
return 0;
}
-const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
+static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
.func = bpf_skb_set_tunnel_key,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1863,17 +1901,58 @@ const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
.arg4_type = ARG_ANYTHING,
};
-static const struct bpf_func_proto *bpf_get_skb_set_tunnel_key_proto(void)
+#define BPF_TUNLEN_MAX 255
+
+static u64 bpf_skb_set_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5)
+{
+ struct sk_buff *skb = (struct sk_buff *) (long) r1;
+ u8 *from = (u8 *) (long) r2;
+ struct ip_tunnel_info *info = skb_tunnel_info(skb);
+ const struct metadata_dst *md = this_cpu_ptr(md_dst);
+
+ if (unlikely(info != &md->u.tun_info || (size & (sizeof(u32) - 1))))
+ return -EINVAL;
+ if (unlikely(size > BPF_TUNLEN_MAX))
+ return -ENOMEM;
+
+ ip_tunnel_info_opts_set(info, from, size);
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
+ .func = bpf_skb_set_tunnel_opt,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_STACK,
+ .arg3_type = ARG_CONST_STACK_SIZE,
+};
+
+static const struct bpf_func_proto *
+bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
{
if (!md_dst) {
- /* race is not possible, since it's called from
- * verifier that is holding verifier mutex
+ BUILD_BUG_ON(FIELD_SIZEOF(struct ip_tunnel_info,
+ options_len) != 1);
+
+ /* Race is not possible, since it's called from verifier
+ * that is holding verifier mutex.
*/
- md_dst = metadata_dst_alloc_percpu(0, GFP_KERNEL);
+ md_dst = metadata_dst_alloc_percpu(BPF_TUNLEN_MAX,
+ GFP_KERNEL);
if (!md_dst)
return NULL;
}
- return &bpf_skb_set_tunnel_key_proto;
+
+ switch (which) {
+ case BPF_FUNC_skb_set_tunnel_key:
+ return &bpf_skb_set_tunnel_key_proto;
+ case BPF_FUNC_skb_set_tunnel_opt:
+ return &bpf_skb_set_tunnel_opt_proto;
+ default:
+ return NULL;
+ }
}
static const struct bpf_func_proto *
@@ -1927,7 +2006,11 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_skb_get_tunnel_key:
return &bpf_skb_get_tunnel_key_proto;
case BPF_FUNC_skb_set_tunnel_key:
- return bpf_get_skb_set_tunnel_key_proto();
+ return bpf_get_skb_set_tunnel_proto(func_id);
+ case BPF_FUNC_skb_get_tunnel_opt:
+ return &bpf_skb_get_tunnel_opt_proto;
+ case BPF_FUNC_skb_set_tunnel_opt:
+ return bpf_get_skb_set_tunnel_proto(func_id);
case BPF_FUNC_redirect:
return &bpf_redirect_proto;
case BPF_FUNC_get_route_realm:
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 202437d..31936d3 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -527,11 +527,12 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
+ struct rtable *rt = NULL;
struct flowi4 fl;
- struct rtable *rt;
int min_headroom;
int tunnel_hlen;
__be16 df, flags;
+ bool use_cache;
int err;
tun_info = skb_tunnel_info(skb);
@@ -540,13 +541,14 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
goto err_free_skb;
key = &tun_info->key;
- rt = !skb->mark ? dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr) :
- NULL;
+ use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
+ if (use_cache)
+ rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr);
if (!rt) {
rt = gre_get_rt(skb, dev, &fl, key);
if (IS_ERR(rt))
goto err_free_skb;
- if (!skb->mark)
+ if (use_cache)
dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
fl.saddr);
}
OpenPOWER on IntegriCloud