From 2d17d8d79e77ff3f1b35b87522fc72fa562260ff Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 14 Dec 2017 17:17:56 -0800 Subject: xdp: linearize skb in netif_receive_generic_xdp() In netif_receive_generic_xdp(), it is necessary to linearize all nonlinear skb. However, in current implementation, skb with troom <= 0 are not linearized. This patch fixes this by calling skb_linearize() for all nonlinear skb. Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access") Signed-off-by: Song Liu Acked-by: Martin KaFai Lau Cc: Daniel Borkmann Cc: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index f47e96b..01ee854 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3904,7 +3904,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0, troom > 0 ? troom + 128 : 0, GFP_ATOMIC)) goto do_drop; - if (troom > 0 && __skb_linearize(skb)) + if (skb_linearize(skb)) goto do_drop; } -- cgit v1.1 From 21b5944350052d2583e82dd59b19a9ba94a007f0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 19 Dec 2017 11:27:56 -0600 Subject: net: Fix double free and memory corruption in get_net_ns_by_id() (I can trivially verify that that idr_remove in cleanup_net happens after the network namespace count has dropped to zero --EWB) Function get_net_ns_by_id() does not check for net::count after it has found a peer in netns_ids idr. It may dereference a peer, after its count has already been finaly decremented. This leads to double free and memory corruption: put_net(peer) rtnl_lock() atomic_dec_and_test(&peer->count) [count=0] ... __put_net(peer) get_net_ns_by_id(net, id) spin_lock(&cleanup_list_lock) list_add(&net->cleanup_list, &cleanup_list) spin_unlock(&cleanup_list_lock) queue_work() peer = idr_find(&net->netns_ids, id) | get_net(peer) [count=1] | ... | (use after final put) v ... cleanup_net() ... spin_lock(&cleanup_list_lock) ... list_replace_init(&cleanup_list, ..) ... spin_unlock(&cleanup_list_lock) ... ... ... ... put_net(peer) ... atomic_dec_and_test(&peer->count) [count=0] ... spin_lock(&cleanup_list_lock) ... list_add(&net->cleanup_list, &cleanup_list) ... spin_unlock(&cleanup_list_lock) ... queue_work() ... rtnl_unlock() rtnl_lock() ... for_each_net(tmp) { ... id = __peernet2id(tmp, peer) ... spin_lock_irq(&tmp->nsid_lock) ... idr_remove(&tmp->netns_ids, id) ... ... ... net_drop_ns() ... net_free(peer) ... } ... | v cleanup_net() ... (Second free of peer) Also, put_net() on the right cpu may reorder with left's cpu list_replace_init(&cleanup_list, ..), and then cleanup_list will be corrupted. Since cleanup_net() is executed in worker thread, while put_net(peer) can happen everywhere, there should be enough time for concurrent get_net_ns_by_id() to pick the peer up, and the race does not seem to be unlikely. The patch fixes the problem in standard way. (Also, there is possible problem in peernet2id_alloc(), which requires check for net::count under nsid_lock and maybe_get_net(peer), but in current stable kernel it's used under rtnl_lock() and it has to be safe. Openswitch begun to use peernet2id_alloc(), and possibly it should be fixed too. While this is not in stable kernel yet, so I'll send a separate message to netdev@ later). Cc: Nicolas Dichtel Signed-off-by: Kirill Tkhai Fixes: 0c7aecd4bde4 "netns: add rtnl cmd to add and get peer netns ids" Reviewed-by: Andrey Ryabinin Reviewed-by: "Eric W. Biederman" Signed-off-by: Eric W. Biederman Reviewed-by: Eric Dumazet Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/core/net_namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index b797832..60a71be 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -267,7 +267,7 @@ struct net *get_net_ns_by_id(struct net *net, int id) spin_lock_bh(&net->nsid_lock); peer = idr_find(&net->netns_ids, id); if (peer) - get_net(peer); + peer = maybe_get_net(peer); spin_unlock_bh(&net->nsid_lock); rcu_read_unlock(); -- cgit v1.1 From 268b790679422a89e9ab0685d9f291edae780c98 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 20 Dec 2017 17:37:49 -0500 Subject: skbuff: orphan frags before zerocopy clone Call skb_zerocopy_clone after skb_orphan_frags, to avoid duplicate calls to skb_uarg(skb)->callback for the same data. skb_zerocopy_clone associates skb_shinfo(skb)->uarg from frag_skb with each segment. This is only safe for uargs that do refcounting, which is those that pass skb_orphan_frags without dropping their shared frags. For others, skb_orphan_frags drops the user frags and sets the uarg to NULL, after which sock_zerocopy_clone has no effect. Qemu hangs were reported due to duplicate vhost_net_zerocopy_callback calls for the same data causing the vhost_net_ubuf_ref_>refcount to drop below zero. Link: http://lkml.kernel.org/r/ Fixes: 1f8b977ab32d ("sock: enable MSG_ZEROCOPY") Reported-by: Andreas Hartmann Reported-by: David Hill Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/core/skbuff.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a592ca0..edf40ac 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3654,8 +3654,6 @@ normal: skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags & SKBTX_SHARED_FRAG; - if (skb_zerocopy_clone(nskb, head_skb, GFP_ATOMIC)) - goto err; while (pos < offset + len) { if (i >= nfrags) { @@ -3681,6 +3679,8 @@ normal: if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC))) goto err; + if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC)) + goto err; *nskb_frag = *frag; __skb_frag_ref(nskb_frag); -- cgit v1.1 From b90ddd568792bcb0054eaf0f61785c8f80c3bd1c Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 20 Dec 2017 17:37:50 -0500 Subject: skbuff: skb_copy_ubufs must release uarg even without user frags skb_copy_ubufs creates a private copy of frags[] to release its hold on user frags, then calls uarg->callback to notify the owner. Call uarg->callback even when no frags exist. This edge case can happen when zerocopy_sg_from_iter finds enough room in skb_headlen to copy all the data. Fixes: 3ece782693c4 ("sock: skb_copy_ubufs support for compound pages") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/core/skbuff.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index edf40ac..a3cb0be 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1178,7 +1178,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) u32 d_off; if (!num_frags) - return 0; + goto release; if (skb_shared(skb) || skb_unclone(skb, gfp_mask)) return -EINVAL; @@ -1238,6 +1238,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off); skb_shinfo(skb)->nr_frags = new_frags; +release: skb_zcopy_clear(skb, false); return 0; } -- cgit v1.1