From d92191aa84e5f187d543867c3d54b38f294833fa Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 21 Mar 2018 13:55:42 +0100 Subject: netfilter: nf_tables: cache device name in flowtable object Devices going away have to grab the nfnl_lock from the netdev event path to avoid races with control plane updates. However, netlink dumps in netfilter do not hold nfnl_lock mutex. Cache the device name into the objects to avoid an use-after-free situation for a device that is going away. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 663b015..30eb065 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1068,6 +1068,8 @@ struct nft_object_ops { int nft_register_obj(struct nft_object_type *obj_type); void nft_unregister_obj(struct nft_object_type *obj_type); +#define NFT_FLOWTABLE_DEVICE_MAX 8 + /** * struct nft_flowtable - nf_tables flow table * @@ -1080,6 +1082,7 @@ void nft_unregister_obj(struct nft_object_type *obj_type); * @genmask: generation mask * @use: number of references to this flow table * @handle: unique object handle + * @dev_name: array of device names * @data: rhashtable and garbage collector * @ops: array of hooks */ @@ -1093,6 +1096,7 @@ struct nft_flowtable { u32 genmask:2, use:30; u64 handle; + char *dev_name[NFT_FLOWTABLE_DEVICE_MAX]; /* runtime data below here */ struct nf_hook_ops *ops ____cacheline_aligned; struct nf_flowtable data; -- cgit v1.1 From eb82a994479245a79647d302f9b4eb8e7c9d7ca6 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sat, 24 Mar 2018 22:25:06 -0700 Subject: net: sched, fix OOO packets with pfifo_fast After the qdisc lock was dropped in pfifo_fast we allow multiple enqueue threads and dequeue threads to run in parallel. On the enqueue side the skb bit ooo_okay is used to ensure all related skbs are enqueued in-order. On the dequeue side though there is no similar logic. What we observe is with fewer queues than CPUs it is possible to re-order packets when two instances of __qdisc_run() are running in parallel. Each thread will dequeue a skb and then whichever thread calls the ndo op first will be sent on the wire. This doesn't typically happen because qdisc_run() is usually triggered by the same core that did the enqueue. However, drivers will trigger __netif_schedule() when queues are transitioning from stopped to awake using the netif_tx_wake_* APIs. When this happens netif_schedule() calls qdisc_run() on the same CPU that did the netif_tx_wake_* which is usually done in the interrupt completion context. This CPU is selected with the irq affinity which is unrelated to the enqueue operations. To resolve this we add a RUNNING bit to the qdisc to ensure only a single dequeue per qdisc is running. Enqueue and dequeue operations can still run in parallel and also on multi queue NICs we can still have a dequeue in-flight per qdisc, which is typically per CPU. Fixes: c5ad119fb6c0 ("net: sched: pfifo_fast use skb_array") Reported-by: Jakob Unterwurzacher Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/net/sch_generic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 2092d33..8da3267 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -30,6 +30,7 @@ struct qdisc_rate_table { enum qdisc_state_t { __QDISC_STATE_SCHED, __QDISC_STATE_DEACTIVATED, + __QDISC_STATE_RUNNING, }; struct qdisc_size_table { -- cgit v1.1 From b85ab56c3f81c5a24b5a5213374f549df06430da Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 26 Mar 2018 15:08:33 -0700 Subject: llc: properly handle dev_queue_xmit() return value llc_conn_send_pdu() pushes the skb into write queue and calls llc_conn_send_pdus() to flush them out. However, the status of dev_queue_xmit() is not returned to caller, in this case, llc_conn_state_process(). llc_conn_state_process() needs hold the skb no matter success or failure, because it still uses it after that, therefore we should hold skb before dev_queue_xmit() when that skb is the one being processed by llc_conn_state_process(). For other callers, they can just pass NULL and ignore the return value as they are. Reported-by: Noam Rathaus Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/llc_conn.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index fe994d2..5c40f11 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -103,7 +103,7 @@ void llc_sk_reset(struct sock *sk); /* Access to a connection */ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb); -void llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb); +int llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb); void llc_conn_rtn_pdu(struct sock *sk, struct sk_buff *skb); void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit); void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit); -- cgit v1.1