From 14e567615679a9999ce6bf4f23d6c9e00f03e00e Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Mon, 17 Apr 2017 21:18:55 +0800 Subject: netfilter: ctnetlink: drop the incorrect cthelper module request First, when creating a new ct, we will invoke request_module to try to load the related inkernel cthelper. So there's no need to call the request_module again when updating the ct helpinfo. Second, ctnetlink_change_helper may be called with rcu_read_lock held, i.e. rcu_read_lock -> nfqnl_recv_verdict -> nfqnl_ct_parse -> ctnetlink_glue_parse -> ctnetlink_glue_parse_ct -> ctnetlink_change_helper. But the request_module invocation may sleep, so we can't call it with the rcu_read_lock held. Remove it now. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) (limited to 'net/netfilter/nf_conntrack_netlink.c') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index dc7dfd6..48c1845 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1512,23 +1512,8 @@ static int ctnetlink_change_helper(struct nf_conn *ct, helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), nf_ct_protonum(ct)); - if (helper == NULL) { -#ifdef CONFIG_MODULES - spin_unlock_bh(&nf_conntrack_expect_lock); - - if (request_module("nfct-helper-%s", helpname) < 0) { - spin_lock_bh(&nf_conntrack_expect_lock); - return -EOPNOTSUPP; - } - - spin_lock_bh(&nf_conntrack_expect_lock); - helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), - nf_ct_protonum(ct)); - if (helper) - return -EAGAIN; -#endif + if (helper == NULL) return -EOPNOTSUPP; - } if (help) { if (help->helper == helper) { -- cgit v1.1 From 88be4c09d9008f9ff337cbf48c5d0f06c8f872e7 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Mon, 17 Apr 2017 21:18:56 +0800 Subject: netfilter: ctnetlink: fix deadlock due to acquire _expect_lock twice Currently, ctnetlink_change_conntrack is always protected by _expect_lock, but this will cause a deadlock when deleting the helper from a conntrack, as the _expect_lock will be acquired again by nf_ct_remove_expectations: CPU0 ---- lock(nf_conntrack_expect_lock); lock(nf_conntrack_expect_lock); *** DEADLOCK *** May be due to missing lock nesting notation 2 locks held by lt-conntrack_gr/12853: #0: (&table[i].mutex){+.+.+.}, at: [] nfnetlink_rcv_msg+0x399/0x6a9 [nfnetlink] #1: (nf_conntrack_expect_lock){+.....}, at: [] ctnetlink_new_conntrack+0x17f/0x408 [nf_conntrack_netlink] Call Trace: dump_stack+0x85/0xc2 __lock_acquire+0x1608/0x1680 ? ctnetlink_parse_tuple_proto+0x10f/0x1c0 [nf_conntrack_netlink] lock_acquire+0x100/0x1f0 ? nf_ct_remove_expectations+0x32/0x90 [nf_conntrack] _raw_spin_lock_bh+0x3f/0x50 ? nf_ct_remove_expectations+0x32/0x90 [nf_conntrack] nf_ct_remove_expectations+0x32/0x90 [nf_conntrack] ctnetlink_change_helper+0xc6/0x190 [nf_conntrack_netlink] ctnetlink_new_conntrack+0x1b2/0x408 [nf_conntrack_netlink] nfnetlink_rcv_msg+0x60a/0x6a9 [nfnetlink] ? nfnetlink_rcv_msg+0x1b9/0x6a9 [nfnetlink] ? nfnetlink_bind+0x1a0/0x1a0 [nfnetlink] netlink_rcv_skb+0xa4/0xc0 nfnetlink_rcv+0x87/0x770 [nfnetlink] Since the operations are unrelated to nf_ct_expect, so we can drop the _expect_lock. Also note, after removing the _expect_lock protection, another CPU may invoke nf_conntrack_helper_unregister, so we should use rcu_read_lock to protect __nf_conntrack_helper_find invoked by ctnetlink_change_helper. Fixes: ca7433df3a67 ("netfilter: conntrack: seperate expect locking from nf_conntrack_lock") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'net/netfilter/nf_conntrack_netlink.c') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 48c1845..e5f9777 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1510,23 +1510,29 @@ static int ctnetlink_change_helper(struct nf_conn *ct, return 0; } + rcu_read_lock(); helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), nf_ct_protonum(ct)); - if (helper == NULL) + if (helper == NULL) { + rcu_read_unlock(); return -EOPNOTSUPP; + } if (help) { if (help->helper == helper) { /* update private helper data if allowed. */ if (helper->from_nlattr) helper->from_nlattr(helpinfo, ct); - return 0; + err = 0; } else - return -EBUSY; + err = -EBUSY; + } else { + /* we cannot set a helper for an existing conntrack */ + err = -EOPNOTSUPP; } - /* we cannot set a helper for an existing conntrack */ - return -EOPNOTSUPP; + rcu_read_unlock(); + return err; } static int ctnetlink_change_timeout(struct nf_conn *ct, @@ -1945,9 +1951,7 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl, err = -EEXIST; ct = nf_ct_tuplehash_to_ctrack(h); if (!(nlh->nlmsg_flags & NLM_F_EXCL)) { - spin_lock_bh(&nf_conntrack_expect_lock); err = ctnetlink_change_conntrack(ct, cda); - spin_unlock_bh(&nf_conntrack_expect_lock); if (err == 0) { nf_conntrack_eventmask_report((1 << IPCT_REPLY) | (1 << IPCT_ASSURED) | @@ -2342,11 +2346,7 @@ ctnetlink_glue_parse(const struct nlattr *attr, struct nf_conn *ct) if (ret < 0) return ret; - spin_lock_bh(&nf_conntrack_expect_lock); - ret = ctnetlink_glue_parse_ct((const struct nlattr **)cda, ct); - spin_unlock_bh(&nf_conntrack_expect_lock); - - return ret; + return ctnetlink_glue_parse_ct((const struct nlattr **)cda, ct); } static int ctnetlink_glue_exp_parse(const struct nlattr * const *cda, -- cgit v1.1 From 53b56da83d7899de375a9de153fd7f5397de85e6 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Mon, 17 Apr 2017 21:18:57 +0800 Subject: netfilter: ctnetlink: make it safer when updating ct->status After converting to use rcu for conntrack hash, one CPU may update the ct->status via ctnetlink, while another CPU may process the packets and update the ct->status. So the non-atomic operation "ct->status |= status;" via ctnetlink becomes unsafe, and this may clear the IPS_DYING_BIT bit set by another CPU unexpectedly. For example: CPU0 CPU1 ctnetlink_change_status __nf_conntrack_find_get old = ct->status nf_ct_gc_expired - nf_ct_kill - test_and_set_bit(IPS_DYING_BIT new = old | status; - ct->status = new; <-- oops, _DYING_ is cleared! Now using a series of atomic bit operation to solve the above issue. Also note, user shouldn't set IPS_TEMPLATE, IPS_SEQ_ADJUST directly, so make these two bits be unchangable too. If we set the IPS_TEMPLATE_BIT, ct will be freed by nf_ct_tmpl_free, but actually it is alloced by nf_conntrack_alloc. If we set the IPS_SEQ_ADJUST_BIT, this may cause the NULL pointer deference, as the nfct_seqadj(ct) maybe NULL. Last, add some comments to describe the logic change due to the commit a963d710f367 ("netfilter: ctnetlink: Fix regression in CTA_STATUS processing"), which makes me feel a little confusing. Fixes: 76507f69c44e ("[NETFILTER]: nf_conntrack: use RCU for conntrack hash") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) (limited to 'net/netfilter/nf_conntrack_netlink.c') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index e5f9777..86deed6 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1419,6 +1419,24 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct, } #endif +static void +__ctnetlink_change_status(struct nf_conn *ct, unsigned long on, + unsigned long off) +{ + unsigned int bit; + + /* Ignore these unchangable bits */ + on &= ~IPS_UNCHANGEABLE_MASK; + off &= ~IPS_UNCHANGEABLE_MASK; + + for (bit = 0; bit < __IPS_MAX_BIT; bit++) { + if (on & (1 << bit)) + set_bit(bit, &ct->status); + else if (off & (1 << bit)) + clear_bit(bit, &ct->status); + } +} + static int ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[]) { @@ -1438,10 +1456,7 @@ ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[]) /* ASSURED bit can only be set */ return -EBUSY; - /* Be careful here, modifying NAT bits can screw up things, - * so don't let users modify them directly if they don't pass - * nf_nat_range. */ - ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK); + __ctnetlink_change_status(ct, status, 0); return 0; } @@ -1628,7 +1643,7 @@ ctnetlink_change_seq_adj(struct nf_conn *ct, if (ret < 0) return ret; - ct->status |= IPS_SEQ_ADJUST; + set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); } if (cda[CTA_SEQ_ADJ_REPLY]) { @@ -1637,7 +1652,7 @@ ctnetlink_change_seq_adj(struct nf_conn *ct, if (ret < 0) return ret; - ct->status |= IPS_SEQ_ADJUST; + set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); } return 0; @@ -2289,10 +2304,10 @@ ctnetlink_update_status(struct nf_conn *ct, const struct nlattr * const cda[]) /* This check is less strict than ctnetlink_change_status() * because callers often flip IPS_EXPECTED bits when sending * an NFQA_CT attribute to the kernel. So ignore the - * unchangeable bits but do not error out. + * unchangeable bits but do not error out. Also user programs + * are allowed to clear the bits that they are allowed to change. */ - ct->status = (status & ~IPS_UNCHANGEABLE_MASK) | - (ct->status & IPS_UNCHANGEABLE_MASK); + __ctnetlink_change_status(ct, status, ~status); return 0; } -- cgit v1.1 From 64f3967c7aacbfa1f0614cdb1a23e3f7e76eb61b Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Mon, 17 Apr 2017 21:18:58 +0800 Subject: netfilter: ctnetlink: acquire ct->lock before operating nf_ct_seqadj We should acquire the ct->lock before accessing or modifying the nf_ct_seqadj, as another CPU may modify the nf_ct_seqadj at the same time during its packet proccessing. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'net/netfilter/nf_conntrack_netlink.c') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 86deed6..78f8c9a 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -417,8 +417,7 @@ nla_put_failure: return -1; } -static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, - const struct nf_conn *ct) +static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, struct nf_conn *ct) { struct nf_conn_seqadj *seqadj = nfct_seqadj(ct); struct nf_ct_seqadj *seq; @@ -426,15 +425,20 @@ static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, if (!(ct->status & IPS_SEQ_ADJUST) || !seqadj) return 0; + spin_lock_bh(&ct->lock); seq = &seqadj->seq[IP_CT_DIR_ORIGINAL]; if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_ORIG) == -1) - return -1; + goto err; seq = &seqadj->seq[IP_CT_DIR_REPLY]; if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_REPLY) == -1) - return -1; + goto err; + spin_unlock_bh(&ct->lock); return 0; +err: + spin_unlock_bh(&ct->lock); + return -1; } static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) @@ -1637,11 +1641,12 @@ ctnetlink_change_seq_adj(struct nf_conn *ct, if (!seqadj) return 0; + spin_lock_bh(&ct->lock); if (cda[CTA_SEQ_ADJ_ORIG]) { ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_ORIGINAL], cda[CTA_SEQ_ADJ_ORIG]); if (ret < 0) - return ret; + goto err; set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); } @@ -1650,12 +1655,16 @@ ctnetlink_change_seq_adj(struct nf_conn *ct, ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_REPLY], cda[CTA_SEQ_ADJ_REPLY]); if (ret < 0) - return ret; + goto err; set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); } + spin_unlock_bh(&ct->lock); return 0; +err: + spin_unlock_bh(&ct->lock); + return ret; } static int -- cgit v1.1