From 14e567615679a9999ce6bf4f23d6c9e00f03e00e Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Mon, 17 Apr 2017 21:18:55 +0800
Subject: netfilter: ctnetlink: drop the incorrect cthelper module request

First, when creating a new ct, we will invoke request_module to try to
load the related inkernel cthelper. So there's no need to call the
request_module again when updating the ct helpinfo.

Second, ctnetlink_change_helper may be called with rcu_read_lock held,
i.e. rcu_read_lock -> nfqnl_recv_verdict -> nfqnl_ct_parse ->
ctnetlink_glue_parse -> ctnetlink_glue_parse_ct ->
ctnetlink_change_helper. But the request_module invocation may sleep,
so we can't call it with the rcu_read_lock held.

Remove it now.

Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

(limited to 'net/netfilter/nf_conntrack_netlink.c')
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index dc7dfd6..48c1845 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1512,23 +1512,8 @@ static int ctnetlink_change_helper(struct nf_conn *ct,
 
 	helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
 					    nf_ct_protonum(ct));
-	if (helper == NULL) {
-#ifdef CONFIG_MODULES
-		spin_unlock_bh(&nf_conntrack_expect_lock);
-
-		if (request_module("nfct-helper-%s", helpname) < 0) {
-			spin_lock_bh(&nf_conntrack_expect_lock);
-			return -EOPNOTSUPP;
-		}
-
-		spin_lock_bh(&nf_conntrack_expect_lock);
-		helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
-						    nf_ct_protonum(ct));
-		if (helper)
-			return -EAGAIN;
-#endif
+	if (helper == NULL)
 		return -EOPNOTSUPP;
-	}
 
 	if (help) {
 		if (help->helper == helper) {
-- 
cgit v1.1


From 88be4c09d9008f9ff337cbf48c5d0f06c8f872e7 Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Mon, 17 Apr 2017 21:18:56 +0800
Subject: netfilter: ctnetlink: fix deadlock due to acquire _expect_lock twice

Currently, ctnetlink_change_conntrack is always protected by _expect_lock,
but this will cause a deadlock when deleting the helper from a conntrack,
as the _expect_lock will be acquired again by nf_ct_remove_expectations:

         CPU0
        ----
  lock(nf_conntrack_expect_lock);
  lock(nf_conntrack_expect_lock);

  *** DEADLOCK ***
  May be due to missing lock nesting notation

  2 locks held by lt-conntrack_gr/12853:
  #0:  (&table[i].mutex){+.+.+.}, at: [<ffffffffa05e2009>]
       nfnetlink_rcv_msg+0x399/0x6a9 [nfnetlink]
  #1:  (nf_conntrack_expect_lock){+.....}, at: [<ffffffffa05f2c1f>]
       ctnetlink_new_conntrack+0x17f/0x408 [nf_conntrack_netlink]

  Call Trace:
   dump_stack+0x85/0xc2
   __lock_acquire+0x1608/0x1680
   ? ctnetlink_parse_tuple_proto+0x10f/0x1c0 [nf_conntrack_netlink]
   lock_acquire+0x100/0x1f0
   ? nf_ct_remove_expectations+0x32/0x90 [nf_conntrack]
   _raw_spin_lock_bh+0x3f/0x50
   ? nf_ct_remove_expectations+0x32/0x90 [nf_conntrack]
   nf_ct_remove_expectations+0x32/0x90 [nf_conntrack]
   ctnetlink_change_helper+0xc6/0x190 [nf_conntrack_netlink]
   ctnetlink_new_conntrack+0x1b2/0x408 [nf_conntrack_netlink]
   nfnetlink_rcv_msg+0x60a/0x6a9 [nfnetlink]
   ? nfnetlink_rcv_msg+0x1b9/0x6a9 [nfnetlink]
   ? nfnetlink_bind+0x1a0/0x1a0 [nfnetlink]
   netlink_rcv_skb+0xa4/0xc0
   nfnetlink_rcv+0x87/0x770 [nfnetlink]

Since the operations are unrelated to nf_ct_expect, so we can drop the
_expect_lock. Also note, after removing the _expect_lock protection,
another CPU may invoke nf_conntrack_helper_unregister, so we should
use rcu_read_lock to protect __nf_conntrack_helper_find invoked by
ctnetlink_change_helper.

Fixes: ca7433df3a67 ("netfilter: conntrack: seperate expect locking from nf_conntrack_lock")
Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'net/netfilter/nf_conntrack_netlink.c')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 48c1845..e5f9777 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1510,23 +1510,29 @@ static int ctnetlink_change_helper(struct nf_conn *ct,
 		return 0;
 	}
 
+	rcu_read_lock();
 	helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
 					    nf_ct_protonum(ct));
-	if (helper == NULL)
+	if (helper == NULL) {
+		rcu_read_unlock();
 		return -EOPNOTSUPP;
+	}
 
 	if (help) {
 		if (help->helper == helper) {
 			/* update private helper data if allowed. */
 			if (helper->from_nlattr)
 				helper->from_nlattr(helpinfo, ct);
-			return 0;
+			err = 0;
 		} else
-			return -EBUSY;
+			err = -EBUSY;
+	} else {
+		/* we cannot set a helper for an existing conntrack */
+		err = -EOPNOTSUPP;
 	}
 
-	/* we cannot set a helper for an existing conntrack */
-	return -EOPNOTSUPP;
+	rcu_read_unlock();
+	return err;
 }
 
 static int ctnetlink_change_timeout(struct nf_conn *ct,
@@ -1945,9 +1951,7 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
 	err = -EEXIST;
 	ct = nf_ct_tuplehash_to_ctrack(h);
 	if (!(nlh->nlmsg_flags & NLM_F_EXCL)) {
-		spin_lock_bh(&nf_conntrack_expect_lock);
 		err = ctnetlink_change_conntrack(ct, cda);
-		spin_unlock_bh(&nf_conntrack_expect_lock);
 		if (err == 0) {
 			nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
 						      (1 << IPCT_ASSURED) |
@@ -2342,11 +2346,7 @@ ctnetlink_glue_parse(const struct nlattr *attr, struct nf_conn *ct)
 	if (ret < 0)
 		return ret;
 
-	spin_lock_bh(&nf_conntrack_expect_lock);
-	ret = ctnetlink_glue_parse_ct((const struct nlattr **)cda, ct);
-	spin_unlock_bh(&nf_conntrack_expect_lock);
-
-	return ret;
+	return ctnetlink_glue_parse_ct((const struct nlattr **)cda, ct);
 }
 
 static int ctnetlink_glue_exp_parse(const struct nlattr * const *cda,
-- 
cgit v1.1


From 53b56da83d7899de375a9de153fd7f5397de85e6 Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Mon, 17 Apr 2017 21:18:57 +0800
Subject: netfilter: ctnetlink: make it safer when updating ct->status

After converting to use rcu for conntrack hash, one CPU may update
the ct->status via ctnetlink, while another CPU may process the
packets and update the ct->status.

So the non-atomic operation "ct->status |= status;" via ctnetlink
becomes unsafe, and this may clear the IPS_DYING_BIT bit set by
another CPU unexpectedly. For example:
         CPU0                            CPU1
  ctnetlink_change_status        __nf_conntrack_find_get
      old = ct->status              nf_ct_gc_expired
          -                         nf_ct_kill
          -                      test_and_set_bit(IPS_DYING_BIT
      new = old | status;                 -
  ct->status = new; <-- oops, _DYING_ is cleared!

Now using a series of atomic bit operation to solve the above issue.

Also note, user shouldn't set IPS_TEMPLATE, IPS_SEQ_ADJUST directly,
so make these two bits be unchangable too.

If we set the IPS_TEMPLATE_BIT, ct will be freed by nf_ct_tmpl_free,
but actually it is alloced by nf_conntrack_alloc.
If we set the IPS_SEQ_ADJUST_BIT, this may cause the NULL pointer
deference, as the nfct_seqadj(ct) maybe NULL.

Last, add some comments to describe the logic change due to the
commit a963d710f367 ("netfilter: ctnetlink: Fix regression in CTA_STATUS
processing"), which makes me feel a little confusing.

Fixes: 76507f69c44e ("[NETFILTER]: nf_conntrack: use RCU for conntrack hash")
Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c | 33 ++++++++++++++++++++++++---------
 1 file changed, 24 insertions(+), 9 deletions(-)

(limited to 'net/netfilter/nf_conntrack_netlink.c')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index e5f9777..86deed6 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1419,6 +1419,24 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
 }
 #endif
 
+static void
+__ctnetlink_change_status(struct nf_conn *ct, unsigned long on,
+			  unsigned long off)
+{
+	unsigned int bit;
+
+	/* Ignore these unchangable bits */
+	on &= ~IPS_UNCHANGEABLE_MASK;
+	off &= ~IPS_UNCHANGEABLE_MASK;
+
+	for (bit = 0; bit < __IPS_MAX_BIT; bit++) {
+		if (on & (1 << bit))
+			set_bit(bit, &ct->status);
+		else if (off & (1 << bit))
+			clear_bit(bit, &ct->status);
+	}
+}
+
 static int
 ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[])
 {
@@ -1438,10 +1456,7 @@ ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[])
 		/* ASSURED bit can only be set */
 		return -EBUSY;
 
-	/* Be careful here, modifying NAT bits can screw up things,
-	 * so don't let users modify them directly if they don't pass
-	 * nf_nat_range. */
-	ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK);
+	__ctnetlink_change_status(ct, status, 0);
 	return 0;
 }
 
@@ -1628,7 +1643,7 @@ ctnetlink_change_seq_adj(struct nf_conn *ct,
 		if (ret < 0)
 			return ret;
 
-		ct->status |= IPS_SEQ_ADJUST;
+		set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
 	}
 
 	if (cda[CTA_SEQ_ADJ_REPLY]) {
@@ -1637,7 +1652,7 @@ ctnetlink_change_seq_adj(struct nf_conn *ct,
 		if (ret < 0)
 			return ret;
 
-		ct->status |= IPS_SEQ_ADJUST;
+		set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
 	}
 
 	return 0;
@@ -2289,10 +2304,10 @@ ctnetlink_update_status(struct nf_conn *ct, const struct nlattr * const cda[])
 	/* This check is less strict than ctnetlink_change_status()
 	 * because callers often flip IPS_EXPECTED bits when sending
 	 * an NFQA_CT attribute to the kernel.  So ignore the
-	 * unchangeable bits but do not error out.
+	 * unchangeable bits but do not error out. Also user programs
+	 * are allowed to clear the bits that they are allowed to change.
 	 */
-	ct->status = (status & ~IPS_UNCHANGEABLE_MASK) |
-		     (ct->status & IPS_UNCHANGEABLE_MASK);
+	__ctnetlink_change_status(ct, status, ~status);
 	return 0;
 }
 
-- 
cgit v1.1


From 64f3967c7aacbfa1f0614cdb1a23e3f7e76eb61b Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Mon, 17 Apr 2017 21:18:58 +0800
Subject: netfilter: ctnetlink: acquire ct->lock before operating nf_ct_seqadj

We should acquire the ct->lock before accessing or modifying the
nf_ct_seqadj, as another CPU may modify the nf_ct_seqadj at the same
time during its packet proccessing.

Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

(limited to 'net/netfilter/nf_conntrack_netlink.c')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 86deed6..78f8c9a 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -417,8 +417,7 @@ nla_put_failure:
 	return -1;
 }
 
-static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb,
-				     const struct nf_conn *ct)
+static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, struct nf_conn *ct)
 {
 	struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
 	struct nf_ct_seqadj *seq;
@@ -426,15 +425,20 @@ static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb,
 	if (!(ct->status & IPS_SEQ_ADJUST) || !seqadj)
 		return 0;
 
+	spin_lock_bh(&ct->lock);
 	seq = &seqadj->seq[IP_CT_DIR_ORIGINAL];
 	if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_ORIG) == -1)
-		return -1;
+		goto err;
 
 	seq = &seqadj->seq[IP_CT_DIR_REPLY];
 	if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_REPLY) == -1)
-		return -1;
+		goto err;
 
+	spin_unlock_bh(&ct->lock);
 	return 0;
+err:
+	spin_unlock_bh(&ct->lock);
+	return -1;
 }
 
 static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
@@ -1637,11 +1641,12 @@ ctnetlink_change_seq_adj(struct nf_conn *ct,
 	if (!seqadj)
 		return 0;
 
+	spin_lock_bh(&ct->lock);
 	if (cda[CTA_SEQ_ADJ_ORIG]) {
 		ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_ORIGINAL],
 				     cda[CTA_SEQ_ADJ_ORIG]);
 		if (ret < 0)
-			return ret;
+			goto err;
 
 		set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
 	}
@@ -1650,12 +1655,16 @@ ctnetlink_change_seq_adj(struct nf_conn *ct,
 		ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_REPLY],
 				     cda[CTA_SEQ_ADJ_REPLY]);
 		if (ret < 0)
-			return ret;
+			goto err;
 
 		set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
 	}
 
+	spin_unlock_bh(&ct->lock);
 	return 0;
+err:
+	spin_unlock_bh(&ct->lock);
+	return ret;
 }
 
 static int
-- 
cgit v1.1