From 625cdd78d119d5848ac3c47d129bdf5f23f64120 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 11 May 2013 19:13:49 +0300 Subject: svcauth_gss: fix error code in use_gss_proxy() This should return zero on success and -EBUSY on error so the type needs to be int instead of bool. Signed-off-by: Dan Carpenter Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/svcauth_gss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 871c73c..2c6a1ec 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1287,7 +1287,7 @@ static bool use_gss_proxy(struct net *net) #ifdef CONFIG_PROC_FS -static bool set_gss_proxy(struct net *net, int type) +static int set_gss_proxy(struct net *net, int type) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); int ret = 0; -- cgit v1.1 From d36ccb9cec22a09a12d9ef8234d634f840d7ae4b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 15 May 2013 10:46:33 -0700 Subject: SUNRPC: Fix a bug in gss_create_upcall If wait_event_interruptible_timeout() is successful, it returns the number of seconds remaining until the timeout. In that case, we should be retrying the upcall. Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 7da6b45..f17f3c5 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -563,11 +563,12 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) struct rpc_cred *cred = &gss_cred->gc_base; struct gss_upcall_msg *gss_msg; DEFINE_WAIT(wait); - int err = 0; + int err; dprintk("RPC: %s for uid %u\n", __func__, from_kuid(&init_user_ns, cred->cr_uid)); retry: + err = 0; gss_msg = gss_setup_upcall(gss_auth->client, gss_auth, cred); if (PTR_ERR(gss_msg) == -EAGAIN) { err = wait_event_interruptible_timeout(pipe_version_waitqueue, @@ -576,7 +577,7 @@ retry: warn_gssd(); err = -EACCES; } - if (err) + if (err < 0) goto out; goto retry; } -- cgit v1.1 From abfdbd53a4e28844ad953b313f017f55edbb85b7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 15 May 2013 11:28:54 -0700 Subject: SUNRPC: Faster detection if gssd is actually running Recent changes to the NFS security flavour negotiation mean that we have a stronger dependency on rpc.gssd. If the latter is not running, because the user failed to start it, then we time out and mark the container as not having an instance. We then use that information to time out faster the next time. If, on the other hand, the rpc.gssd successfully binds to an rpc_pipe, then we mark the container as having an rpc.gssd instance. Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 13 ++++++++++++- net/sunrpc/netns.h | 2 ++ net/sunrpc/rpc_pipe.c | 4 ++++ 3 files changed, 18 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index f17f3c5..3aff72f 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -52,6 +52,8 @@ #include #include +#include "../netns.h" + static const struct rpc_authops authgss_ops; static const struct rpc_credops gss_credops; @@ -559,9 +561,12 @@ out: static inline int gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) { + struct net *net = rpc_net_ns(gss_auth->client); + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); struct rpc_pipe *pipe; struct rpc_cred *cred = &gss_cred->gc_base; struct gss_upcall_msg *gss_msg; + unsigned long timeout; DEFINE_WAIT(wait); int err; @@ -569,11 +574,17 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) __func__, from_kuid(&init_user_ns, cred->cr_uid)); retry: err = 0; + /* Default timeout is 15s unless we know that gssd is not running */ + timeout = 15 * HZ; + if (!sn->gssd_running) + timeout = HZ >> 2; gss_msg = gss_setup_upcall(gss_auth->client, gss_auth, cred); if (PTR_ERR(gss_msg) == -EAGAIN) { err = wait_event_interruptible_timeout(pipe_version_waitqueue, - pipe_version >= 0, 15*HZ); + pipe_version >= 0, timeout); if (pipe_version < 0) { + if (err == 0) + sn->gssd_running = 0; warn_gssd(); err = -EACCES; } diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h index 7111a4c..0827f64 100644 --- a/net/sunrpc/netns.h +++ b/net/sunrpc/netns.h @@ -29,6 +29,8 @@ struct sunrpc_net { struct rpc_clnt *gssp_clnt; int use_gss_proxy; struct proc_dir_entry *use_gssp_proc; + + unsigned int gssd_running; }; extern int sunrpc_net_id; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index a9129f8..a370762 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -216,11 +216,14 @@ rpc_destroy_inode(struct inode *inode) static int rpc_pipe_open(struct inode *inode, struct file *filp) { + struct net *net = inode->i_sb->s_fs_info; + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); struct rpc_pipe *pipe; int first_open; int res = -ENXIO; mutex_lock(&inode->i_mutex); + sn->gssd_running = 1; pipe = RPC_I(inode)->pipe; if (pipe == NULL) goto out; @@ -1069,6 +1072,7 @@ void rpc_pipefs_init_net(struct net *net) struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); mutex_init(&sn->pipefs_sb_lock); + sn->gssd_running = 1; } /* -- cgit v1.1 From 2aed8b476f3478be140df92bbfb182978e835504 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 15 May 2013 10:27:32 -0700 Subject: SUNRPC: Convert auth_gss pipe detection to work in namespaces This seems to have been overlooked when we did the namespace conversion. If a container is running a legacy version of rpc.gssd then it will be disrupted if the global 'pipe_version' is set by a container running the new version of rpc.gssd. Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 46 +++++++++++++++++++++++++----------------- net/sunrpc/netns.h | 2 ++ net/sunrpc/rpc_pipe.c | 1 + 3 files changed, 30 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 3aff72f..fc2f78d 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -87,8 +87,6 @@ struct gss_auth { }; /* pipe_version >= 0 if and only if someone has a pipe open. */ -static int pipe_version = -1; -static atomic_t pipe_users = ATOMIC_INIT(0); static DEFINE_SPINLOCK(pipe_version_lock); static struct rpc_wait_queue pipe_version_rpc_waitqueue; static DECLARE_WAIT_QUEUE_HEAD(pipe_version_waitqueue); @@ -268,24 +266,27 @@ struct gss_upcall_msg { char databuf[UPCALL_BUF_LEN]; }; -static int get_pipe_version(void) +static int get_pipe_version(struct net *net) { + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); int ret; spin_lock(&pipe_version_lock); - if (pipe_version >= 0) { - atomic_inc(&pipe_users); - ret = pipe_version; + if (sn->pipe_version >= 0) { + atomic_inc(&sn->pipe_users); + ret = sn->pipe_version; } else ret = -EAGAIN; spin_unlock(&pipe_version_lock); return ret; } -static void put_pipe_version(void) +static void put_pipe_version(struct net *net) { - if (atomic_dec_and_lock(&pipe_users, &pipe_version_lock)) { - pipe_version = -1; + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + + if (atomic_dec_and_lock(&sn->pipe_users, &pipe_version_lock)) { + sn->pipe_version = -1; spin_unlock(&pipe_version_lock); } } @@ -293,9 +294,10 @@ static void put_pipe_version(void) static void gss_release_msg(struct gss_upcall_msg *gss_msg) { + struct net *net = rpc_net_ns(gss_msg->auth->client); if (!atomic_dec_and_test(&gss_msg->count)) return; - put_pipe_version(); + put_pipe_version(net); BUG_ON(!list_empty(&gss_msg->list)); if (gss_msg->ctx != NULL) gss_put_ctx(gss_msg->ctx); @@ -441,7 +443,10 @@ static void gss_encode_msg(struct gss_upcall_msg *gss_msg, struct rpc_clnt *clnt, const char *service_name) { - if (pipe_version == 0) + struct net *net = rpc_net_ns(clnt); + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + + if (sn->pipe_version == 0) gss_encode_v0_msg(gss_msg); else /* pipe_version == 1 */ gss_encode_v1_msg(gss_msg, clnt, service_name); @@ -457,7 +462,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, struct rpc_clnt *clnt, gss_msg = kzalloc(sizeof(*gss_msg), GFP_NOFS); if (gss_msg == NULL) return ERR_PTR(-ENOMEM); - vers = get_pipe_version(); + vers = get_pipe_version(rpc_net_ns(clnt)); if (vers < 0) { kfree(gss_msg); return ERR_PTR(vers); @@ -581,8 +586,8 @@ retry: gss_msg = gss_setup_upcall(gss_auth->client, gss_auth, cred); if (PTR_ERR(gss_msg) == -EAGAIN) { err = wait_event_interruptible_timeout(pipe_version_waitqueue, - pipe_version >= 0, timeout); - if (pipe_version < 0) { + sn->pipe_version >= 0, timeout); + if (sn->pipe_version < 0) { if (err == 0) sn->gssd_running = 0; warn_gssd(); @@ -719,20 +724,22 @@ out: static int gss_pipe_open(struct inode *inode, int new_version) { + struct net *net = inode->i_sb->s_fs_info; + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); int ret = 0; spin_lock(&pipe_version_lock); - if (pipe_version < 0) { + if (sn->pipe_version < 0) { /* First open of any gss pipe determines the version: */ - pipe_version = new_version; + sn->pipe_version = new_version; rpc_wake_up(&pipe_version_rpc_waitqueue); wake_up(&pipe_version_waitqueue); - } else if (pipe_version != new_version) { + } else if (sn->pipe_version != new_version) { /* Trying to open a pipe of a different version */ ret = -EBUSY; goto out; } - atomic_inc(&pipe_users); + atomic_inc(&sn->pipe_users); out: spin_unlock(&pipe_version_lock); return ret; @@ -752,6 +759,7 @@ static int gss_pipe_open_v1(struct inode *inode) static void gss_pipe_release(struct inode *inode) { + struct net *net = inode->i_sb->s_fs_info; struct rpc_pipe *pipe = RPC_I(inode)->pipe; struct gss_upcall_msg *gss_msg; @@ -770,7 +778,7 @@ restart: } spin_unlock(&pipe->lock); - put_pipe_version(); + put_pipe_version(net); } static void diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h index 0827f64..74d948f 100644 --- a/net/sunrpc/netns.h +++ b/net/sunrpc/netns.h @@ -28,6 +28,8 @@ struct sunrpc_net { wait_queue_head_t gssp_wq; struct rpc_clnt *gssp_clnt; int use_gss_proxy; + int pipe_version; + atomic_t pipe_users; struct proc_dir_entry *use_gssp_proc; unsigned int gssd_running; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index a370762..e7ce4b3 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1073,6 +1073,7 @@ void rpc_pipefs_init_net(struct net *net) mutex_init(&sn->pipefs_sb_lock); sn->gssd_running = 1; + sn->pipe_version = -1; } /* -- cgit v1.1 From 6211dd12da609bc6893b9c3182630b494737ec4b Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 17 May 2013 13:43:04 +0200 Subject: mac80211: fix direct probe auth We send direct probe to broadcast address, as some APs do not respond to unicast PROBE frames when unassociated. Broadcast frames are not acked, so we can not use that for trigger MLME state machine, but we need to use old timeout mechanism. This fixes authentication timed out like below: [ 1024.671974] wlan6: authenticate with 54:e6:fc:98:63:fe [ 1024.694125] wlan6: direct probe to 54:e6:fc:98:63:fe (try 1/3) [ 1024.695450] wlan6: direct probe to 54:e6:fc:98:63:fe (try 2/3) [ 1024.700586] wlan6: send auth to 54:e6:fc:98:63:fe (try 3/3) [ 1024.701441] wlan6: authentication with 54:e6:fc:98:63:fe timed out With fix, we have: [ 4524.198978] wlan6: authenticate with 54:e6:fc:98:63:fe [ 4524.220692] wlan6: direct probe to 54:e6:fc:98:63:fe (try 1/3) [ 4524.421784] wlan6: send auth to 54:e6:fc:98:63:fe (try 2/3) [ 4524.423272] wlan6: authenticated [ 4524.423811] wlan6: associate with 54:e6:fc:98:63:fe (try 1/3) [ 4524.427492] wlan6: RX AssocResp from 54:e6:fc:98:63:fe (capab=0x431 status=0 aid=1) Cc: stable@vger.kernel.org # 3.9 Signed-off-by: Stanislaw Gruszka Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a46e490..a8c2130 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3321,10 +3321,6 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) if (WARN_ON_ONCE(!auth_data)) return -EINVAL; - if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) - tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS | - IEEE80211_TX_INTFL_MLME_CONN_TX; - auth_data->tries++; if (auth_data->tries > IEEE80211_AUTH_MAX_TRIES) { @@ -3358,6 +3354,10 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) auth_data->expected_transaction = trans; } + if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) + tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS | + IEEE80211_TX_INTFL_MLME_CONN_TX; + ieee80211_send_auth(sdata, trans, auth_data->algorithm, status, auth_data->data, auth_data->data_len, auth_data->bss->bssid, @@ -3381,12 +3381,12 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) * will not answer to direct packet in unassociated state. */ ieee80211_send_probe_req(sdata, NULL, ssidie + 2, ssidie[1], - NULL, 0, (u32) -1, true, tx_flags, + NULL, 0, (u32) -1, true, 0, auth_data->bss->channel, false); rcu_read_unlock(); } - if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) { + if (tx_flags == 0) { auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT; ifmgd->auth_data->timeout_started = true; run_again(ifmgd, auth_data->timeout); -- cgit v1.1 From 7c055881de33b5d8ebb6d12150eaf338a0546199 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Sun, 12 May 2013 23:21:24 +0200 Subject: NFC: Remove commented out LLCP related Makefile line The Kconfig symbol NFC_LLCP was removed in commit 30cc458765 ("NFC: Move LLCP code to the NFC top level diirectory"). But the reference to its macro in this Makefile was only commented out. Remove it now. Signed-off-by: Paul Bolle Signed-off-by: Samuel Ortiz --- net/nfc/Makefile | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/nfc/Makefile b/net/nfc/Makefile index fb799de..a76f453 100644 --- a/net/nfc/Makefile +++ b/net/nfc/Makefile @@ -5,7 +5,6 @@ obj-$(CONFIG_NFC) += nfc.o obj-$(CONFIG_NFC_NCI) += nci/ obj-$(CONFIG_NFC_HCI) += hci/ -#obj-$(CONFIG_NFC_LLCP) += llcp/ nfc-objs := core.o netlink.o af_nfc.o rawsock.o llcp_core.o llcp_commands.o \ llcp_sock.o -- cgit v1.1 From a3c3cac5d31879cd9ae2de7874dc6544ca704aec Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 May 2013 12:57:24 -0400 Subject: SUNRPC: Prevent an rpc_task wakeup race The lockless RPC_IS_QUEUED() test in __rpc_execute means that we need to be careful about ordering the calls to rpc_test_and_set_running(task) and rpc_clear_queued(task). If we get the order wrong, then we may end up testing the RPC_TASK_RUNNING flag after __rpc_execute() has looped and changed the state of the rpc_task. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- net/sunrpc/sched.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index f8529fc..5356b12 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -324,11 +324,17 @@ EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task); * Note: If the task is ASYNC, and is being made runnable after sitting on an * rpc_wait_queue, this must be called with the queue spinlock held to protect * the wait queue operation. + * Note the ordering of rpc_test_and_set_running() and rpc_clear_queued(), + * which is needed to ensure that __rpc_execute() doesn't loop (due to the + * lockless RPC_IS_QUEUED() test) before we've had a chance to test + * the RPC_TASK_RUNNING flag. */ static void rpc_make_runnable(struct rpc_task *task) { + bool need_wakeup = !rpc_test_and_set_running(task); + rpc_clear_queued(task); - if (rpc_test_and_set_running(task)) + if (!need_wakeup) return; if (RPC_IS_ASYNC(task)) { INIT_WORK(&task->u.tk_work, rpc_async_schedule); -- cgit v1.1 From 2a7851bffb008ff4882eee673da74718997b4265 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 17 May 2013 03:56:10 +0000 Subject: netfilter: add nf_ipv6_ops hook to fix xt_addrtype with IPv6 Quoting https://bugzilla.netfilter.org/show_bug.cgi?id=812: [ ip6tables -m addrtype ] When I tried to use in the nat/PREROUTING it messes up the routing cache even if the rule didn't matched at all. [..] If I remove the --limit-iface-in from the non-working scenario, so just use the -m addrtype --dst-type LOCAL it works! This happens when LOCAL type matching is requested with --limit-iface-in, and the default ipv6 route is via the interface the packet we test arrived on. Because xt_addrtype uses ip6_route_output, the ipv6 routing implementation creates an unwanted cached entry, and the packet won't make it to the real/expected destination. Silently ignoring --limit-iface-in makes the routing work but it breaks rule matching (--dst-type LOCAL with limit-iface-in is supposed to only match if the dst address is configured on the incoming interface; without --limit-iface-in it will match if the address is reachable via lo). The test should call ipv6_chk_addr() instead. However, this would add a link-time dependency on ipv6. There are two possible solutions: 1) Revert the commit that moved ipt_addrtype to xt_addrtype, and put ipv6 specific code into ip6t_addrtype. 2) add new "nf_ipv6_ops" struct to register pointers to ipv6 functions. While the former might seem preferable, Pablo pointed out that there are more xt modules with link-time dependeny issues regarding ipv6, so lets go for 2). Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv6/addrconf.c | 2 +- net/ipv6/netfilter.c | 7 +++++++ net/netfilter/core.c | 2 ++ net/netfilter/xt_addrtype.c | 27 ++++++++++++++++----------- 4 files changed, 26 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d1ab6ab..d1b2d80 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1487,7 +1487,7 @@ static int ipv6_count_addresses(struct inet6_dev *idev) } int ipv6_chk_addr(struct net *net, const struct in6_addr *addr, - struct net_device *dev, int strict) + const struct net_device *dev, int strict) { struct inet6_ifaddr *ifp; unsigned int hash = inet6_addr_hash(addr); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 72836f4..95f3f1d 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -186,6 +187,10 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook, return csum; }; +static const struct nf_ipv6_ops ipv6ops = { + .chk_addr = ipv6_chk_addr, +}; + static const struct nf_afinfo nf_ip6_afinfo = { .family = AF_INET6, .checksum = nf_ip6_checksum, @@ -198,6 +203,7 @@ static const struct nf_afinfo nf_ip6_afinfo = { int __init ipv6_netfilter_init(void) { + RCU_INIT_POINTER(nf_ipv6_ops, &ipv6ops); return nf_register_afinfo(&nf_ip6_afinfo); } @@ -206,5 +212,6 @@ int __init ipv6_netfilter_init(void) */ void ipv6_netfilter_fini(void) { + RCU_INIT_POINTER(nf_ipv6_ops, NULL); nf_unregister_afinfo(&nf_ip6_afinfo); } diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 07c865a..857ca9f 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -30,6 +30,8 @@ static DEFINE_MUTEX(afinfo_mutex); const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; EXPORT_SYMBOL(nf_afinfo); +const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly; +EXPORT_SYMBOL_GPL(nf_ipv6_ops); int nf_register_afinfo(const struct nf_afinfo *afinfo) { diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index 49c5ff7..68ff29f 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -22,6 +22,7 @@ #include #endif +#include #include #include @@ -33,12 +34,12 @@ MODULE_ALIAS("ip6t_addrtype"); #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, - const struct in6_addr *addr) + const struct in6_addr *addr, u16 mask) { const struct nf_afinfo *afinfo; struct flowi6 flow; struct rt6_info *rt; - u32 ret; + u32 ret = 0; int route_err; memset(&flow, 0, sizeof(flow)); @@ -49,12 +50,19 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, rcu_read_lock(); afinfo = nf_get_afinfo(NFPROTO_IPV6); - if (afinfo != NULL) + if (afinfo != NULL) { + const struct nf_ipv6_ops *v6ops; + + if (dev && (mask & XT_ADDRTYPE_LOCAL)) { + v6ops = nf_get_ipv6_ops(); + if (v6ops && v6ops->chk_addr(net, addr, dev, true)) + ret = XT_ADDRTYPE_LOCAL; + } route_err = afinfo->route(net, (struct dst_entry **)&rt, - flowi6_to_flowi(&flow), !!dev); - else + flowi6_to_flowi(&flow), false); + } else { route_err = 1; - + } rcu_read_unlock(); if (route_err) @@ -62,15 +70,12 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, if (rt->rt6i_flags & RTF_REJECT) ret = XT_ADDRTYPE_UNREACHABLE; - else - ret = 0; - if (rt->rt6i_flags & RTF_LOCAL) + if (dev == NULL && rt->rt6i_flags & RTF_LOCAL) ret |= XT_ADDRTYPE_LOCAL; if (rt->rt6i_flags & RTF_ANYCAST) ret |= XT_ADDRTYPE_ANYCAST; - dst_release(&rt->dst); return ret; } @@ -90,7 +95,7 @@ static bool match_type6(struct net *net, const struct net_device *dev, if ((XT_ADDRTYPE_LOCAL | XT_ADDRTYPE_ANYCAST | XT_ADDRTYPE_UNREACHABLE) & mask) - return !!(mask & match_lookup_rt6(net, dev, addr)); + return !!(mask & match_lookup_rt6(net, dev, addr, mask)); return true; } -- cgit v1.1 From 4f36ea6eed2081340c7a7aa98c73187ecfccebff Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Thu, 23 May 2013 01:50:46 +0000 Subject: netfilter: ipt_ULOG: fix non-null terminated string in the nf_log path If nf_log uses ipt_ULOG as logging output, we can deliver non-null terminated strings to user-space since the maximum length of the prefix that is passed by nf_log is NF_LOG_PREFIXLEN but pm->prefix is 32 bytes long (ULOG_PREFIX_LEN). This is actually happening already from nf_conntrack_tcp if ipt_ULOG is used, since it is passing strings longer than 32 bytes. Signed-off-by: Chen Gang Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_ULOG.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index cf08218..ff4b781 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -231,8 +231,10 @@ static void ipt_ulog_packet(struct net *net, put_unaligned(tv.tv_usec, &pm->timestamp_usec); put_unaligned(skb->mark, &pm->mark); pm->hook = hooknum; - if (prefix != NULL) - strncpy(pm->prefix, prefix, sizeof(pm->prefix)); + if (prefix != NULL) { + strncpy(pm->prefix, prefix, sizeof(pm->prefix) - 1); + pm->prefix[sizeof(pm->prefix) - 1] = '\0'; + } else if (loginfo->prefix[0] != '\0') strncpy(pm->prefix, loginfo->prefix, sizeof(pm->prefix)); else -- cgit v1.1 From c815797663b72e3ac1736f1886538152bc48e4af Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 May 2013 18:10:21 +0200 Subject: cfg80211: check wdev->netdev in connection work If a P2P-Device is present and another virtual interface triggers the connection work, the system crash because it tries to check if the P2P-Device's netdev (which doesn't exist) is up. Skip any wdevs that have no netdev to fix this. Cc: stable@vger.kernel.org Reported-by: YanBo Signed-off-by: Johannes Berg --- net/wireless/sme.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 8b5eddf..3ed35c3 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -231,6 +231,9 @@ void cfg80211_conn_work(struct work_struct *work) mutex_lock(&rdev->sched_scan_mtx); list_for_each_entry(wdev, &rdev->wdev_list, list) { + if (!wdev->netdev) + continue; + wdev_lock(wdev); if (!netif_running(wdev->netdev)) { wdev_unlock(wdev); -- cgit v1.1 From 2b436312f0919c05804fed5aa4b7f255db196e7a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 May 2013 21:04:38 +0200 Subject: mac80211: fix queue handling crash The code I added in "mac80211: don't start new netdev queues if driver stopped" crashes for monitor and AP VLAN interfaces because while they have a netdev, they don't have queues set up by the driver. To fix the crash, exclude these from queue accounting here and just start their netdev queues unconditionally. For monitor, this is the best we can do, as we can redirect frames there to any other interface and don't know which one that will since it can be different for each frame. For AP VLAN interfaces, we can do better later and actually properly track the queue status. Not doing this is really a separate bug though. Reported-by: Ilan Peer Reported-by: Jouni Malinen Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 60f1ce5..68f51c3 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -653,7 +653,11 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) ieee80211_recalc_ps(local, -1); - if (dev) { + if (sdata->vif.type == NL80211_IFTYPE_MONITOR || + sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { + /* XXX: for AP_VLAN, actually track AP queues */ + netif_tx_start_all_queues(dev); + } else if (dev) { unsigned long flags; int n_acs = IEEE80211_NUM_ACS; int ac; -- cgit v1.1 From 4325d724cd91643c727ca4cb063e8bb19989950b Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 23 May 2013 15:05:59 +0200 Subject: cfg80211: fix reporting 64-bit station info tx bytes Copy & paste mistake - STATION_INFO_TX_BYTES64 is the name of the flag, not NL80211_STA_INFO_TX_BYTES64. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index dfdb5e6..d5aed3b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3411,7 +3411,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, (u32)sinfo->rx_bytes)) goto nla_put_failure; if ((sinfo->filled & (STATION_INFO_TX_BYTES | - NL80211_STA_INFO_TX_BYTES64)) && + STATION_INFO_TX_BYTES64)) && nla_put_u32(msg, NL80211_STA_INFO_TX_BYTES, (u32)sinfo->tx_bytes)) goto nla_put_failure; -- cgit v1.1 From a622260254ee481747cceaaa8609985b29a31565 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 24 May 2013 05:49:58 +0000 Subject: ip_tunnel: fix kernel panic with icmp_dest_unreach Daniel Petre reported crashes in icmp_dst_unreach() with following call graph: #3 [ffff88003fc03938] __stack_chk_fail at ffffffff81037f77 #4 [ffff88003fc03948] icmp_send at ffffffff814d5fec #5 [ffff88003fc03ae8] ipv4_link_failure at ffffffff814a1795 #6 [ffff88003fc03af8] ipgre_tunnel_xmit at ffffffff814e7965 #7 [ffff88003fc03b78] dev_hard_start_xmit at ffffffff8146e032 #8 [ffff88003fc03bc8] sch_direct_xmit at ffffffff81487d66 #9 [ffff88003fc03c08] __qdisc_run at ffffffff81487efd #10 [ffff88003fc03c48] dev_queue_xmit at ffffffff8146e5a7 #11 [ffff88003fc03c88] ip_finish_output at ffffffff814ab596 Daniel found a similar problem mentioned in http://lkml.indiana.edu/hypermail/linux/kernel/1007.0/00961.html And indeed this is the root cause : skb->cb[] contains data fooling IP stack. We must clear IPCB in ip_tunnel_xmit() sooner in case dst_link_failure() is called. Or else skb->cb[] might contain garbage from GSO segmentation layer. A similar fix was tested on linux-3.9, but gre code was refactored in linux-3.10. I'll send patches for stable kernels as well. Many thanks to Daniel for providing reports, patches and testing ! Reported-by: Daniel Petre Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index e4147ec..be2f8da 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -503,6 +503,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, inner_iph = (const struct iphdr *)skb_inner_network_header(skb); + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); dst = tnl_params->daddr; if (dst == 0) { /* NBMA tunnel */ @@ -658,7 +659,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); /* Push down and install the IP header. */ skb_push(skb, sizeof(struct iphdr)); -- cgit v1.1 From 1351c5d3b189a487fbacd5cdf2dc3e6faf12c682 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 May 2013 23:09:56 +0200 Subject: mac80211: assign AP_VLAN hw queues correctly A lot of code in mac80211 assumes that the hw queues are set up correctly for all interfaces (except for monitor) but this isn't true for AP_VLAN interfaces. Fix this by copying the AP master configuration when an AP VLAN is brought up, after this the AP interface can't change its configuration any more and needs to be brought down to change it, which also forces AP_VLAN interfaces down, so just copying in open() is sufficient. Reported-by: Jouni Malinen Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 68f51c3..00e2238 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -474,6 +474,9 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) master->control_port_protocol; sdata->control_port_no_encrypt = master->control_port_no_encrypt; + sdata->vif.cab_queue = master->vif.cab_queue; + memcpy(sdata->vif.hw_queue, master->vif.hw_queue, + sizeof(sdata->vif.hw_queue)); break; } case NL80211_IFTYPE_AP: -- cgit v1.1 From c8aa22db0112f640ac6631347f850879c621840b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 24 May 2013 01:06:09 +0200 Subject: mac80211: close AP_VLAN interfaces before unregistering all Since Eric's commit efe117ab8 ("Speedup ieee80211_remove_interfaces") there's a bug in mac80211 when it unregisters with AP_VLAN interfaces up. If the AP_VLAN interface was registered after the AP it belongs to (which is the typical case) and then we get into this code path, unregister_netdevice_many() will crash because it isn't prepared to deal with interfaces being closed in the middle of it. Exactly this happens though, because we iterate the list, find the AP master this AP_VLAN belongs to and dev_close() the dependent VLANs. After this, unregister_netdevice_many() won't pick up the fact that the AP_VLAN is already down and will do it again, causing a crash. Cc: stable@vger.kernel.org [2.6.33+] Cc: Eric Dumazet Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 00e2238..ceef644 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1703,6 +1703,15 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) ASSERT_RTNL(); + /* + * Close all AP_VLAN interfaces first, as otherwise they + * might be closed while the AP interface they belong to + * is closed, causing unregister_netdevice_many() to crash. + */ + list_for_each_entry(sdata, &local->interfaces, list) + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + dev_close(sdata->dev); + mutex_lock(&local->iflist_mtx); list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) { list_del(&sdata->list); -- cgit v1.1 From ac20976dcaeea3e77e40e9aac8f3799d2a22ea2b Mon Sep 17 00:00:00 2001 From: Helmut Schaa Date: Mon, 27 May 2013 10:43:09 +0200 Subject: mac80211: Allow single vif mac address change with addr_mask When changing the MAC address of a single vif mac80211 will check if the new address fits into the address mask specified by the driver. This only needs to be done when using multiple BSSIDs. Hence, check the new address only against all other vifs. Also fix the MAC address assignment on new interfaces if the user changed the address of a vif such that perm_addr is not covered by addr_mask anymore. Resolves: https://bugzilla.kernel.org/show_bug.cgi?id=57371 Signed-off-by: Helmut Schaa Signed-off-by: Jakub Kicinski Reported-by: Alessandro Lannocca Cc: Alessandro Lannocca Cc: Bruno Randolf Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index ceef644..98d20c0 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -159,9 +159,10 @@ static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) return 0; } -static int ieee80211_verify_mac(struct ieee80211_local *local, u8 *addr) +static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr) { - struct ieee80211_sub_if_data *sdata; + struct ieee80211_local *local = sdata->local; + struct ieee80211_sub_if_data *iter; u64 new, mask, tmp; u8 *m; int ret = 0; @@ -181,11 +182,14 @@ static int ieee80211_verify_mac(struct ieee80211_local *local, u8 *addr) mutex_lock(&local->iflist_mtx); - list_for_each_entry(sdata, &local->interfaces, list) { - if (sdata->vif.type == NL80211_IFTYPE_MONITOR) + list_for_each_entry(iter, &local->interfaces, list) { + if (iter == sdata) continue; - m = sdata->vif.addr; + if (iter->vif.type == NL80211_IFTYPE_MONITOR) + continue; + + m = iter->vif.addr; tmp = ((u64)m[0] << 5*8) | ((u64)m[1] << 4*8) | ((u64)m[2] << 3*8) | ((u64)m[3] << 2*8) | ((u64)m[4] << 1*8) | ((u64)m[5] << 0*8); @@ -209,7 +213,7 @@ static int ieee80211_change_mac(struct net_device *dev, void *addr) if (ieee80211_sdata_running(sdata)) return -EBUSY; - ret = ieee80211_verify_mac(sdata->local, sa->sa_data); + ret = ieee80211_verify_mac(sdata, sa->sa_data); if (ret) return ret; @@ -1486,7 +1490,17 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, break; } + /* + * Pick address of existing interface in case user changed + * MAC address manually, default to perm_addr. + */ m = local->hw.wiphy->perm_addr; + list_for_each_entry(sdata, &local->interfaces, list) { + if (sdata->vif.type == NL80211_IFTYPE_MONITOR) + continue; + m = sdata->vif.addr; + break; + } start = ((u64)m[0] << 5*8) | ((u64)m[1] << 4*8) | ((u64)m[2] << 3*8) | ((u64)m[3] << 2*8) | ((u64)m[4] << 1*8) | ((u64)m[5] << 0*8); -- cgit v1.1 From dc7b3eb900aab02e5cafbca3948d005be13fb4a5 Mon Sep 17 00:00:00 2001 From: Grzegorz Lyczba Date: Mon, 13 May 2013 23:56:24 +0200 Subject: ipvs: Fix reuse connection if real server is dead Expire cached connection for new TCP/SCTP connection if real server is down. Otherwise, IPVS uses the dead server for the reused connection, instead of a new working one. Signed-off-by: Grzegorz Lyczba Acked-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_core.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 085b588..05565d2 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1001,6 +1001,32 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len) return th->rst; } +static inline bool is_new_conn(const struct sk_buff *skb, + struct ip_vs_iphdr *iph) +{ + switch (iph->protocol) { + case IPPROTO_TCP: { + struct tcphdr _tcph, *th; + + th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); + if (th == NULL) + return false; + return th->syn; + } + case IPPROTO_SCTP: { + sctp_chunkhdr_t *sch, schunk; + + sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t), + sizeof(schunk), &schunk); + if (sch == NULL) + return false; + return sch->type == SCTP_CID_INIT; + } + default: + return false; + } +} + /* Handle response packets: rewrite addresses and send away... */ static unsigned int @@ -1612,6 +1638,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) * Check if the packet belongs to an existing connection entry */ cp = pp->conn_in_get(af, skb, &iph, 0); + + if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp && cp->dest && + unlikely(!atomic_read(&cp->dest->weight)) && !iph.fragoffs && + is_new_conn(skb, &iph)) { + ip_vs_conn_expire_now(cp); + __ip_vs_conn_put(cp); + cp = NULL; + } + if (unlikely(!cp) && !iph.fragoffs) { /* No (second) fragments need to enter here, as nf_defrag_ipv6 * replayed fragment zero will already have created the cp -- cgit v1.1 From f96ef988cc603487c03a6de07807b06cbe641829 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Tue, 28 May 2013 08:26:49 +0200 Subject: ipv4: fix redirect handling for TCP packets Unlike ipv4_redirect() and ipv4_sk_redirect(), ip_do_redirect() doesn't call __build_flow_key() directly but via ip_rt_build_flow_key() wrapper. This leads to __build_flow_key() getting pointer to IPv4 header of the ICMP redirect packet rather than pointer to the embedded IPv4 header of the packet initiating the redirect. As a result, handling of ICMP redirects initiated by TCP packets is broken. Issue was introduced by 4895c771c ("ipv4: Add FIB nexthop exceptions.") Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- net/ipv4/route.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 550781a..d35bbf0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -737,10 +737,15 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf { struct rtable *rt; struct flowi4 fl4; + const struct iphdr *iph = (const struct iphdr *) skb->data; + int oif = skb->dev->ifindex; + u8 tos = RT_TOS(iph->tos); + u8 prot = iph->protocol; + u32 mark = skb->mark; rt = (struct rtable *) dst; - ip_rt_build_flow_key(&fl4, sk, skb); + __build_flow_key(&fl4, sk, iph, oif, tos, prot, mark, 0); __ip_do_redirect(rt, skb, &fl4, true); } -- cgit v1.1 From b161c144404c18f6a9e20e46b63828ae3c2eb093 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 24 May 2013 09:47:49 -0400 Subject: svcrpc: implement O_NONBLOCK behavior for use-gss-proxy Somebody noticed LTP was complaining about O_NONBLOCK opens of /proc/net/rpc/use-gss-proxy succeeding and then a following read hanging. I'm not convinced LTP really has any business opening random proc files and expecting them to behave a certain way. Maybe this isn't really a bug. But in any case the O_NONBLOCK behavior could be useful for someone that wants to test whether gss-proxy is up without waiting. Reported-by: Jan Stancek Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/svcauth_gss.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 2c6a1ec..29b4ba9 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1317,10 +1317,12 @@ static inline bool gssp_ready(struct sunrpc_net *sn) return false; } -static int wait_for_gss_proxy(struct net *net) +static int wait_for_gss_proxy(struct net *net, struct file *file) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + if (file->f_flags & O_NONBLOCK && !gssp_ready(sn)) + return -EAGAIN; return wait_event_interruptible(sn->gssp_wq, gssp_ready(sn)); } @@ -1362,7 +1364,7 @@ static ssize_t read_gssp(struct file *file, char __user *buf, size_t len; int ret; - ret = wait_for_gss_proxy(net); + ret = wait_for_gss_proxy(net, file); if (ret) return ret; -- cgit v1.1 From 1be374a0518a288147c6a7398792583200a67261 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 22 May 2013 14:07:44 -0700 Subject: net: Block MSG_CMSG_COMPAT in send(m)msg and recv(m)msg To: linux-kernel@vger.kernel.org Cc: x86@kernel.org, trinity@vger.kernel.org, Andy Lutomirski , netdev@vger.kernel.org, "David S. Miller" Subject: [PATCH 5/5] net: Block MSG_CMSG_COMPAT in send(m)msg and recv(m)msg MSG_CMSG_COMPAT is (AFAIK) not intended to be part of the API -- it's a hack that steals a bit to indicate to other networking code that a compat entry was used. So don't allow it from a non-compat syscall. This prevents an oops when running this code: int main() { int s; struct sockaddr_in addr; struct msghdr *hdr; char *highpage = mmap((void*)(TASK_SIZE_MAX - 4096), 4096, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); if (highpage == MAP_FAILED) err(1, "mmap"); s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); if (s == -1) err(1, "socket"); addr.sin_family = AF_INET; addr.sin_port = htons(1); addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); if (connect(s, (struct sockaddr*)&addr, sizeof(addr)) != 0) err(1, "connect"); void *evil = highpage + 4096 - COMPAT_MSGHDR_SIZE; printf("Evil address is %p\n", evil); if (syscall(__NR_sendmmsg, s, evil, 1, MSG_CMSG_COMPAT) < 0) err(1, "sendmmsg"); return 0; } Cc: David S. Miller Signed-off-by: Andy Lutomirski Signed-off-by: David S. Miller --- net/socket.c | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index 6b94633..9ff6366 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2075,8 +2075,12 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, fla { int fput_needed, err; struct msghdr msg_sys; - struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed); + struct socket *sock; + + if (flags & MSG_CMSG_COMPAT) + return -EINVAL; + sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; @@ -2149,6 +2153,8 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg, unsigned int, vlen, unsigned int, flags) { + if (flags & MSG_CMSG_COMPAT) + return -EINVAL; return __sys_sendmmsg(fd, mmsg, vlen, flags); } @@ -2249,8 +2255,12 @@ SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, { int fput_needed, err; struct msghdr msg_sys; - struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed); + struct socket *sock; + + if (flags & MSG_CMSG_COMPAT) + return -EINVAL; + sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; @@ -2375,6 +2385,9 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, int datagrams; struct timespec timeout_sys; + if (flags & MSG_CMSG_COMPAT) + return -EINVAL; + if (!timeout) return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL); @@ -2492,15 +2505,31 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) (int __user *)a[4]); break; case SYS_SENDMSG: + if (a[2] & MSG_CMSG_COMPAT) { + err = -EINVAL; + break; + } err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]); break; case SYS_SENDMMSG: + if (a[3] & MSG_CMSG_COMPAT) { + err = -EINVAL; + break; + } err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]); break; case SYS_RECVMSG: + if (a[2] & MSG_CMSG_COMPAT) { + err = -EINVAL; + break; + } err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); break; case SYS_RECVMMSG: + if (a[3] & MSG_CMSG_COMPAT) { + err = -EINVAL; + break; + } err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3], (struct timespec __user *)a[4]); break; -- cgit v1.1 From 456db6a4d495f40777da6f1f32f62f13026f52db Mon Sep 17 00:00:00 2001 From: Federico Vaga Date: Tue, 28 May 2013 05:02:44 +0000 Subject: net/core/sock.c: add missing VSOCK string in af_family_*_key_strings The three arrays of strings: af_family_key_strings, af_family_slock_key_strings and af_family_clock_key_strings have not VSOCK's string Signed-off-by: Federico Vaga Signed-off-by: David S. Miller --- net/core/sock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 6ba327d..88868a9 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -210,7 +210,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = { "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , - "sk_lock-AF_NFC" , "sk_lock-AF_MAX" + "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_MAX" }; static const char *const af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , @@ -226,7 +226,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , - "slock-AF_NFC" , "slock-AF_MAX" + "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX" }; static const char *const af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , @@ -242,7 +242,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , - "clock-AF_NFC" , "clock-AF_MAX" + "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_MAX" }; /* -- cgit v1.1 From d660164d79b67f879db35a7d61e47d3b99bc714e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= Date: Tue, 28 May 2013 22:37:03 +0000 Subject: netfilter: xt_LOG: fix mark logging for IPv6 packets In dump_ipv6_packet(), the "recurse" parameter is zero only if dumping contents of a packet embedded into an ICMPv6 error message. Therefore we want to log packet mark if recurse is non-zero, not when it is zero. Signed-off-by: Michal Kubecek Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_LOG.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c index 491c7d8..5ab2484 100644 --- a/net/netfilter/xt_LOG.c +++ b/net/netfilter/xt_LOG.c @@ -737,7 +737,7 @@ static void dump_ipv6_packet(struct sbuff *m, dump_sk_uid_gid(m, skb->sk); /* Max length: 16 "MARK=0xFFFFFFFF " */ - if (!recurse && skb->mark) + if (recurse && skb->mark) sb_add(m, "MARK=0x%x ", skb->mark); } -- cgit v1.1 From afe3c3fd5392b2f0066930abc5dbd3f4b14a0f13 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 24 May 2013 17:24:34 -0400 Subject: svcrpc: fix failures to handle -1 uid's and gid's As of f025adf191924e3a75ce80e130afcd2485b53bb8 "sunrpc: Properly decode kuids and kgids in RPC_AUTH_UNIX credentials" any rpc containing a -1 (0xffff) uid or gid would fail with a badcred error. Reported symptoms were xmbc clients failing on upgrade of the NFS server; examination of the network trace showed them sending -1 as the gid. Reported-by: Julian Sikorski Tested-by: Julian Sikorski Cc: "Eric W. Biederman" Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- net/sunrpc/svcauth_unix.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index c3f9e1e..06bdf5a 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -810,11 +810,15 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) goto badcred; argv->iov_base = (void*)((__be32*)argv->iov_base + slen); /* skip machname */ argv->iov_len -= slen*4; - + /* + * Note: we skip uid_valid()/gid_valid() checks here for + * backwards compatibility with clients that use -1 id's. + * Instead, -1 uid or gid is later mapped to the + * (export-specific) anonymous id by nfsd_setuser. + * Supplementary gid's will be left alone. + */ cred->cr_uid = make_kuid(&init_user_ns, svc_getnl(argv)); /* uid */ cred->cr_gid = make_kgid(&init_user_ns, svc_getnl(argv)); /* gid */ - if (!uid_valid(cred->cr_uid) || !gid_valid(cred->cr_gid)) - goto badcred; slen = svc_getnl(argv); /* gids length */ if (slen > 16 || (len -= (slen + 2)*4) < 0) goto badcred; @@ -823,8 +827,6 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) return SVC_CLOSE; for (i = 0; i < slen; i++) { kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv)); - if (!gid_valid(kgid)) - goto badcred; GROUP_AT(cred->cr_group_info, i) = kgid; } if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { -- cgit v1.1 From a70b9641e6a90d6821e4354a2c2fede74015db29 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 29 May 2013 13:33:51 +0100 Subject: ipvs: ip_vs_sh: fix build kfree_rcu() requires offsetof(..., rcu_head) < 4096, which can get violated with a sufficiently high CONFIG_IP_VS_SH_TAB_BITS. Signed-off-by: Jan Beulich Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_sh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index 0df269d..a65edfe4 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -67,8 +67,8 @@ struct ip_vs_sh_bucket { #define IP_VS_SH_TAB_MASK (IP_VS_SH_TAB_SIZE - 1) struct ip_vs_sh_state { - struct ip_vs_sh_bucket buckets[IP_VS_SH_TAB_SIZE]; struct rcu_head rcu_head; + struct ip_vs_sh_bucket buckets[IP_VS_SH_TAB_SIZE]; }; /* -- cgit v1.1 From fda3f402f446e82204266f4a3bf26912f2d55e75 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 31 May 2013 05:37:57 +0000 Subject: snmp6: remove IPSTATS_MIB_CSUMERRORS This stat is not relevant in IPv6, there is no checksum in IPv6 header. Just leave a comment to explain the hole. Signed-off-by: Nicolas Dichtel Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index f3c1ff4..51c3285 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -90,7 +90,7 @@ static const struct snmp_mib snmp6_ipstats_list[] = { SNMP_MIB_ITEM("Ip6OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS), SNMP_MIB_ITEM("Ip6InBcastOctets", IPSTATS_MIB_INBCASTOCTETS), SNMP_MIB_ITEM("Ip6OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS), - SNMP_MIB_ITEM("InCsumErrors", IPSTATS_MIB_CSUMERRORS), + /* IPSTATS_MIB_CSUMERRORS is not relevant in IPv6 (no checksum) */ SNMP_MIB_SENTINEL }; -- cgit v1.1 From 9747ba6636be8a7e8ba83a1fb231d061ca318e4f Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Fri, 31 May 2013 11:57:26 +0000 Subject: net/core: __hw_addr_create_ex does not initialize sync_cnt The sync_cnt field is not being initialized, which can result in arbitrary values in the field. Fixed by initializing it to zero. Signed-off-by: Jay Vosburgh Reviewed-by: Vlad Yasevich Tested-by: Shawn Bohrer Signed-off-by: David S. Miller --- net/core/dev_addr_lists.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index c013f38..1f919d9 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -39,6 +39,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list, ha->refcount = 1; ha->global_use = global; ha->synced = sync; + ha->sync_cnt = 0; list_add_tail_rcu(&ha->list, &list->list); list->count++; -- cgit v1.1 From 60ba834c2fb65f2fafee47e03c258fac579d0591 Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Fri, 31 May 2013 11:57:27 +0000 Subject: net/core: __hw_addr_unsync_one "from" address not marked synced When an address is added to a subordinate interface (the "to" list), the address entry in the "from" list is not marked "synced" as the entry added to the "to" list is. When performing the unsync operation (e.g., dev_mc_unsync), __hw_addr_unsync_one calls __hw_addr_del_entry with the "synced" parameter set to true for the case when the address reference is being released from the "from" list. This causes a test inside to fail, with the result being that the reference count on the "from" address is not properly decremeted and the address on the "from" list will never be freed. Correct this by having __hw_addr_unsync_one call the __hw_addr_del_entry function with the "sync" flag set to false for the "remove from the from list" case. Signed-off-by: Jay Vosburgh Reviewed-by: Vlad Yasevich Tested-by: Shawn Bohrer Signed-off-by: David S. Miller --- net/core/dev_addr_lists.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 1f919d9..c858e81 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -160,7 +160,8 @@ static void __hw_addr_unsync_one(struct netdev_hw_addr_list *to_list, if (err) return; ha->sync_cnt--; - __hw_addr_del_entry(from_list, ha, false, true); + /* address on from list is not marked synced */ + __hw_addr_del_entry(from_list, ha, false, false); } static int __hw_addr_sync_multiple(struct netdev_hw_addr_list *to_list, -- cgit v1.1 From 29ca2f8fcc721517b83d0a560c47cee2dde827a6 Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Fri, 31 May 2013 11:57:28 +0000 Subject: net/core: __hw_addr_sync_one / _multiple broken Currently, __hw_addr_sync_one is called in a loop by __hw_addr_sync_multiple to sync each of a "from" device's hw addresses to a "to" device. __hw_addr_sync_one calls __hw_addr_add_ex to attempt to add each address. __hw_addr_add_ex is called with global=false, and sync=true. __hw_addr_add_ex checks to see if the new address matches an address already on the list. If so, it tests global and sync. In this case, sync=true, and it then checks if the address is already synced, and if so, returns 0. This 0 return causes __hw_addr_sync_one to increment the sync_cnt and refcount for the "from" list's address entry, even though the address is already synced and has a reference and sync_cnt. This will cause the sync_cnt and refcount to increment without bound every time an addresses is added to the "from" device and synced to the "to" device. The fix here has two parts: First, when __hw_addr_add_ex finds the address already exists and is synced, return -EEXIST instead of 0. Second, __hw_addr_sync_one checks the error return for -EEXIST, and if so, it (a) does not add a refcount/sync_cnt, and (b) returns 0 itself so that __hw_addr_sync_multiple will not return an error. Signed-off-by: Jay Vosburgh Reviewed-by: Vlad Yasevich Tested-by: Shawn Bohrer Signed-off-by: David S. Miller --- net/core/dev_addr_lists.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index c858e81..8e2c2ef 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -67,7 +67,7 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, } if (sync) { if (ha->synced) - return 0; + return -EEXIST; else ha->synced = true; } @@ -140,10 +140,13 @@ static int __hw_addr_sync_one(struct netdev_hw_addr_list *to_list, err = __hw_addr_add_ex(to_list, ha->addr, addr_len, ha->type, false, true); - if (err) + if (err && err != -EEXIST) return err; - ha->sync_cnt++; - ha->refcount++; + + if (!err) { + ha->sync_cnt++; + ha->refcount++; + } return 0; } -- cgit v1.1 From b190a50875b95e58ebe2b00ed3bf7f1d44961471 Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Fri, 31 May 2013 11:57:29 +0000 Subject: net/core: dev_mc_sync_multiple calls wrong helper The dev_mc_sync_multiple function is currently calling __hw_addr_sync, and not __hw_addr_sync_multiple. This will result in addresses only being synced to the first device from the set. Corrected by calling the _multiple variant. Signed-off-by: Jay Vosburgh Reviewed-by: Vlad Yasevich Tested-by: Shawn Bohrer Signed-off-by: David S. Miller --- net/core/dev_addr_lists.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 8e2c2ef..6cda4e2 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -801,7 +801,7 @@ int dev_mc_sync_multiple(struct net_device *to, struct net_device *from) return -EINVAL; netif_addr_lock_nested(to); - err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len); + err = __hw_addr_sync_multiple(&to->mc, &from->mc, to->addr_len); if (!err) __dev_set_rx_mode(to); netif_addr_unlock(to); -- cgit v1.1 From 1e2bd517c108816220f262d7954b697af03b5f9c Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 30 May 2013 06:45:27 +0000 Subject: udp6: Fix udp fragmentation for tunnel traffic. udp6 over GRE tunnel does not work after to GRE tso changes. GRE tso handler passes inner packet but keeps track of outer header start in SKB_GSO_CB(skb)->mac_offset. udp6 fragment need to take care of outer header, which start at the mac_offset, while adding fragment header. This bug is introduced by commit 68c3316311 (GRE: Add TCP segmentation offload for GRE). Reported-by: Dmitry Kravkov Signed-off-by: Pravin B Shelar Tested-by: Dmitry Kravkov Signed-off-by: David S. Miller --- net/ipv6/udp_offload.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 3bb3a89..d3cfaf9 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -46,11 +46,12 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, unsigned int mss; unsigned int unfrag_ip6hlen, unfrag_len; struct frag_hdr *fptr; - u8 *mac_start, *prevhdr; + u8 *packet_start, *prevhdr; u8 nexthdr; u8 frag_hdr_sz = sizeof(struct frag_hdr); int offset; __wsum csum; + int tnl_hlen; mss = skb_shinfo(skb)->gso_size; if (unlikely(skb->len <= mss)) @@ -83,9 +84,11 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, skb->ip_summed = CHECKSUM_NONE; /* Check if there is enough headroom to insert fragment header. */ - if ((skb_mac_header(skb) < skb->head + frag_hdr_sz) && - pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC)) - goto out; + tnl_hlen = skb_tnl_header_len(skb); + if (skb_headroom(skb) < (tnl_hlen + frag_hdr_sz)) { + if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz)) + goto out; + } /* Find the unfragmentable header and shift it left by frag_hdr_sz * bytes to insert fragment header. @@ -93,11 +96,12 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); nexthdr = *prevhdr; *prevhdr = NEXTHDR_FRAGMENT; - unfrag_len = skb_network_header(skb) - skb_mac_header(skb) + - unfrag_ip6hlen; - mac_start = skb_mac_header(skb); - memmove(mac_start-frag_hdr_sz, mac_start, unfrag_len); + unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) + + unfrag_ip6hlen + tnl_hlen; + packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset; + memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len); + SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz; skb->mac_header -= frag_hdr_sz; skb->network_header -= frag_hdr_sz; -- cgit v1.1 From e4c1721642bbd42d8142f4811cde0588c28db51d Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Wed, 29 May 2013 07:36:25 +0000 Subject: xfrm: force a garbage collection after deleting a policy In some cases after deleting a policy from the SPD the policy would remain in the dst/flow/route cache for an extended period of time which caused problems for SELinux as its dynamic network access controls key off of the number of XFRM policy and state entries. This patch corrects this problem by forcing a XFRM garbage collection whenever a policy is sucessfully removed. Reported-by: Ondrej Moris Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- net/key/af_key.c | 4 ++++ net/xfrm/xfrm_policy.c | 3 ++- net/xfrm/xfrm_user.c | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/key/af_key.c b/net/key/af_key.c index 5b1e5af..c5fbd75 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2366,6 +2366,8 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa out: xfrm_pol_put(xp); + if (err == 0) + xfrm_garbage_collect(net); return err; } @@ -2615,6 +2617,8 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_ out: xfrm_pol_put(xp); + if (delete && err == 0) + xfrm_garbage_collect(net); return err; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 23cea0f..ea970b8 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2557,11 +2557,12 @@ static void __xfrm_garbage_collect(struct net *net) } } -static void xfrm_garbage_collect(struct net *net) +void xfrm_garbage_collect(struct net *net) { flow_cache_flush(); __xfrm_garbage_collect(net); } +EXPORT_SYMBOL(xfrm_garbage_collect); static void xfrm_garbage_collect_deferred(struct net *net) { diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index aa77874..3f565e4 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1681,6 +1681,8 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, out: xfrm_pol_put(xp); + if (delete && err == 0) + xfrm_garbage_collect(net); return err; } -- cgit v1.1 From 01cb71d2d47b78354358e4bb938bb06323e17498 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 2 Jun 2013 13:55:05 +0000 Subject: net_sched: restore "overhead xxx" handling commit 56b765b79 ("htb: improved accuracy at high rates") broke the "overhead xxx" handling, as well as the "linklayer atm" attribute. tc class add ... htb rate X ceil Y linklayer atm overhead 10 This patch restores the "overhead xxx" handling, for htb, tbf and act_police The "linklayer atm" thing needs a separate fix. Reported-by: Jesper Dangaard Brouer Signed-off-by: Eric Dumazet Cc: Vimalkumar Cc: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/act_police.c | 8 ++++---- net/sched/sch_generic.c | 8 +++++--- net/sched/sch_htb.c | 8 ++++---- net/sched/sch_tbf.c | 8 ++++---- 4 files changed, 17 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 823463a..189e3c5 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -231,14 +231,14 @@ override: } if (R_tab) { police->rate_present = true; - psched_ratecfg_precompute(&police->rate, R_tab->rate.rate); + psched_ratecfg_precompute(&police->rate, &R_tab->rate); qdisc_put_rtab(R_tab); } else { police->rate_present = false; } if (P_tab) { police->peak_present = true; - psched_ratecfg_precompute(&police->peak, P_tab->rate.rate); + psched_ratecfg_precompute(&police->peak, &P_tab->rate); qdisc_put_rtab(P_tab); } else { police->peak_present = false; @@ -376,9 +376,9 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) }; if (police->rate_present) - opt.rate.rate = psched_ratecfg_getrate(&police->rate); + psched_ratecfg_getrate(&opt.rate, &police->rate); if (police->peak_present) - opt.peakrate.rate = psched_ratecfg_getrate(&police->peak); + psched_ratecfg_getrate(&opt.peakrate, &police->peak); if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt)) goto nla_put_failure; if (police->tcfp_result && diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index eac7e0e..2022408 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -898,14 +898,16 @@ void dev_shutdown(struct net_device *dev) WARN_ON(timer_pending(&dev->watchdog_timer)); } -void psched_ratecfg_precompute(struct psched_ratecfg *r, u32 rate) +void psched_ratecfg_precompute(struct psched_ratecfg *r, + const struct tc_ratespec *conf) { u64 factor; u64 mult; int shift; - r->rate_bps = (u64)rate << 3; - r->shift = 0; + memset(r, 0, sizeof(*r)); + r->overhead = conf->overhead; + r->rate_bps = (u64)conf->rate << 3; r->mult = 1; /* * Calibrate mult, shift so that token counting is accurate diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 79b1876..f87fb85 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1090,9 +1090,9 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, memset(&opt, 0, sizeof(opt)); - opt.rate.rate = psched_ratecfg_getrate(&cl->rate); + psched_ratecfg_getrate(&opt.rate, &cl->rate); opt.buffer = PSCHED_NS2TICKS(cl->buffer); - opt.ceil.rate = psched_ratecfg_getrate(&cl->ceil); + psched_ratecfg_getrate(&opt.ceil, &cl->ceil); opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer); opt.quantum = cl->quantum; opt.prio = cl->prio; @@ -1459,8 +1459,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->prio = TC_HTB_NUMPRIO - 1; } - psched_ratecfg_precompute(&cl->rate, hopt->rate.rate); - psched_ratecfg_precompute(&cl->ceil, hopt->ceil.rate); + psched_ratecfg_precompute(&cl->rate, &hopt->rate); + psched_ratecfg_precompute(&cl->ceil, &hopt->ceil); cl->buffer = PSCHED_TICKS2NS(hopt->buffer); cl->cbuffer = PSCHED_TICKS2NS(hopt->buffer); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index c8388f3..e478d31 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -298,9 +298,9 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) q->tokens = q->buffer; q->ptokens = q->mtu; - psched_ratecfg_precompute(&q->rate, rtab->rate.rate); + psched_ratecfg_precompute(&q->rate, &rtab->rate); if (ptab) { - psched_ratecfg_precompute(&q->peak, ptab->rate.rate); + psched_ratecfg_precompute(&q->peak, &ptab->rate); q->peak_present = true; } else { q->peak_present = false; @@ -350,9 +350,9 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) goto nla_put_failure; opt.limit = q->limit; - opt.rate.rate = psched_ratecfg_getrate(&q->rate); + psched_ratecfg_getrate(&opt.rate, &q->rate); if (q->peak_present) - opt.peakrate.rate = psched_ratecfg_getrate(&q->peak); + psched_ratecfg_getrate(&opt.peakrate, &q->peak); else memset(&opt.peakrate, 0, sizeof(opt.peakrate)); opt.mtu = PSCHED_NS2TICKS(q->mtu); -- cgit v1.1 From 534c877928a16ae5f9776436a497109639bf67dc Mon Sep 17 00:00:00 2001 From: Gao feng Date: Sun, 2 Jun 2013 22:16:21 +0000 Subject: ipv6: assign rt6_info to inet6_ifaddr in init_loopback Commit 25fb6ca4ed9cad72f14f61629b68dc03c0d9713f "net IPv6 : Fix broken IPv6 routing table after loopback down-up" forgot to assign rt6_info to the inet6_ifaddr. When disable the net device, the rt6_info which allocated in init_loopback will not be destroied in __ipv6_ifa_notify. This will trigger the waring message below [23527.916091] unregister_netdevice: waiting for tap0 to become free. Usage count = 1 Reported-by: Arkadiusz Miskiewicz Signed-off-by: Gao feng Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d1b2d80..1bbf744 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2658,8 +2658,10 @@ static void init_loopback(struct net_device *dev) sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, 0); /* Failure cases are ignored */ - if (!IS_ERR(sp_rt)) + if (!IS_ERR(sp_rt)) { + sp_ifa->rt = sp_rt; ip6_ins_rt(sp_rt); + } } read_unlock_bh(&idev->lock); } -- cgit v1.1 From 5e71d9d77c07fa7d4c42287a177f7b738d0cd4b9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Mon, 3 Jun 2013 09:28:43 +0000 Subject: net: fix sk_buff head without data area Eric Dumazet spotted that we have to check skb->head instead of skb->data as skb->head points to the beginning of the data area of the skbuff. Similarly, we have to initialize the skb->head pointer, not skb->data in __alloc_skb_head. After this fix, netlink crashes in the release path of the sk_buff, so let's fix that as well. This bug was introduced in (0ebd0ac net: add function to allocate sk_buff head without data area). Reported-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/core/skbuff.c | 4 ++-- net/netlink/af_netlink.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index af9185d..cfd777b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -195,7 +195,7 @@ struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node) * the tail pointer in struct sk_buff! */ memset(skb, 0, offsetof(struct sk_buff, tail)); - skb->data = NULL; + skb->head = NULL; skb->truesize = sizeof(struct sk_buff); atomic_set(&skb->users, 1); @@ -611,7 +611,7 @@ static void skb_release_head_state(struct sk_buff *skb) static void skb_release_all(struct sk_buff *skb) { skb_release_head_state(skb); - if (likely(skb->data)) + if (likely(skb->head)) skb_release_data(skb); } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 12ac6b4..d0b3dd6 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -747,7 +747,7 @@ static void netlink_skb_destructor(struct sk_buff *skb) atomic_dec(&ring->pending); sock_put(sk); - skb->data = NULL; + skb->head = NULL; } #endif if (skb->sk != NULL) -- cgit v1.1 From 5343a7f8be11951cb3095b91e8e4eb506cfacc0f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 4 Jun 2013 07:11:48 +0000 Subject: net_sched: htb: do not mix 1ns and 64ns time units commit 56b765b79 ("htb: improved accuracy at high rates") added another regression for low rates, because it mixes 1ns and 64ns time units. So the maximum delay (mbuffer) was not 60 second, but 937 ms. Lets convert all time fields to 1ns as 64bit arches are becoming the norm. Reported-by: Jesper Dangaard Brouer Signed-off-by: Eric Dumazet Tested-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index f87fb85..adaedd7 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -109,7 +109,7 @@ struct htb_class { } un; struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ struct rb_node pq_node; /* node for event queue */ - psched_time_t pq_key; + s64 pq_key; int prio_activity; /* for which prios are we active */ enum htb_cmode cmode; /* current mode of the class */ @@ -121,10 +121,10 @@ struct htb_class { /* token bucket parameters */ struct psched_ratecfg rate; struct psched_ratecfg ceil; - s64 buffer, cbuffer; /* token bucket depth/rate */ - psched_tdiff_t mbuffer; /* max wait time */ - s64 tokens, ctokens; /* current number of tokens */ - psched_time_t t_c; /* checkpoint time */ + s64 buffer, cbuffer; /* token bucket depth/rate */ + s64 mbuffer; /* max wait time */ + s64 tokens, ctokens; /* current number of tokens */ + s64 t_c; /* checkpoint time */ }; struct htb_sched { @@ -141,15 +141,15 @@ struct htb_sched { struct rb_root wait_pq[TC_HTB_MAXDEPTH]; /* time of nearest event per level (row) */ - psched_time_t near_ev_cache[TC_HTB_MAXDEPTH]; + s64 near_ev_cache[TC_HTB_MAXDEPTH]; int defcls; /* class where unclassified flows go to */ /* filters for qdisc itself */ struct tcf_proto *filter_list; - int rate2quantum; /* quant = rate / rate2quantum */ - psched_time_t now; /* cached dequeue time */ + int rate2quantum; /* quant = rate / rate2quantum */ + s64 now; /* cached dequeue time */ struct qdisc_watchdog watchdog; /* non shaped skbs; let them go directly thru */ @@ -664,8 +664,8 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, * next pending event (0 for no event in pq, q->now for too many events). * Note: Applied are events whose have cl->pq_key <= q->now. */ -static psched_time_t htb_do_events(struct htb_sched *q, int level, - unsigned long start) +static s64 htb_do_events(struct htb_sched *q, int level, + unsigned long start) { /* don't run for longer than 2 jiffies; 2 is used instead of * 1 to simplify things when jiffy is going to be incremented @@ -857,7 +857,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) struct sk_buff *skb; struct htb_sched *q = qdisc_priv(sch); int level; - psched_time_t next_event; + s64 next_event; unsigned long start_at; /* try to dequeue direct packets as high prio (!) to minimize cpu work */ @@ -880,7 +880,7 @@ ok: for (level = 0; level < TC_HTB_MAXDEPTH; level++) { /* common case optimization - skip event handler quickly */ int m; - psched_time_t event; + s64 event; if (q->now >= q->near_ev_cache[level]) { event = htb_do_events(q, level, start_at); @@ -1117,8 +1117,8 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) if (!cl->level && cl->un.leaf.q) cl->qstats.qlen = cl->un.leaf.q->q.qlen; - cl->xstats.tokens = cl->tokens; - cl->xstats.ctokens = cl->ctokens; + cl->xstats.tokens = PSCHED_NS2TICKS(cl->tokens); + cl->xstats.ctokens = PSCHED_NS2TICKS(cl->ctokens); if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 || @@ -1200,7 +1200,7 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl, parent->un.leaf.q = new_q ? new_q : &noop_qdisc; parent->tokens = parent->buffer; parent->ctokens = parent->cbuffer; - parent->t_c = psched_get_time(); + parent->t_c = ktime_to_ns(ktime_get()); parent->cmode = HTB_CAN_SEND; } @@ -1417,8 +1417,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* set class to be in HTB_CAN_SEND state */ cl->tokens = PSCHED_TICKS2NS(hopt->buffer); cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer); - cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC; /* 1min */ - cl->t_c = psched_get_time(); + cl->mbuffer = 60ULL * NSEC_PER_SEC; /* 1min */ + cl->t_c = ktime_to_ns(ktime_get()); cl->cmode = HTB_CAN_SEND; /* attach to the hash list and parent's family */ -- cgit v1.1