From 38516ab59fbc5b3bb278cf5e1fe2867c70cff32e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 20 Apr 2010 17:04:50 -0400 Subject: tracing: Let tracepoints have data passed to tracepoint callbacks This patch adds data to be passed to tracepoint callbacks. The created functions from DECLARE_TRACE() now need a mandatory data parameter. For example: DECLARE_TRACE(mytracepoint, int value, value) Will create the register function: int register_trace_mytracepoint((void(*)(void *data, int value))probe, void *data); As the first argument, all callbacks (probes) must take a (void *data) parameter. So a callback for the above tracepoint will look like: void myprobe(void *data, int value) { } The callback may choose to ignore the data parameter. This change allows callbacks to register a private data pointer along with the function probe. void mycallback(void *data, int value); register_trace_mytracepoint(mycallback, mydata); Then the mycallback() will receive the "mydata" as the first parameter before the args. A more detailed example: DECLARE_TRACE(mytracepoint, TP_PROTO(int status), TP_ARGS(status)); /* In the C file */ DEFINE_TRACE(mytracepoint, TP_PROTO(int status), TP_ARGS(status)); [...] trace_mytracepoint(status); /* In a file registering this tracepoint */ int my_callback(void *data, int status) { struct my_struct my_data = data; [...] } [...] my_data = kmalloc(sizeof(*my_data), GFP_KERNEL); init_my_data(my_data); register_trace_mytracepoint(my_callback, my_data); The same callback can also be registered to the same tracepoint as long as the data registered is different. Note, the data must also be used to unregister the callback: unregister_trace_mytracepoint(my_callback, my_data); Because of the data parameter, tracepoints declared this way can not have no args. That is: DECLARE_TRACE(mytracepoint, TP_PROTO(void), TP_ARGS()); will cause an error. If no arguments are needed, a new macro can be used instead: DECLARE_TRACE_NOARGS(mytracepoint); Since there are no arguments, the proto and args fields are left out. This is part of a series to make the tracepoint footprint smaller: text data bss dec hex filename 4913961 1088356 861512 6863829 68bbd5 vmlinux.orig 4914025 1088868 861512 6864405 68be15 vmlinux.class 4918492 1084612 861512 6864616 68bee8 vmlinux.tracepoint Again, this patch also increases the size of the kernel, but lays the ground work for decreasing it. v5: Fixed net/core/drop_monitor.c to handle these updates. v4: Moved the DECLARE_TRACE() DECLARE_TRACE_NOARGS out of the #ifdef CONFIG_TRACE_POINTS, since the two are the same in both cases. The __DECLARE_TRACE() is what changes. Thanks to Frederic Weisbecker for pointing this out. v3: Made all register_* functions require data to be passed and all callbacks to take a void * parameter as its first argument. This makes the calling functions comply with C standards. Also added more comments to the modifications of DECLARE_TRACE(). v2: Made the DECLARE_TRACE() have the ability to pass arguments and added a new DECLARE_TRACE_NOARGS() for tracepoints that do not need any arguments. Acked-by: Mathieu Desnoyers Acked-by: Masami Hiramatsu Acked-by: Frederic Weisbecker Cc: Neil Horman Cc: David S. Miller Signed-off-by: Steven Rostedt --- net/core/drop_monitor.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index cf208d8..ad41529 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -172,12 +172,12 @@ out: return; } -static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) +static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location) { trace_drop_common(skb, location); } -static void trace_napi_poll_hit(struct napi_struct *napi) +static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi) { struct dm_hw_stat_delta *new_stat; @@ -225,12 +225,12 @@ static int set_all_monitor_traces(int state) switch (state) { case TRACE_ON: - rc |= register_trace_kfree_skb(trace_kfree_skb_hit); - rc |= register_trace_napi_poll(trace_napi_poll_hit); + rc |= register_trace_kfree_skb(trace_kfree_skb_hit, NULL); + rc |= register_trace_napi_poll(trace_napi_poll_hit, NULL); break; case TRACE_OFF: - rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit); - rc |= unregister_trace_napi_poll(trace_napi_poll_hit); + rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL); + rc |= unregister_trace_napi_poll(trace_napi_poll_hit, NULL); tracepoint_synchronize_unregister(); -- cgit v1.1 From a1d7c1b4b8dfbc5ecadcff9284d64bb6ad4c0196 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 14 May 2010 21:18:17 +0200 Subject: netfilter: nf_ct_sip: handle non-linear skbs Handle non-linear skbs by linearizing them instead of silently failing. Long term the helper should be fixed to either work with non-linear skbs directly by using the string search API or work on a copy of the data. Based on patch by Jason Gunthorpe Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_sip.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index b20f427..53d8922 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1393,10 +1393,8 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, nf_ct_refresh(ct, skb, sip_timeout * HZ); - if (skb_is_nonlinear(skb)) { - pr_debug("Copy of skbuff not supported yet.\n"); - return NF_ACCEPT; - } + if (unlikely(skb_linearize(skb))) + return NF_DROP; dptr = skb->data + dataoff; datalen = skb->len - dataoff; @@ -1455,10 +1453,8 @@ static int sip_help_udp(struct sk_buff *skb, unsigned int protoff, nf_ct_refresh(ct, skb, sip_timeout * HZ); - if (skb_is_nonlinear(skb)) { - pr_debug("Copy of skbuff not supported yet.\n"); - return NF_ACCEPT; - } + if (unlikely(skb_linearize(skb))) + return NF_DROP; dptr = skb->data + dataoff; datalen = skb->len - dataoff; -- cgit v1.1 From dc3f5e68f846eec38fb31d78f0b6e83633ad375e Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 13 Apr 2010 16:11:50 +0300 Subject: trans_virtio: use virtqueue_xxx wrappers Switch trans_virtio to new virtqueue_xxx wrappers. Signed-off-by: Michael S. Tsirkin Signed-off-by: Rusty Russell --- net/9p/trans_virtio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 7eb78ec..dcfbe99 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -137,7 +137,7 @@ static void req_done(struct virtqueue *vq) P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n"); - while ((rc = chan->vq->vq_ops->get_buf(chan->vq, &len)) != NULL) { + while ((rc = virtqueue_get_buf(chan->vq, &len)) != NULL) { P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); req = p9_tag_lookup(chan->client, rc->tag); @@ -209,13 +209,13 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req) req->status = REQ_STATUS_SENT; - if (chan->vq->vq_ops->add_buf(chan->vq, chan->sg, out, in, req->tc) < 0) { + if (virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc) < 0) { P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio rpc add_buf returned failure"); return -EIO; } - chan->vq->vq_ops->kick(chan->vq); + virtqueue_kick(chan->vq); P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); return 0; -- cgit v1.1 From fc350777c705a39a312728ac5e8a6f164a828f5d Mon Sep 17 00:00:00 2001 From: Joerg Marx Date: Thu, 20 May 2010 15:55:30 +0200 Subject: netfilter: nf_conntrack: fix a race in __nf_conntrack_confirm against nf_ct_get_next_corpse() This race was triggered by a 'conntrack -F' command running in parallel to the insertion of a hash for a new connection. Losing this race led to a dead conntrack entry effectively blocking traffic for a particular connection until timeout or flushing the conntrack hashes again. Now the check for an already dying connection is done inside the lock. Signed-off-by: Joerg Marx Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_core.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index b83c530..eeeb8bc 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -424,6 +424,16 @@ __nf_conntrack_confirm(struct sk_buff *skb) spin_lock_bh(&nf_conntrack_lock); + /* We have to check the DYING flag inside the lock to prevent + a race against nf_ct_get_next_corpse() possibly called from + user context, else we insert an already 'dead' hash, blocking + further use of that particular connection -JM */ + + if (unlikely(nf_ct_is_dying(ct))) { + spin_unlock_bh(&nf_conntrack_lock); + return NF_ACCEPT; + } + /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're not in the hash. If there is, we lost race. */ -- cgit v1.1 From 622e0ca1cd4d459f5af4f2c65f4dc0dd823cb4c3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 20 May 2010 23:07:56 -0700 Subject: gro: Fix bogus gso_size on the first fraglist entry When GRO produces fraglist entries, and the resulting skb hits an interface that is incapable of TSO but capable of FRAGLIST, we end up producing a bogus packet with gso_size non-zero. This was reported in the field with older versions of KVM that did not set the TSO bits on tuntap. This patch fixes that. Reported-by: Igor Zhang Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/skbuff.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c543dd2..4c11000 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2718,6 +2718,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p); skb_shinfo(nskb)->frag_list = p; skb_shinfo(nskb)->gso_size = pinfo->gso_size; + pinfo->gso_size = 0; skb_header_release(p); nskb->prev = p; -- cgit v1.1 From 76cc8b13a6e41b537fd262b600da1571314add62 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 20 May 2010 18:37:59 +0000 Subject: net: fix problem in dequeuing from input_pkt_queue Fix some issues introduced in batch skb dequeuing for input_pkt_queue. The primary issue it that the queue head must be incremented only after a packet has been processed, that is only after __netif_receive_skb has been called. This is needed for the mechanism to prevent OOO packet in RFS. Also when flushing the input_pkt_queue and process_queue, the process queue should be done first to prevent OOO packets. Because the input_pkt_queue has been effectively split into two queues, the calculation of the tail ptr is no longer correct. The correct value would be head+input_pkt_queue->len+process_queue->len. To avoid this calculation we added an explict input_queue_tail in softnet_data. The tail value is simply incremented when queuing to input_pkt_queue. Signed-off-by: Tom Herbert Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 6c82065..0aab66d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2426,10 +2426,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, if (skb_queue_len(&sd->input_pkt_queue)) { enqueue: __skb_queue_tail(&sd->input_pkt_queue, skb); -#ifdef CONFIG_RPS - *qtail = sd->input_queue_head + - skb_queue_len(&sd->input_pkt_queue); -#endif + input_queue_tail_incr_save(sd, qtail); rps_unlock(sd); local_irq_restore(flags); return NET_RX_SUCCESS; @@ -2964,7 +2961,7 @@ static void flush_backlog(void *arg) if (skb->dev == dev) { __skb_unlink(skb, &sd->input_pkt_queue); kfree_skb(skb); - input_queue_head_add(sd, 1); + input_queue_head_incr(sd); } } rps_unlock(sd); @@ -2973,6 +2970,7 @@ static void flush_backlog(void *arg) if (skb->dev == dev) { __skb_unlink(skb, &sd->process_queue); kfree_skb(skb); + input_queue_head_incr(sd); } } } @@ -3328,18 +3326,20 @@ static int process_backlog(struct napi_struct *napi, int quota) while ((skb = __skb_dequeue(&sd->process_queue))) { local_irq_enable(); __netif_receive_skb(skb); - if (++work >= quota) - return work; local_irq_disable(); + input_queue_head_incr(sd); + if (++work >= quota) { + local_irq_enable(); + return work; + } } rps_lock(sd); qlen = skb_queue_len(&sd->input_pkt_queue); - if (qlen) { - input_queue_head_add(sd, qlen); + if (qlen) skb_queue_splice_tail_init(&sd->input_pkt_queue, &sd->process_queue); - } + if (qlen < quota - work) { /* * Inline a custom version of __napi_complete(). @@ -5679,12 +5679,14 @@ static int dev_cpu_callback(struct notifier_block *nfb, local_irq_enable(); /* Process offline CPU's input_pkt_queue */ - while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { + while ((skb = __skb_dequeue(&oldsd->process_queue))) { netif_rx(skb); - input_queue_head_add(oldsd, 1); + input_queue_head_incr(oldsd); } - while ((skb = __skb_dequeue(&oldsd->process_queue))) + while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { netif_rx(skb); + input_queue_head_incr(oldsd); + } return NOTIFY_OK; } -- cgit v1.1 From 22fe88d3d85850267ff4535b465794a5768f868a Mon Sep 17 00:00:00 2001 From: Sujith Date: Thu, 13 May 2010 10:34:08 +0530 Subject: cfg80211: Fix signal_type comparison signal_type is enum cfg80211_signal_type. This fixes the gcc warning: scan.c: In function `cfg80211_inform_bss': scan.c:518:6: warning: comparison between `enum cfg80211_signal_type' and `enum nl80211_bss' scan.c: In function `cfg80211_inform_bss_frame': scan.c:574:6: warning: comparison between `enum cfg80211_signal_type' and `enum nl80211_bss' Signed-off-by: Sujith Signed-off-by: John W. Linville --- net/wireless/scan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/scan.c b/net/wireless/scan.c index a026c6d..58401d2 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -515,7 +515,7 @@ cfg80211_inform_bss(struct wiphy *wiphy, privsz = wiphy->bss_priv_size; - if (WARN_ON(wiphy->signal_type == NL80211_BSS_SIGNAL_UNSPEC && + if (WARN_ON(wiphy->signal_type == CFG80211_SIGNAL_TYPE_UNSPEC && (signal < 0 || signal > 100))) return NULL; @@ -571,7 +571,7 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy, u.probe_resp.variable); size_t privsz = wiphy->bss_priv_size; - if (WARN_ON(wiphy->signal_type == NL80211_BSS_SIGNAL_UNSPEC && + if (WARN_ON(wiphy->signal_type == CFG80211_SIGNAL_TYPE_UNSPEC && (signal < 0 || signal > 100))) return NULL; -- cgit v1.1 From 9fbc630c89fd210e15ffe84fd6e968a2d39000b0 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 15 May 2010 15:46:17 +0200 Subject: cfg80211: fix crash in cfg80211_set_freq() Since wdev can be NULL, check it before dereferencing it Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- net/wireless/chan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/chan.c b/net/wireless/chan.c index d92d088..b01a6f6 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -50,7 +50,7 @@ int cfg80211_set_freq(struct cfg80211_registered_device *rdev, struct ieee80211_channel *chan; int result; - if (wdev->iftype == NL80211_IFTYPE_MONITOR) + if (wdev && wdev->iftype == NL80211_IFTYPE_MONITOR) wdev = NULL; if (wdev) { -- cgit v1.1 From 579d7534ca83235794b6d9ef3cd473ffc14e9d42 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 18 May 2010 14:36:34 +0200 Subject: cfg80211: add missing braces Specifying a valid channel type will get goto out rather than continuing, due to missing braces. This affects both remain on channel and action frame TX commands. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index aaa1aad..db71150 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4443,9 +4443,10 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, if (channel_type != NL80211_CHAN_NO_HT && channel_type != NL80211_CHAN_HT20 && channel_type != NL80211_CHAN_HT40PLUS && - channel_type != NL80211_CHAN_HT40MINUS) + channel_type != NL80211_CHAN_HT40MINUS) { err = -EINVAL; goto out; + } } freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); @@ -4717,9 +4718,10 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info) if (channel_type != NL80211_CHAN_NO_HT && channel_type != NL80211_CHAN_HT20 && channel_type != NL80211_CHAN_HT40PLUS && - channel_type != NL80211_CHAN_HT40MINUS) + channel_type != NL80211_CHAN_HT40MINUS) { err = -EINVAL; goto out; + } } freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); -- cgit v1.1 From 35f3d14dbbc58447c61e38a162ea10add6b31dc7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 20 May 2010 10:43:18 +0200 Subject: pipe: add support for shrinking and growing pipes This patch adds F_GETPIPE_SZ and F_SETPIPE_SZ fcntl() actions for growing and shrinking the size of a pipe and adjusts pipe.c and splice.c (and relay and network splice) usage to work with these larger (or smaller) pipes. Signed-off-by: Jens Axboe --- net/core/skbuff.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 93c4e06..9319817 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1417,12 +1417,13 @@ new_page: /* * Fill page/offset/length into spd, if it can hold more pages. */ -static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page, +static inline int spd_fill_page(struct splice_pipe_desc *spd, + struct pipe_inode_info *pipe, struct page *page, unsigned int *len, unsigned int offset, struct sk_buff *skb, int linear, struct sock *sk) { - if (unlikely(spd->nr_pages == PIPE_BUFFERS)) + if (unlikely(spd->nr_pages == pipe->buffers)) return 1; if (linear) { @@ -1458,7 +1459,8 @@ static inline int __splice_segment(struct page *page, unsigned int poff, unsigned int plen, unsigned int *off, unsigned int *len, struct sk_buff *skb, struct splice_pipe_desc *spd, int linear, - struct sock *sk) + struct sock *sk, + struct pipe_inode_info *pipe) { if (!*len) return 1; @@ -1481,7 +1483,7 @@ static inline int __splice_segment(struct page *page, unsigned int poff, /* the linear region may spread across several pages */ flen = min_t(unsigned int, flen, PAGE_SIZE - poff); - if (spd_fill_page(spd, page, &flen, poff, skb, linear, sk)) + if (spd_fill_page(spd, pipe, page, &flen, poff, skb, linear, sk)) return 1; __segment_seek(&page, &poff, &plen, flen); @@ -1496,9 +1498,9 @@ static inline int __splice_segment(struct page *page, unsigned int poff, * Map linear and fragment data from the skb to spd. It reports failure if the * pipe is full or if we already spliced the requested length. */ -static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, - unsigned int *len, struct splice_pipe_desc *spd, - struct sock *sk) +static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, + unsigned int *offset, unsigned int *len, + struct splice_pipe_desc *spd, struct sock *sk) { int seg; @@ -1508,7 +1510,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, if (__splice_segment(virt_to_page(skb->data), (unsigned long) skb->data & (PAGE_SIZE - 1), skb_headlen(skb), - offset, len, skb, spd, 1, sk)) + offset, len, skb, spd, 1, sk, pipe)) return 1; /* @@ -1518,7 +1520,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; if (__splice_segment(f->page, f->page_offset, f->size, - offset, len, skb, spd, 0, sk)) + offset, len, skb, spd, 0, sk, pipe)) return 1; } @@ -1535,8 +1537,8 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, struct pipe_inode_info *pipe, unsigned int tlen, unsigned int flags) { - struct partial_page partial[PIPE_BUFFERS]; - struct page *pages[PIPE_BUFFERS]; + struct partial_page partial[PIPE_DEF_BUFFERS]; + struct page *pages[PIPE_DEF_BUFFERS]; struct splice_pipe_desc spd = { .pages = pages, .partial = partial, @@ -1546,12 +1548,16 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, }; struct sk_buff *frag_iter; struct sock *sk = skb->sk; + int ret = 0; + + if (splice_grow_spd(pipe, &spd)) + return -ENOMEM; /* * __skb_splice_bits() only fails if the output has no room left, * so no point in going over the frag_list for the error case. */ - if (__skb_splice_bits(skb, &offset, &tlen, &spd, sk)) + if (__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk)) goto done; else if (!tlen) goto done; @@ -1562,14 +1568,12 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, skb_walk_frags(skb, frag_iter) { if (!tlen) break; - if (__skb_splice_bits(frag_iter, &offset, &tlen, &spd, sk)) + if (__skb_splice_bits(frag_iter, pipe, &offset, &tlen, &spd, sk)) break; } done: if (spd.nr_pages) { - int ret; - /* * Drop the socket lock, otherwise we have reverse * locking dependencies between sk_lock and i_mutex @@ -1582,10 +1586,10 @@ done: release_sock(sk); ret = splice_to_pipe(pipe, &spd); lock_sock(sk); - return ret; } - return 0; + splice_shrink_spd(pipe, &spd); + return ret; } /** -- cgit v1.1 From c56e4acf55c804cbeea0ddb696ef698c73d39826 Mon Sep 17 00:00:00 2001 From: Sripathi Kodi Date: Thu, 25 Mar 2010 12:40:35 +0000 Subject: 9p: VFS switches for 9p2000.L: protocol and client changes Prepare p9pdu_read/write functions to handle multiple protocols. Signed-off-by: Sripathi Kodi Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 3 ++- net/9p/protocol.c | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index 0aa79fa..e2d3146 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1321,7 +1321,8 @@ static int p9_client_statsize(struct p9_wstat *wst, int proto_version) if (wst->muid) ret += strlen(wst->muid); - if (proto_version == p9_proto_2000u) { + if ((proto_version == p9_proto_2000u) || + (proto_version == p9_proto_2000L)) { ret += 2+4+4+4; /* extension[s] n_uid[4] n_gid[4] n_muid[4] */ if (wst->extension) ret += strlen(wst->extension); diff --git a/net/9p/protocol.c b/net/9p/protocol.c index e7541d5..77d3aab 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -341,7 +341,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, } break; case '?': - if (proto_version != p9_proto_2000u) + if ((proto_version != p9_proto_2000u) && + (proto_version != p9_proto_2000L)) return 0; break; default: @@ -488,7 +489,8 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, } break; case '?': - if (proto_version != p9_proto_2000u) + if ((proto_version != p9_proto_2000u) && + (proto_version != p9_proto_2000L)) return 0; break; default: -- cgit v1.1 From bda8e7752063cdbdd1d308bc1705400a8cec1aeb Mon Sep 17 00:00:00 2001 From: Sripathi Kodi Date: Thu, 25 Mar 2010 12:45:30 +0000 Subject: 9p: add 9P2000.L statfs operation I made a V2 of this patch on top of my patches for VFS switches. The change was adding v9fs_statfs pointer to v9fs_super_ops_dotl instead of v9fs_super_ops. statfs - get file system statistics size[4] Tstatfs tag[2] fid[4] size[4] Rstatfs tag[2] type[4] bsize[4] blocks[8] bfree[8] bavail[8] files[8] ffree[8] fsid[8] namelen[4] The statfs message is used to request file system information returned by the statfs(2) system call, which is used by df(1) to report file system and disk space usage. Signed-off-by: Jim Garlick Signed-off-by: Sripathi Kodi Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index e2d3146..430a1c4 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1365,3 +1365,42 @@ error: return err; } EXPORT_SYMBOL(p9_client_wstat); + +int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb) +{ + int err; + struct p9_req_t *req; + struct p9_client *clnt; + + err = 0; + clnt = fid->clnt; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TSTATFS fid %d\n", fid->fid); + + req = p9_client_rpc(clnt, P9_TSTATFS, "d", fid->fid); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + + err = p9pdu_readf(req->rc, clnt->proto_version, "ddqqqqqqd", &sb->type, + &sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail, + &sb->files, &sb->ffree, &sb->fsid, &sb->namelen); + if (err) { + p9pdu_dump(1, req->rc); + p9_free_req(clnt, req); + goto error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RSTATFS fid %d type 0x%lx bsize %ld " + "blocks %llu bfree %llu bavail %llu files %llu ffree %llu " + "fsid %llu namelen %ld\n", + fid->fid, (long unsigned int)sb->type, (long int)sb->bsize, + sb->blocks, sb->bfree, sb->bavail, sb->files, sb->ffree, + sb->fsid, (long int)sb->namelen); + + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_statfs); -- cgit v1.1 From 4681dbdacb5cdc4d3273c3a97a1858d6e00a5fe7 Mon Sep 17 00:00:00 2001 From: Sripathi Kodi Date: Thu, 25 Mar 2010 12:47:26 +0000 Subject: 9p: add 9P2000.L rename operation I made a V2 of this patch on top of my patches for VFS switches. All the changes were due to change in some offsets. rename - change name of file or directory size[4] Trename tag[2] fid[4] newdirfid[4] name[s] size[4] Rrename tag[2] The rename message is used to change the name of a file, possibly moving it to a new directory. The 9P wstat message can only rename a file within the same directory. Signed-off-by: Jim Garlick Signed-off-by: Sripathi Kodi Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index 430a1c4..37c8da0 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1404,3 +1404,31 @@ error: return err; } EXPORT_SYMBOL(p9_client_statfs); + +int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, char *name) +{ + int err; + struct p9_req_t *req; + struct p9_client *clnt; + + err = 0; + clnt = fid->clnt; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TRENAME fid %d newdirfid %d name %s\n", + fid->fid, newdirfid->fid, name); + + req = p9_client_rpc(clnt, P9_TRENAME, "dds", fid->fid, + newdirfid->fid, name); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RRENAME fid %d\n", fid->fid); + + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_rename); + -- cgit v1.1 From 674b604cdd389252d89a14133b6ebf80165d1d55 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 19 May 2010 15:08:17 +0200 Subject: sunrpc: Pushdown the bkl from ioctl Pushdown the bkl to rpc_pipe_ioctl. Signed-off-by: Frederic Weisbecker Cc: "J. Bruce Fields" Cc: Neil Brown Cc: Nfs Cc: Thomas Gleixner Cc: John Kacur Cc: Arnd Bergmann --- net/sunrpc/rpc_pipe.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 20e30c6..95ccbcf 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -27,6 +27,7 @@ #include #include #include +#include static struct vfsmount *rpc_mount __read_mostly; static int rpc_mount_count; @@ -309,8 +310,7 @@ rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait) } static int -rpc_pipe_ioctl(struct inode *ino, struct file *filp, - unsigned int cmd, unsigned long arg) +rpc_pipe_ioctl_unlocked(struct file *filp, unsigned int cmd, unsigned long arg) { struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode); int len; @@ -331,13 +331,25 @@ rpc_pipe_ioctl(struct inode *ino, struct file *filp, } } +static long +rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + long ret; + + lock_kernel(); + ret = rpc_pipe_ioctl_unlocked(filp, cmd, arg); + unlock_kernel(); + + return ret; +} + static const struct file_operations rpc_pipe_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .read = rpc_pipe_read, .write = rpc_pipe_write, .poll = rpc_pipe_poll, - .ioctl = rpc_pipe_ioctl, + .unlocked_ioctl = rpc_pipe_ioctl, .open = rpc_pipe_open, .release = rpc_pipe_release, }; -- cgit v1.1 From 9918ff26b301e9a57f25fb12b44a46ad0c1e8f8f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 19 May 2010 15:08:17 +0200 Subject: sunrpc: Pushdown the bkl from sunrpc cache ioctl Pushdown the bkl to cache_ioctl_pipefs. Signed-off-by: Frederic Weisbecker Cc: "J. Bruce Fields" Cc: Neil Brown Cc: Nfs Cc: Thomas Gleixner Cc: John Kacur Cc: Arnd Bergmann --- net/sunrpc/cache.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 39bddba..91b1ade 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -33,6 +33,7 @@ #include #include #include +#include #define RPCDBG_FACILITY RPCDBG_CACHE @@ -1525,12 +1526,18 @@ static unsigned int cache_poll_pipefs(struct file *filp, poll_table *wait) return cache_poll(filp, wait, cd); } -static int cache_ioctl_pipefs(struct inode *inode, struct file *filp, +static long cache_ioctl_pipefs(struct file *filp, unsigned int cmd, unsigned long arg) { + struct inode *inode = filp->f_dentry->d_inode; struct cache_detail *cd = RPC_I(inode)->private; + long ret; - return cache_ioctl(inode, filp, cmd, arg, cd); + lock_kernel(); + ret = cache_ioctl(inode, filp, cmd, arg, cd); + unlock_kernel(); + + return ret; } static int cache_open_pipefs(struct inode *inode, struct file *filp) @@ -1553,7 +1560,7 @@ const struct file_operations cache_file_operations_pipefs = { .read = cache_read_pipefs, .write = cache_write_pipefs, .poll = cache_poll_pipefs, - .ioctl = cache_ioctl_pipefs, /* for FIONREAD */ + .unlocked_ioctl = cache_ioctl_pipefs, /* for FIONREAD */ .open = cache_open_pipefs, .release = cache_release_pipefs, }; -- cgit v1.1 From a6c0f8217c17d46da22fa56923f3cbd03615cb7c Mon Sep 17 00:00:00 2001 From: Denis Kirjanov Date: Sun, 23 May 2010 05:45:45 +0000 Subject: ieee802154: Fix possible NULL pointer dereference in wpan_phy_alloc Check for NULL pointer after kzalloc Signed-off-by: Denis Kirjanov Signed-off-by: David S. Miller --- net/ieee802154/wpan-class.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ieee802154/wpan-class.c b/net/ieee802154/wpan-class.c index 3d803a1..1627ef2 100644 --- a/net/ieee802154/wpan-class.c +++ b/net/ieee802154/wpan-class.c @@ -147,13 +147,15 @@ struct wpan_phy *wpan_phy_alloc(size_t priv_size) struct wpan_phy *phy = kzalloc(sizeof(*phy) + priv_size, GFP_KERNEL); + if (!phy) + goto out; mutex_lock(&wpan_phy_mutex); phy->idx = wpan_phy_idx++; if (unlikely(!wpan_phy_idx_valid(phy->idx))) { wpan_phy_idx--; mutex_unlock(&wpan_phy_mutex); kfree(phy); - return NULL; + goto out; } mutex_unlock(&wpan_phy_mutex); @@ -168,6 +170,9 @@ struct wpan_phy *wpan_phy_alloc(size_t priv_size) phy->current_page = 0; /* for compatibility */ return phy; + +out: + return NULL; } EXPORT_SYMBOL(wpan_phy_alloc); -- cgit v1.1 From 53b0f08042f04813cd1a7473dacd3edfacb28eb3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 22 May 2010 20:37:44 +0000 Subject: net_sched: Fix qdisc_notify() Ben Pfaff reported a kernel oops and provided a test program to reproduce it. https://kerneltrap.org/mailarchive/linux-netdev/2010/5/21/6277805 tc_fill_qdisc() should not be called for builtin qdisc, or it dereference a NULL pointer to get device ifindex. Fix is to always use tc_qdisc_dump_ignore() before calling tc_fill_qdisc(). Reported-by: Ben Pfaff Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_api.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index fe35c1f..b9e8c3b 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1195,6 +1195,11 @@ nla_put_failure: return -1; } +static bool tc_qdisc_dump_ignore(struct Qdisc *q) +{ + return (q->flags & TCQ_F_BUILTIN) ? true : false; +} + static int qdisc_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, u32 clid, struct Qdisc *old, struct Qdisc *new) @@ -1206,11 +1211,11 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb, if (!skb) return -ENOBUFS; - if (old && old->handle) { + if (old && !tc_qdisc_dump_ignore(old)) { if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0) goto err_out; } - if (new) { + if (new && !tc_qdisc_dump_ignore(new)) { if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) goto err_out; } @@ -1223,11 +1228,6 @@ err_out: return -EINVAL; } -static bool tc_qdisc_dump_ignore(struct Qdisc *q) -{ - return (q->flags & TCQ_F_BUILTIN) ? true : false; -} - static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, struct netlink_callback *cb, int *q_idx_p, int s_q_idx) -- cgit v1.1 From eb1669aed950cb5f34622bcceba66bef5980e97a Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Fri, 21 May 2010 10:45:20 +0000 Subject: net-caif: drop redundant Kconfig entries There is already a submenu entry that is always displayed, so there is no need to also show a dedicated CAIF comment. Drop dead commented code while we're here, and change the submenu text to better match the style everyone else is using. Signed-off-by: Mike Frysinger Signed-off-by: David S. Miller --- net/caif/Kconfig | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/caif/Kconfig b/net/caif/Kconfig index cd1daf6..ed65178 100644 --- a/net/caif/Kconfig +++ b/net/caif/Kconfig @@ -2,10 +2,8 @@ # CAIF net configurations # -#menu "CAIF Support" -comment "CAIF Support" menuconfig CAIF - tristate "Enable CAIF support" + tristate "CAIF support" select CRC_CCITT default n ---help--- @@ -45,4 +43,3 @@ config CAIF_NETDEV If unsure say Y. endif -#endmenu -- cgit v1.1 From 253683bbfb6bc5864417c8c35cb6ef13b5e259e6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 21 May 2010 02:25:27 +0000 Subject: rtnetlink: Fix error handling in do_setlink() Commit c02db8c6290bb992442fec1407643c94cc414375: Author: Chris Wright Date: Sun May 16 01:05:45 2010 -0700 Subject: rtnetlink: make SR-IOV VF interface symmetric adds broken error handling to do_setlink() in net/core/rtnetlink.c. The problem is the following chunk of code: if (tb[IFLA_VFINFO_LIST]) { struct nlattr *attr; int rem; nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) { if (nla_type(attr) != IFLA_VF_INFO) ----> goto errout; err = do_setvfinfo(dev, attr); if (err < 0) goto errout; modified = 1; } } which can get to errout without setting err, resulting in the following error: net/core/rtnetlink.c: In function 'do_setlink': net/core/rtnetlink.c:904: warning: 'err' may be used uninitialized in this function Change the code to return -EINVAL in this case. Note that this might not be the appropriate error though. Signed-off-by: David Howells cc: Chris Wright cc: David S. Miller Acked-by: Chris Wright Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index e4b9870..7ab86f3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1199,8 +1199,10 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct nlattr *attr; int rem; nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) { - if (nla_type(attr) != IFLA_VF_INFO) + if (nla_type(attr) != IFLA_VF_INFO) { + err = -EINVAL; goto errout; + } err = do_setvfinfo(dev, attr); if (err < 0) goto errout; -- cgit v1.1 From 8ce6cebc2f126f3ecf2d80746ea54245adf18057 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 19 May 2010 10:12:19 +0000 Subject: net-2.6 : V2 - fix dev_get_valid_name the commit: commit d90310243fd750240755e217c5faa13e24f41536 Author: Octavian Purdila Date: Wed Nov 18 02:36:59 2009 +0000 net: device name allocation cleanups introduced a bug when there is a hash collision making impossible to rename a device with eth%d. This bug is very hard to reproduce and appears rarely. The problem is coming from we don't pass a temporary buffer to __dev_alloc_name but 'dev->name' which is modified by the function. A detailed explanation is here: http://marc.info/?l=linux-netdev&m=127417784011987&w=2 Changelog: V2 : replaced strings comparison by pointers comparison Signed-off-by: Daniel Lezcano Reviewed-by: Octavian Purdila Signed-off-by: David S. Miller --- net/core/dev.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 0aab66d..07a48e2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -954,18 +954,22 @@ int dev_alloc_name(struct net_device *dev, const char *name) } EXPORT_SYMBOL(dev_alloc_name); -static int dev_get_valid_name(struct net *net, const char *name, char *buf, - bool fmt) +static int dev_get_valid_name(struct net_device *dev, const char *name, bool fmt) { + struct net *net; + + BUG_ON(!dev_net(dev)); + net = dev_net(dev); + if (!dev_valid_name(name)) return -EINVAL; if (fmt && strchr(name, '%')) - return __dev_alloc_name(net, name, buf); + return dev_alloc_name(dev, name); else if (__dev_get_by_name(net, name)) return -EEXIST; - else if (buf != name) - strlcpy(buf, name, IFNAMSIZ); + else if (dev->name != name) + strlcpy(dev->name, name, IFNAMSIZ); return 0; } @@ -997,7 +1001,7 @@ int dev_change_name(struct net_device *dev, const char *newname) memcpy(oldname, dev->name, IFNAMSIZ); - err = dev_get_valid_name(net, newname, dev->name, 1); + err = dev_get_valid_name(dev, newname, 1); if (err < 0) return err; @@ -4965,7 +4969,7 @@ int register_netdevice(struct net_device *dev) } } - ret = dev_get_valid_name(net, dev->name, dev->name, 0); + ret = dev_get_valid_name(dev, dev->name, 0); if (ret) goto err_uninit; @@ -5574,7 +5578,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char /* We get here if we can't use the current device name */ if (!pat) goto out; - if (dev_get_valid_name(net, pat, dev->name, 1)) + if (dev_get_valid_name(dev, pat, 1)) goto out; } -- cgit v1.1 From 9e4b816bc31962ebbb8784d602acd5fa25a08ad8 Mon Sep 17 00:00:00 2001 From: Sjur Braendeland Date: Fri, 21 May 2010 02:16:07 +0000 Subject: caif: Bugfix - wait_ev*_timeout returns long. Discovered bug when testing on 64bit architecture. Fixed by using long to store result from wait_event_interruptible_timeout. Signed-off-by: Sjur Braendeland Signed-off-by: David S. Miller --- net/caif/caif_socket.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index c3a70c5..77e9956 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -920,17 +920,17 @@ wait_connect: timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); release_sock(sk); - err = wait_event_interruptible_timeout(*sk_sleep(sk), + err = -ERESTARTSYS; + timeo = wait_event_interruptible_timeout(*sk_sleep(sk), sk->sk_state != CAIF_CONNECTING, timeo); lock_sock(sk); - if (err < 0) + if (timeo < 0) goto out; /* -ERESTARTSYS */ - if (err == 0 && sk->sk_state != CAIF_CONNECTED) { - err = -ETIMEDOUT; - goto out; - } + err = -ETIMEDOUT; + if (timeo == 0 && sk->sk_state != CAIF_CONNECTED) + goto out; if (sk->sk_state != CAIF_CONNECTED) { sock->state = SS_UNCONNECTED; err = sock_error(sk); @@ -945,7 +945,6 @@ out: return err; } - /* * caif_release() - Disconnect a CAIF Socket * Copied and modified af_irda.c:irda_release(). -- cgit v1.1 From 7aecf4944f2c05aafb73b4820e469c74b4ec8517 Mon Sep 17 00:00:00 2001 From: Sjur Braendeland Date: Fri, 21 May 2010 02:16:08 +0000 Subject: caif: Bugfix - use standard Linux lists Discovered bug when running high number of parallel connect requests. Replace buggy home brewed list with linux/list.h. Signed-off-by: Sjur Braendeland Signed-off-by: David S. Miller --- net/caif/cfctrl.c | 92 ++++++++++++++++--------------------------------------- 1 file changed, 26 insertions(+), 66 deletions(-) (limited to 'net') diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c index 0ffe1e1..fcfda98 100644 --- a/net/caif/cfctrl.c +++ b/net/caif/cfctrl.c @@ -44,13 +44,14 @@ struct cflayer *cfctrl_create(void) dev_info.id = 0xff; memset(this, 0, sizeof(*this)); cfsrvl_init(&this->serv, 0, &dev_info); - spin_lock_init(&this->info_list_lock); atomic_set(&this->req_seq_no, 1); atomic_set(&this->rsp_seq_no, 1); this->serv.layer.receive = cfctrl_recv; sprintf(this->serv.layer.name, "ctrl"); this->serv.layer.ctrlcmd = cfctrl_ctrlcmd; spin_lock_init(&this->loop_linkid_lock); + spin_lock_init(&this->info_list_lock); + INIT_LIST_HEAD(&this->list); this->loop_linkid = 1; return &this->serv.layer; } @@ -112,20 +113,10 @@ bool cfctrl_req_eq(struct cfctrl_request_info *r1, void cfctrl_insert_req(struct cfctrl *ctrl, struct cfctrl_request_info *req) { - struct cfctrl_request_info *p; spin_lock(&ctrl->info_list_lock); - req->next = NULL; atomic_inc(&ctrl->req_seq_no); req->sequence_no = atomic_read(&ctrl->req_seq_no); - if (ctrl->first_req == NULL) { - ctrl->first_req = req; - spin_unlock(&ctrl->info_list_lock); - return; - } - p = ctrl->first_req; - while (p->next != NULL) - p = p->next; - p->next = req; + list_add_tail(&req->list, &ctrl->list); spin_unlock(&ctrl->info_list_lock); } @@ -133,46 +124,28 @@ void cfctrl_insert_req(struct cfctrl *ctrl, struct cfctrl_request_info *cfctrl_remove_req(struct cfctrl *ctrl, struct cfctrl_request_info *req) { - struct cfctrl_request_info *p; - struct cfctrl_request_info *ret; + struct cfctrl_request_info *p, *tmp, *first; spin_lock(&ctrl->info_list_lock); - if (ctrl->first_req == NULL) { - spin_unlock(&ctrl->info_list_lock); - return NULL; - } - - if (cfctrl_req_eq(req, ctrl->first_req)) { - ret = ctrl->first_req; - caif_assert(ctrl->first_req); - atomic_set(&ctrl->rsp_seq_no, - ctrl->first_req->sequence_no); - ctrl->first_req = ctrl->first_req->next; - spin_unlock(&ctrl->info_list_lock); - return ret; - } + first = list_first_entry(&ctrl->list, struct cfctrl_request_info, list); - p = ctrl->first_req; - - while (p->next != NULL) { - if (cfctrl_req_eq(req, p->next)) { - pr_warning("CAIF: %s(): Requests are not " + list_for_each_entry_safe(p, tmp, &ctrl->list, list) { + if (cfctrl_req_eq(req, p)) { + if (p != first) + pr_warning("CAIF: %s(): Requests are not " "received in order\n", __func__); - ret = p->next; + atomic_set(&ctrl->rsp_seq_no, - p->next->sequence_no); - p->next = p->next->next; - spin_unlock(&ctrl->info_list_lock); - return ret; + p->sequence_no); + list_del(&p->list); + goto out; } - p = p->next; } + p = NULL; +out: spin_unlock(&ctrl->info_list_lock); - - pr_warning("CAIF: %s(): Request does not match\n", - __func__); - return NULL; + return p; } struct cfctrl_rsp *cfctrl_get_respfuncs(struct cflayer *layer) @@ -388,31 +361,18 @@ void cfctrl_getstartreason_req(struct cflayer *layer) void cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer) { - struct cfctrl_request_info *p, *req; + struct cfctrl_request_info *p, *tmp; struct cfctrl *ctrl = container_obj(layr); spin_lock(&ctrl->info_list_lock); - - if (ctrl->first_req == NULL) { - spin_unlock(&ctrl->info_list_lock); - return; - } - - if (ctrl->first_req->client_layer == adap_layer) { - - req = ctrl->first_req; - ctrl->first_req = ctrl->first_req->next; - kfree(req); - } - - p = ctrl->first_req; - while (p != NULL && p->next != NULL) { - if (p->next->client_layer == adap_layer) { - - req = p->next; - p->next = p->next->next; - kfree(p->next); + pr_warning("CAIF: %s(): enter\n", __func__); + + list_for_each_entry_safe(p, tmp, &ctrl->list, list) { + if (p->client_layer == adap_layer) { + pr_warning("CAIF: %s(): cancel req :%d\n", __func__, + p->sequence_no); + list_del(&p->list); + kfree(p); } - p = p->next; } spin_unlock(&ctrl->info_list_lock); @@ -634,7 +594,7 @@ static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, case _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND: case CAIF_CTRLCMD_FLOW_OFF_IND: spin_lock(&this->info_list_lock); - if (this->first_req != NULL) { + if (!list_empty(&this->list)) { pr_debug("CAIF: %s(): Received flow off in " "control layer", __func__); } -- cgit v1.1 From 638e628a600a5c542d46dfb06771cf9c229ef5f3 Mon Sep 17 00:00:00 2001 From: Sjur Braendeland Date: Fri, 21 May 2010 02:16:09 +0000 Subject: caif: Bugfix - handle mem-allocation failures Discovered bugs when injecting slab allocation failures. Add checks on all memory allocation. Signed-off-by: Sjur Braendeland Signed-off-by: David S. Miller --- net/caif/cfpkt_skbuff.c | 25 +++++++++++++++++-------- net/caif/cfserl.c | 3 ++- net/caif/cfsrvl.c | 6 ++++++ 3 files changed, 25 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c index 83fff2f..a6fdf89 100644 --- a/net/caif/cfpkt_skbuff.c +++ b/net/caif/cfpkt_skbuff.c @@ -238,6 +238,7 @@ int cfpkt_add_head(struct cfpkt *pkt, const void *data2, u16 len) struct sk_buff *lastskb; u8 *to; const u8 *data = data2; + int ret; if (unlikely(is_erronous(pkt))) return -EPROTO; if (unlikely(skb_headroom(skb) < len)) { @@ -246,9 +247,10 @@ int cfpkt_add_head(struct cfpkt *pkt, const void *data2, u16 len) } /* Make sure data is writable */ - if (unlikely(skb_cow_data(skb, 0, &lastskb) < 0)) { + ret = skb_cow_data(skb, 0, &lastskb); + if (unlikely(ret < 0)) { PKT_ERROR(pkt, "cfpkt_add_head: cow failed\n"); - return -EPROTO; + return ret; } to = skb_push(skb, len); @@ -316,6 +318,8 @@ EXPORT_SYMBOL(cfpkt_setlen); struct cfpkt *cfpkt_create_uplink(const unsigned char *data, unsigned int len) { struct cfpkt *pkt = cfpkt_create_pfx(len + PKT_POSTFIX, PKT_PREFIX); + if (!pkt) + return NULL; if (unlikely(data != NULL)) cfpkt_add_body(pkt, data, len); return pkt; @@ -344,12 +348,13 @@ struct cfpkt *cfpkt_append(struct cfpkt *dstpkt, if (dst->tail + neededtailspace > dst->end) { /* Create a dumplicate of 'dst' with more tail space */ + struct cfpkt *tmppkt; dstlen = skb_headlen(dst); createlen = dstlen + neededtailspace; - tmp = pkt_to_skb( - cfpkt_create(createlen + PKT_PREFIX + PKT_POSTFIX)); - if (!tmp) + tmppkt = cfpkt_create(createlen + PKT_PREFIX + PKT_POSTFIX); + if (tmppkt == NULL) return NULL; + tmp = pkt_to_skb(tmppkt); skb_set_tail_pointer(tmp, dstlen); tmp->len = dstlen; memcpy(tmp->data, dst->data, dstlen); @@ -368,6 +373,7 @@ struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos) { struct sk_buff *skb2; struct sk_buff *skb = pkt_to_skb(pkt); + struct cfpkt *tmppkt; u8 *split = skb->data + pos; u16 len2nd = skb_tail_pointer(skb) - split; @@ -381,9 +387,12 @@ struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos) } /* Create a new packet for the second part of the data */ - skb2 = pkt_to_skb( - cfpkt_create_pfx(len2nd + PKT_PREFIX + PKT_POSTFIX, - PKT_PREFIX)); + tmppkt = cfpkt_create_pfx(len2nd + PKT_PREFIX + PKT_POSTFIX, + PKT_PREFIX); + if (tmppkt == NULL) + return NULL; + skb2 = pkt_to_skb(tmppkt); + if (skb2 == NULL) return NULL; diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c index 06029ea..cb4325a 100644 --- a/net/caif/cfserl.c +++ b/net/caif/cfserl.c @@ -67,6 +67,8 @@ static int cfserl_receive(struct cflayer *l, struct cfpkt *newpkt) layr->incomplete_frm = cfpkt_append(layr->incomplete_frm, newpkt, expectlen); pkt = layr->incomplete_frm; + if (pkt == NULL) + return -ENOMEM; } else { pkt = newpkt; } @@ -154,7 +156,6 @@ static int cfserl_receive(struct cflayer *l, struct cfpkt *newpkt) if (layr->usestx) { if (tail_pkt != NULL) pkt = cfpkt_append(pkt, tail_pkt, 0); - /* Start search for next STX if frame failed */ continue; } else { diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c index aff31f3..6e5b7079 100644 --- a/net/caif/cfsrvl.c +++ b/net/caif/cfsrvl.c @@ -123,6 +123,12 @@ static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl) struct caif_payload_info *info; u8 flow_off = SRVL_FLOW_OFF; pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE); + if (!pkt) { + pr_warning("CAIF: %s(): Out of memory\n", + __func__); + return -ENOMEM; + } + if (cfpkt_add_head(pkt, &flow_off, 1) < 0) { pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); -- cgit v1.1 From ca6a09f25cde1d2d86f8e821cf69f4f650c30dbf Mon Sep 17 00:00:00 2001 From: Sjur Braendeland Date: Fri, 21 May 2010 02:16:10 +0000 Subject: caif: Bugfix - Poll can't return POLLHUP while connecting. Discovered bug when testing async connect. While connecting poll should not return POLLHUP, but POLLOUT when connected. Also fixed the sysfs flow-control-counters. Signed-off-by: Sjur Braendeland Signed-off-by: David S. Miller --- net/caif/caif_socket.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 77e9956..732897d 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -138,7 +138,7 @@ void caif_flow_ctrl(struct sock *sk, int mode) { struct caifsock *cf_sk; cf_sk = container_of(sk, struct caifsock, sk); - if (cf_sk->layer.dn) + if (cf_sk->layer.dn && cf_sk->layer.dn->modemcmd) cf_sk->layer.dn->modemcmd(cf_sk->layer.dn, mode); } @@ -162,9 +162,8 @@ int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) atomic_read(&cf_sk->sk.sk_rmem_alloc), sk_rcvbuf_lowwater(cf_sk)); set_rx_flow_off(cf_sk); - if (cf_sk->layer.dn) - cf_sk->layer.dn->modemcmd(cf_sk->layer.dn, - CAIF_MODEMCMD_FLOW_OFF_REQ); + dbfs_atomic_inc(&cnt.num_rx_flow_off); + caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ); } err = sk_filter(sk, skb); @@ -175,9 +174,8 @@ int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) trace_printk("CAIF: %s():" " sending flow OFF due to rmem_schedule\n", __func__); - if (cf_sk->layer.dn) - cf_sk->layer.dn->modemcmd(cf_sk->layer.dn, - CAIF_MODEMCMD_FLOW_OFF_REQ); + dbfs_atomic_inc(&cnt.num_rx_flow_off); + caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ); } skb->dev = NULL; skb_set_owner_r(skb, sk); @@ -285,16 +283,13 @@ static void caif_check_flow_release(struct sock *sk) { struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); - if (cf_sk->layer.dn == NULL || cf_sk->layer.dn->modemcmd == NULL) - return; if (rx_flow_is_on(cf_sk)) return; if (atomic_read(&sk->sk_rmem_alloc) <= sk_rcvbuf_lowwater(cf_sk)) { dbfs_atomic_inc(&cnt.num_rx_flow_on); set_rx_flow_on(cf_sk); - cf_sk->layer.dn->modemcmd(cf_sk->layer.dn, - CAIF_MODEMCMD_FLOW_ON_REQ); + caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_ON_REQ); } } /* @@ -1018,10 +1013,6 @@ static unsigned int caif_poll(struct file *file, (sk->sk_shutdown & RCV_SHUTDOWN)) mask |= POLLIN | POLLRDNORM; - /* Connection-based need to check for termination and startup */ - if (sk->sk_state == CAIF_DISCONNECTED) - mask |= POLLHUP; - /* * we set writable also when the other side has shut down the * connection. This prevents stuck sockets. -- cgit v1.1 From a9a8f1070d8733b37418b3a2d58df4e771b61f88 Mon Sep 17 00:00:00 2001 From: Sjur Braendeland Date: Fri, 21 May 2010 02:16:11 +0000 Subject: caif: Bugfix - missing spin_unlock Splint found missing spin_unlock. Corrected this an some other trivial split warnings. Signed-off-by: Sjur Braendeland Signed-off-by: David S. Miller --- net/caif/caif_socket.c | 10 +++++----- net/caif/cfmuxl.c | 3 ++- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 732897d..691a571 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -60,7 +60,7 @@ struct debug_fs_counter { atomic_t num_rx_flow_off; atomic_t num_rx_flow_on; }; -struct debug_fs_counter cnt; +static struct debug_fs_counter cnt; #define dbfs_atomic_inc(v) atomic_inc(v) #define dbfs_atomic_dec(v) atomic_dec(v) #else @@ -128,13 +128,13 @@ static void caif_read_unlock(struct sock *sk) mutex_unlock(&cf_sk->readlock); } -int sk_rcvbuf_lowwater(struct caifsock *cf_sk) +static int sk_rcvbuf_lowwater(struct caifsock *cf_sk) { /* A quarter of full buffer is used a low water mark */ return cf_sk->sk.sk_rcvbuf / 4; } -void caif_flow_ctrl(struct sock *sk, int mode) +static void caif_flow_ctrl(struct sock *sk, int mode) { struct caifsock *cf_sk; cf_sk = container_of(sk, struct caifsock, sk); @@ -146,7 +146,7 @@ void caif_flow_ctrl(struct sock *sk, int mode) * Copied from sock.c:sock_queue_rcv_skb(), but changed so packets are * not dropped, but CAIF is sending flow off instead. */ -int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int err; int skb_len; @@ -1184,7 +1184,7 @@ static struct net_proto_family caif_family_ops = { .owner = THIS_MODULE, }; -int af_caif_init(void) +static int af_caif_init(void) { int err = sock_register(&caif_family_ops); if (!err) diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c index 7372f27..80c8d33 100644 --- a/net/caif/cfmuxl.c +++ b/net/caif/cfmuxl.c @@ -174,10 +174,11 @@ struct cflayer *cfmuxl_remove_uplayer(struct cflayer *layr, u8 id) spin_lock(&muxl->receive_lock); up = get_up(muxl, id); if (up == NULL) - return NULL; + goto out; memset(muxl->up_cache, 0, sizeof(muxl->up_cache)); list_del(&up->node); cfsrvl_put(up); +out: spin_unlock(&muxl->receive_lock); return up; } -- cgit v1.1 From dcda138d2f27e32bd0d6250cc42839b0d70bb4b8 Mon Sep 17 00:00:00 2001 From: Sjur Braendeland Date: Fri, 21 May 2010 02:16:12 +0000 Subject: caif: Bugfix - use MSG_TRUNC in receive Fixed handling when skb don't fit in user buffer, instead of returning -EMSGSIZE, the buffer is truncated (just as unix seqpakcet does). Signed-off-by: Sjur Braendeland Signed-off-by: David S. Miller --- net/caif/caif_socket.c | 47 ++++++++++++++++++----------------------------- 1 file changed, 18 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 691a571..3d0e095 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -292,53 +292,42 @@ static void caif_check_flow_release(struct sock *sk) caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_ON_REQ); } } + /* - * Copied from sock.c:sock_queue_rcv_skb(), and added check that user buffer - * has sufficient size. + * Copied from unix_dgram_recvmsg, but removed credit checks, + * changed locking, address handling and added MSG_TRUNC. */ - static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t buf_len, int flags) + struct msghdr *m, size_t len, int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; - int ret = 0; - int len; + int ret; + int copylen; - if (unlikely(!buf_len)) - return -EINVAL; + ret = -EOPNOTSUPP; + if (m->msg_flags&MSG_OOB) + goto read_error; skb = skb_recv_datagram(sk, flags, 0 , &ret); if (!skb) goto read_error; - - len = skb->len; - - if (skb && skb->len > buf_len && !(flags & MSG_PEEK)) { - len = buf_len; - /* - * Push skb back on receive queue if buffer too small. - * This has a built-in race where multi-threaded receive - * may get packet in wrong order, but multiple read does - * not really guarantee ordered delivery anyway. - * Let's optimize for speed without taking locks. - */ - - skb_queue_head(&sk->sk_receive_queue, skb); - ret = -EMSGSIZE; - goto read_error; + copylen = skb->len; + if (len < copylen) { + m->msg_flags |= MSG_TRUNC; + copylen = len; } - ret = skb_copy_datagram_iovec(skb, 0, m->msg_iov, len); + ret = skb_copy_datagram_iovec(skb, 0, m->msg_iov, copylen); if (ret) - goto read_error; + goto out_free; + ret = (flags & MSG_TRUNC) ? skb->len : copylen; +out_free: skb_free_datagram(sk, skb); - caif_check_flow_release(sk); - - return len; + return ret; read_error: return ret; -- cgit v1.1 From f845172531fb7410c7fb7780b1a6e51ee6df7d52 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 24 May 2010 00:12:34 -0700 Subject: cls_cgroup: Store classid in struct sock Up until now cls_cgroup has relied on fetching the classid out of the current executing thread. This runs into trouble when a packet processing is delayed in which case it may execute out of another thread's context. Furthermore, even when a packet is not delayed we may fail to classify it if soft IRQs have been disabled, because this scenario is indistinguishable from one where a packet unrelated to the current thread is processed by a real soft IRQ. In fact, the current semantics is inherently broken, as a single skb may be constructed out of the writes of two different tasks. A different manifestation of this problem is when the TCP stack transmits in response of an incoming ACK. This is currently unclassified. As we already have a concept of packet ownership for accounting purposes in the skb->sk pointer, this is a natural place to store the classid in a persistent manner. This patch adds the cls_cgroup classid in struct sock, filling up an existing hole on 64-bit :) The value is set at socket creation time. So all sockets created via socket(2) automatically gains the ID of the thread creating it. Whenever another process touches the socket by either reading or writing to it, we will change the socket classid to that of the process if it has a valid (non-zero) classid. For sockets created on inbound connections through accept(2), we inherit the classid of the original listening socket through sk_clone, possibly preceding the actual accept(2) call. In order to minimise risks, I have not made this the authoritative classid. For now it is only used as a backup when we execute with soft IRQs disabled. Once we're completely happy with its semantics we can use it as the sole classid. Footnote: I have rearranged the error path on cls_group module creation. If we didn't do this, then there is a window where someone could create a tc rule using cls_group before the cgroup subsystem has been registered. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/sock.c | 18 ++++++++++++++++++ net/sched/cls_cgroup.c | 50 ++++++++++++++++++++++++++++++++++---------------- net/socket.c | 9 +++++++++ 3 files changed, 61 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index bf88a16..a05ae7f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -123,6 +123,7 @@ #include #include #include +#include #include @@ -217,6 +218,11 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); EXPORT_SYMBOL(sysctl_optmem_max); +#if defined(CONFIG_CGROUPS) && !defined(CONFIG_NET_CLS_CGROUP) +int net_cls_subsys_id = -1; +EXPORT_SYMBOL_GPL(net_cls_subsys_id); +#endif + static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) { struct timeval tv; @@ -1050,6 +1056,16 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) module_put(owner); } +#ifdef CONFIG_CGROUPS +void sock_update_classid(struct sock *sk) +{ + u32 classid = task_cls_classid(current); + + if (classid && classid != sk->sk_classid) + sk->sk_classid = classid; +} +#endif + /** * sk_alloc - All socket objects are allocated here * @net: the applicable net namespace @@ -1073,6 +1089,8 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, sock_lock_init(sk); sock_net_set(sk, get_net(net)); atomic_set(&sk->sk_wmem_alloc, 1); + + sock_update_classid(sk); } return sk; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 2211803..78ef2c5 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -16,14 +16,11 @@ #include #include #include +#include #include #include - -struct cgroup_cls_state -{ - struct cgroup_subsys_state css; - u32 classid; -}; +#include +#include static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, struct cgroup *cgrp); @@ -112,6 +109,10 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp, struct cls_cgroup_head *head = tp->root; u32 classid; + rcu_read_lock(); + classid = task_cls_state(current)->classid; + rcu_read_unlock(); + /* * Due to the nature of the classifier it is required to ignore all * packets originating from softirq context as accessing `current' @@ -122,12 +123,12 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp, * calls by looking at the number of nested bh disable calls because * softirqs always disables bh. */ - if (softirq_count() != SOFTIRQ_OFFSET) - return -1; - - rcu_read_lock(); - classid = task_cls_state(current)->classid; - rcu_read_unlock(); + if (softirq_count() != SOFTIRQ_OFFSET) { + /* If there is an sk_classid we'll use that. */ + if (!skb->sk) + return -1; + classid = skb->sk->sk_classid; + } if (!classid) return -1; @@ -289,18 +290,35 @@ static struct tcf_proto_ops cls_cgroup_ops __read_mostly = { static int __init init_cgroup_cls(void) { - int ret = register_tcf_proto_ops(&cls_cgroup_ops); - if (ret) - return ret; + int ret; + ret = cgroup_load_subsys(&net_cls_subsys); if (ret) - unregister_tcf_proto_ops(&cls_cgroup_ops); + goto out; + +#ifndef CONFIG_NET_CLS_CGROUP + /* We can't use rcu_assign_pointer because this is an int. */ + smp_wmb(); + net_cls_subsys_id = net_cls_subsys.subsys_id; +#endif + + ret = register_tcf_proto_ops(&cls_cgroup_ops); + if (ret) + cgroup_unload_subsys(&net_cls_subsys); + +out: return ret; } static void __exit exit_cgroup_cls(void) { unregister_tcf_proto_ops(&cls_cgroup_ops); + +#ifndef CONFIG_NET_CLS_CGROUP + net_cls_subsys_id = -1; + synchronize_rcu(); +#endif + cgroup_unload_subsys(&net_cls_subsys); } diff --git a/net/socket.c b/net/socket.c index f9f7d08..367d547 100644 --- a/net/socket.c +++ b/net/socket.c @@ -94,6 +94,7 @@ #include #include +#include #include #include @@ -558,6 +559,8 @@ static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct sock_iocb *si = kiocb_to_siocb(iocb); int err; + sock_update_classid(sock->sk); + si->sock = sock; si->scm = NULL; si->msg = msg; @@ -684,6 +687,8 @@ static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock, { struct sock_iocb *si = kiocb_to_siocb(iocb); + sock_update_classid(sock->sk); + si->sock = sock; si->scm = NULL; si->msg = msg; @@ -777,6 +782,8 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos, if (unlikely(!sock->ops->splice_read)) return -EINVAL; + sock_update_classid(sock->sk); + return sock->ops->splice_read(sock, ppos, pipe, len, flags); } @@ -3069,6 +3076,8 @@ int kernel_setsockopt(struct socket *sock, int level, int optname, int kernel_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) { + sock_update_classid(sock->sk); + if (sock->ops->sendpage) return sock->ops->sendpage(sock, page, offset, size, flags); -- cgit v1.1 From 8286274284e15b11b0f531b6ceeef21fbe00a8dd Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 24 May 2010 00:14:10 -0700 Subject: tun: Update classid on packet injection This patch makes tun update its socket classid every time we inject a packet into the network stack. This is so that any updates made by the admin to the process writing packets to tun is effected. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/sock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index a05ae7f..37fe9b6 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1064,6 +1064,7 @@ void sock_update_classid(struct sock *sk) if (classid && classid != sk->sk_classid) sk->sk_classid = classid; } +EXPORT_SYMBOL(sock_update_classid); #endif /** -- cgit v1.1 From a69eee4988752c7196677958b4ed8f4c2b28499a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 24 May 2010 07:45:43 -0700 Subject: Revert "ath9k: Group Key fix for VAPs" This reverts commit 03ceedea972a82d343fa5c2528b3952fa9e615d5, since it breaks resume from suspend-to-ram on Rafael's Acer Ferrari One. NetworkManager thinks everything is ok, but it can't connect to the AP to get an IP address after the resume. In fact, it even breaks resume for non-ath9k chipsets: reverting it also fixes Rafael's Toshiba Protege R500 with the iwlagn driver. As Johannes says: "Indeed, this patch needs to be reverted. That mac80211 change is wrong and completely unnecessary." Reported-and-requested-by: Rafael J. Wysocki Acked-by: Johannes Berg Cc: Daniel Yingqiang Ma Cc: John W. Linville Cc: David Miller Signed-off-by: Linus Torvalds --- net/mac80211/key.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 8d4b417..e8f6e3b 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -140,7 +140,6 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key) struct ieee80211_sub_if_data, u.ap); - key->conf.ap_addr = sdata->dev->dev_addr; ret = drv_set_key(key->local, SET_KEY, sdata, sta, &key->conf); if (!ret) { -- cgit v1.1 From 3dc3fc52ea1537f5f37ab301d2b1468a0e79988f Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Mon, 24 May 2010 13:36:37 -0400 Subject: Revert "ath9k: Group Key fix for VAPs" This reverts commit 03ceedea972a82d343fa5c2528b3952fa9e615d5. This patch was reported to cause a regression in which connectivity is lost and cannot be reestablished after a suspend/resume cycle. Signed-off-by: John W. Linville --- net/mac80211/key.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 8d4b417..e8f6e3b 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -140,7 +140,6 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key) struct ieee80211_sub_if_data, u.ap); - key->conf.ap_addr = sdata->dev->dev_addr; ret = drv_set_key(key->local, SET_KEY, sdata, sta, &key->conf); if (!ret) { -- cgit v1.1 From a0c9101c05389e69a5382967667ca686a8d8fbd3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 21 May 2010 11:28:41 -0700 Subject: wireless: fix sta_info.h kernel-doc warnings Fix sta_info.h kernel-doc warnings: Warning(net/mac80211/sta_info.h:164): No description found for parameter 'tid_active_rx[STA_TID_NUM]' Warning(net/mac80211/sta_info.h:164): Excess struct/union/enum/typedef member 'tid_state_rx' description in 'sta_ampdu_mlme' Signed-off-by: Randy Dunlap Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/sta_info.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 48a5e80..df9d455 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -145,7 +145,7 @@ enum plink_state { /** * struct sta_ampdu_mlme - STA aggregation information. * - * @tid_state_rx: TID's state in Rx session state machine. + * @tid_active_rx: TID's state in Rx session state machine. * @tid_rx: aggregation info for Rx per TID * @tid_state_tx: TID's state in Tx session state machine. * @tid_tx: aggregation info for Tx per TID -- cgit v1.1 From d9b52dc6fd1fbb2bad645cbc86a60f984c1cb179 Mon Sep 17 00:00:00 2001 From: Yoichi Yuasa Date: Mon, 24 May 2010 18:37:02 -0700 Subject: net/dccp: expansion of error code size Because MIPS's EDQUOT value is 1133(0x46d). It's larger than u8. Signed-off-by: Yoichi Yuasa Signed-off-by: David S. Miller --- net/dccp/input.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/dccp/input.c b/net/dccp/input.c index 58f7bc1..6beb6a7 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -124,9 +124,9 @@ static int dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) return queued; } -static u8 dccp_reset_code_convert(const u8 code) +static u16 dccp_reset_code_convert(const u8 code) { - const u8 error_code[] = { + const u16 error_code[] = { [DCCP_RESET_CODE_CLOSED] = 0, /* normal termination */ [DCCP_RESET_CODE_UNSPECIFIED] = 0, /* nothing known */ [DCCP_RESET_CODE_ABORTED] = ECONNRESET, @@ -148,7 +148,7 @@ static u8 dccp_reset_code_convert(const u8 code) static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb) { - u8 err = dccp_reset_code_convert(dccp_hdr_reset(skb)->dccph_reset_code); + u16 err = dccp_reset_code_convert(dccp_hdr_reset(skb)->dccph_reset_code); sk->sk_err = err; -- cgit v1.1 From 4be929be34f9bdeffa40d815d32d7d60d2c7f03b Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 24 May 2010 14:33:03 -0700 Subject: kernel-wide: replace USHORT_MAX, SHORT_MAX and SHORT_MIN with USHRT_MAX, SHRT_MAX and SHRT_MIN - C99 knows about USHRT_MAX/SHRT_MAX/SHRT_MIN, not USHORT_MAX/SHORT_MAX/SHORT_MIN. - Make SHRT_MIN of type s16, not int, for consistency. [akpm@linux-foundation.org: fix drivers/dma/timb_dma.c] [akpm@linux-foundation.org: fix security/keys/keyring.c] Signed-off-by: Alexey Dobriyan Acked-by: WANG Cong Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/9p/protocol.c | 2 +- net/dccp/options.c | 2 +- net/ipv4/udp.c | 8 ++++---- net/mac80211/sta_info.c | 2 +- net/sunrpc/rpcb_clnt.c | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 77d3aab..149f821 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -394,7 +394,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, const char *sptr = va_arg(ap, const char *); int16_t len = 0; if (sptr) - len = MIN(strlen(sptr), USHORT_MAX); + len = MIN(strlen(sptr), USHRT_MAX); errcode = p9pdu_writef(pdu, proto_version, "w", len); diff --git a/net/dccp/options.c b/net/dccp/options.c index 1b08cae..07395f8 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -296,7 +296,7 @@ static inline u8 dccp_ndp_len(const u64 ndp) { if (likely(ndp <= 0xFF)) return 1; - return likely(ndp <= USHORT_MAX) ? 2 : (ndp <= UINT_MAX ? 4 : 6); + return likely(ndp <= USHRT_MAX) ? 2 : (ndp <= UINT_MAX ? 4 : 6); } int dccp_insert_option(struct sock *sk, struct sk_buff *skb, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 9de6a69..baeec29 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1686,8 +1686,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, return -ENOPROTOOPT; if (val != 0 && val < 8) /* Illegal coverage: use default (8) */ val = 8; - else if (val > USHORT_MAX) - val = USHORT_MAX; + else if (val > USHRT_MAX) + val = USHRT_MAX; up->pcslen = val; up->pcflag |= UDPLITE_SEND_CC; break; @@ -1700,8 +1700,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, return -ENOPROTOOPT; if (val != 0 && val < 8) /* Avoid silly minimal values. */ val = 8; - else if (val > USHORT_MAX) - val = USHORT_MAX; + else if (val > USHRT_MAX) + val = USHRT_MAX; up->pcrlen = val; up->pcflag |= UDPLITE_RECV_CC; break; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 7301975..ba9360a 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -259,7 +259,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, skb_queue_head_init(&sta->tx_filtered); for (i = 0; i < NUM_RX_DATA_QUEUES; i++) - sta->last_seq_ctrl[i] = cpu_to_le16(USHORT_MAX); + sta->last_seq_ctrl[i] = cpu_to_le16(USHRT_MAX); #ifdef CONFIG_MAC80211_VERBOSE_DEBUG printk(KERN_DEBUG "%s: Allocated STA %pM\n", diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 1211053..dac219a 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -783,7 +783,7 @@ static int rpcb_dec_getport(struct rpc_rqst *req, __be32 *p, port = ntohl(*p); dprintk("RPC: %5u PMAP_%s result: %lu\n", task->tk_pid, task->tk_msg.rpc_proc->p_name, port); - if (unlikely(port > USHORT_MAX)) + if (unlikely(port > USHRT_MAX)) return -EIO; rpcb->r_port = port; -- cgit v1.1 From ef7ffe8f06895312aeb08a5f8a1d4c90e34335ea Mon Sep 17 00:00:00 2001 From: Alex Riesen Date: Mon, 24 May 2010 14:33:05 -0700 Subject: sunrpc: use formatting of module name in SUNRPC gcc-4.3.3 produces the warning: "format not a string literal and no format arguments" Signed-off-by: Alex Riesen Cc: Trond Myklebust Cc: Chuck Lever Cc: David S. Miller Acked-by: Tom Talpey Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/sunrpc/xprt.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 3fc3253..dcd0132 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -166,7 +166,6 @@ EXPORT_SYMBOL_GPL(xprt_unregister_transport); int xprt_load_transport(const char *transport_name) { struct xprt_class *t; - char module_name[sizeof t->name + 5]; int result; result = 0; @@ -178,9 +177,7 @@ int xprt_load_transport(const char *transport_name) } } spin_unlock(&xprt_list_lock); - strcpy(module_name, "xprt"); - strncat(module_name, transport_name, sizeof t->name); - result = request_module(module_name); + result = request_module("xprt%s", transport_name); out: return result; } -- cgit v1.1 From e513480e28cdfd868755f05c1a654fcfcee58070 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Tue, 25 May 2010 16:08:39 -0700 Subject: Phonet: fix potential use-after-free in pep_sock_close() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sk_common_release() might destroy our last reference to the socket. So an extra temporary reference is needed during cleanup. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pep.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/phonet/pep.c b/net/phonet/pep.c index af4d38b..7b048a3 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -626,6 +626,7 @@ static void pep_sock_close(struct sock *sk, long timeout) struct pep_sock *pn = pep_sk(sk); int ifindex = 0; + sock_hold(sk); /* keep a reference after sk_common_release() */ sk_common_release(sk); lock_sock(sk); @@ -644,6 +645,7 @@ static void pep_sock_close(struct sock *sk, long timeout) if (ifindex) gprs_detach(sk); + sock_put(sk); } static int pep_wait_connreq(struct sock *sk, int noblock) -- cgit v1.1 From ed0f160ad674407adb3aba499444f71c83289c63 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 26 May 2010 00:38:56 -0700 Subject: ipmr: off by one in __ipmr_fill_mroute() This fixes a smatch warning: net/ipv4/ipmr.c +1917 __ipmr_fill_mroute(12) error: buffer overflow '(mrt)->vif_table' 32 <= 32 The ipv6 version had the same issue. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 2 +- net/ipv6/ip6mr.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 4588910..856123f 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1911,7 +1911,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct rtattr *mp_head; /* If cache is unresolved, don't try to parse IIF and OIF */ - if (c->mfc_parent > MAXVIFS) + if (c->mfc_parent >= MAXVIFS) return -ENOENT; if (VIF_EXISTS(mrt, c->mfc_parent)) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index bd9e7d3..073071f 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2017,7 +2017,7 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, struct rtattr *mp_head; /* If cache is unresolved, don't try to parse IIF and OIF */ - if (c->mf6c_parent > MAXMIFS) + if (c->mf6c_parent >= MAXMIFS) return -ENOENT; if (MIF_EXISTS(mrt, c->mf6c_parent)) -- cgit v1.1 From 0a68b0bed08eeb7ec62e0125f17856b1ccb1ea4b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 26 May 2010 08:42:24 -0400 Subject: sunrpc: fix leak on error on socket xprt setup Also collect exit code together while we're at it. Signed-off-by: J. Bruce Fields Cc: Trond Myklebust Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index b7cd8cc..2a96751 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2293,6 +2293,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) struct sockaddr *addr = args->dstaddr; struct rpc_xprt *xprt; struct sock_xprt *transport; + struct rpc_xprt *ret; xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries); if (IS_ERR(xprt)) @@ -2330,8 +2331,8 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6); break; default: - kfree(xprt); - return ERR_PTR(-EAFNOSUPPORT); + ret = ERR_PTR(-EAFNOSUPPORT); + goto out_err; } if (xprt_bound(xprt)) @@ -2346,10 +2347,11 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) if (try_module_get(THIS_MODULE)) return xprt; - + ret = ERR_PTR(-EINVAL); +out_err: kfree(xprt->slot); kfree(xprt); - return ERR_PTR(-EINVAL); + return ret; } static const struct rpc_timeout xs_tcp_default_timeout = { @@ -2368,6 +2370,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) struct sockaddr *addr = args->dstaddr; struct rpc_xprt *xprt; struct sock_xprt *transport; + struct rpc_xprt *ret; xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); if (IS_ERR(xprt)) @@ -2403,8 +2406,8 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6); break; default: - kfree(xprt); - return ERR_PTR(-EAFNOSUPPORT); + ret = ERR_PTR(-EAFNOSUPPORT); + goto out_err; } if (xprt_bound(xprt)) @@ -2420,10 +2423,11 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) if (try_module_get(THIS_MODULE)) return xprt; - + ret = ERR_PTR(-EINVAL); +out_err: kfree(xprt->slot); kfree(xprt); - return ERR_PTR(-EINVAL); + return ret; } /** @@ -2437,6 +2441,7 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) struct rpc_xprt *xprt; struct sock_xprt *transport; struct svc_sock *bc_sock; + struct rpc_xprt *ret; xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); if (IS_ERR(xprt)) @@ -2476,8 +2481,8 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) RPCBIND_NETID_TCP6); break; default: - kfree(xprt); - return ERR_PTR(-EAFNOSUPPORT); + ret = ERR_PTR(-EAFNOSUPPORT); + goto out_err; } if (xprt_bound(xprt)) @@ -2499,9 +2504,11 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) if (try_module_get(THIS_MODULE)) return xprt; + ret = ERR_PTR(-EINVAL); +out_err: kfree(xprt->slot); kfree(xprt); - return ERR_PTR(-EINVAL); + return ret; } static struct xprt_class xs_udp_transport = { -- cgit v1.1 From a56635a56f2afb3d22d9ce07e8f8d69537416b2d Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 26 May 2010 05:56:48 +0000 Subject: net/iucv: Add missing spin_unlock Add a spin_unlock missing on the error path. There seems like no reason why the lock should continue to be held if the kzalloc fail. The semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression E1; @@ * spin_lock(E1,...); <+... when != E1 if (...) { ... when != E1 * return ...; } ...+> * spin_unlock(E1,...); // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- net/iucv/af_iucv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index c8b4599..9637e45 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1619,7 +1619,7 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg) save_message: save_msg = kzalloc(sizeof(struct sock_msg_q), GFP_ATOMIC | GFP_DMA); if (!save_msg) - return; + goto out_unlock; save_msg->path = path; save_msg->msg = *msg; -- cgit v1.1 From 8a74ad60a546b13bd1096b2a61a7a5c6fd9ae17c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 26 May 2010 19:20:18 +0000 Subject: net: fix lock_sock_bh/unlock_sock_bh This new sock lock primitive was introduced to speedup some user context socket manipulation. But it is unsafe to protect two threads, one using regular lock_sock/release_sock, one using lock_sock_bh/unlock_sock_bh This patch changes lock_sock_bh to be careful against 'owned' state. If owned is found to be set, we must take the slow path. lock_sock_bh() now returns a boolean to say if the slow path was taken, and this boolean is used at unlock_sock_bh time to call the appropriate unlock function. After this change, BH are either disabled or enabled during the lock_sock_bh/unlock_sock_bh protected section. This might be misleading, so we rename these functions to lock_sock_fast()/unlock_sock_fast(). Reported-by: Anton Blanchard Signed-off-by: Eric Dumazet Tested-by: Anton Blanchard Signed-off-by: David S. Miller --- net/core/datagram.c | 6 ++++-- net/core/sock.c | 33 +++++++++++++++++++++++++++++++++ net/ipv4/udp.c | 14 ++++++++------ net/ipv6/udp.c | 5 +++-- 4 files changed, 48 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/core/datagram.c b/net/core/datagram.c index e0097531..f5b6f43 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -229,15 +229,17 @@ EXPORT_SYMBOL(skb_free_datagram); void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) { + bool slow; + if (likely(atomic_read(&skb->users) == 1)) smp_rmb(); else if (likely(!atomic_dec_and_test(&skb->users))) return; - lock_sock_bh(sk); + slow = lock_sock_fast(sk); skb_orphan(skb); sk_mem_reclaim_partial(sk); - unlock_sock_bh(sk); + unlock_sock_fast(sk, slow); /* skb is now orphaned, can be freed outside of locked section */ __kfree_skb(skb); diff --git a/net/core/sock.c b/net/core/sock.c index 37fe9b6..2cf7f9f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2007,6 +2007,39 @@ void release_sock(struct sock *sk) } EXPORT_SYMBOL(release_sock); +/** + * lock_sock_fast - fast version of lock_sock + * @sk: socket + * + * This version should be used for very small section, where process wont block + * return false if fast path is taken + * sk_lock.slock locked, owned = 0, BH disabled + * return true if slow path is taken + * sk_lock.slock unlocked, owned = 1, BH enabled + */ +bool lock_sock_fast(struct sock *sk) +{ + might_sleep(); + spin_lock_bh(&sk->sk_lock.slock); + + if (!sk->sk_lock.owned) + /* + * Note : We must disable BH + */ + return false; + + __lock_sock(sk); + sk->sk_lock.owned = 1; + spin_unlock(&sk->sk_lock.slock); + /* + * The sk_lock has mutex_lock() semantics here: + */ + mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); + local_bh_enable(); + return true; +} +EXPORT_SYMBOL(lock_sock_fast); + int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) { struct timeval tv; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 9de6a69..b9d0d40 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1063,10 +1063,11 @@ static unsigned int first_packet_length(struct sock *sk) spin_unlock_bh(&rcvq->lock); if (!skb_queue_empty(&list_kill)) { - lock_sock_bh(sk); + bool slow = lock_sock_fast(sk); + __skb_queue_purge(&list_kill); sk_mem_reclaim_partial(sk); - unlock_sock_bh(sk); + unlock_sock_fast(sk, slow); } return res; } @@ -1123,6 +1124,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, int peeked; int err; int is_udplite = IS_UDPLITE(sk); + bool slow; /* * Check any passed addresses @@ -1197,10 +1199,10 @@ out: return err; csum_copy_err: - lock_sock_bh(sk); + slow = lock_sock_fast(sk); if (!skb_kill_datagram(sk, skb, flags)) UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); - unlock_sock_bh(sk); + unlock_sock_fast(sk, slow); if (noblock) return -EAGAIN; @@ -1625,9 +1627,9 @@ int udp_rcv(struct sk_buff *skb) void udp_destroy_sock(struct sock *sk) { - lock_sock_bh(sk); + bool slow = lock_sock_fast(sk); udp_flush_pending_frames(sk); - unlock_sock_bh(sk); + unlock_sock_fast(sk, slow); } /* diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 3d7a2c0..87be586 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -328,6 +328,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, int err; int is_udplite = IS_UDPLITE(sk); int is_udp4; + bool slow; if (addr_len) *addr_len=sizeof(struct sockaddr_in6); @@ -424,7 +425,7 @@ out: return err; csum_copy_err: - lock_sock_bh(sk); + slow = lock_sock_fast(sk); if (!skb_kill_datagram(sk, skb, flags)) { if (is_udp4) UDP_INC_STATS_USER(sock_net(sk), @@ -433,7 +434,7 @@ csum_copy_err: UDP6_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); } - unlock_sock_bh(sk); + unlock_sock_fast(sk, slow); if (flags & MSG_DONTWAIT) return -EAGAIN; -- cgit v1.1 From 92e99a98bb44ccce6c902ea077b49cafcfb80025 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Wed, 26 May 2010 14:43:33 -0700 Subject: iucv: convert cpu notifier to return encapsulate errno value By the previous modification, the cpu notifier can return encapsulate errno value. This converts the cpu notifiers for iucv. Signed-off-by: Akinobu Mita Cc: Ursula Braun Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/iucv/iucv.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index fd8b283..f28ad2cc 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -632,13 +632,14 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self, iucv_irq_data[cpu] = kmalloc_node(sizeof(struct iucv_irq_data), GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); if (!iucv_irq_data[cpu]) - return NOTIFY_BAD; + return notifier_from_errno(-ENOMEM); + iucv_param[cpu] = kmalloc_node(sizeof(union iucv_param), GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); if (!iucv_param[cpu]) { kfree(iucv_irq_data[cpu]); iucv_irq_data[cpu] = NULL; - return NOTIFY_BAD; + return notifier_from_errno(-ENOMEM); } iucv_param_irq[cpu] = kmalloc_node(sizeof(union iucv_param), GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); @@ -647,7 +648,7 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self, iucv_param[cpu] = NULL; kfree(iucv_irq_data[cpu]); iucv_irq_data[cpu] = NULL; - return NOTIFY_BAD; + return notifier_from_errno(-ENOMEM); } break; case CPU_UP_CANCELED: @@ -677,7 +678,7 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self, cpu_clear(cpu, cpumask); if (cpus_empty(cpumask)) /* Can't offline last IUCV enabled cpu. */ - return NOTIFY_BAD; + return notifier_from_errno(-EINVAL); smp_call_function_single(cpu, iucv_retrieve_cpu, NULL, 1); if (cpus_empty(iucv_irq_cpumask)) smp_call_function_single(first_cpu(iucv_buffer_cpumask), -- cgit v1.1 From a47311380e094bb201be8a818370c73c3f52122c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 27 May 2010 16:09:39 -0700 Subject: net: fix __neigh_event_send() commit 7fee226ad23 (net: add a noref bit on skb dst) missed one spot where an skb is enqueued, with a possibly not refcounted dst entry. __neigh_event_send() inserts skb into arp_queue, so we must make sure dst entry is refcounted, or dst entry can be freed by garbage collector after caller exits from rcu protected section. Reported-by: Ingo Molnar Tested-by: Ingo Molnar Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/neighbour.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index bff3790..6ba1c0e 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -934,6 +934,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) kfree_skb(buff); NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); } + skb_dst_force(skb); __skb_queue_tail(&neigh->arp_queue, skb); } rc = 1; -- cgit v1.1 From 0aa68271510ae2b221d4b60892103837be63afe4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 27 May 2010 16:14:30 -0700 Subject: ipv6: Add GSO support on forwarding path Currently we disallow GSO packets on the IPv6 forward path. This patch fixes this. Note that I discovered that our existing GSO MTU checks (e.g., IPv4 forwarding) are buggy in that they skip the check altogether, when they really should be checking gso_size + header instead. I have also been lazy here in that I haven't bothered to segment the GSO packet by hand before generating an ICMP message. Someone should add that to be 100% correct. Reported-by: Ralf Baechle Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index cd963f6..89425af 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -507,7 +507,7 @@ int ip6_forward(struct sk_buff *skb) if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - if (skb->len > mtu) { + if (skb->len > mtu && !skb_is_gso(skb)) { /* Again, force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); -- cgit v1.1 From 50636af715ac1ceb1872bd29a4bdcc68975c3263 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 28 May 2010 03:41:17 -0700 Subject: xt_tee: use skb_dst_drop() After commit 7fee226a (net: add a noref bit on skb dst), its wrong to use : dst_release(skb_dst(skb)), since we could decrement a refcount while skb dst was not refcounted. We should use skb_dst_drop(skb) instead. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/netfilter/xt_TEE.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index d7920d9f..859d9fd 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -76,7 +76,7 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info) if (ip_route_output_key(net, &rt, &fl) != 0) return false; - dst_release(skb_dst(skb)); + skb_dst_drop(skb); skb_dst_set(skb, &rt->u.dst); skb->dev = rt->u.dst.dev; skb->protocol = htons(ETH_P_IP); @@ -157,7 +157,7 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info) if (dst == NULL) return false; - dst_release(skb_dst(skb)); + skb_dst_drop(skb); skb_dst_set(skb, dst); skb->dev = dst->dev; skb->protocol = htons(ETH_P_IPV6); -- cgit v1.1 From 8ca9418350eccd5dd2659931807c1901224dd638 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Fri, 28 May 2010 03:42:18 -0700 Subject: netlink: bug fix: don't overrun skbs on vf_port dump Noticed by Patrick McHardy: was continuing to fill skb after a nla_put_failure, ignoring the size calculated by upper layer. Now, return -EMSGSIZE on any overruns, but also allow netdev to fail ndo_get_vf_port with error other than -EMSGSIZE, thus unwinding nest. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 7ab86f3..7331bb2 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -722,14 +722,13 @@ static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev) for (vf = 0; vf < dev_num_vf(dev->dev.parent); vf++) { vf_port = nla_nest_start(skb, IFLA_VF_PORT); - if (!vf_port) { - nla_nest_cancel(skb, vf_ports); - return -EMSGSIZE; - } + if (!vf_port) + goto nla_put_failure; NLA_PUT_U32(skb, IFLA_PORT_VF, vf); err = dev->netdev_ops->ndo_get_vf_port(dev, vf, skb); + if (err == -EMSGSIZE) + goto nla_put_failure; if (err) { -nla_put_failure: nla_nest_cancel(skb, vf_port); continue; } @@ -739,6 +738,10 @@ nla_put_failure: nla_nest_end(skb, vf_ports); return 0; + +nla_put_failure: + nla_nest_cancel(skb, vf_ports); + return -EMSGSIZE; } static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev) @@ -753,7 +756,7 @@ static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev) err = dev->netdev_ops->ndo_get_vf_port(dev, PORT_SELF_VF, skb); if (err) { nla_nest_cancel(skb, port_self); - return err; + return (err == -EMSGSIZE) ? err : 0; } nla_nest_end(skb, port_self); -- cgit v1.1 From 045de01a174d9f0734f657eb4b3313d89b4fd5ad Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Fri, 28 May 2010 03:42:43 -0700 Subject: netlink: bug fix: wrong size was calculated for vfinfo list blob The wrong size was being calculated for vfinfo. In one case, it was over- calculating using nlmsg_total_size on attrs, in another case, it was under-calculating by assuming ifla_vf_* structs are packed together, but each struct is it's own attr w/ hdr (and padding). Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 7331bb2..1a2af24 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -650,11 +650,12 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev) if (dev->dev.parent && dev_is_pci(dev->dev.parent)) { int num_vfs = dev_num_vf(dev->dev.parent); - size_t size = nlmsg_total_size(sizeof(struct nlattr)); - size += nlmsg_total_size(num_vfs * sizeof(struct nlattr)); - size += num_vfs * (sizeof(struct ifla_vf_mac) + - sizeof(struct ifla_vf_vlan) + - sizeof(struct ifla_vf_tx_rate)); + size_t size = nla_total_size(sizeof(struct nlattr)); + size += nla_total_size(num_vfs * sizeof(struct nlattr)); + size += num_vfs * + (nla_total_size(sizeof(struct ifla_vf_mac)) + + nla_total_size(sizeof(struct ifla_vf_vlan)) + + nla_total_size(sizeof(struct ifla_vf_tx_rate))); return size; } else return 0; -- cgit v1.1 From 368a07d26ae99c80678a968946744fd83e7708d0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 28 May 2010 14:26:23 +0200 Subject: mac80211: make a function static sparse correctly complains that __ieee80211_get_channel_mode is not static. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/chan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 5d218c5..32be11e 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -5,7 +5,7 @@ #include #include "ieee80211_i.h" -enum ieee80211_chan_mode +static enum ieee80211_chan_mode __ieee80211_get_channel_mode(struct ieee80211_local *local, struct ieee80211_sub_if_data *ignore) { -- cgit v1.1 From 6057fd78a8dcce6269f029b967051d5a2e9b0895 Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Fri, 28 May 2010 23:02:35 -0700 Subject: IPv6: fix Mobile IPv6 regression Commit f4f914b5 (net: ipv6 bind to device issue) caused a regression with Mobile IPv6 when it changed the meaning of fl->oif to become a strict requirement of the route lookup. Instead, only force strict mode when sk->sk_bound_dev_if is set on the calling socket, getting the intended behavior and fixing the regression. Tested-by: Arnaud Ebalard Signed-off-by: Brian Haley Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 294cbe8..252d761 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -814,7 +814,7 @@ struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, { int flags = 0; - if (fl->oif || rt6_need_strict(&fl->fl6_dst)) + if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst)) flags |= RT6_LOOKUP_F_IFACE; if (!ipv6_addr_any(&fl->fl6_src)) -- cgit v1.1 From 5b0daa3474d52bed906c4d5e92b44e10148c6972 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sat, 29 May 2010 00:12:13 -0700 Subject: skb: make skb_recycle_check() return a bool value Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- net/core/skbuff.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4c11000..4667d4d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -482,22 +482,22 @@ EXPORT_SYMBOL(consume_skb); * reference count dropping and cleans up the skbuff as if it * just came from __alloc_skb(). */ -int skb_recycle_check(struct sk_buff *skb, int skb_size) +bool skb_recycle_check(struct sk_buff *skb, int skb_size) { struct skb_shared_info *shinfo; if (irqs_disabled()) - return 0; + return false; if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE) - return 0; + return false; skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD); if (skb_end_pointer(skb) - skb->head < skb_size) - return 0; + return false; if (skb_shared(skb) || skb_cloned(skb)) - return 0; + return false; skb_release_head_state(skb); @@ -509,7 +509,7 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size) skb->data = skb->head + NET_SKB_PAD; skb_reset_tail_pointer(skb); - return 1; + return true; } EXPORT_SYMBOL(skb_recycle_check); -- cgit v1.1 From 5daf47bb4e708fde32c1856a0d049e3c3d03c36c Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 26 May 2010 05:54:21 +0000 Subject: net/rds: Add missing mutex_unlock Add a mutex_unlock missing on the error path. In each case, whenever the label out is reached from elsewhere in the function, mutex is not locked. The semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression E1; @@ * mutex_lock(E1); <+... when != E1 if (...) { ... when != E1 * return ...; } ...+> * mutex_unlock(E1); // Signed-off-by: Julia Lawall Reviewed-by: Zach Brown Acked-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/ib_cm.c | 1 + net/rds/iw_cm.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net') diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 10ed0d5..f688327 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -475,6 +475,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, err = rds_ib_setup_qp(conn); if (err) { rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", err); + mutex_unlock(&conn->c_cm_lock); goto out; } diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c index a9d951b..b5dd6ac 100644 --- a/net/rds/iw_cm.c +++ b/net/rds/iw_cm.c @@ -452,6 +452,7 @@ int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id, err = rds_iw_setup_qp(conn); if (err) { rds_iw_conn_error(conn, "rds_iw_setup_qp failed (%d)\n", err); + mutex_unlock(&conn->c_cm_lock); goto out; } -- cgit v1.1 From 97dc875f90a7b88a9fa476c256345c0d40fcdf6c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 26 May 2010 05:16:48 +0000 Subject: caif: unlock on error path in cfserl_receive() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There was an spin_unlock missing on the error path. The spin_lock was tucked in with the declarations so it was hard to spot. I added a new line. Signed-off-by: Dan Carpenter Acked-by: Sjur Brændeland Signed-off-by: David S. Miller --- net/caif/cfserl.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c index cb4325a..965c5ba 100644 --- a/net/caif/cfserl.c +++ b/net/caif/cfserl.c @@ -59,16 +59,18 @@ static int cfserl_receive(struct cflayer *l, struct cfpkt *newpkt) u8 stx = CFSERL_STX; int ret; u16 expectlen = 0; + caif_assert(newpkt != NULL); spin_lock(&layr->sync); if (layr->incomplete_frm != NULL) { - layr->incomplete_frm = cfpkt_append(layr->incomplete_frm, newpkt, expectlen); pkt = layr->incomplete_frm; - if (pkt == NULL) + if (pkt == NULL) { + spin_unlock(&layr->sync); return -ENOMEM; + } } else { pkt = newpkt; } -- cgit v1.1 From 7dfde179c38056b91d51e60f3d50902387f27c84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Wed, 26 May 2010 00:44:44 +0000 Subject: Phonet: listening socket lock protects the connected socket list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The accept()'d socket need to be unhashed while the (listen()'ing) socket lock is held. This fixes a race condition that could lead to an OOPS. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pep.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 7b048a3..94d72e8 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -1045,12 +1045,12 @@ static void pep_sock_unhash(struct sock *sk) lock_sock(sk); if ((1 << sk->sk_state) & ~(TCPF_CLOSE|TCPF_LISTEN)) { skparent = pn->listener; - sk_del_node_init(sk); release_sock(sk); - sk = skparent; pn = pep_sk(skparent); - lock_sock(sk); + lock_sock(skparent); + sk_del_node_init(sk); + sk = skparent; } /* Unhash a listening sock only when it is closed * and all of its active connected pipes are closed. */ -- cgit v1.1 From 2903037400a26e7c0cc93ab75a7d62abfacdf485 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 29 May 2010 00:20:48 -0700 Subject: net: fix sk_forward_alloc corruptions As David found out, sock_queue_err_skb() should be called with socket lock hold, or we risk sk_forward_alloc corruption, since we use non atomic operations to update this field. This patch adds bh_lock_sock()/bh_unlock_sock() pair to three spots. (BH already disabled) 1) skb_tstamp_tx() 2) Before calling ip_icmp_error(), in __udp4_lib_err() 3) Before calling ipv6_icmp_error(), in __udp6_lib_err() Reported-by: Anton Blanchard Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 4 ++++ net/ipv4/udp.c | 2 ++ net/ipv6/udp.c | 6 ++++-- 3 files changed, 10 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4667d4d..f2913ae 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2992,7 +2992,11 @@ void skb_tstamp_tx(struct sk_buff *orig_skb, memset(serr, 0, sizeof(*serr)); serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; + + bh_lock_sock(sk); err = sock_queue_err_skb(sk, skb); + bh_unlock_sock(sk); + if (err) kfree_skb(skb); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b9d0d40..acdc9be 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -634,7 +634,9 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; } else { + bh_lock_sock(sk); ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1)); + bh_unlock_sock(sk); } sk->sk_err = err; sk->sk_error_report(sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 87be586..3048f90 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -466,9 +466,11 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk->sk_state != TCP_ESTABLISHED && !np->recverr) goto out; - if (np->recverr) + if (np->recverr) { + bh_lock_sock(sk); ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1)); - + bh_unlock_sock(sk); + } sk->sk_err = err; sk->sk_error_report(sk); out: -- cgit v1.1 From c936e8bd1de2fa50c49e3df6fa5036bf07870b67 Mon Sep 17 00:00:00 2001 From: Xiaotian Feng Date: Mon, 31 May 2010 16:41:09 +0200 Subject: netfilter: don't xt_jumpstack_alloc twice in xt_register_table In xt_register_table, xt_jumpstack_alloc is called first, later xt_replace_table is used. But in xt_replace_table, xt_jumpstack_alloc will be used again. Then the memory allocated by previous xt_jumpstack_alloc will be leaked. We can simply remove the previous xt_jumpstack_alloc because there aren't any users of newinfo between xt_jumpstack_alloc and xt_replace_table. Signed-off-by: Xiaotian Feng Cc: Patrick McHardy Cc: "David S. Miller" Cc: Jan Engelhardt Cc: Andrew Morton Cc: Rusty Russell Cc: Alexey Dobriyan Acked-By: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/netfilter/x_tables.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 445de70..47b1e79 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -844,10 +844,6 @@ struct xt_table *xt_register_table(struct net *net, struct xt_table_info *private; struct xt_table *t, *table; - ret = xt_jumpstack_alloc(newinfo); - if (ret < 0) - return ERR_PTR(ret); - /* Don't add one object to multiple lists. */ table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL); if (!table) { -- cgit v1.1 From 7489aec8eed4f2f1eb3b4d35763bd3ea30b32ef5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 31 May 2010 16:41:35 +0200 Subject: netfilter: xtables: stackptr should be percpu commit f3c5c1bfd4 (netfilter: xtables: make ip_tables reentrant) introduced a performance regression, because stackptr array is shared by all cpus, adding cache line ping pongs. (16 cpus share a 64 bytes cache line) Fix this using alloc_percpu() Signed-off-by: Eric Dumazet Acked-By: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ip_tables.c | 2 +- net/ipv6/netfilter/ip6_tables.c | 2 +- net/netfilter/x_tables.c | 13 +++---------- 3 files changed, 5 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 63958f3..4b6c5ca 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -336,7 +336,7 @@ ipt_do_table(struct sk_buff *skb, cpu = smp_processor_id(); table_base = private->entries[cpu]; jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; - stackptr = &private->stackptr[cpu]; + stackptr = per_cpu_ptr(private->stackptr, cpu); origptr = *stackptr; e = get_entry(table_base, private->hook_entry[hook]); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 6f517bd..9d2d68f 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -363,7 +363,7 @@ ip6t_do_table(struct sk_buff *skb, cpu = smp_processor_id(); table_base = private->entries[cpu]; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; - stackptr = &private->stackptr[cpu]; + stackptr = per_cpu_ptr(private->stackptr, cpu); origptr = *stackptr; e = get_entry(table_base, private->hook_entry[hook]); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 47b1e79..e34622f 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -699,10 +699,8 @@ void xt_free_table_info(struct xt_table_info *info) vfree(info->jumpstack); else kfree(info->jumpstack); - if (sizeof(unsigned int) * nr_cpu_ids > PAGE_SIZE) - vfree(info->stackptr); - else - kfree(info->stackptr); + + free_percpu(info->stackptr); kfree(info); } @@ -753,14 +751,9 @@ static int xt_jumpstack_alloc(struct xt_table_info *i) unsigned int size; int cpu; - size = sizeof(unsigned int) * nr_cpu_ids; - if (size > PAGE_SIZE) - i->stackptr = vmalloc(size); - else - i->stackptr = kmalloc(size, GFP_KERNEL); + i->stackptr = alloc_percpu(unsigned int); if (i->stackptr == NULL) return -ENOMEM; - memset(i->stackptr, 0, size); size = sizeof(void **) * nr_cpu_ids; if (size > PAGE_SIZE) -- cgit v1.1 From b1faf5666438090a4dc4fceac8502edc7788b7e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 31 May 2010 23:44:05 -0700 Subject: net: sock_queue_err_skb() dont mess with sk_forward_alloc Correct sk_forward_alloc handling for error_queue would need to use a backlog of frames that softirq handler could not deliver because socket is owned by user thread. Or extend backlog processing to be able to process normal and error packets. Another possibility is to not use mem charge for error queue, this is what I implemented in this patch. Note: this reverts commit 29030374 (net: fix sk_forward_alloc corruptions), since we dont need to lock socket anymore. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 30 ++++++++++++++++++++++++++++-- net/ipv4/udp.c | 6 ++---- net/ipv6/udp.c | 6 ++---- 3 files changed, 32 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4e7ac09..9f07e74 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2965,6 +2965,34 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) } EXPORT_SYMBOL_GPL(skb_cow_data); +static void sock_rmem_free(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); +} + +/* + * Note: We dont mem charge error packets (no sk_forward_alloc changes) + */ +int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) +{ + if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= + (unsigned)sk->sk_rcvbuf) + return -ENOMEM; + + skb_orphan(skb); + skb->sk = sk; + skb->destructor = sock_rmem_free; + atomic_add(skb->truesize, &sk->sk_rmem_alloc); + + skb_queue_tail(&sk->sk_error_queue, skb); + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_data_ready(sk, skb->len); + return 0; +} +EXPORT_SYMBOL(sock_queue_err_skb); + void skb_tstamp_tx(struct sk_buff *orig_skb, struct skb_shared_hwtstamps *hwtstamps) { @@ -2997,9 +3025,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb, serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; - bh_lock_sock(sk); err = sock_queue_err_skb(sk, skb); - bh_unlock_sock(sk); if (err) kfree_skb(skb); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 50678f9..eec4ff4 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -633,11 +633,9 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) if (!inet->recverr) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; - } else { - bh_lock_sock(sk); + } else ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1)); - bh_unlock_sock(sk); - } + sk->sk_err = err; sk->sk_error_report(sk); out: diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 3048f90..87be586 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -466,11 +466,9 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk->sk_state != TCP_ESTABLISHED && !np->recverr) goto out; - if (np->recverr) { - bh_lock_sock(sk); + if (np->recverr) ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1)); - bh_unlock_sock(sk); - } + sk->sk_err = err; sk->sk_error_report(sk); out: -- cgit v1.1 From 288fcee8b7aa98796d96cd5b1b2e8005639328bf Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 31 May 2010 23:48:19 -0700 Subject: net/ipv4/tcp_input.c: fix compilation breakage when FASTRETRANS_DEBUG > 1 Commit: c720c7e8383aff1cb219bddf474ed89d850336e3 missed these. Signed-off-by: Joe Perches Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3e6dafc..548d575 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2639,7 +2639,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) if (sk->sk_family == AF_INET) { printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n", msg, - &inet->daddr, ntohs(inet->dport), + &inet->inet_daddr, ntohs(inet->inet_dport), tp->snd_cwnd, tcp_left_out(tp), tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); @@ -2649,7 +2649,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) struct ipv6_pinfo *np = inet6_sk(sk); printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", msg, - &np->daddr, ntohs(inet->dport), + &np->daddr, ntohs(inet->inet_dport), tp->snd_cwnd, tcp_left_out(tp), tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); -- cgit v1.1 From 8ae5977ff95c03fe6c36a5721c57dcb4bfe4f290 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 30 May 2010 14:52:58 +0200 Subject: mac80211: fix blockack-req processing Daniel reported that the paged RX changes had broken blockack request frame processing due to using data that wasn't really part of the skb data. Fix this using skb_copy_bits() for the needed data. As a side effect, this adds a check on processing too short frames, which previously this code could do. Reported-by: Daniel Halperin Signed-off-by: Johannes Berg Acked-by: Daniel Halperin Signed-off-by: John W. Linville --- net/mac80211/rx.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 6e2a7bc..5e0b654 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1818,17 +1818,26 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) return RX_CONTINUE; if (ieee80211_is_back_req(bar->frame_control)) { + struct { + __le16 control, start_seq_num; + } __packed bar_data; + if (!rx->sta) return RX_DROP_MONITOR; + + if (skb_copy_bits(skb, offsetof(struct ieee80211_bar, control), + &bar_data, sizeof(bar_data))) + return RX_DROP_MONITOR; + spin_lock(&rx->sta->lock); - tid = le16_to_cpu(bar->control) >> 12; + tid = le16_to_cpu(bar_data.control) >> 12; if (!rx->sta->ampdu_mlme.tid_active_rx[tid]) { spin_unlock(&rx->sta->lock); return RX_DROP_MONITOR; } tid_agg_rx = rx->sta->ampdu_mlme.tid_rx[tid]; - start_seq_num = le16_to_cpu(bar->start_seq_num) >> 4; + start_seq_num = le16_to_cpu(bar_data.start_seq_num) >> 4; /* reset session timer */ if (tid_agg_rx->timeout) -- cgit v1.1 From 51a0d38de26226f2779912d92f155b93d539da9a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 31 May 2010 12:00:12 +0200 Subject: mac80211: fix dialog token allocator The dialog token allocator has apparently been broken since b83f4e15 ("mac80211: fix deadlock in sta->lock") because it got moved out under the spinlock. Fix it. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/agg-tx.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index c163d0a..98258b7 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -332,14 +332,16 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid) IEEE80211_QUEUE_STOP_REASON_AGGREGATION); spin_unlock(&local->ampdu_lock); - spin_unlock_bh(&sta->lock); - /* send an addBA request */ + /* prepare tid data */ sta->ampdu_mlme.dialog_token_allocator++; sta->ampdu_mlme.tid_tx[tid]->dialog_token = sta->ampdu_mlme.dialog_token_allocator; sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num; + spin_unlock_bh(&sta->lock); + + /* send AddBA request */ ieee80211_send_addba_request(sdata, pubsta->addr, tid, sta->ampdu_mlme.tid_tx[tid]->dialog_token, sta->ampdu_mlme.tid_tx[tid]->ssn, -- cgit v1.1 From fafeeb6c80e3842c6dc19d05de09a23f23eef0d8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 1 Jun 2010 10:04:49 +0000 Subject: xfrm: force a dst reference in __xfrm_route_forward() Packets going through __xfrm_route_forward() have a not refcounted dst entry, since we enabled a noref forwarding path. xfrm_lookup() might incorrectly release this dst entry. It's a bit late to make invasive changes in xfrm_lookup(), so lets force a refcount in this path. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index d965a2b..4bf27d9 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2153,6 +2153,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) return 0; } + skb_dst_force(skb); dst = skb_dst(skb); res = xfrm_lookup(net, &dst, &fl, NULL, 0) == 0; -- cgit v1.1 From 194dbcc8a1a97cbac9a619a563e5f6b7f7d5a485 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 12 May 2010 21:31:06 +0000 Subject: net: init_vlan should not copy slave or master flags The vlan device should not copy the slave or master flags from the real device. It is not in the bond until added nor is it a master. Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 55be908..5298426 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -708,7 +708,8 @@ static int vlan_dev_init(struct net_device *dev) netif_carrier_off(dev); /* IFF_BROADCAST|IFF_MULTICAST; ??? */ - dev->flags = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI); + dev->flags = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | + IFF_MASTER | IFF_SLAVE); dev->iflink = real_dev->ifindex; dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT))) | -- cgit v1.1 From 2df4a0fa1540c460ec69788ab2a901cc72a75644 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 12 May 2010 21:31:11 +0000 Subject: net: fix conflict between null_or_orig and null_or_bond If a skb is received on an inactive bond that does not meet the special cases checked for by skb_bond_should_drop it should only be delivered to exact matches as the comment in netif_receive_skb() says. However because null_or_bond could also be null this is not always true. This patch renames null_or_bond to orig_or_bond and initializes it to orig_dev. This keeps the intent of null_or_bond to pass frames received on VLAN interfaces stacked on bonding interfaces without invalidating the statement for null_or_orig. Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 1845b08..d03470f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2795,7 +2795,7 @@ static int __netif_receive_skb(struct sk_buff *skb) struct net_device *orig_dev; struct net_device *master; struct net_device *null_or_orig; - struct net_device *null_or_bond; + struct net_device *orig_or_bond; int ret = NET_RX_DROP; __be16 type; @@ -2868,10 +2868,10 @@ ncls: * device that may have registered for a specific ptype. The * handler may have to adjust skb->dev and orig_dev. */ - null_or_bond = NULL; + orig_or_bond = orig_dev; if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) && (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) { - null_or_bond = vlan_dev_real_dev(skb->dev); + orig_or_bond = vlan_dev_real_dev(skb->dev); } type = skb->protocol; @@ -2879,7 +2879,7 @@ ncls: &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { if (ptype->type == type && (ptype->dev == null_or_orig || ptype->dev == skb->dev || ptype->dev == orig_dev || - ptype->dev == null_or_bond)) { + ptype->dev == orig_or_bond)) { if (pt_prev) ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; -- cgit v1.1 From 33c29dde7d04dc0ec0edb649d20ccf1351c13a06 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sat, 29 May 2010 14:26:59 +0000 Subject: act_nat: fix the wrong checksum when addr isn't in old_addr/mask fix the wrong checksum when addr isn't in old_addr/mask For TCP and UDP packets, when addr isn't in old_addr/mask we don't do SNAT or DNAT, and we should not update layer 4 checksum. Signed-off-by: Changli Gao ---- net/sched/act_nat.c | 4 ++++ 1 file changed, 4 insertions(+) Signed-off-by: David S. Miller --- net/sched/act_nat.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index d885ba3..5709494 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -159,6 +159,9 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a, iph->daddr = new_addr; csum_replace4(&iph->check, addr, new_addr); + } else if ((iph->frag_off & htons(IP_OFFSET)) || + iph->protocol != IPPROTO_ICMP) { + goto out; } ihl = iph->ihl * 4; @@ -247,6 +250,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a, break; } +out: return action; drop: -- cgit v1.1 From edafe502404f3669d364b6e96d79b54067b634b4 Mon Sep 17 00:00:00 2001 From: Daniele Lacamera Date: Wed, 2 Jun 2010 02:02:04 +0000 Subject: TCP: tcp_hybla: Fix integer overflow in slow start increment For large values of rtt, 2^rho operation may overflow u32. Clamp down the increment to 2^16. Signed-off-by: Daniele Lacamera Signed-off-by: David S. Miller --- net/ipv4/tcp_hybla.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index c209e05..377bc93 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -126,8 +126,8 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) * calculate 2^fract in a <<7 value. */ is_slowstart = 1; - increment = ((1 << ca->rho) * hybla_fraction(rho_fractions)) - - 128; + increment = ((1 << min(ca->rho, 16U)) * + hybla_fraction(rho_fractions)) - 128; } else { /* * congestion avoidance -- cgit v1.1 From fbc2e7d9cf49e0bf89b9e91fd60a06851a855c5d Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Wed, 2 Jun 2010 07:32:42 -0700 Subject: cls_u32: use skb_header_pointer() to dereference data safely use skb_header_pointer() to dereference data safely the original skb->data dereference isn't safe, as there isn't any skb->len or skb_is_nonlinear() check. skb_header_pointer() is used instead in this patch. And when the skb isn't long enough, we terminate the function u32_classify() immediately with -1. Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 49 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 9627542..4f52214 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -98,11 +98,11 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re { struct { struct tc_u_knode *knode; - u8 *ptr; + unsigned int off; } stack[TC_U32_MAXDEPTH]; struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root; - u8 *ptr = skb_network_header(skb); + unsigned int off = skb_network_offset(skb); struct tc_u_knode *n; int sdepth = 0; int off2 = 0; @@ -134,8 +134,14 @@ next_knode: #endif for (i = n->sel.nkeys; i>0; i--, key++) { - - if ((*(__be32*)(ptr+key->off+(off2&key->offmask))^key->val)&key->mask) { + unsigned int toff; + __be32 *data, _data; + + toff = off + key->off + (off2 & key->offmask); + data = skb_header_pointer(skb, toff, 4, &_data); + if (!data) + goto out; + if ((*data ^ key->val) & key->mask) { n = n->next; goto next_knode; } @@ -174,29 +180,45 @@ check_terminal: if (sdepth >= TC_U32_MAXDEPTH) goto deadloop; stack[sdepth].knode = n; - stack[sdepth].ptr = ptr; + stack[sdepth].off = off; sdepth++; ht = n->ht_down; sel = 0; - if (ht->divisor) - sel = ht->divisor&u32_hash_fold(*(__be32*)(ptr+n->sel.hoff), &n->sel,n->fshift); - + if (ht->divisor) { + __be32 *data, _data; + + data = skb_header_pointer(skb, off + n->sel.hoff, 4, + &_data); + if (!data) + goto out; + sel = ht->divisor & u32_hash_fold(*data, &n->sel, + n->fshift); + } if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT))) goto next_ht; if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) { off2 = n->sel.off + 3; - if (n->sel.flags&TC_U32_VAROFFSET) - off2 += ntohs(n->sel.offmask & *(__be16*)(ptr+n->sel.offoff)) >>n->sel.offshift; + if (n->sel.flags & TC_U32_VAROFFSET) { + __be16 *data, _data; + + data = skb_header_pointer(skb, + off + n->sel.offoff, + 2, &_data); + if (!data) + goto out; + off2 += ntohs(n->sel.offmask & *data) >> + n->sel.offshift; + } off2 &= ~3; } if (n->sel.flags&TC_U32_EAT) { - ptr += off2; + off += off2; off2 = 0; } - if (ptr < skb_tail_pointer(skb)) + if (off < skb->len) goto next_ht; } @@ -204,9 +226,10 @@ check_terminal: if (sdepth--) { n = stack[sdepth].knode; ht = n->ht_up; - ptr = stack[sdepth].ptr; + off = stack[sdepth].off; goto check_terminal; } +out: return -1; deadloop: -- cgit v1.1 From db2c24175d149b55784f7cb2c303622ce962c1ae Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Wed, 2 Jun 2010 04:55:02 +0000 Subject: act_pedit: access skb->data safely access skb->data safely we should use skb_header_pointer() and skb_store_bits() to access skb->data to handle small or non-linear skbs. Signed-off-by: Changli Gao ---- net/sched/act_pedit.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) Signed-off-by: David S. Miller --- net/sched/act_pedit.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index fdbd0b7..50e3d94 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -125,7 +125,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, { struct tcf_pedit *p = a->priv; int i, munged = 0; - u8 *pptr; + unsigned int off; if (!(skb->tc_verd & TC_OK2MUNGE)) { /* should we set skb->cloned? */ @@ -134,7 +134,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, } } - pptr = skb_network_header(skb); + off = skb_network_offset(skb); spin_lock(&p->tcf_lock); @@ -144,17 +144,17 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tc_pedit_key *tkey = p->tcfp_keys; for (i = p->tcfp_nkeys; i > 0; i--, tkey++) { - u32 *ptr; + u32 *ptr, _data; int offset = tkey->off; if (tkey->offmask) { - if (skb->len > tkey->at) { - char *j = pptr + tkey->at; - offset += ((*j & tkey->offmask) >> - tkey->shift); - } else { + char *d, _d; + + d = skb_header_pointer(skb, off + tkey->at, 1, + &_d); + if (!d) goto bad; - } + offset += (*d & tkey->offmask) >> tkey->shift; } if (offset % 4) { @@ -169,9 +169,13 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, goto bad; } - ptr = (u32 *)(pptr+offset); + ptr = skb_header_pointer(skb, off + offset, 4, &_data); + if (!ptr) + goto bad; /* just do it, baby */ *ptr = ((*ptr & tkey->mask) ^ tkey->val); + if (ptr == &_data) + skb_store_bits(skb, off + offset, ptr, 4); munged++; } -- cgit v1.1 From 8b9a4e6e442756f670ef507f09bbc6c11dc0fca6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 28 May 2010 15:22:58 +0200 Subject: mac80211: process station blockack action frames from work Processing an association response could take a bit of time while we set up the hardware etc. During that time, the AP might already send a blockack request. If this happens very quickly on a fairly slow machine, we can end up processing the blockack request before the association processing has finished. Since the blockack processing cannot sleep right now, we also cannot make it wait in the driver. As a result, sometimes on slow machines the iwlagn driver gets totally confused, and no traffic can pass when the aggregation setup was done before the assoc setup completed. I'm working on a proper fix for this, which involves queuing all blockack category action frames from a work struct, and also allowing the ampdu_action driver callback to sleep, which will generally clean up the code and make things easier. However, this is a very involved and complex change. To fix the problem at hand in a way that can also be backported to stable, I've come up with this patch. Here, I simply process all aggregation action frames from the managed interface skb queue, which means their processing will be serialized with processing the association response, thereby fixing the problem. Signed-off-by: Johannes Berg Cc: stable@kernel.org Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 52 +++++++++++++++++++++++++++++++++++++++++++++------- net/mac80211/rx.c | 3 +++ 2 files changed, 48 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 0839c4e..3310e70 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1692,14 +1692,52 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, rma = ieee80211_rx_mgmt_disassoc(sdata, mgmt, skb->len); break; case IEEE80211_STYPE_ACTION: - if (mgmt->u.action.category != WLAN_CATEGORY_SPECTRUM_MGMT) + switch (mgmt->u.action.category) { + case WLAN_CATEGORY_BACK: { + struct ieee80211_local *local = sdata->local; + int len = skb->len; + struct sta_info *sta; + + rcu_read_lock(); + sta = sta_info_get(sdata, mgmt->sa); + if (!sta) { + rcu_read_unlock(); + break; + } + + local_bh_disable(); + + switch (mgmt->u.action.u.addba_req.action_code) { + case WLAN_ACTION_ADDBA_REQ: + if (len < (IEEE80211_MIN_ACTION_SIZE + + sizeof(mgmt->u.action.u.addba_req))) + break; + ieee80211_process_addba_request(local, sta, mgmt, len); + break; + case WLAN_ACTION_ADDBA_RESP: + if (len < (IEEE80211_MIN_ACTION_SIZE + + sizeof(mgmt->u.action.u.addba_resp))) + break; + ieee80211_process_addba_resp(local, sta, mgmt, len); + break; + case WLAN_ACTION_DELBA: + if (len < (IEEE80211_MIN_ACTION_SIZE + + sizeof(mgmt->u.action.u.delba))) + break; + ieee80211_process_delba(sdata, sta, mgmt, len); + break; + } + local_bh_enable(); + rcu_read_unlock(); break; - - ieee80211_sta_process_chanswitch(sdata, - &mgmt->u.action.u.chan_switch.sw_elem, - (void *)ifmgd->associated->priv, - rx_status->mactime); - break; + } + case WLAN_CATEGORY_SPECTRUM_MGMT: + ieee80211_sta_process_chanswitch(sdata, + &mgmt->u.action.u.chan_switch.sw_elem, + (void *)ifmgd->associated->priv, + rx_status->mactime); + break; + } } mutex_unlock(&ifmgd->mtx); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 5e0b654..be9abc2 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1944,6 +1944,9 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) if (len < IEEE80211_MIN_ACTION_SIZE + 1) break; + if (sdata->vif.type == NL80211_IFTYPE_STATION) + return ieee80211_sta_rx_mgmt(sdata, rx->skb); + switch (mgmt->u.action.u.addba_req.action_code) { case WLAN_ACTION_ADDBA_REQ: if (len < (IEEE80211_MIN_ACTION_SIZE + -- cgit v1.1 From 8764ab2ca7ab5055e1ca80f9cfa4970c34acb804 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 4 Jun 2010 01:57:38 +0000 Subject: net: check for refcount if pop a stacked dst_entry xfrm triggers a warning if dst_pop() drops a refcount on a noref dst. This patch changes dst_pop() to skb_dst_pop(). skb_dst_pop() drops the refcnt only on a refcounted dst. Also we don't clone the child dst_entry, so it is not refcounted and we can use skb_dst_set_noref() in xfrm_output_one(). Signed-off-by: Steffen Klassert Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/xfrm/xfrm_output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 6a32915..a3cca0a 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -95,13 +95,13 @@ resume: goto error_nolock; } - dst = dst_pop(dst); + dst = skb_dst_pop(skb); if (!dst) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); err = -EHOSTUNREACH; goto error_nolock; } - skb_dst_set(skb, dst); + skb_dst_set_noref(skb, dst); x = dst->xfrm; } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); -- cgit v1.1 From 57f1553ee5d9f093660cc49098f494e17ed11668 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 3 Jun 2010 00:42:30 +0000 Subject: syncookies: remove Kconfig text line about disabled-by-default syncookies default to on since e994b7c901ded7200b525a707c6da71f2cf6d4bb (tcp: Don't make syn cookies initial setting depend on CONFIG_SYSCTL). Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/Kconfig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 8e3a1fd..7c3a7d1 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -303,7 +303,7 @@ config ARPD If unsure, say N. config SYN_COOKIES - bool "IP: TCP syncookie support (disabled per default)" + bool "IP: TCP syncookie support" ---help--- Normal TCP/IP networking is open to an attack known as "SYN flooding". This denial-of-service attack prevents legitimate remote @@ -328,13 +328,13 @@ config SYN_COOKIES server is really overloaded. If this happens frequently better turn them off. - If you say Y here, note that SYN cookies aren't enabled by default; - you can enable them by saying Y to "/proc file system support" and + If you say Y here, you can disable SYN cookies at run time by + saying Y to "/proc file system support" and "Sysctl support" below and executing the command - echo 1 >/proc/sys/net/ipv4/tcp_syncookies + echo 0 > /proc/sys/net/ipv4/tcp_syncookies - at boot time after the /proc file system has been mounted. + after the /proc file system has been mounted. If unsure, say N. -- cgit v1.1 From ca55158c6ecb7832a6ad80ac44a14d23bab8cdfc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Jun 2010 09:03:58 +0000 Subject: rps: tcp: fix rps_sock_flow_table table updates I believe a moderate SYN flood attack can corrupt RFS flow table (rps_sock_flow_table), making RPS/RFS much less effective. Even in a normal situation, server handling short lived sessions suffer from bad steering for the first data packet of a session, if another SYN packet is received for another session. We do following action in tcp_v4_rcv() : sock_rps_save_rxhash(sk, skb->rxhash); We should _not_ do this if sk is a LISTEN socket, as about each packet received on a LISTEN socket has a different rxhash than previous one. -> RPS_NO_CPU markers are spread all over rps_sock_flow_table. Also, it makes sense to protect sk->rxhash field changes with socket lock (We currently can change it even if user thread owns the lock and might use rxhash) This patch moves sock_rps_save_rxhash() to a sock locked section, and only for non LISTEN sockets. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 202cf09..fe193e5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1555,6 +1555,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) #endif if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ + sock_rps_save_rxhash(sk, skb->rxhash); TCP_CHECK_TIMER(sk); if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; @@ -1579,7 +1580,9 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) } return 0; } - } + } else + sock_rps_save_rxhash(sk, skb->rxhash); + TCP_CHECK_TIMER(sk); if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { @@ -1672,8 +1675,6 @@ process: skb->dev = NULL; - sock_rps_save_rxhash(sk, skb->rxhash); - bh_lock_sock_nested(sk); ret = 0; if (!sock_owned_by_user(sk)) { -- cgit v1.1 From c44649216522cd607a4027d2ebf4a8147d3fa94c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Jun 2010 05:45:47 +0000 Subject: tcp: use correct net ns in cookie_v4_check() Its better to make a route lookup in appropriate namespace. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/syncookies.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 5c24db4..9f6b222 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -347,7 +347,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, { .sport = th->dest, .dport = th->source } } }; security_req_classify_flow(req, &fl); - if (ip_route_output_key(&init_net, &rt, &fl)) { + if (ip_route_output_key(sock_net(sk), &rt, &fl)) { reqsk_free(req); goto out; } -- cgit v1.1 From 72e09ad107e78d69ff4d3b97a69f0aad2b77280f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 5 Jun 2010 03:03:30 -0700 Subject: ipv6: avoid high order allocations With mtu=9000, mld_newpack() use order-2 GFP_ATOMIC allocations, that are very unreliable, on machines where PAGE_SIZE=4K Limit allocated skbs to be at most one page. (order-0 allocations) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/mcast.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 59f1881..ab1622d 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1356,7 +1356,10 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size) IPV6_TLV_PADN, 0 }; /* we assume size > sizeof(ra) here */ - skb = sock_alloc_send_skb(sk, size + LL_ALLOCATED_SPACE(dev), 1, &err); + size += LL_ALLOCATED_SPACE(dev); + /* limit our allocations to order-0 page */ + size = min_t(int, size, SKB_MAX_ORDER(0, 0)); + skb = sock_alloc_send_skb(sk, size, 1, &err); if (!skb) return NULL; -- cgit v1.1 From 8ffb335e8d696affc04f963bf73ce2196f80edb9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 6 Jun 2010 15:34:40 -0700 Subject: ip6mr: fix a typo in ip6mr_for_each_table() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 073071f..89c0b07 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -120,7 +120,7 @@ static void mroute_clean_tables(struct mr6_table *mrt); static void ipmr_expire_process(unsigned long arg); #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES -#define ip6mr_for_each_table(mrt, met) \ +#define ip6mr_for_each_table(mrt, net) \ list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list) static struct mr6_table *ip6mr_get_table(struct net *net, u32 id) -- cgit v1.1 From 035320d54758e21227987e3aae0d46e7a04f4ddc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 6 Jun 2010 23:48:40 +0000 Subject: ipmr: dont corrupt lists ipmr_rules_exit() and ip6mr_rules_exit() free a list of items, but forget to properly remove these items from list. List head is not changed and still points to freed memory. This can trigger a fault later when icmpv6_sk_exit() is called. Fix is to either reinit list, or use list_del() to properly remove items from list before freeing them. bugzilla report : https://bugzilla.kernel.org/show_bug.cgi?id=16120 Introduced by commit d1db275dd3f6e4 (ipv6: ip6mr: support multiple tables) and commit f0ad0860d01e (ipv4: ipmr: support multiple tables) Reported-by: Alex Zhavnerchik Signed-off-by: Eric Dumazet CC: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 4 +++- net/ipv6/ip6mr.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 856123f..757f25eb 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -267,8 +267,10 @@ static void __net_exit ipmr_rules_exit(struct net *net) { struct mr_table *mrt, *next; - list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) + list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { + list_del(&mrt->list); kfree(mrt); + } fib_rules_unregister(net->ipv4.mr_rules_ops); } #else diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 89c0b07..66078da 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -254,8 +254,10 @@ static void __net_exit ip6mr_rules_exit(struct net *net) { struct mr6_table *mrt, *next; - list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) + list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) { + list_del(&mrt->list); ip6mr_free_table(mrt); + } fib_rules_unregister(net->ipv6.mr6_rules_ops); } #else -- cgit v1.1 From 35dd0509b21e4b5bab36b9eb80c8dab0322f5007 Mon Sep 17 00:00:00 2001 From: Holger Schurig Date: Mon, 7 Jun 2010 16:33:49 +0200 Subject: mac80211: fix function pointer check This makes "iw wlan0 dump survey" work again with mac80211-based drivers that support it, e.g. ath5k. Signed-off-by: Holger Schurig Signed-off-by: John W. Linville --- net/mac80211/driver-ops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 4f22713..9c1da08 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -349,7 +349,7 @@ static inline int drv_get_survey(struct ieee80211_local *local, int idx, struct survey_info *survey) { int ret = -EOPNOTSUPP; - if (local->ops->conf_tx) + if (local->ops->get_survey) ret = local->ops->get_survey(&local->hw, idx, survey); /* trace_drv_get_survey(local, idx, survey, ret); */ return ret; -- cgit v1.1 From b054b747a694927879c94dd11af54d04346aed7d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 7 Jun 2010 21:50:07 +0200 Subject: mac80211: fix deauth before assoc When we receive a deauthentication frame before having successfully associated, we neither print a message nor abort assocation. The former makes it hard to debug, while the latter later causes a warning in cfg80211 when, as will typically be the case, association timed out. This warning was reported by many, e.g. in https://bugzilla.kernel.org/show_bug.cgi?id=15981, but I couldn't initially pinpoint it. I verified the fix by hacking hostapd to send a deauth frame instead of an association response. Cc: stable@kernel.org Signed-off-by: Johannes Berg Tested-by: Miles Lane Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 3310e70..f803f8b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1760,9 +1760,45 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, mutex_unlock(&ifmgd->mtx); if (skb->len >= 24 + 2 /* mgmt + deauth reason */ && - (fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_DEAUTH) - cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len); + (fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_DEAUTH) { + struct ieee80211_local *local = sdata->local; + struct ieee80211_work *wk; + + mutex_lock(&local->work_mtx); + list_for_each_entry(wk, &local->work_list, list) { + if (wk->sdata != sdata) + continue; + + if (wk->type != IEEE80211_WORK_ASSOC) + continue; + + if (memcmp(mgmt->bssid, wk->filter_ta, ETH_ALEN)) + continue; + if (memcmp(mgmt->sa, wk->filter_ta, ETH_ALEN)) + continue; + /* + * Printing the message only here means we can't + * spuriously print it, but it also means that it + * won't be printed when the frame comes in before + * we even tried to associate or in similar cases. + * + * Ultimately, I suspect cfg80211 should print the + * messages instead. + */ + printk(KERN_DEBUG + "%s: deauthenticated from %pM (Reason: %u)\n", + sdata->name, mgmt->bssid, + le16_to_cpu(mgmt->u.deauth.reason_code)); + + list_del_rcu(&wk->list); + free_work(wk); + break; + } + mutex_unlock(&local->work_mtx); + + cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len); + } out: kfree_skb(skb); } -- cgit v1.1 From 08c801f8d45387a1b46066aad1789a9bb9c4b645 Mon Sep 17 00:00:00 2001 From: Tim Gardner Date: Tue, 8 Jun 2010 17:51:27 -0600 Subject: net: Print num_rx_queues imbalance warning only when there are allocated queues BugLink: http://bugs.launchpad.net/bugs/591416 There are a number of network drivers (bridge, bonding, etc) that are not yet receive multi-queue enabled and use alloc_netdev(), so don't print a num_rx_queues imbalance warning in that case. Also, only print the warning once for those drivers that _are_ multi-queue enabled. Signed-off-by: Tim Gardner Acked-by: Eric Dumazet --- net/core/dev.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index d03470f..14a8568 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2253,11 +2253,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, if (skb_rx_queue_recorded(skb)) { u16 index = skb_get_rx_queue(skb); if (unlikely(index >= dev->num_rx_queues)) { - if (net_ratelimit()) { - pr_warning("%s received packet on queue " - "%u, but number of RX queues is %u\n", - dev->name, index, dev->num_rx_queues); - } + WARN_ONCE(dev->num_rx_queues > 1, "%s received packet " + "on queue %u, but number of RX queues is %u\n", + dev->name, index, dev->num_rx_queues); goto done; } rxqueue = dev->_rx + index; -- cgit v1.1 From aea34e7ae7a40bc72f9f11b5658160dfb4b90c48 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 7 Jun 2010 04:51:58 +0000 Subject: caif: fix a couple range checks The extra ! character means that these conditions are always false. Signed-off-by: Dan Carpenter Acked-by: Sjur Braendeland Signed-off-by: David S. Miller --- net/caif/cfrfml.c | 2 +- net/caif/cfveil.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c index cd2830f..fd27b17 100644 --- a/net/caif/cfrfml.c +++ b/net/caif/cfrfml.c @@ -83,7 +83,7 @@ static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt) if (!cfsrvl_ready(service, &ret)) return ret; - if (!cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) { + if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) { pr_err("CAIF: %s():Packet too large - size=%d\n", __func__, cfpkt_getlen(pkt)); return -EOVERFLOW; diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c index 0fd827f..e04f7d9 100644 --- a/net/caif/cfveil.c +++ b/net/caif/cfveil.c @@ -84,7 +84,7 @@ static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt) return ret; caif_assert(layr->dn != NULL); caif_assert(layr->dn->transmit != NULL); - if (!cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) { + if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) { pr_warning("CAIF: %s(): Packet too large - size=%d\n", __func__, cfpkt_getlen(pkt)); return -EOVERFLOW; -- cgit v1.1 From 00d9d6a185de89edc0649ca4ead58f0283dfcbac Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Jun 2010 22:24:44 +0000 Subject: ipv6: fix ICMP6_MIB_OUTERRORS In commit 1f8438a85366 (icmp: Account for ICMP out errors), I did a typo on IPV6 side, using ICMP6_MIB_OUTMSGS instead of ICMP6_MIB_OUTERRORS Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index ce79929..03e62f9 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -483,7 +483,7 @@ route_done: np->tclass, NULL, &fl, (struct rt6_info*)dst, MSG_DONTWAIT, np->dontfrag); if (err) { - ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); + ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); goto out_put; } @@ -565,7 +565,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) np->dontfrag); if (err) { - ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); + ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); goto out_put; } -- cgit v1.1 From 597a264b1a9c7e36d1728f677c66c5c1f7e3b837 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 3 Jun 2010 09:30:11 +0000 Subject: net: deliver skbs on inactive slaves to exact matches Currently, the accelerated receive path for VLAN's will drop packets if the real device is an inactive slave and is not one of the special pkts tested for in skb_bond_should_drop(). This behavior is different then the non-accelerated path and for pkts over a bonded vlan. For example, vlanx -> bond0 -> ethx will be dropped in the vlan path and not delivered to any packet handlers at all. However, bond0 -> vlanx -> ethx and bond0 -> ethx will be delivered to handlers that match the exact dev, because the VLAN path checks the real_dev which is not a slave and netif_recv_skb() doesn't drop frames but only delivers them to exact matches. This patch adds a sk_buff flag which is used for tagging skbs that would previously been dropped and allows the skb to continue to skb_netif_recv(). Here we add logic to check for the deliver_no_wcard flag and if it is set only deliver to handlers that match exactly. This makes both paths above consistent and gives pkt handlers a way to identify skbs that come from inactive slaves. Without this patch in some configurations skbs will be delivered to handlers with exact matches and in others be dropped out right in the vlan path. I have tested the following 4 configurations in failover modes and load balancing modes. # bond0 -> ethx # vlanx -> bond0 -> ethx # bond0 -> vlanx -> ethx # bond0 -> ethx | vlanx -> -- Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- net/8021q/vlan_core.c | 4 ++-- net/core/dev.c | 17 ++++++++++++++--- 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index bd537fc..50f58f5f 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -12,7 +12,7 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, return NET_RX_DROP; if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master))) - goto drop; + skb->deliver_no_wcard = 1; skb->skb_iif = skb->dev->ifindex; __vlan_hwaccel_put_tag(skb, vlan_tci); @@ -84,7 +84,7 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp, struct sk_buff *p; if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master))) - goto drop; + skb->deliver_no_wcard = 1; skb->skb_iif = skb->dev->ifindex; __vlan_hwaccel_put_tag(skb, vlan_tci); diff --git a/net/core/dev.c b/net/core/dev.c index 14a8568..2b3bf53 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2810,13 +2810,24 @@ static int __netif_receive_skb(struct sk_buff *skb) if (!skb->skb_iif) skb->skb_iif = skb->dev->ifindex; + /* + * bonding note: skbs received on inactive slaves should only + * be delivered to pkt handlers that are exact matches. Also + * the deliver_no_wcard flag will be set. If packet handlers + * are sensitive to duplicate packets these skbs will need to + * be dropped at the handler. The vlan accel path may have + * already set the deliver_no_wcard flag. + */ null_or_orig = NULL; orig_dev = skb->dev; master = ACCESS_ONCE(orig_dev->master); - if (master) { - if (skb_bond_should_drop(skb, master)) + if (skb->deliver_no_wcard) + null_or_orig = orig_dev; + else if (master) { + if (skb_bond_should_drop(skb, master)) { + skb->deliver_no_wcard = 1; null_or_orig = orig_dev; /* deliver only exact match */ - else + } else skb->dev = master; } -- cgit v1.1 From ae638c47dc040b8def16d05dc6acdd527628f231 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 8 Jun 2010 23:39:10 +0000 Subject: pkt_sched: gen_estimator: add a new lock gen_kill_estimator() / gen_new_estimator() is not always called with RTNL held. net/netfilter/xt_RATEEST.c is one user of these API that do not hold RTNL, so random corruptions can occur between "tc" and "iptables". Add a new fine grained lock instead of trying to use RTNL in netfilter. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/gen_estimator.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index cf8e703..785e527 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -107,6 +107,7 @@ static DEFINE_RWLOCK(est_lock); /* Protects against soft lockup during large deletion */ static struct rb_root est_root = RB_ROOT; +static DEFINE_SPINLOCK(est_tree_lock); static void est_timer(unsigned long arg) { @@ -201,7 +202,6 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats * * Returns 0 on success or a negative error code. * - * NOTE: Called under rtnl_mutex */ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_rate_est *rate_est, @@ -232,6 +232,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, est->last_packets = bstats->packets; est->avpps = rate_est->pps<<10; + spin_lock(&est_tree_lock); if (!elist[idx].timer.function) { INIT_LIST_HEAD(&elist[idx].list); setup_timer(&elist[idx].timer, est_timer, idx); @@ -242,6 +243,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, list_add_rcu(&est->list, &elist[idx].list); gen_add_node(est); + spin_unlock(&est_tree_lock); return 0; } @@ -261,13 +263,13 @@ static void __gen_kill_estimator(struct rcu_head *head) * * Removes the rate estimator specified by &bstats and &rate_est. * - * NOTE: Called under rtnl_mutex */ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_rate_est *rate_est) { struct gen_estimator *e; + spin_lock(&est_tree_lock); while ((e = gen_find_node(bstats, rate_est))) { rb_erase(&e->node, &est_root); @@ -278,6 +280,7 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, list_del_rcu(&e->list); call_rcu(&e->e_rcu, __gen_kill_estimator); } + spin_unlock(&est_tree_lock); } EXPORT_SYMBOL(gen_kill_estimator); @@ -312,8 +315,14 @@ EXPORT_SYMBOL(gen_replace_estimator); bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, const struct gnet_stats_rate_est *rate_est) { + bool res; + ASSERT_RTNL(); - return gen_find_node(bstats, rate_est) != NULL; + spin_lock(&est_tree_lock); + res = gen_find_node(bstats, rate_est) != NULL; + spin_unlock(&est_tree_lock); + + return res; } EXPORT_SYMBOL(gen_estimator_active); -- cgit v1.1 From 07a0f0f07a68014c92c752a5598102372bddf46e Mon Sep 17 00:00:00 2001 From: Daniel Turull Date: Thu, 10 Jun 2010 23:08:11 -0700 Subject: pktgen: Fix accuracy of inter-packet delay. This patch correct a bug in the delay of pktgen. It makes sure the inter-packet interval is accurate. Signed-off-by: Daniel Turull Signed-off-by: Robert Olsson Signed-off-by: David S. Miller --- net/core/pktgen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 2ad68da..1dacd7b 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2170,7 +2170,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) end_time = ktime_now(); pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time)); - pkt_dev->next_tx = ktime_add_ns(end_time, pkt_dev->delay); + pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay); } static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev) -- cgit v1.1