From 97077c4a9868fce8ac151512cde5d24fc1144f24 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 17 Aug 2005 12:03:32 -0700 Subject: [IPV6]: Fix raw socket hardware checksum failures When packets hit raw sockets the csum update isn't done yet, do it manually. Packets can also reach rawv6_rcv on the output path through ip6_call_ra_chain, in this case skb->ip_summed is CHECKSUM_NONE and this codepath isn't executed. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/raw.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index e2b848e..1d4d75b 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -328,6 +328,8 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) if (skb->ip_summed != CHECKSUM_UNNECESSARY) { if (skb->ip_summed == CHECKSUM_HW) { + skb_postpull_rcsum(skb, skb->nh.raw, + skb->h.raw - skb->nh.raw); skb->ip_summed = CHECKSUM_UNNECESSARY; if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr, -- cgit v1.1 From 35d59efd105b3b7c1b5878dcc9d1749f41f9740f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 17 Aug 2005 12:03:59 -0700 Subject: [TCP]: Fix bug #5070: kernel BUG at net/ipv4/tcp_output.c:864 1) We send out a normal sized packet with TSO on to start off. 2) ICMP is received indicating a smaller MTU. 3) We send the current sk_send_head which needs to be fragmented since it was created before the ICMP event. The first fragment is then sent out. At this point the remaining fragment is allocated by tcp_fragment. However, its size is padded to fit the L1 cache-line size therefore creating tail-room up to 124 bytes long. This fragment will also be sitting at sk_send_head. 4) tcp_sendmsg is called again and it stores data in the tail-room of of the fragment. 5) tcp_push_one is called by tcp_sendmsg which then calls tso_fragment since the packet as a whole exceeds the MTU. At this point we have a packet that has data in the head area being fed to tso_fragment which bombs out. My take on this is that we shouldn't ever call tcp_fragment on a TSO socket for a packet that is yet to be transmitted since this creates a packet on sk_send_head that cannot be extended. So here is a patch to change it so that tso_fragment is always used in this case. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3ed6fc1..566045e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -861,7 +861,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, u16 flags; /* All of a TSO frame must be composed of paged data. */ - BUG_ON(skb->len != skb->data_len); + if (skb->len != skb->data_len) + return tcp_fragment(sk, skb, len, mss_now); buff = sk_stream_alloc_pskb(sk, 0, 0, GFP_ATOMIC); if (unlikely(buff == NULL)) @@ -974,6 +975,8 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) sent_pkts = 0; while ((skb = sk->sk_send_head)) { + unsigned int limit; + tso_segs = tcp_init_tso_segs(sk, skb, mss_now); BUG_ON(!tso_segs); @@ -994,9 +997,10 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) break; } + limit = mss_now; if (tso_segs > 1) { - u32 limit = tcp_window_allows(tp, skb, - mss_now, cwnd_quota); + limit = tcp_window_allows(tp, skb, + mss_now, cwnd_quota); if (skb->len < limit) { unsigned int trim = skb->len % mss_now; @@ -1004,15 +1008,12 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) if (trim) limit = skb->len - trim; } - if (skb->len > limit) { - if (tso_fragment(sk, skb, limit, mss_now)) - break; - } - } else if (unlikely(skb->len > mss_now)) { - if (unlikely(tcp_fragment(sk, skb, mss_now, mss_now))) - break; } + if (skb->len > limit && + unlikely(tso_fragment(sk, skb, limit, mss_now))) + break; + TCP_SKB_CB(skb)->when = tcp_time_stamp; if (unlikely(tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)))) @@ -1064,11 +1065,14 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH); if (likely(cwnd_quota)) { + unsigned int limit; + BUG_ON(!tso_segs); + limit = mss_now; if (tso_segs > 1) { - u32 limit = tcp_window_allows(tp, skb, - mss_now, cwnd_quota); + limit = tcp_window_allows(tp, skb, + mss_now, cwnd_quota); if (skb->len < limit) { unsigned int trim = skb->len % mss_now; @@ -1076,15 +1080,12 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) if (trim) limit = skb->len - trim; } - if (skb->len > limit) { - if (unlikely(tso_fragment(sk, skb, limit, mss_now))) - return; - } - } else if (unlikely(skb->len > mss_now)) { - if (unlikely(tcp_fragment(sk, skb, mss_now, mss_now))) - return; } + if (skb->len > limit && + unlikely(tso_fragment(sk, skb, limit, mss_now))) + return; + /* Send it out now. */ TCP_SKB_CB(skb)->when = tcp_time_stamp; -- cgit v1.1 From bfd272b1ca1164382eabaa9986aad822adb91eb2 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 17 Aug 2005 12:04:22 -0700 Subject: [IPV6]: Fix SKB leak in ip6_input_finish() Changing it to how ip_input handles should fix it. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/ip6_input.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 866f107..10fbb50 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -198,12 +198,13 @@ resubmit: if (!raw_sk) { if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { IP6_INC_STATS_BH(IPSTATS_MIB_INUNKNOWNPROTOS); - icmpv6_param_prob(skb, ICMPV6_UNK_NEXTHDR, nhoff); + icmpv6_send(skb, ICMPV6_PARAMPROB, + ICMPV6_UNK_NEXTHDR, nhoff, + skb->dev); } - } else { + } else IP6_INC_STATS_BH(IPSTATS_MIB_INDELIVERS); - kfree_skb(skb); - } + kfree_skb(skb); } rcu_read_unlock(); return 0; -- cgit v1.1 From 1f07247de51efd30c88ad8e3e06a8b5382fc7d35 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 17 Aug 2005 12:05:27 -0700 Subject: [DECNET]: Fix RCU race condition in dn_neigh_construct(). Signed-off-by: Paul E. McKenney Signed-off-by: David S. Miller --- net/decnet/dn_neigh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index f32dba9..8d0cc3c 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -148,12 +148,12 @@ static int dn_neigh_construct(struct neighbour *neigh) __neigh_parms_put(neigh->parms); neigh->parms = neigh_parms_clone(parms); - rcu_read_unlock(); if (dn_db->use_long) neigh->ops = &dn_long_ops; else neigh->ops = &dn_short_ops; + rcu_read_unlock(); if (dn->flags & DN_NDFLAG_P3) neigh->ops = &dn_phase3_ops; -- cgit v1.1 From 001dd250c1c68667a5c3b74979fa614e2edc9ceb Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Thu, 18 Aug 2005 14:04:51 -0700 Subject: [TOKENRING]: Use interrupt-safe locking with rif_lock. Change operations on rif_lock from spin_{un}lock_bh to spin_{un}lock_irq{save,restore} equivalents. Some of the rif_lock critical sections are called from interrupt context via tr_type_trans->tr_add_rif_info. The TR NIC drivers call tr_type_trans from their packet receive handlers. Signed-off-by: Jay Vosburgh Signed-off-by: John W. Linville Signed-off-by: David S. Miller --- net/802/tr.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/802/tr.c b/net/802/tr.c index a755e88..1bb7dc1 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -251,10 +251,11 @@ void tr_source_route(struct sk_buff *skb,struct trh_hdr *trh,struct net_device * unsigned int hash; struct rif_cache *entry; unsigned char *olddata; + unsigned long flags; static const unsigned char mcast_func_addr[] = {0xC0,0x00,0x00,0x04,0x00,0x00}; - spin_lock_bh(&rif_lock); + spin_lock_irqsave(&rif_lock, flags); /* * Broadcasts are single route as stated in RFC 1042 @@ -323,7 +324,7 @@ printk("source routing for %02X:%02X:%02X:%02X:%02X:%02X\n",trh->daddr[0], else slack = 18 - ((ntohs(trh->rcf) & TR_RCF_LEN_MASK)>>8); olddata = skb->data; - spin_unlock_bh(&rif_lock); + spin_unlock_irqrestore(&rif_lock, flags); skb_pull(skb, slack); memmove(skb->data, olddata, sizeof(struct trh_hdr) - slack); @@ -337,10 +338,11 @@ printk("source routing for %02X:%02X:%02X:%02X:%02X:%02X\n",trh->daddr[0], static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev) { unsigned int hash, rii_p = 0; + unsigned long flags; struct rif_cache *entry; - spin_lock_bh(&rif_lock); + spin_lock_irqsave(&rif_lock, flags); /* * Firstly see if the entry exists @@ -378,7 +380,7 @@ printk("adding rif_entry: addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n", if(!entry) { printk(KERN_DEBUG "tr.c: Couldn't malloc rif cache entry !\n"); - spin_unlock_bh(&rif_lock); + spin_unlock_irqrestore(&rif_lock, flags); return; } @@ -420,7 +422,7 @@ printk("updating rif_entry: addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n", } entry->last_used=jiffies; } - spin_unlock_bh(&rif_lock); + spin_unlock_irqrestore(&rif_lock, flags); } /* @@ -430,9 +432,9 @@ printk("updating rif_entry: addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n", static void rif_check_expire(unsigned long dummy) { int i; - unsigned long next_interval = jiffies + sysctl_tr_rif_timeout/2; + unsigned long flags, next_interval = jiffies + sysctl_tr_rif_timeout/2; - spin_lock_bh(&rif_lock); + spin_lock_irqsave(&rif_lock, flags); for(i =0; i < RIF_TABLE_SIZE; i++) { struct rif_cache *entry, **pentry; @@ -454,7 +456,7 @@ static void rif_check_expire(unsigned long dummy) } } - spin_unlock_bh(&rif_lock); + spin_unlock_irqrestore(&rif_lock, flags); mod_timer(&rif_timer, next_interval); @@ -485,7 +487,7 @@ static struct rif_cache *rif_get_idx(loff_t pos) static void *rif_seq_start(struct seq_file *seq, loff_t *pos) { - spin_lock_bh(&rif_lock); + spin_lock_irq(&rif_lock); return *pos ? rif_get_idx(*pos - 1) : SEQ_START_TOKEN; } @@ -516,7 +518,7 @@ static void *rif_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void rif_seq_stop(struct seq_file *seq, void *v) { - spin_unlock_bh(&rif_lock); + spin_unlock_irq(&rif_lock); } static int rif_seq_show(struct seq_file *seq, void *v) -- cgit v1.1 From cb94c62c252796f42bb83fe40960d12f3ea5a82a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 18 Aug 2005 14:05:44 -0700 Subject: [IPV4]: Fix DST leak in icmp_push_reply() Based upon a bug report and initial patch by Ollie Wild. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 3d78464..badfc58 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -349,12 +349,12 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, { struct sk_buff *skb; - ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param, - icmp_param->data_len+icmp_param->head_len, - icmp_param->head_len, - ipc, rt, MSG_DONTWAIT); - - if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) { + if (ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param, + icmp_param->data_len+icmp_param->head_len, + icmp_param->head_len, + ipc, rt, MSG_DONTWAIT) < 0) + ip_flush_pending_frames(icmp_socket->sk); + else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) { struct icmphdr *icmph = skb->h.icmph; unsigned int csum = 0; struct sk_buff *skb1; -- cgit v1.1 From 6fc8b9e7c60d4a3d4d7f1189f74e37651f5610e6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 18 Aug 2005 14:36:59 -0700 Subject: [IPCOMP]: Fix false smp_processor_id warning This patch fixes a false-positive from debug_smp_processor_id(). The processor ID is only used to look up crypto_tfm objects. Any processor ID is acceptable here as long as it is one that is iterated on by for_each_cpu(). Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/ipcomp.c | 2 +- net/ipv6/ipcomp6.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 2065944..7ded6e6 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -358,7 +358,7 @@ static struct crypto_tfm **ipcomp_alloc_tfms(const char *alg_name) int cpu; /* This can be any valid CPU ID so we don't need locking. */ - cpu = smp_processor_id(); + cpu = raw_smp_processor_id(); list_for_each_entry(pos, &ipcomp_tfms_list, list) { struct crypto_tfm *tfm; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 423feb4..135383e 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -354,7 +354,7 @@ static struct crypto_tfm **ipcomp6_alloc_tfms(const char *alg_name) int cpu; /* This can be any valid CPU ID so we don't need locking. */ - cpu = smp_processor_id(); + cpu = raw_smp_processor_id(); list_for_each_entry(pos, &ipcomp6_tfms_list, list) { struct crypto_tfm *tfm; -- cgit v1.1 From fd841326d73096ad79be9c3fa348f9ad04541cc2 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 20 Aug 2005 17:38:40 -0700 Subject: [NETFILTER]: Fix ECN target TCP marking An incorrect check made it bail out before doing anything. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_ECN.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index ada9911..d3250a3 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -61,10 +61,10 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward) if (!tcph) return 0; - if (!(einfo->operation & IPT_ECN_OP_SET_ECE - || tcph->ece == einfo->proto.tcp.ece) - && (!(einfo->operation & IPT_ECN_OP_SET_CWR - || tcph->cwr == einfo->proto.tcp.cwr))) + if ((!(einfo->operation & IPT_ECN_OP_SET_ECE) || + tcph->ece == einfo->proto.tcp.ece) && + ((!(einfo->operation & IPT_ECN_OP_SET_CWR) || + tcph->cwr == einfo->proto.tcp.cwr))) return 1; if (!skb_ip_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) -- cgit v1.1 From f93592ff4fa4a55aa7640d435fa93338e190294d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 20 Aug 2005 17:39:15 -0700 Subject: [NETFILTER]: Fix HW checksum handling in ECN target Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_ECN.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index d3250a3..94a0ce1 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -71,6 +71,10 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward) return 0; tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4; + if ((*pskb)->ip_summed == CHECKSUM_HW && + skb_checksum_help(*pskb, inward)) + return 0; + diffs[0] = ((u_int16_t *)tcph)[6]; if (einfo->operation & IPT_ECN_OP_SET_ECE) tcph->ece = einfo->proto.tcp.ece; @@ -79,13 +83,10 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward) diffs[1] = ((u_int16_t *)tcph)[6]; diffs[0] = diffs[0] ^ 0xFFFF; - if ((*pskb)->ip_summed != CHECKSUM_HW) + if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) tcph->check = csum_fold(csum_partial((char *)diffs, sizeof(diffs), tcph->check^0xFFFF)); - else - if (skb_checksum_help(*pskb, inward)) - return 0; (*pskb)->nfcache |= NFC_ALTERED; return 1; } -- cgit v1.1 From 7e71af49d46e4c25f17a2c8f53d62ffd14f01007 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 20 Aug 2005 17:40:41 -0700 Subject: [NETFILTER]: Fix HW checksum handling in TCPMSS target Most importantly, remove bogus BUG() in receive path. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_TCPMSS.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c index 1049050..7b84a25 100644 --- a/net/ipv4/netfilter/ipt_TCPMSS.c +++ b/net/ipv4/netfilter/ipt_TCPMSS.c @@ -61,6 +61,10 @@ ipt_tcpmss_target(struct sk_buff **pskb, if (!skb_ip_make_writable(pskb, (*pskb)->len)) return NF_DROP; + if ((*pskb)->ip_summed == CHECKSUM_HW && + skb_checksum_help(*pskb, out == NULL)) + return NF_DROP; + iph = (*pskb)->nh.iph; tcplen = (*pskb)->len - iph->ihl*4; @@ -186,9 +190,6 @@ ipt_tcpmss_target(struct sk_buff **pskb, newmss); retmodified: - /* We never hw checksum SYN packets. */ - BUG_ON((*pskb)->ip_summed == CHECKSUM_HW); - (*pskb)->nfcache |= NFC_UNKNOWN | NFC_ALTERED; return IPT_CONTINUE; } -- cgit v1.1 From 14869c388673e8db3348ab3706fa6485d0f0cf95 Mon Sep 17 00:00:00 2001 From: Dmitry Yusupov Date: Tue, 23 Aug 2005 10:09:27 -0700 Subject: [TCP]: Do TSO deferral even if tail SKB can go out now. If the tail SKB fits into the window, it is still benefitical to defer until the goal percentage of the window is available. This give the application time to feed more data into the send queue and thus results in larger TSO frames going out. Patch from Dmitry Yusupov . Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 566045e..dd30dd1 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -925,10 +925,6 @@ static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_ limit = min(send_win, cong_win); - /* If sk_send_head can be sent fully now, just do it. */ - if (skb->len <= limit) - return 0; - if (sysctl_tcp_tso_win_divisor) { u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); -- cgit v1.1 From c3a20692ca5c8eb8cf5d0f489d4fc839ce7593d1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 23 Aug 2005 10:09:53 -0700 Subject: [RPC]: Kill bogus kmap in krb5 While I was going through the crypto users recently, I noticed this bogus kmap in sunrpc. It's totally unnecessary since the crypto layer will do its own kmap before touching the data. Besides, the kmap is throwing the return value away. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/sunrpc/auth_gss/gss_krb5_crypto.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 24c21f2..5a7265a 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -185,9 +185,7 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, sg->page = body->pages[i]; sg->offset = offset; sg->length = thislen; - kmap(sg->page); /* XXX kmap_atomic? */ crypto_digest_update(tfm, sg, 1); - kunmap(sg->page); len -= thislen; i++; offset = 0; -- cgit v1.1 From 1344a41637114485fac7afa1505bce2ff862807a Mon Sep 17 00:00:00 2001 From: Dave Johnson Date: Tue, 23 Aug 2005 10:10:15 -0700 Subject: [IPV4]: Fix negative timer loop with lots of ipv4 peers. From: Dave Johnson Found this bug while doing some scaling testing that created 500K inet peers. peer_check_expire() in net/ipv4/inetpeer.c isn't using inet_peer_gc_mintime correctly and will end up creating an expire timer with less than the minimum duration, and even zero/negative if enough active peers are present. If >65K peers, the timer will be less than inet_peer_gc_mintime, and with >70K peers, the timer duration will reach zero and go negative. The timer handler will continue to schedule another zero/negative timer in a loop until peers can be aged. This can continue for at least a few minutes or even longer if the peers remain active due to arriving packets while the loop is occurring. Bug is present in both 2.4 and 2.6. Same patch will apply to both just fine. Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/ipv4/inetpeer.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 9547395..ab18a85 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -450,10 +450,13 @@ static void peer_check_expire(unsigned long dummy) /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime * interval depending on the total number of entries (more entries, * less interval). */ - peer_periodic_timer.expires = jiffies - + inet_peer_gc_maxtime - - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ * - peer_total / inet_peer_threshold * HZ; + if (peer_total >= inet_peer_threshold) + peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime; + else + peer_periodic_timer.expires = jiffies + + inet_peer_gc_maxtime + - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ * + peer_total / inet_peer_threshold * HZ; add_timer(&peer_periodic_timer); } -- cgit v1.1 From 66a79a19a7c582efd99bb143c3a59fbda006eb39 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 23 Aug 2005 10:10:35 -0700 Subject: [NETFILTER]: Fix HW checksum handling in ip_queue/ip6_queue The checksum needs to be filled in on output, after mangling a packet ip_summed needs to be reset. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_queue.c | 7 +++++++ net/ipv6/netfilter/ip6_queue.c | 7 +++++++ 2 files changed, 14 insertions(+) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index eda1fba..c6baa81 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -214,6 +214,12 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) break; case IPQ_COPY_PACKET: + if (entry->skb->ip_summed == CHECKSUM_HW && + (*errp = skb_checksum_help(entry->skb, + entry->info->outdev == NULL))) { + read_unlock_bh(&queue_lock); + return NULL; + } if (copy_range == 0 || copy_range > entry->skb->len) data_len = entry->skb->len; else @@ -385,6 +391,7 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) if (!skb_ip_make_writable(&e->skb, v->data_len)) return -ENOMEM; memcpy(e->skb->data, v->payload, v->data_len); + e->skb->ip_summed = CHECKSUM_NONE; e->skb->nfcache |= NFC_ALTERED; /* diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 5493180..a16df5b 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -211,6 +211,12 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) break; case IPQ_COPY_PACKET: + if (entry->skb->ip_summed == CHECKSUM_HW && + (*errp = skb_checksum_help(entry->skb, + entry->info->outdev == NULL))) { + read_unlock_bh(&queue_lock); + return NULL; + } if (copy_range == 0 || copy_range > entry->skb->len) data_len = entry->skb->len; else @@ -381,6 +387,7 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) if (!skb_ip_make_writable(&e->skb, v->data_len)) return -ENOMEM; memcpy(e->skb->data, v->payload, v->data_len); + e->skb->ip_summed = CHECKSUM_NONE; e->skb->nfcache |= NFC_ALTERED; /* -- cgit v1.1 From 53b924b31fa53ac3007df3fef6870d5074a9adf8 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 23 Aug 2005 10:11:30 -0700 Subject: [NET]: Fix socket bitop damage The socket flag cleanups that went into 2.6.12-rc1 are basically oring the flags of an old socket into the socket just being created. Unfortunately that one was just initialized by sock_init_data(), so already has SOCK_ZAPPED set. As the result zapped sockets are created and all incoming connection will fail due to this bug which again was carefully replicated to at least AX.25, NET/ROM or ROSE. In order to keep the abstraction alive I've introduced sock_copy_flags() to copy the socket flags from one sockets to another and used that instead of the bitwise copy thing. Anyway, the idea here has probably been to copy all flags, so sock_copy_flags() should be the right thing. With this the ham radio protocols are usable again, so I hope this will make it into 2.6.13. Signed-off-by: Ralf Baechle DL5RB Signed-off-by: David S. Miller --- net/ax25/af_ax25.c | 7 +------ net/netrom/af_netrom.c | 7 +------ net/rose/af_rose.c | 7 +------ 3 files changed, 3 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 707097d..7d8ecad 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -875,12 +875,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev) sk->sk_sndbuf = osk->sk_sndbuf; sk->sk_state = TCP_ESTABLISHED; sk->sk_sleep = osk->sk_sleep; - - if (sock_flag(osk, SOCK_DBG)) - sock_set_flag(sk, SOCK_DBG); - - if (sock_flag(osk, SOCK_ZAPPED)) - sock_set_flag(sk, SOCK_ZAPPED); + sock_copy_flags(sk, osk); oax25 = ax25_sk(osk); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 31ed4a9..5385835 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -459,12 +459,7 @@ static struct sock *nr_make_new(struct sock *osk) sk->sk_sndbuf = osk->sk_sndbuf; sk->sk_state = TCP_ESTABLISHED; sk->sk_sleep = osk->sk_sleep; - - if (sock_flag(osk, SOCK_ZAPPED)) - sock_set_flag(sk, SOCK_ZAPPED); - - if (sock_flag(osk, SOCK_DBG)) - sock_set_flag(sk, SOCK_DBG); + sock_copy_flags(sk, osk); skb_queue_head_init(&nr->ack_queue); skb_queue_head_init(&nr->reseq_queue); diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 7eb6a5b..3fe7e56 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -556,12 +556,7 @@ static struct sock *rose_make_new(struct sock *osk) sk->sk_sndbuf = osk->sk_sndbuf; sk->sk_state = TCP_ESTABLISHED; sk->sk_sleep = osk->sk_sleep; - - if (sock_flag(osk, SOCK_ZAPPED)) - sock_set_flag(sk, SOCK_ZAPPED); - - if (sock_flag(osk, SOCK_DBG)) - sock_set_flag(sk, SOCK_DBG); + sock_copy_flags(sk, osk); init_timer(&rose->timer); init_timer(&rose->idletimer); -- cgit v1.1 From 01d7dd0e9f8c5f1888619d2649c7da389232b408 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 23 Aug 2005 10:11:45 -0700 Subject: [AX25]: UID fixes o Brown paperbag bug - ax25_findbyuid() was always returning a NULL pointer as the result. Breaks ROSE completly and AX.25 if UID policy set to deny. o While the list structure of AX.25's UID to callsign mapping table was properly protected by a spinlock, it's elements were not refcounted resulting in a race between removal and usage of an element. Signed-off-by: Ralf Baechle DL5RB Signed-off-by: David S. Miller --- net/ax25/af_ax25.c | 20 +++++++----- net/ax25/ax25_route.c | 12 +++++--- net/ax25/ax25_uid.c | 83 ++++++++++++++++++++++---------------------------- net/netrom/af_netrom.c | 24 ++++++++++----- net/rose/af_rose.c | 20 +++++++----- 5 files changed, 84 insertions(+), 75 deletions(-) (limited to 'net') diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 7d8ecad..a5c94f1 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1002,7 +1002,8 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct sock *sk = sock->sk; struct full_sockaddr_ax25 *addr = (struct full_sockaddr_ax25 *)uaddr; ax25_dev *ax25_dev = NULL; - ax25_address *call; + ax25_uid_assoc *user; + ax25_address call; ax25_cb *ax25; int err = 0; @@ -1021,9 +1022,15 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr->fsa_ax25.sax25_family != AF_AX25) return -EINVAL; - call = ax25_findbyuid(current->euid); - if (call == NULL && ax25_uid_policy && !capable(CAP_NET_ADMIN)) { - return -EACCES; + user = ax25_findbyuid(current->euid); + if (user) { + call = user->call; + ax25_uid_put(user); + } else { + if (ax25_uid_policy && !capable(CAP_NET_ADMIN)) + return -EACCES; + + call = addr->fsa_ax25.sax25_call; } lock_sock(sk); @@ -1034,10 +1041,7 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; } - if (call == NULL) - ax25->source_addr = addr->fsa_ax25.sax25_call; - else - ax25->source_addr = *call; + ax25->source_addr = call; /* * User already set interface with SO_BINDTODEVICE diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index 44b99b1f..c288526 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -422,8 +422,8 @@ static inline void ax25_adjust_path(ax25_address *addr, ax25_digi *digipeat) */ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr) { + ax25_uid_assoc *user; ax25_route *ax25_rt; - ax25_address *call; int err; if ((ax25_rt = ax25_get_route(addr, NULL)) == NULL) @@ -434,16 +434,18 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr) goto put; } - if ((call = ax25_findbyuid(current->euid)) == NULL) { + user = ax25_findbyuid(current->euid); + if (user) { + ax25->source_addr = user->call; + ax25_uid_put(user); + } else { if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) { err = -EPERM; goto put; } - call = (ax25_address *)ax25->ax25_dev->dev->dev_addr; + ax25->source_addr = *(ax25_address *)ax25->ax25_dev->dev->dev_addr; } - ax25->source_addr = *call; - if (ax25_rt->digipeat != NULL) { if ((ax25->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) { err = -ENOMEM; diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c index cea6b7d..a8b3822 100644 --- a/net/ax25/ax25_uid.c +++ b/net/ax25/ax25_uid.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -41,38 +42,41 @@ * Callsign/UID mapper. This is in kernel space for security on multi-amateur machines. */ -static ax25_uid_assoc *ax25_uid_list; +HLIST_HEAD(ax25_uid_list); static DEFINE_RWLOCK(ax25_uid_lock); int ax25_uid_policy = 0; -ax25_address *ax25_findbyuid(uid_t uid) +ax25_uid_assoc *ax25_findbyuid(uid_t uid) { - ax25_uid_assoc *ax25_uid; - ax25_address *res = NULL; + ax25_uid_assoc *ax25_uid, *res = NULL; + struct hlist_node *node; read_lock(&ax25_uid_lock); - for (ax25_uid = ax25_uid_list; ax25_uid != NULL; ax25_uid = ax25_uid->next) { + ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) { if (ax25_uid->uid == uid) { - res = &ax25_uid->call; + ax25_uid_hold(ax25_uid); + res = ax25_uid; break; } } read_unlock(&ax25_uid_lock); - return NULL; + return res; } int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax) { - ax25_uid_assoc *s, *ax25_uid; + ax25_uid_assoc *ax25_uid; + struct hlist_node *node; + ax25_uid_assoc *user; unsigned long res; switch (cmd) { case SIOCAX25GETUID: res = -ENOENT; read_lock(&ax25_uid_lock); - for (ax25_uid = ax25_uid_list; ax25_uid != NULL; ax25_uid = ax25_uid->next) { + ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) { if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) { res = ax25_uid->uid; break; @@ -85,19 +89,22 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax) case SIOCAX25ADDUID: if (!capable(CAP_NET_ADMIN)) return -EPERM; - if (ax25_findbyuid(sax->sax25_uid)) + user = ax25_findbyuid(sax->sax25_uid); + if (user) { + ax25_uid_put(user); return -EEXIST; + } if (sax->sax25_uid == 0) return -EINVAL; if ((ax25_uid = kmalloc(sizeof(*ax25_uid), GFP_KERNEL)) == NULL) return -ENOMEM; + atomic_set(&ax25_uid->refcount, 1); ax25_uid->uid = sax->sax25_uid; ax25_uid->call = sax->sax25_call; write_lock(&ax25_uid_lock); - ax25_uid->next = ax25_uid_list; - ax25_uid_list = ax25_uid; + hlist_add_head(&ax25_uid->uid_node, &ax25_uid_list); write_unlock(&ax25_uid_lock); return 0; @@ -106,34 +113,21 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax) if (!capable(CAP_NET_ADMIN)) return -EPERM; + ax25_uid = NULL; write_lock(&ax25_uid_lock); - for (ax25_uid = ax25_uid_list; ax25_uid != NULL; ax25_uid = ax25_uid->next) { - if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) { + ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) { + if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) break; - } } if (ax25_uid == NULL) { write_unlock(&ax25_uid_lock); return -ENOENT; } - if ((s = ax25_uid_list) == ax25_uid) { - ax25_uid_list = s->next; - write_unlock(&ax25_uid_lock); - kfree(ax25_uid); - return 0; - } - while (s != NULL && s->next != NULL) { - if (s->next == ax25_uid) { - s->next = ax25_uid->next; - write_unlock(&ax25_uid_lock); - kfree(ax25_uid); - return 0; - } - s = s->next; - } + hlist_del_init(&ax25_uid->uid_node); + ax25_uid_put(ax25_uid); write_unlock(&ax25_uid_lock); - return -ENOENT; + return 0; default: return -EINVAL; @@ -147,13 +141,11 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax) static void *ax25_uid_seq_start(struct seq_file *seq, loff_t *pos) { struct ax25_uid_assoc *pt; - int i = 1; + struct hlist_node *node; + int i = 0; read_lock(&ax25_uid_lock); - if (*pos == 0) - return SEQ_START_TOKEN; - - for (pt = ax25_uid_list; pt != NULL; pt = pt->next) { + ax25_uid_for_each(pt, node, &ax25_uid_list) { if (i == *pos) return pt; ++i; @@ -164,8 +156,9 @@ static void *ax25_uid_seq_start(struct seq_file *seq, loff_t *pos) static void *ax25_uid_seq_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; - return (v == SEQ_START_TOKEN) ? ax25_uid_list : - ((struct ax25_uid_assoc *) v)->next; + + return hlist_entry(((ax25_uid_assoc *)v)->uid_node.next, + ax25_uid_assoc, uid_node); } static void ax25_uid_seq_stop(struct seq_file *seq, void *v) @@ -179,7 +172,6 @@ static int ax25_uid_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "Policy: %d\n", ax25_uid_policy); else { struct ax25_uid_assoc *pt = v; - seq_printf(seq, "%6d %s\n", pt->uid, ax2asc(&pt->call)); } @@ -213,16 +205,13 @@ struct file_operations ax25_uid_fops = { */ void __exit ax25_uid_free(void) { - ax25_uid_assoc *s, *ax25_uid; + ax25_uid_assoc *ax25_uid; + struct hlist_node *node; write_lock(&ax25_uid_lock); - ax25_uid = ax25_uid_list; - while (ax25_uid != NULL) { - s = ax25_uid; - ax25_uid = ax25_uid->next; - - kfree(s); + ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) { + hlist_del_init(&ax25_uid->uid_node); + ax25_uid_put(ax25_uid); } - ax25_uid_list = NULL; write_unlock(&ax25_uid_lock); } diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 5385835..162a85f 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -536,7 +536,8 @@ static int nr_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct nr_sock *nr = nr_sk(sk); struct full_sockaddr_ax25 *addr = (struct full_sockaddr_ax25 *)uaddr; struct net_device *dev; - ax25_address *user, *source; + ax25_uid_assoc *user; + ax25_address *source; lock_sock(sk); if (!sock_flag(sk, SOCK_ZAPPED)) { @@ -575,16 +576,19 @@ static int nr_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) } else { source = &addr->fsa_ax25.sax25_call; - if ((user = ax25_findbyuid(current->euid)) == NULL) { + user = ax25_findbyuid(current->euid); + if (user) { + nr->user_addr = user->call; + ax25_uid_put(user); + } else { if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) { release_sock(sk); dev_put(dev); return -EPERM; } - user = source; + nr->user_addr = *source; } - nr->user_addr = *user; nr->source_addr = *source; } @@ -604,7 +608,8 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr, struct sock *sk = sock->sk; struct nr_sock *nr = nr_sk(sk); struct sockaddr_ax25 *addr = (struct sockaddr_ax25 *)uaddr; - ax25_address *user, *source = NULL; + ax25_address *source = NULL; + ax25_uid_assoc *user; struct net_device *dev; lock_sock(sk); @@ -645,16 +650,19 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr, } source = (ax25_address *)dev->dev_addr; - if ((user = ax25_findbyuid(current->euid)) == NULL) { + user = ax25_findbyuid(current->euid); + if (user) { + nr->user_addr = user->call; + ax25_uid_put(user); + } else { if (ax25_uid_policy && !capable(CAP_NET_ADMIN)) { dev_put(dev); release_sock(sk); return -EPERM; } - user = source; + nr->user_addr = *source; } - nr->user_addr = *user; nr->source_addr = *source; nr->device = dev; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 3fe7e56..5480caf 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -626,7 +626,8 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct rose_sock *rose = rose_sk(sk); struct sockaddr_rose *addr = (struct sockaddr_rose *)uaddr; struct net_device *dev; - ax25_address *user, *source; + ax25_address *source; + ax25_uid_assoc *user; int n; if (!sock_flag(sk, SOCK_ZAPPED)) @@ -651,14 +652,17 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) source = &addr->srose_call; - if ((user = ax25_findbyuid(current->euid)) == NULL) { + user = ax25_findbyuid(current->euid); + if (user) { + rose->source_call = user->call; + ax25_uid_put(user); + } else { if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) return -EACCES; - user = source; + rose->source_call = *source; } rose->source_addr = addr->srose_addr; - rose->source_call = *user; rose->device = dev; rose->source_ndigis = addr->srose_ndigis; @@ -685,8 +689,8 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le struct rose_sock *rose = rose_sk(sk); struct sockaddr_rose *addr = (struct sockaddr_rose *)uaddr; unsigned char cause, diagnostic; - ax25_address *user; struct net_device *dev; + ax25_uid_assoc *user; int n; if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) { @@ -736,12 +740,14 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le if ((dev = rose_dev_first()) == NULL) return -ENETUNREACH; - if ((user = ax25_findbyuid(current->euid)) == NULL) + user = ax25_findbyuid(current->euid); + if (!user) return -EINVAL; memcpy(&rose->source_addr, dev->dev_addr, ROSE_ADDR_LEN); - rose->source_call = *user; + rose->source_call = user->call; rose->device = dev; + ax25_uid_put(user); rose_insert_socket(sk); /* Finish the bind */ } -- cgit v1.1 From d2287f844187158e5eddd0d5de8e95bd607abcb7 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 23 Aug 2005 10:12:04 -0700 Subject: [SCTP]: Add SENTINEL to SCTP MIB stats Add SNMP_MIB_SENTINEL to the definition of the sctp_snmp_list so that the output routine in proc correctly terminates. This was causing some problems running on ia64 systems. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/proc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 98d49ec..b74f777 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -57,6 +57,7 @@ static struct snmp_mib sctp_snmp_list[] = { SNMP_MIB_ITEM("SctpReasmUsrMsgs", SCTP_MIB_REASMUSRMSGS), SNMP_MIB_ITEM("SctpOutSCTPPacks", SCTP_MIB_OUTSCTPPACKS), SNMP_MIB_ITEM("SctpInSCTPPacks", SCTP_MIB_INSCTPPACKS), + SNMP_MIB_SENTINEL }; /* Return the current value of a particular entry in the mib by adding its -- cgit v1.1 From 0fbbeb1ba43bd04f0f1d4f161b7f72437a1c8a03 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 23 Aug 2005 10:12:44 -0700 Subject: [PKT_SCHED]: Fix missing qdisc_destroy() in qdisc_create_dflt() qdisc_create_dflt() is missing to destroy the newly allocated default qdisc if the initialization fails resulting in leaks of all kinds. The only caller in mainline which may trigger this bug is sch_tbf.c in tbf_create_dflt_qdisc(). Note: qdisc_create_dflt() doesn't fulfill the official locking requirements of qdisc_destroy() but since the qdisc could never be seen by the outside world this doesn't matter and it can stay as-is until the locking of pkt_sched is cleaned up. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 8edefd5d..0d066c9 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -438,6 +438,7 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops) if (!ops->init || ops->init(sch, NULL) == 0) return sch; + qdisc_destroy(sch); errout: return NULL; } -- cgit v1.1 From 89ebd197eb2cd31d6187db344d5117064e19fdde Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 23 Aug 2005 10:13:06 -0700 Subject: [TCP]: Unconditionally clear TCP_NAGLE_PUSH in skb_entail(). Intention of this bit is to force pushing of the existing send queue when TCP_CORK or TCP_NODELAY state changes via setsockopt(). But it's easy to create a situation where the bit never clears. For example, if the send queue starts empty: 1) set TCP_NODELAY 2) clear TCP_NODELAY 3) set TCP_CORK 4) do small write() The current code will leave TCP_NAGLE_PUSH set after that sequence. Unconditionally clearing the bit when new data is added via skb_entail() solves the problem. Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ddb6ce4..69b1fcf 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -584,7 +584,7 @@ static inline void skb_entail(struct sock *sk, struct tcp_sock *tp, sk_charge_skb(sk, skb); if (!sk->sk_send_head) sk->sk_send_head = skb; - else if (tp->nonagle&TCP_NAGLE_PUSH) + if (tp->nonagle & TCP_NAGLE_PUSH) tp->nonagle &= ~TCP_NAGLE_PUSH; } -- cgit v1.1 From d5d283751ef3c05b6766501a46800cbee84959d6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 23 Aug 2005 10:49:54 -0700 Subject: [TCP]: Document non-trivial locking path in tcp_v{4,6}_get_port(). This trips up a lot of folks reading this code. Put an unlikely() around the port-exhaustion test for good measure. Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 9 +++++++-- net/ipv6/tcp_ipv6.c | 9 +++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5d91213..67c6708 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -242,9 +242,14 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) tcp_port_rover = rover; spin_unlock(&tcp_portalloc_lock); - /* Exhausted local port range during search? */ + /* Exhausted local port range during search? It is not + * possible for us to be holding one of the bind hash + * locks if this test triggers, because if 'remaining' + * drops to zero, we broke out of the do/while loop at + * the top level, not from the 'break;' statement. + */ ret = 1; - if (remaining <= 0) + if (unlikely(remaining <= 0)) goto fail; /* OK, here is the one we will use. HEAD is diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f6e288d..ef29cfd 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -158,9 +158,14 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) tcp_port_rover = rover; spin_unlock(&tcp_portalloc_lock); - /* Exhausted local port range during search? */ + /* Exhausted local port range during search? It is not + * possible for us to be holding one of the bind hash + * locks if this test triggers, because if 'remaining' + * drops to zero, we broke out of the do/while loop at + * the top level, not from the 'break;' statement. + */ ret = 1; - if (remaining <= 0) + if (unlikely(remaining <= 0)) goto fail; /* OK, here is the one we will use. */ -- cgit v1.1 From dc16aaf29d64b8c5e0b88f49a4d541edf5b61e42 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 23 Aug 2005 10:50:09 -0700 Subject: [ROSE]: Fix missing unlocks in rose_route_frame() Noticed by Coverity checker. Signed-off-by: David S. Miller --- net/rose/rose_route.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index ff73ebb..46b2321 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -994,8 +994,10 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) * 1. The frame isn't for us, * 2. It isn't "owned" by any existing route. */ - if (frametype != ROSE_CALL_REQUEST) /* XXX */ - return 0; + if (frametype != ROSE_CALL_REQUEST) { /* XXX */ + ret = 0; + goto out; + } len = (((skb->data[3] >> 4) & 0x0F) + 1) / 2; len += (((skb->data[3] >> 0) & 0x0F) + 1) / 2; -- cgit v1.1 From c1cc168442a943ed3997f6543db87c061987f9d7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 23 Aug 2005 14:55:32 -0700 Subject: [ROSE]: Fix typo in rose_route_frame() locking fix. Signed-off-by: David S. Miller --- net/rose/rose_route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 46b2321..25da6f6 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -995,7 +995,7 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) * 2. It isn't "owned" by any existing route. */ if (frametype != ROSE_CALL_REQUEST) { /* XXX */ - ret = 0; + res = 0; goto out; } -- cgit v1.1 From 06c7427021f1cc83703f14659d8405ca773ba1ef Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 23 Aug 2005 22:06:09 -0700 Subject: [FIB_TRIE]: Don't ignore negative results from fib_semantic_match When a semantic match occurs either success, not found or an error (for matching unreachable routes/blackholes) is returned. fib_trie ignores the errors and looks for a different matching route. Treat results other than "no match" as success and end lookup. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index a701405..45efd5f 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1333,9 +1333,9 @@ err:; } static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *plen, const struct flowi *flp, - struct fib_result *res, int *err) + struct fib_result *res) { - int i; + int err, i; t_key mask; struct leaf_info *li; struct hlist_head *hhead = &l->list; @@ -1348,18 +1348,18 @@ static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *pl if (l->key != (key & mask)) continue; - if (((*err) = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) == 0) { + if ((err = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) <= 0) { *plen = i; #ifdef CONFIG_IP_FIB_TRIE_STATS t->stats.semantic_match_passed++; #endif - return 1; + return err; } #ifdef CONFIG_IP_FIB_TRIE_STATS t->stats.semantic_match_miss++; #endif } - return 0; + return 1; } static int @@ -1386,7 +1386,7 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result /* Just a leaf? */ if (IS_LEAF(n)) { - if (check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret)) + if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0) goto found; goto failed; } @@ -1508,7 +1508,7 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result continue; } if (IS_LEAF(n)) { - if (check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret)) + if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0) goto found; } backtrace: -- cgit v1.1