From 0b760113a3a155269a3fba93a409c640031dd68f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 31 May 2011 15:15:34 -0400 Subject: NLM: Don't hang forever on NLM unlock requests If the NLM daemon is killed on the NFS server, we can currently end up hanging forever on an 'unlock' request, instead of aborting. Basically, if the rpcbind request fails, or the server keeps returning garbage, we really want to quit instead of retrying. Tested-by: Vasily Averin Signed-off-by: Trond Myklebust Cc: stable@kernel.org --- net/sunrpc/clnt.c | 3 +++ net/sunrpc/sched.c | 1 + 2 files changed, 4 insertions(+) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b84d739..566bcfd 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1175,6 +1175,9 @@ call_bind_status(struct rpc_task *task) status = -EOPNOTSUPP; break; } + if (task->tk_rebind_retry == 0) + break; + task->tk_rebind_retry--; rpc_delay(task, 3*HZ); goto retry_timeout; case -ETIMEDOUT: diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 6b43ee7..a27406b 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -792,6 +792,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta /* Initialize retry counters */ task->tk_garb_retry = 2; task->tk_cred_retry = 2; + task->tk_rebind_retry = 2; task->tk_priority = task_setup_data->priority - RPC_PRIORITY_LOW; task->tk_owner = current->tgid; -- cgit v1.1 From 5afa9133cfe67f1bfead6049a9640c9262a7101c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 17 Jun 2011 10:14:59 -0400 Subject: SUNRPC: Ensure the RPC client only quits on fatal signals Fix a couple of instances where we were exiting the RPC client on arbitrary signals. We should only do so on fatal signals. Cc: stable@kernel.org Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 4 ++-- net/sunrpc/clnt.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 339ba64..5daf6cc 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -577,13 +577,13 @@ retry: } inode = &gss_msg->inode->vfs_inode; for (;;) { - prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_KILLABLE); spin_lock(&inode->i_lock); if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) { break; } spin_unlock(&inode->i_lock); - if (signalled()) { + if (fatal_signal_pending(current)) { err = -ERESTARTSYS; goto out_intr; } diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 566bcfd..8c91415 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1061,7 +1061,7 @@ call_allocate(struct rpc_task *task) dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid); - if (RPC_IS_ASYNC(task) || !signalled()) { + if (RPC_IS_ASYNC(task) || !fatal_signal_pending(current)) { task->tk_action = call_allocate; rpc_delay(task, HZ>>4); return; -- cgit v1.1 From 32c90254ed4a0c698caa0794ebb4de63fcc69631 Mon Sep 17 00:00:00 2001 From: Xufeng Zhang Date: Tue, 21 Jun 2011 10:43:39 +0000 Subject: ipv6/udp: Use the correct variable to determine non-blocking condition udpv6_recvmsg() function is not using the correct variable to determine whether or not the socket is in non-blocking operation, this will lead to unexpected behavior when a UDP checksum error occurs. Consider a non-blocking udp receive scenario: when udpv6_recvmsg() is called by sock_common_recvmsg(), MSG_DONTWAIT bit of flags variable in udpv6_recvmsg() is cleared by "flags & ~MSG_DONTWAIT" in this call: err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT, flags & ~MSG_DONTWAIT, &addr_len); i.e. with udpv6_recvmsg() getting these values: int noblock = flags & MSG_DONTWAIT int flags = flags & ~MSG_DONTWAIT So, when udp checksum error occurs, the execution will go to csum_copy_err, and then the problem happens: csum_copy_err: ............... if (flags & MSG_DONTWAIT) return -EAGAIN; goto try_again; ............... But it will always go to try_again as MSG_DONTWAIT has been cleared from flags at call time -- only noblock contains the original value of MSG_DONTWAIT, so the test should be: if (noblock) return -EAGAIN; This is also consistent with what the ipv4/udp code does. Signed-off-by: Xufeng Zhang Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/ipv6/udp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 41f8c9c..1e7a43f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -453,7 +453,7 @@ csum_copy_err: } unlock_sock_fast(sk, slow); - if (flags & MSG_DONTWAIT) + if (noblock) return -EAGAIN; goto try_again; } -- cgit v1.1 From 9cfaa8def1c795a512bc04f2aec333b03724ca2e Mon Sep 17 00:00:00 2001 From: Xufeng Zhang Date: Tue, 21 Jun 2011 10:43:40 +0000 Subject: udp/recvmsg: Clear MSG_TRUNC flag when starting over for a new packet Consider this scenario: When the size of the first received udp packet is bigger than the receive buffer, MSG_TRUNC bit is set in msg->msg_flags. However, if checksum error happens and this is a blocking socket, it will goto try_again loop to receive the next packet. But if the size of the next udp packet is smaller than receive buffer, MSG_TRUNC flag should not be set, but because MSG_TRUNC bit is not cleared in msg->msg_flags before receive the next packet, MSG_TRUNC is still set, which is wrong. Fix this problem by clearing MSG_TRUNC flag when starting over for a new packet. Signed-off-by: Xufeng Zhang Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/ipv4/udp.c | 3 +++ net/ipv6/udp.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index abca870..48cd88e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1249,6 +1249,9 @@ csum_copy_err: if (noblock) return -EAGAIN; + + /* starting over for a new packet */ + msg->msg_flags &= ~MSG_TRUNC; goto try_again; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1e7a43f..328985c 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -455,6 +455,9 @@ csum_copy_err: if (noblock) return -EAGAIN; + + /* starting over for a new packet */ + msg->msg_flags &= ~MSG_TRUNC; goto try_again; } -- cgit v1.1 From bd4265fe365c0f3945dd5ff1527e52bbe2bedfa2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 23 Jun 2011 02:39:12 +0000 Subject: bridge: Only flood unregistered groups to routers The bridge currently floods packets to groups that we have never seen before to all ports. This is not required by RFC4541 and in fact it is not desirable in environment where traffic to unregistered group is always present. This patch changes the behaviour so that we only send traffic to unregistered groups to ports marked as routers. The user can always force flooding behaviour to any given port by marking it as a router. Note that this change does not apply to traffic to 224.0.0.X as traffic to those groups must always be flooded to all ports. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 29b9812..2d85ca7 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1379,8 +1379,11 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) return -EINVAL; - if (iph->protocol != IPPROTO_IGMP) + if (iph->protocol != IPPROTO_IGMP) { + if ((iph->daddr & IGMP_LOCAL_GROUP_MASK) != IGMP_LOCAL_GROUP) + BR_INPUT_SKB_CB(skb)->mrouters_only = 1; return 0; + } len = ntohs(iph->tot_len); if (skb->len < len || len < ip_hdrlen(skb)) -- cgit v1.1 From 33f99dc7fd948bbc808a24a0989c167f8973b643 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 22 Jun 2011 01:04:37 +0000 Subject: ipv4: Fix packet size calculation in __ip_append_data Git commit 59104f06 (ip: take care of last fragment in ip_append_data) added a check to see if we exceed the mtu when we add trailer_len. However, the mtu is already subtracted by the trailer length when the xfrm transfomation bundles are set up. So IPsec packets with mtu size get fragmented, or if the DF bit is set the packets will not be send even though they match the mtu perfectly fine. This patch actually reverts commit 59104f06. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a8024ea..6b894d4 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -888,12 +888,9 @@ alloc_new_skb: * because we have no idea what fragment will be * the last. */ - if (datalen == length + fraggap) { + if (datalen == length + fraggap) alloclen += rt->dst.trailer_len; - /* make sure mtu is not reached */ - if (datalen > mtu - fragheaderlen - rt->dst.trailer_len) - datalen -= ALIGN(rt->dst.trailer_len, 8); - } + if (transhdrlen) { skb = sock_alloc_send_skb(sk, alloclen + hh_len + 15, -- cgit v1.1 From 353e5c9abd900de3d1a40925386ffe4abf76111e Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 22 Jun 2011 01:05:37 +0000 Subject: ipv4: Fix IPsec slowpath fragmentation problem ip_append_data() builds packets based on the mtu from dst_mtu(rt->dst.path). On IPsec the effective mtu is lower because we need to add the protocol headers and trailers later when we do the IPsec transformations. So after the IPsec transformations the packet might be too big, which leads to a slowpath fragmentation then. This patch fixes this by building the packets based on the lower IPsec mtu from dst_mtu(&rt->dst) and adapts the exthdr handling to this. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 6b894d4..4a7e16b 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -802,8 +802,6 @@ static int __ip_append_data(struct sock *sk, skb = skb_peek_tail(queue); exthdrlen = !skb ? rt->dst.header_len : 0; - length += exthdrlen; - transhdrlen += exthdrlen; mtu = cork->fragsize; hh_len = LL_RESERVED_SPACE(rt->dst.dev); @@ -883,6 +881,8 @@ alloc_new_skb: else alloclen = fraglen; + alloclen += exthdrlen; + /* The last fragment gets additional space at tail. * Note, with MSG_MORE we overallocate on fragments, * because we have no idea what fragment will be @@ -923,11 +923,11 @@ alloc_new_skb: /* * Find where to start putting bytes. */ - data = skb_put(skb, fraglen); + data = skb_put(skb, fraglen + exthdrlen); skb_set_network_header(skb, exthdrlen); skb->transport_header = (skb->network_header + fragheaderlen); - data += fragheaderlen; + data += fragheaderlen + exthdrlen; if (fraggap) { skb->csum = skb_copy_and_csum_bits( @@ -1061,7 +1061,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, */ *rtp = NULL; cork->fragsize = inet->pmtudisc == IP_PMTUDISC_PROBE ? - rt->dst.dev->mtu : dst_mtu(rt->dst.path); + rt->dst.dev->mtu : dst_mtu(&rt->dst); cork->dst = &rt->dst; cork->length = 0; cork->tx_flags = ipc->tx_flags; -- cgit v1.1 From ed6e4ef836d425bc35e33bf20fcec95e68203afa Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 18 Jun 2011 07:53:59 +0000 Subject: netfilter: Fix ip_route_me_harder triggering ip_rt_bug Avoid creating input routes with ip_route_me_harder. It does not work for locally generated packets. Instead, restrict sockets to provide valid saddr for output route (or unicast saddr for transparent proxy). For other traffic allow saddr to be unicast or local but if callers forget to check saddr type use 0 for the output route. The resulting handling should be: - REJECT TCP: - in INPUT we can provide addr_type = RTN_LOCAL but better allow rejecting traffic delivered with local route (no IP address => use RTN_UNSPEC to allow also RTN_UNICAST). - FORWARD: RTN_UNSPEC => allow RTN_LOCAL/RTN_UNICAST saddr, add fix to ignore RTN_BROADCAST and RTN_MULTICAST - OUTPUT: RTN_UNSPEC - NAT, mangle, ip_queue, nf_ip_reroute: RTN_UNSPEC in LOCAL_OUT - IPVS: - use RTN_LOCAL in LOCAL_OUT and FORWARD after SNAT to restrict saddr to be local Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/netfilter.c | 60 +++++++++++++++-------------------------- net/ipv4/netfilter/ipt_REJECT.c | 14 +++------- 2 files changed, 26 insertions(+), 48 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 4614bab..2e97e3e 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -17,51 +17,35 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; struct flowi4 fl4 = {}; - unsigned long orefdst; + __be32 saddr = iph->saddr; + __u8 flags = 0; unsigned int hh_len; - unsigned int type; - type = inet_addr_type(net, iph->saddr); - if (skb->sk && inet_sk(skb->sk)->transparent) - type = RTN_LOCAL; - if (addr_type == RTN_UNSPEC) - addr_type = type; + if (!skb->sk && addr_type != RTN_LOCAL) { + if (addr_type == RTN_UNSPEC) + addr_type = inet_addr_type(net, saddr); + if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST) + flags |= FLOWI_FLAG_ANYSRC; + else + saddr = 0; + } /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. */ - if (addr_type == RTN_LOCAL) { - fl4.daddr = iph->daddr; - if (type == RTN_LOCAL) - fl4.saddr = iph->saddr; - fl4.flowi4_tos = RT_TOS(iph->tos); - fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; - fl4.flowi4_mark = skb->mark; - fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; - rt = ip_route_output_key(net, &fl4); - if (IS_ERR(rt)) - return -1; - - /* Drop old route. */ - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - } else { - /* non-local src, find valid iif to satisfy - * rp-filter when calling ip_route_input. */ - fl4.daddr = iph->saddr; - rt = ip_route_output_key(net, &fl4); - if (IS_ERR(rt)) - return -1; + fl4.daddr = iph->daddr; + fl4.saddr = saddr; + fl4.flowi4_tos = RT_TOS(iph->tos); + fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; + fl4.flowi4_mark = skb->mark; + fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : flags; + rt = ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) + return -1; - orefdst = skb->_skb_refdst; - if (ip_route_input(skb, iph->daddr, iph->saddr, - RT_TOS(iph->tos), rt->dst.dev) != 0) { - dst_release(&rt->dst); - return -1; - } - dst_release(&rt->dst); - refdst_drop(orefdst); - } + /* Drop old route. */ + skb_dst_drop(skb); + skb_dst_set(skb, &rt->dst); if (skb_dst(skb)->error) return -1; diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 1ff79e5..51f13f8 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -40,7 +40,6 @@ static void send_reset(struct sk_buff *oldskb, int hook) struct iphdr *niph; const struct tcphdr *oth; struct tcphdr _otcph, *tcph; - unsigned int addr_type; /* IP header checks: fragment. */ if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) @@ -55,6 +54,9 @@ static void send_reset(struct sk_buff *oldskb, int hook) if (oth->rst) return; + if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) + return; + /* Check checksum */ if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP)) return; @@ -101,19 +103,11 @@ static void send_reset(struct sk_buff *oldskb, int hook) nskb->csum_start = (unsigned char *)tcph - nskb->head; nskb->csum_offset = offsetof(struct tcphdr, check); - addr_type = RTN_UNSPEC; - if (hook != NF_INET_FORWARD -#ifdef CONFIG_BRIDGE_NETFILTER - || (nskb->nf_bridge && nskb->nf_bridge->mask & BRNF_BRIDGED) -#endif - ) - addr_type = RTN_LOCAL; - /* ip_route_me_harder expects skb->dst to be set */ skb_dst_set_noref(nskb, skb_dst(oldskb)); nskb->protocol = htons(ETH_P_IP); - if (ip_route_me_harder(nskb, addr_type)) + if (ip_route_me_harder(nskb, RTN_UNSPEC)) goto free_nskb; niph->ttl = ip4_dst_hoplimit(skb_dst(nskb)); -- cgit v1.1 From 11d53b4990226247a950e2b1ccfa4cf93bfbc822 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 24 Jun 2011 15:23:34 -0700 Subject: ipv6: Don't change dst->flags using assignments. This blows away any flags already set in the entry. Signed-off-by: David S. Miller --- net/ipv6/route.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index de2b1de..c2af4da 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1062,14 +1062,6 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); rt->dst.output = ip6_output; -#if 0 /* there's no chance to use these for ndisc */ - rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST - ? DST_HOST - : 0; - ipv6_addr_copy(&rt->rt6i_dst.addr, addr); - rt->rt6i_dst.plen = 128; -#endif - spin_lock_bh(&icmp6_dst_lock); rt->dst.next = icmp6_dst_gc_list; icmp6_dst_gc_list = &rt->dst; @@ -1244,7 +1236,7 @@ int ip6_route_add(struct fib6_config *cfg) ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); rt->rt6i_dst.plen = cfg->fc_dst_len; if (rt->rt6i_dst.plen == 128) - rt->dst.flags = DST_HOST; + rt->dst.flags |= DST_HOST; #ifdef CONFIG_IPV6_SUBTREES ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); @@ -2025,7 +2017,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, in6_dev_hold(idev); - rt->dst.flags = DST_HOST; + rt->dst.flags |= DST_HOST; rt->dst.input = ip6_input; rt->dst.output = ip6_output; rt->rt6i_idev = idev; -- cgit v1.1 From 957c665f37007de93ccbe45902a23143724170d0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 24 Jun 2011 15:25:00 -0700 Subject: ipv6: Don't put artificial limit on routing table size. IPV6, unlike IPV4, doesn't have a routing cache. Routing table entries, as well as clones made in response to route lookup requests, all live in the same table. And all of these things are together collected in the destination cache table for ipv6. This means that routing table entries count against the garbage collection limits, even though such entries cannot ever be reclaimed and are added explicitly by the administrator (rather than being created in response to lookups). Therefore it makes no sense to count ipv6 routing table entries against the GC limits. Add a DST_NOCOUNT destination cache entry flag, and skip the counting if it is set. Use this flag bit in ipv6 when adding routing table entries. Signed-off-by: David S. Miller --- net/core/dst.c | 6 ++++-- net/ipv6/route.c | 13 +++++++------ 2 files changed, 11 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/core/dst.c b/net/core/dst.c index 9ccca03..6135f36 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -190,7 +190,8 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, dst->lastuse = jiffies; dst->flags = flags; dst->next = NULL; - dst_entries_add(ops, 1); + if (!(flags & DST_NOCOUNT)) + dst_entries_add(ops, 1); return dst; } EXPORT_SYMBOL(dst_alloc); @@ -243,7 +244,8 @@ again: neigh_release(neigh); } - dst_entries_add(dst->ops, -1); + if (!(dst->flags & DST_NOCOUNT)) + dst_entries_add(dst->ops, -1); if (dst->ops->destroy) dst->ops->destroy(dst); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c2af4da..0ef1f08 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -228,9 +228,10 @@ static struct rt6_info ip6_blk_hole_entry_template = { /* allocate dst with ip6_dst_ops */ static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops, - struct net_device *dev) + struct net_device *dev, + int flags) { - struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, 0); + struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags); memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry)); @@ -1042,7 +1043,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, if (unlikely(idev == NULL)) return NULL; - rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev); + rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0); if (unlikely(rt == NULL)) { in6_dev_put(idev); goto out; @@ -1206,7 +1207,7 @@ int ip6_route_add(struct fib6_config *cfg) goto out; } - rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL); + rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT); if (rt == NULL) { err = -ENOMEM; @@ -1726,7 +1727,7 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) { struct net *net = dev_net(ort->rt6i_dev); struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, - ort->dst.dev); + ort->dst.dev, 0); if (rt) { rt->dst.input = ort->dst.input; @@ -2005,7 +2006,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, { struct net *net = dev_net(idev->dev); struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, - net->loopback_dev); + net->loopback_dev, 0); struct neighbour *neigh; if (rt == NULL) { -- cgit v1.1 From 12fdb4d3babcde43834c54dee22a69bb73adbae7 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 29 Jun 2011 23:18:20 +0000 Subject: xfrm: Remove family arg from xfrm_bundle_ok The family arg is not used any more, so remove it. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 9bec2e8..5ce74a3 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -50,7 +50,7 @@ static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); static void xfrm_init_pmtu(struct dst_entry *dst); static int stale_bundle(struct dst_entry *dst); -static int xfrm_bundle_ok(struct xfrm_dst *xdst, int family); +static int xfrm_bundle_ok(struct xfrm_dst *xdst); static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, @@ -2241,7 +2241,7 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) static int stale_bundle(struct dst_entry *dst) { - return !xfrm_bundle_ok((struct xfrm_dst *)dst, AF_UNSPEC); + return !xfrm_bundle_ok((struct xfrm_dst *)dst); } void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) @@ -2313,7 +2313,7 @@ static void xfrm_init_pmtu(struct dst_entry *dst) * still valid. */ -static int xfrm_bundle_ok(struct xfrm_dst *first, int family) +static int xfrm_bundle_ok(struct xfrm_dst *first) { struct dst_entry *dst = &first->u.dst; struct xfrm_dst *last; -- cgit v1.1 From c146066ab80267c3305de5dda6a4083f06df9265 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 29 Jun 2011 23:19:32 +0000 Subject: ipv4: Don't use ufo handling on later transformed packets We might call ip_ufo_append_data() for packets that will be IPsec transformed later. This function should be used just for real udp packets. So we check for rt->dst.header_len which is only nonzero on IPsec handling and call ip_ufo_append_data() just if rt->dst.header_len is zero. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 4a7e16b..84f26e8 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -828,7 +828,7 @@ static int __ip_append_data(struct sock *sk, cork->length += length; if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO)) { + (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len) { err = ip_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, mtu, flags); -- cgit v1.1 From b00897b881f775040653955fda99dcf7c167b382 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 29 Jun 2011 23:20:41 +0000 Subject: xfrm4: Don't call icmp_send on local error Calling icmp_send() on a local message size error leads to an incorrect update of the path mtu. So use ip_local_error() instead to notify the socket about the error. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/xfrm4_output.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 2d51840..327a617 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -32,7 +32,12 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) dst = skb_dst(skb); mtu = dst_mtu(dst); if (skb->len > mtu) { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + if (skb->sk) + ip_local_error(skb->sk, EMSGSIZE, ip_hdr(skb)->daddr, + inet_sk(skb->sk)->inet_dport, mtu); + else + icmp_send(skb, ICMP_DEST_UNREACH, + ICMP_FRAG_NEEDED, htonl(mtu)); ret = -EMSGSIZE; } out: -- cgit v1.1 From c349a528cd47e2272ded0ea358363855e86180da Mon Sep 17 00:00:00 2001 From: Marcus Meissner Date: Mon, 4 Jul 2011 01:30:29 +0000 Subject: net: bind() fix error return on wrong address family Hi, Reinhard Max also pointed out that the error should EAFNOSUPPORT according to POSIX. The Linux manpages have it as EINVAL, some other OSes (Minix, HPUX, perhaps BSD) use EAFNOSUPPORT. Windows uses WSAEFAULT according to MSDN. Other protocols error values in their af bind() methods in current mainline git as far as a brief look shows: EAFNOSUPPORT: atm, appletalk, l2tp, llc, phonet, rxrpc EINVAL: ax25, bluetooth, decnet, econet, ieee802154, iucv, netlink, netrom, packet, rds, rose, unix, x25, No check?: can/raw, ipv6/raw, irda, l2tp/l2tp_ip Ciao, Marcus Signed-off-by: Marcus Meissner Cc: Reinhard Max Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 4 +++- net/ipv6/af_inet6.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index eae1f67..ef1528a 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -465,8 +465,10 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_len < sizeof(struct sockaddr_in)) goto out; - if (addr->sin_family != AF_INET) + if (addr->sin_family != AF_INET) { + err = -EAFNOSUPPORT; goto out; + } chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index d450a2f..3b5669a 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -274,7 +274,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) return -EINVAL; if (addr->sin6_family != AF_INET6) - return -EINVAL; + return -EAFNOSUPPORT; addr_type = ipv6_addr_type(&addr->sin6_addr); if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM) -- cgit v1.1 From 44661462ee1ee3c922754fc1f246867f0d01e7ea Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 5 Jul 2011 13:58:33 +0000 Subject: bridge: Always flood broadcast packets As is_multicast_ether_addr returns true on broadcast packets as well, we need to explicitly exclude broadcast packets so that they're always flooded. This wasn't an issue before as broadcast packets were considered to be an unregistered multicast group, which were always flooded. However, as we now only flood such packets to router ports, this is no longer acceptable. Reported-by: Michael Guntsche Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/bridge/br_device.c | 4 +++- net/bridge/br_input.c | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index c188c80..32b8f9f 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -49,7 +49,9 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) skb_pull(skb, ETH_HLEN); rcu_read_lock(); - if (is_multicast_ether_addr(dest)) { + if (is_broadcast_ether_addr(dest)) + br_flood_deliver(br, skb); + else if (is_multicast_ether_addr(dest)) { if (unlikely(netpoll_tx_running(dev))) { br_flood_deliver(br, skb); goto out; diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index f3ac1e8..f06ee39 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -60,7 +60,7 @@ int br_handle_frame_finish(struct sk_buff *skb) br = p->br; br_fdb_update(br, p, eth_hdr(skb)->h_source); - if (is_multicast_ether_addr(dest) && + if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) && br_multicast_rcv(br, p, skb)) goto drop; @@ -77,7 +77,9 @@ int br_handle_frame_finish(struct sk_buff *skb) dst = NULL; - if (is_multicast_ether_addr(dest)) { + if (is_broadcast_ether_addr(dest)) + skb2 = skb; + else if (is_multicast_ether_addr(dest)) { mdst = br_mdb_get(br, skb); if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) { if ((mdst && mdst->mglist) || -- cgit v1.1 From 712ae51afd55b20c04c5383d02ba5d10233313b1 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Tue, 5 Jul 2011 20:43:12 -0700 Subject: net: vlan: enable soft features regardless of underlying device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If gso/gro feature of underlying device is turned off, then new created vlan device never can turn gso/gro on. Although underlying device don't support TSO, we still should use software segments for vlan device. Signed-off-by: Shan Wei Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 7ea5cf9..86bff9b 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -586,9 +586,14 @@ static void vlan_dev_uninit(struct net_device *dev) static u32 vlan_dev_fix_features(struct net_device *dev, u32 features) { struct net_device *real_dev = vlan_dev_info(dev)->real_dev; + u32 old_features = features; features &= real_dev->features; features &= real_dev->vlan_features; + + if (old_features & NETIF_F_SOFT_FEATURES) + features |= old_features & NETIF_F_SOFT_FEATURES; + if (dev_ethtool_get_rx_csum(real_dev)) features |= NETIF_F_RXCSUM; features |= NETIF_F_LLTX; -- cgit v1.1