diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/af_inet.c | 24 | ||||
-rw-r--r-- | net/ipv4/devinet.c | 3 | ||||
-rw-r--r-- | net/ipv4/fib_frontend.c | 1 | ||||
-rw-r--r-- | net/ipv4/fib_hash.c | 1 | ||||
-rw-r--r-- | net/ipv4/fib_lookup.h | 3 | ||||
-rw-r--r-- | net/ipv4/fib_rules.c | 4 | ||||
-rw-r--r-- | net/ipv4/fib_semantics.c | 3 | ||||
-rw-r--r-- | net/ipv4/fib_trie.c | 3 | ||||
-rw-r--r-- | net/ipv4/igmp.c | 2 | ||||
-rw-r--r-- | net/ipv4/inet_diag.c | 2 | ||||
-rw-r--r-- | net/ipv4/inet_timewait_sock.c | 23 | ||||
-rw-r--r-- | net/ipv4/ip_gre.c | 1 | ||||
-rw-r--r-- | net/ipv4/ip_input.c | 13 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 12 | ||||
-rw-r--r-- | net/ipv4/ip_sockglue.c | 84 | ||||
-rw-r--r-- | net/ipv4/ipconfig.c | 41 | ||||
-rw-r--r-- | net/ipv4/ipip.c | 1 | ||||
-rw-r--r-- | net/ipv4/proc.c | 10 | ||||
-rw-r--r-- | net/ipv4/syncookies.c | 5 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 47 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 100 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 2 |
23 files changed, 247 insertions, 142 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 7f03373..d873621 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -375,6 +375,7 @@ lookup_protocol: inet->uc_ttl = -1; inet->mc_loop = 1; inet->mc_ttl = 1; + inet->mc_all = 1; inet->mc_index = 0; inet->mc_list = NULL; @@ -1003,8 +1004,6 @@ void inet_register_protosw(struct inet_protosw *p) out: spin_unlock_bh(&inetsw_lock); - synchronize_net(); - return; out_permanent: @@ -1248,13 +1247,20 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff **pp = NULL; struct sk_buff *p; struct iphdr *iph; + unsigned int hlen; + unsigned int off; + unsigned int id; int flush = 1; int proto; - int id; - iph = skb_gro_header(skb, sizeof(*iph)); - if (unlikely(!iph)) - goto out; + off = skb_gro_offset(skb); + hlen = off + sizeof(*iph); + iph = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) { + iph = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!iph)) + goto out; + } proto = iph->protocol & (MAX_INET_PROTOS - 1); @@ -1269,9 +1275,9 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) goto out_unlock; - flush = ntohs(iph->tot_len) != skb_gro_len(skb) || - iph->frag_off != htons(IP_DF); - id = ntohs(iph->id); + id = ntohl(*(u32 *)&iph->id); + flush = (u16)((ntohl(*(u32 *)iph) ^ skb_gro_len(skb)) | (id ^ IP_DF)); + id >>= 16; for (p = *head; p; p = p->next) { struct iphdr *iph2; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 126bb91..3863c3a 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1347,7 +1347,8 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, struct net *net = ctl->extra2; if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) { - rtnl_lock(); + if (!rtnl_trylock()) + return restart_syscall(); if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { inet_forward_change(net); } else if (*valp) { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index cafcc49..e2f9505 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -40,7 +40,6 @@ #include <net/route.h> #include <net/tcp.h> #include <net/sock.h> -#include <net/icmp.h> #include <net/arp.h> #include <net/ip_fib.h> #include <net/rtnetlink.h> diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index ded8c44..ecd3945 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -263,7 +263,6 @@ fn_hash_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result err = fib_semantic_match(&f->fn_alias, flp, res, - f->fn_key, fz->fz_mask, fz->fz_order); if (err <= 0) goto out; diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index 2c1623d..637b133 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -22,8 +22,7 @@ struct fib_alias { /* Exported by fib_semantics.c */ extern int fib_semantic_match(struct list_head *head, const struct flowi *flp, - struct fib_result *res, __be32 zone, __be32 mask, - int prefixlen); + struct fib_result *res, int prefixlen); extern void fib_release_info(struct fib_info *); extern struct fib_info *fib_create_info(struct fib_config *cfg); extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 6080d71..92d9d97 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -134,7 +134,7 @@ static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = { }; static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, - struct nlmsghdr *nlh, struct fib_rule_hdr *frh, + struct fib_rule_hdr *frh, struct nlattr **tb) { struct net *net = sock_net(skb->sk); @@ -209,7 +209,7 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, } static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb, - struct nlmsghdr *nlh, struct fib_rule_hdr *frh) + struct fib_rule_hdr *frh) { struct fib4_rule *rule4 = (struct fib4_rule *) rule; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index f831df5..9b096d6 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -866,8 +866,7 @@ failure: /* Note! fib_semantic_match intentionally uses RCU list functions. */ int fib_semantic_match(struct list_head *head, const struct flowi *flp, - struct fib_result *res, __be32 zone, __be32 mask, - int prefixlen) + struct fib_result *res, int prefixlen) { struct fib_alias *fa; int nh_sel = 0; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 33c7c85..538d2a9 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1351,8 +1351,7 @@ static int check_leaf(struct trie *t, struct leaf *l, if (l->key != (key & ntohl(mask))) continue; - err = fib_semantic_match(&li->falh, flp, res, - htonl(l->key), mask, plen); + err = fib_semantic_match(&li->falh, flp, res, plen); #ifdef CONFIG_IP_FIB_TRIE_STATS if (err <= 0) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 9eb6219..e6058a5 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2196,7 +2196,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) break; } if (!pmc) - return 1; + return inet->mc_all; psl = pmc->sflist; if (!psl) return pmc->sfmode == MCAST_EXCLUDE; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 588a779..b0b2735 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -198,8 +198,6 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, tmo = 0; r->idiag_family = tw->tw_family; - r->idiag_state = tw->tw_state; - r->idiag_timer = 0; r->idiag_retrans = 0; r->id.idiag_if = tw->tw_bound_dev_if; r->id.idiag_cookie[0] = (u32)(unsigned long)tw; diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 8554d0e..68a8d89 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -49,19 +49,22 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, inet_twsk_put(tw); } -void inet_twsk_put(struct inet_timewait_sock *tw) +static noinline void inet_twsk_free(struct inet_timewait_sock *tw) { - if (atomic_dec_and_test(&tw->tw_refcnt)) { - struct module *owner = tw->tw_prot->owner; - twsk_destructor((struct sock *)tw); + struct module *owner = tw->tw_prot->owner; + twsk_destructor((struct sock *)tw); #ifdef SOCK_REFCNT_DEBUG - printk(KERN_DEBUG "%s timewait_sock %p released\n", - tw->tw_prot->name, tw); + pr_debug("%s timewait_sock %p released\n", tw->tw_prot->name, tw); #endif - release_net(twsk_net(tw)); - kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw); - module_put(owner); - } + release_net(twsk_net(tw)); + kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw); + module_put(owner); +} + +void inet_twsk_put(struct inet_timewait_sock *tw) +{ + if (atomic_dec_and_test(&tw->tw_refcnt)) + inet_twsk_free(tw); } EXPORT_SYMBOL_GPL(inet_twsk_put); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index e62510d..77436e2 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1238,6 +1238,7 @@ static void ipgre_tunnel_setup(struct net_device *dev) dev->iflink = 0; dev->addr_len = 4; dev->features |= NETIF_F_NETNS_LOCAL; + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; } static int ipgre_tunnel_init(struct net_device *dev) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 1a58a6f..40f6206 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -358,10 +358,12 @@ static int ip_rcv_finish(struct sk_buff *skb) goto drop; rt = skb->rtable; - if (rt->rt_type == RTN_MULTICAST) - IP_INC_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INMCASTPKTS); - else if (rt->rt_type == RTN_BROADCAST) - IP_INC_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INBCASTPKTS); + if (rt->rt_type == RTN_MULTICAST) { + IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INMCAST, + skb->len); + } else if (rt->rt_type == RTN_BROADCAST) + IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INBCAST, + skb->len); return dst_input(skb); @@ -384,7 +386,8 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, if (skb->pkt_type == PACKET_OTHERHOST) goto drop; - IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INRECEIVES); + + IP_UPD_PO_STATS_BH(dev_net(dev), IPSTATS_MIB_IN, skb->len); if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) { IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 3e7e910..ea19c37 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -181,10 +181,10 @@ static inline int ip_finish_output2(struct sk_buff *skb) struct net_device *dev = dst->dev; unsigned int hh_len = LL_RESERVED_SPACE(dev); - if (rt->rt_type == RTN_MULTICAST) - IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTMCASTPKTS); - else if (rt->rt_type == RTN_BROADCAST) - IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTBCASTPKTS); + if (rt->rt_type == RTN_MULTICAST) { + IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len); + } else if (rt->rt_type == RTN_BROADCAST) + IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTBCAST, skb->len); /* Be paranoid, rather than too clever. */ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { @@ -244,7 +244,7 @@ int ip_mc_output(struct sk_buff *skb) /* * If the indicated interface is up and running, send the packet. */ - IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTREQUESTS); + IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len); skb->dev = dev; skb->protocol = htons(ETH_P_IP); @@ -298,7 +298,7 @@ int ip_output(struct sk_buff *skb) { struct net_device *dev = skb->dst->dev; - IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTREQUESTS); + IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len); skb->dev = dev; skb->protocol = htons(ETH_P_IP); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 43c0585..cb49936 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -157,38 +157,39 @@ void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) /* Ordered by supposed usage frequency */ if (flags & 1) ip_cmsg_recv_pktinfo(msg, skb); - if ((flags>>=1) == 0) + if ((flags >>= 1) == 0) return; if (flags & 1) ip_cmsg_recv_ttl(msg, skb); - if ((flags>>=1) == 0) + if ((flags >>= 1) == 0) return; if (flags & 1) ip_cmsg_recv_tos(msg, skb); - if ((flags>>=1) == 0) + if ((flags >>= 1) == 0) return; if (flags & 1) ip_cmsg_recv_opts(msg, skb); - if ((flags>>=1) == 0) + if ((flags >>= 1) == 0) return; if (flags & 1) ip_cmsg_recv_retopts(msg, skb); - if ((flags>>=1) == 0) + if ((flags >>= 1) == 0) return; if (flags & 1) ip_cmsg_recv_security(msg, skb); - if ((flags>>=1) == 0) + if ((flags >>= 1) == 0) return; if (flags & 1) ip_cmsg_recv_dstaddr(msg, skb); } +EXPORT_SYMBOL(ip_cmsg_recv); int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc) { @@ -203,7 +204,8 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc) switch (cmsg->cmsg_type) { case IP_RETOPTS: err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)); - err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40); + err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg), + err < 40 ? err : 40); if (err) return err; break; @@ -238,7 +240,8 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc) struct ip_ra_chain *ip_ra_chain; DEFINE_RWLOCK(ip_ra_lock); -int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct sock *)) +int ip_ra_control(struct sock *sk, unsigned char on, + void (*destructor)(struct sock *)) { struct ip_ra_chain *ra, *new_ra, **rap; @@ -248,7 +251,7 @@ int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct s new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; write_lock_bh(&ip_ra_lock); - for (rap = &ip_ra_chain; (ra=*rap) != NULL; rap = &ra->next) { + for (rap = &ip_ra_chain; (ra = *rap) != NULL; rap = &ra->next) { if (ra->sk == sk) { if (on) { write_unlock_bh(&ip_ra_lock); @@ -416,7 +419,8 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) /* Reset and regenerate socket error */ spin_lock_bh(&sk->sk_error_queue.lock); sk->sk_err = 0; - if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) { + skb2 = skb_peek(&sk->sk_error_queue); + if (skb2 != NULL) { sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno; spin_unlock_bh(&sk->sk_error_queue.lock); sk->sk_error_report(sk); @@ -431,8 +435,8 @@ out: /* - * Socket option code for IP. This is the end of the line after any TCP,UDP etc options on - * an IP socket. + * Socket option code for IP. This is the end of the line after any + * TCP,UDP etc options on an IP socket. */ static int do_ip_setsockopt(struct sock *sk, int level, @@ -449,6 +453,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) | (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT))) || optname == IP_MULTICAST_TTL || + optname == IP_MULTICAST_ALL || optname == IP_MULTICAST_LOOP || optname == IP_RECVORIGDSTADDR) { if (optlen >= sizeof(int)) { @@ -474,7 +479,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, switch (optname) { case IP_OPTIONS: { - struct ip_options * opt = NULL; + struct ip_options *opt = NULL; if (optlen > 40 || optlen < 0) goto e_inval; err = ip_options_get_from_user(sock_net(sk), &opt, @@ -556,9 +561,9 @@ static int do_ip_setsockopt(struct sock *sk, int level, } break; case IP_TTL: - if (optlen<1) + if (optlen < 1) goto e_inval; - if (val != -1 && (val < 1 || val>255)) + if (val != -1 && (val < 0 || val > 255)) goto e_inval; inet->uc_ttl = val; break; @@ -570,7 +575,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, inet->hdrincl = val ? 1 : 0; break; case IP_MTU_DISCOVER: - if (val<0 || val>3) + if (val < 0 || val > 3) goto e_inval; inet->pmtudisc = val; break; @@ -582,7 +587,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, case IP_MULTICAST_TTL: if (sk->sk_type == SOCK_STREAM) goto e_inval; - if (optlen<1) + if (optlen < 1) goto e_inval; if (val == -1) val = 1; @@ -591,7 +596,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, inet->mc_ttl = val; break; case IP_MULTICAST_LOOP: - if (optlen<1) + if (optlen < 1) goto e_inval; inet->mc_loop = !!val; break; @@ -613,7 +618,8 @@ static int do_ip_setsockopt(struct sock *sk, int level, } else { memset(&mreq, 0, sizeof(mreq)); if (optlen >= sizeof(struct in_addr) && - copy_from_user(&mreq.imr_address, optval, sizeof(struct in_addr))) + copy_from_user(&mreq.imr_address, optval, + sizeof(struct in_addr))) break; } @@ -677,7 +683,6 @@ static int do_ip_setsockopt(struct sock *sk, int level, } case IP_MSFILTER: { - extern int sysctl_igmp_max_msf; struct ip_msfilter *msf; if (optlen < IP_MSFILTER_SIZE(0)) @@ -831,7 +836,6 @@ static int do_ip_setsockopt(struct sock *sk, int level, } case MCAST_MSFILTER: { - extern int sysctl_igmp_max_msf; struct sockaddr_in *psin; struct ip_msfilter *msf = NULL; struct group_filter *gsf = NULL; @@ -849,9 +853,9 @@ static int do_ip_setsockopt(struct sock *sk, int level, break; } err = -EFAULT; - if (copy_from_user(gsf, optval, optlen)) { + if (copy_from_user(gsf, optval, optlen)) goto mc_msf_out; - } + /* numsrc >= (4G-140)/128 overflow in 32 bits */ if (gsf->gf_numsrc >= 0x1ffffff || gsf->gf_numsrc > sysctl_igmp_max_msf) { @@ -879,7 +883,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, msf->imsf_fmode = gsf->gf_fmode; msf->imsf_numsrc = gsf->gf_numsrc; err = -EADDRNOTAVAIL; - for (i=0; i<gsf->gf_numsrc; ++i) { + for (i = 0; i < gsf->gf_numsrc; ++i) { psin = (struct sockaddr_in *)&gsf->gf_slist[i]; if (psin->sin_family != AF_INET) @@ -890,17 +894,24 @@ static int do_ip_setsockopt(struct sock *sk, int level, gsf = NULL; err = ip_mc_msfilter(sk, msf, ifindex); - mc_msf_out: +mc_msf_out: kfree(msf); kfree(gsf); break; } + case IP_MULTICAST_ALL: + if (optlen < 1) + goto e_inval; + if (val != 0 && val != 1) + goto e_inval; + inet->mc_all = val; + break; case IP_ROUTER_ALERT: err = ip_ra_control(sk, val ? 1 : 0, NULL); break; case IP_FREEBIND: - if (optlen<1) + if (optlen < 1) goto e_inval; inet->freebind = !!val; break; @@ -957,6 +968,7 @@ int ip_setsockopt(struct sock *sk, int level, #endif return err; } +EXPORT_SYMBOL(ip_setsockopt); #ifdef CONFIG_COMPAT int compat_ip_setsockopt(struct sock *sk, int level, int optname, @@ -986,13 +998,12 @@ int compat_ip_setsockopt(struct sock *sk, int level, int optname, #endif return err; } - EXPORT_SYMBOL(compat_ip_setsockopt); #endif /* - * Get the options. Note for future reference. The GET of IP options gets the - * _received_ ones. The set sets the _sent_ ones. + * Get the options. Note for future reference. The GET of IP options gets + * the _received_ ones. The set sets the _sent_ ones. */ static int do_ip_getsockopt(struct sock *sk, int level, int optname, @@ -1143,10 +1154,14 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, return -EFAULT; } err = ip_mc_gsfget(sk, &gsf, - (struct group_filter __user *)optval, optlen); + (struct group_filter __user *)optval, + optlen); release_sock(sk); return err; } + case IP_MULTICAST_ALL: + val = inet->mc_all; + break; case IP_PKTOPTIONS: { struct msghdr msg; @@ -1187,7 +1202,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, } release_sock(sk); - if (len < sizeof(int) && len > 0 && val>=0 && val<=255) { + if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) { unsigned char ucval = (unsigned char)val; len = 1; if (put_user(len, optlen)) @@ -1230,6 +1245,7 @@ int ip_getsockopt(struct sock *sk, int level, #endif return err; } +EXPORT_SYMBOL(ip_getsockopt); #ifdef CONFIG_COMPAT int compat_ip_getsockopt(struct sock *sk, int level, int optname, @@ -1262,11 +1278,5 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname, #endif return err; } - EXPORT_SYMBOL(compat_ip_getsockopt); #endif - -EXPORT_SYMBOL(ip_cmsg_recv); - -EXPORT_SYMBOL(ip_getsockopt); -EXPORT_SYMBOL(ip_setsockopt); diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 88bf051..f8d04c2 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -160,6 +160,9 @@ static char user_dev_name[IFNAMSIZ] __initdata = { 0, }; /* Protocols supported by available interfaces */ static int ic_proto_have_if __initdata = 0; +/* MTU for boot device */ +static int ic_dev_mtu __initdata = 0; + #ifdef IPCONFIG_DYNAMIC static DEFINE_SPINLOCK(ic_recv_lock); static volatile int ic_got_reply __initdata = 0; /* Proto(s) that replied */ @@ -286,7 +289,7 @@ set_sockaddr(struct sockaddr_in *sin, __be32 addr, __be16 port) sin->sin_port = port; } -static int __init ic_dev_ioctl(unsigned int cmd, struct ifreq *arg) +static int __init ic_devinet_ioctl(unsigned int cmd, struct ifreq *arg) { int res; @@ -297,6 +300,17 @@ static int __init ic_dev_ioctl(unsigned int cmd, struct ifreq *arg) return res; } +static int __init ic_dev_ioctl(unsigned int cmd, struct ifreq *arg) +{ + int res; + + mm_segment_t oldfs = get_fs(); + set_fs(get_ds()); + res = dev_ioctl(&init_net, cmd, (struct ifreq __user *) arg); + set_fs(oldfs); + return res; +} + static int __init ic_route_ioctl(unsigned int cmd, struct rtentry *arg) { int res; @@ -321,20 +335,31 @@ static int __init ic_setup_if(void) memset(&ir, 0, sizeof(ir)); strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->name); set_sockaddr(sin, ic_myaddr, 0); - if ((err = ic_dev_ioctl(SIOCSIFADDR, &ir)) < 0) { + if ((err = ic_devinet_ioctl(SIOCSIFADDR, &ir)) < 0) { printk(KERN_ERR "IP-Config: Unable to set interface address (%d).\n", err); return -1; } set_sockaddr(sin, ic_netmask, 0); - if ((err = ic_dev_ioctl(SIOCSIFNETMASK, &ir)) < 0) { + if ((err = ic_devinet_ioctl(SIOCSIFNETMASK, &ir)) < 0) { printk(KERN_ERR "IP-Config: Unable to set interface netmask (%d).\n", err); return -1; } set_sockaddr(sin, ic_myaddr | ~ic_netmask, 0); - if ((err = ic_dev_ioctl(SIOCSIFBRDADDR, &ir)) < 0) { + if ((err = ic_devinet_ioctl(SIOCSIFBRDADDR, &ir)) < 0) { printk(KERN_ERR "IP-Config: Unable to set interface broadcast address (%d).\n", err); return -1; } + /* Handle the case where we need non-standard MTU on the boot link (a network + * using jumbo frames, for instance). If we can't set the mtu, don't error + * out, we'll try to muddle along. + */ + if (ic_dev_mtu != 0) { + strcpy(ir.ifr_name, ic_dev->name); + ir.ifr_mtu = ic_dev_mtu; + if ((err = ic_dev_ioctl(SIOCSIFMTU, &ir)) < 0) + printk(KERN_ERR "IP-Config: Unable to set interface mtu to %d (%d).\n", + ic_dev_mtu, err); + } return 0; } @@ -623,6 +648,7 @@ ic_dhcp_init_options(u8 *options) 12, /* Host name */ 15, /* Domain name */ 17, /* Boot path */ + 26, /* MTU */ 40, /* NIS domain name */ }; @@ -798,6 +824,7 @@ static void __init ic_do_bootp_ext(u8 *ext) { u8 servers; int i; + u16 mtu; #ifdef IPCONFIG_DEBUG u8 *c; @@ -837,6 +864,10 @@ static void __init ic_do_bootp_ext(u8 *ext) if (!root_server_path[0]) ic_bootp_string(root_server_path, ext+1, *ext, sizeof(root_server_path)); break; + case 26: /* Interface MTU */ + memcpy(&mtu, ext+1, sizeof(mtu)); + ic_dev_mtu = ntohs(mtu); + break; case 40: /* NIS Domain name (_not_ DNS) */ ic_bootp_string(utsname()->domainname, ext+1, *ext, __NEW_UTS_LEN); break; @@ -1403,6 +1434,8 @@ static int __init ip_auto_config(void) printk(",\n bootserver=%pI4", &ic_servaddr); printk(", rootserver=%pI4", &root_server_addr); printk(", rootpath=%s", root_server_path); + if (ic_dev_mtu) + printk(", mtu=%d", ic_dev_mtu); printk("\n"); #endif /* !SILENT */ diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 9054139..bb2f1b1 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -713,6 +713,7 @@ static void ipip_tunnel_setup(struct net_device *dev) dev->iflink = 0; dev->addr_len = 4; dev->features |= NETIF_F_NETNS_LOCAL; + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; } static void ipip_tunnel_init(struct net_device *dev) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index cf0cdee..f25542c 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -90,14 +90,14 @@ static const struct file_operations sockstat_seq_fops = { /* snmp items */ static const struct snmp_mib snmp4_ipstats_list[] = { - SNMP_MIB_ITEM("InReceives", IPSTATS_MIB_INRECEIVES), + SNMP_MIB_ITEM("InReceives", IPSTATS_MIB_INPKTS), SNMP_MIB_ITEM("InHdrErrors", IPSTATS_MIB_INHDRERRORS), SNMP_MIB_ITEM("InAddrErrors", IPSTATS_MIB_INADDRERRORS), SNMP_MIB_ITEM("ForwDatagrams", IPSTATS_MIB_OUTFORWDATAGRAMS), SNMP_MIB_ITEM("InUnknownProtos", IPSTATS_MIB_INUNKNOWNPROTOS), SNMP_MIB_ITEM("InDiscards", IPSTATS_MIB_INDISCARDS), SNMP_MIB_ITEM("InDelivers", IPSTATS_MIB_INDELIVERS), - SNMP_MIB_ITEM("OutRequests", IPSTATS_MIB_OUTREQUESTS), + SNMP_MIB_ITEM("OutRequests", IPSTATS_MIB_OUTPKTS), SNMP_MIB_ITEM("OutDiscards", IPSTATS_MIB_OUTDISCARDS), SNMP_MIB_ITEM("OutNoRoutes", IPSTATS_MIB_OUTNOROUTES), SNMP_MIB_ITEM("ReasmTimeout", IPSTATS_MIB_REASMTIMEOUT), @@ -118,6 +118,12 @@ static const struct snmp_mib snmp4_ipextstats_list[] = { SNMP_MIB_ITEM("OutMcastPkts", IPSTATS_MIB_OUTMCASTPKTS), SNMP_MIB_ITEM("InBcastPkts", IPSTATS_MIB_INBCASTPKTS), SNMP_MIB_ITEM("OutBcastPkts", IPSTATS_MIB_OUTBCASTPKTS), + SNMP_MIB_ITEM("InOctets", IPSTATS_MIB_INOCTETS), + SNMP_MIB_ITEM("OutOctets", IPSTATS_MIB_OUTOCTETS), + SNMP_MIB_ITEM("InMcastOctets", IPSTATS_MIB_INMCASTOCTETS), + SNMP_MIB_ITEM("OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS), + SNMP_MIB_ITEM("InBcastOctets", IPSTATS_MIB_INBCASTOCTETS), + SNMP_MIB_ITEM("OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index b35a950..cd2b97f 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -161,13 +161,12 @@ static __u16 const msstab[] = { */ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) { - struct tcp_sock *tp = tcp_sk(sk); const struct iphdr *iph = ip_hdr(skb); const struct tcphdr *th = tcp_hdr(skb); int mssind; const __u16 mss = *mssp; - tp->last_synq_overflow = jiffies; + tcp_synq_overflow(sk); /* XXX sort msstab[] by probability? Binary search? */ for (mssind = 0; mss > msstab[mssind + 1]; mssind++) @@ -268,7 +267,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, if (!sysctl_tcp_syncookies || !th->ack) goto out; - if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) || + if (tcp_synq_no_recent_overflow(sk) || (mss = cookie_check(skb, cookie)) == 0) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); goto out; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7a0f0b2..17b89c5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -439,12 +439,14 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) !tp->urg_data || before(tp->urg_seq, tp->copied_seq) || !before(tp->urg_seq, tp->rcv_nxt)) { + struct sk_buff *skb; + answ = tp->rcv_nxt - tp->copied_seq; /* Subtract 1, if FIN is in queue. */ - if (answ && !skb_queue_empty(&sk->sk_receive_queue)) - answ -= - tcp_hdr((struct sk_buff *)sk->sk_receive_queue.prev)->fin; + skb = skb_peek_tail(&sk->sk_receive_queue); + if (answ && skb) + answ -= tcp_hdr(skb)->fin; } else answ = tp->urg_seq - tp->copied_seq; release_sock(sk); @@ -1382,11 +1384,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, /* Next get a buffer. */ - skb = skb_peek(&sk->sk_receive_queue); - do { - if (!skb) - break; - + skb_queue_walk(&sk->sk_receive_queue, skb) { /* Now that we have two receive queues this * shouldn't happen. */ @@ -1403,8 +1401,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (tcp_hdr(skb)->fin) goto found_fin_ok; WARN_ON(!(flags & MSG_PEEK)); - skb = skb->next; - } while (skb != (struct sk_buff *)&sk->sk_receive_queue); + } /* Well, if we have backlog, try to process it now yet. */ @@ -2518,20 +2515,30 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) unsigned int thlen; unsigned int flags; unsigned int mss = 1; + unsigned int hlen; + unsigned int off; int flush = 1; int i; - th = skb_gro_header(skb, sizeof(*th)); - if (unlikely(!th)) - goto out; + off = skb_gro_offset(skb); + hlen = off + sizeof(*th); + th = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) { + th = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!th)) + goto out; + } thlen = th->doff * 4; if (thlen < sizeof(*th)) goto out; - th = skb_gro_header(skb, thlen); - if (unlikely(!th)) - goto out; + hlen = off + thlen; + if (skb_gro_header_hard(skb, hlen)) { + th = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!th)) + goto out; + } skb_gro_pull(skb, thlen); @@ -2544,7 +2551,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) th2 = tcp_hdr(p); - if ((th->source ^ th2->source) | (th->dest ^ th2->dest)) { + if (*(u32 *)&th->source ^ *(u32 *)&th2->source) { NAPI_GRO_CB(p)->same_flow = 0; continue; } @@ -2559,14 +2566,14 @@ found: flush |= flags & TCP_FLAG_CWR; flush |= (flags ^ tcp_flag_word(th2)) & ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH); - flush |= (th->ack_seq ^ th2->ack_seq) | (th->window ^ th2->window); - for (i = sizeof(*th); !flush && i < thlen; i += 4) + flush |= th->ack_seq ^ th2->ack_seq; + for (i = sizeof(*th); i < thlen; i += 4) flush |= *(u32 *)((u8 *)th + i) ^ *(u32 *)((u8 *)th2 + i); mss = skb_shinfo(p)->gso_size; - flush |= (len > mss) | !len; + flush |= (len - 1) >= mss; flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); if (flush || skb_gro_receive(head, skb)) { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index eec3e6f..2bdb0da 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -77,7 +77,7 @@ int sysctl_tcp_window_scaling __read_mostly = 1; int sysctl_tcp_sack __read_mostly = 1; int sysctl_tcp_fack __read_mostly = 1; int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH; -int sysctl_tcp_ecn __read_mostly; +int sysctl_tcp_ecn __read_mostly = 2; int sysctl_tcp_dsack __read_mostly = 1; int sysctl_tcp_app_win __read_mostly = 31; int sysctl_tcp_adv_win_scale __read_mostly = 2; @@ -4426,7 +4426,7 @@ drop: } __skb_queue_head(&tp->out_of_order_queue, skb); } else { - struct sk_buff *skb1 = tp->out_of_order_queue.prev; + struct sk_buff *skb1 = skb_peek_tail(&tp->out_of_order_queue); u32 seq = TCP_SKB_CB(skb)->seq; u32 end_seq = TCP_SKB_CB(skb)->end_seq; @@ -4443,15 +4443,18 @@ drop: } /* Find place to insert this segment. */ - do { + while (1) { if (!after(TCP_SKB_CB(skb1)->seq, seq)) break; - } while ((skb1 = skb1->prev) != - (struct sk_buff *)&tp->out_of_order_queue); + if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) { + skb1 = NULL; + break; + } + skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1); + } /* Do skb overlap to previous one? */ - if (skb1 != (struct sk_buff *)&tp->out_of_order_queue && - before(seq, TCP_SKB_CB(skb1)->end_seq)) { + if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) { if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { /* All the bits are present. Drop. */ __kfree_skb(skb); @@ -4463,15 +4466,26 @@ drop: tcp_dsack_set(sk, seq, TCP_SKB_CB(skb1)->end_seq); } else { - skb1 = skb1->prev; + if (skb_queue_is_first(&tp->out_of_order_queue, + skb1)) + skb1 = NULL; + else + skb1 = skb_queue_prev( + &tp->out_of_order_queue, + skb1); } } - __skb_queue_after(&tp->out_of_order_queue, skb1, skb); + if (!skb1) + __skb_queue_head(&tp->out_of_order_queue, skb); + else + __skb_queue_after(&tp->out_of_order_queue, skb1, skb); /* And clean segments covered by new one as whole. */ - while ((skb1 = skb->next) != - (struct sk_buff *)&tp->out_of_order_queue && - after(end_seq, TCP_SKB_CB(skb1)->seq)) { + while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) { + skb1 = skb_queue_next(&tp->out_of_order_queue, skb); + + if (!after(end_seq, TCP_SKB_CB(skb1)->seq)) + break; if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) { tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, end_seq); @@ -4492,7 +4506,10 @@ add_sack: static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, struct sk_buff_head *list) { - struct sk_buff *next = skb->next; + struct sk_buff *next = NULL; + + if (!skb_queue_is_last(list, skb)) + next = skb_queue_next(list, skb); __skb_unlink(skb, list); __kfree_skb(skb); @@ -4503,6 +4520,9 @@ static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, /* Collapse contiguous sequence of skbs head..tail with * sequence numbers start..end. + * + * If tail is NULL, this means until the end of the list. + * * Segments with FIN/SYN are not collapsed (only because this * simplifies code) */ @@ -4511,15 +4531,23 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct sk_buff *head, struct sk_buff *tail, u32 start, u32 end) { - struct sk_buff *skb; + struct sk_buff *skb, *n; + bool end_of_skbs; /* First, check that queue is collapsible and find * the point where collapsing can be useful. */ - for (skb = head; skb != tail;) { + skb = head; +restart: + end_of_skbs = true; + skb_queue_walk_from_safe(list, skb, n) { + if (skb == tail) + break; /* No new bits? It is possible on ofo queue. */ if (!before(start, TCP_SKB_CB(skb)->end_seq)) { skb = tcp_collapse_one(sk, skb, list); - continue; + if (!skb) + break; + goto restart; } /* The first skb to collapse is: @@ -4529,16 +4557,24 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, */ if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin && (tcp_win_from_space(skb->truesize) > skb->len || - before(TCP_SKB_CB(skb)->seq, start) || - (skb->next != tail && - TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb->next)->seq))) + before(TCP_SKB_CB(skb)->seq, start))) { + end_of_skbs = false; break; + } + + if (!skb_queue_is_last(list, skb)) { + struct sk_buff *next = skb_queue_next(list, skb); + if (next != tail && + TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(next)->seq) { + end_of_skbs = false; + break; + } + } /* Decided to skip this, advance start seq. */ start = TCP_SKB_CB(skb)->end_seq; - skb = skb->next; } - if (skb == tail || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin) + if (end_of_skbs || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin) return; while (before(start, end)) { @@ -4583,7 +4619,8 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, } if (!before(start, TCP_SKB_CB(skb)->end_seq)) { skb = tcp_collapse_one(sk, skb, list); - if (skb == tail || + if (!skb || + skb == tail || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin) return; @@ -4610,17 +4647,21 @@ static void tcp_collapse_ofo_queue(struct sock *sk) head = skb; for (;;) { - skb = skb->next; + struct sk_buff *next = NULL; + + if (!skb_queue_is_last(&tp->out_of_order_queue, skb)) + next = skb_queue_next(&tp->out_of_order_queue, skb); + skb = next; /* Segment is terminated when we see gap or when * we are at the end of all the queue. */ - if (skb == (struct sk_buff *)&tp->out_of_order_queue || + if (!skb || after(TCP_SKB_CB(skb)->seq, end) || before(TCP_SKB_CB(skb)->end_seq, start)) { tcp_collapse(sk, &tp->out_of_order_queue, head, skb, start, end); head = skb; - if (skb == (struct sk_buff *)&tp->out_of_order_queue) + if (!skb) break; /* Start new segment */ start = TCP_SKB_CB(skb)->seq; @@ -4681,10 +4722,11 @@ static int tcp_prune_queue(struct sock *sk) tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); tcp_collapse_ofo_queue(sk); - tcp_collapse(sk, &sk->sk_receive_queue, - sk->sk_receive_queue.next, - (struct sk_buff *)&sk->sk_receive_queue, - tp->copied_seq, tp->rcv_nxt); + if (!skb_queue_empty(&sk->sk_receive_queue)) + tcp_collapse(sk, &sk->sk_receive_queue, + skb_peek(&sk->sk_receive_queue), + NULL, + tp->copied_seq, tp->rcv_nxt); sk_mem_reclaim(sk); if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5d427f8..fc79e341 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1593,7 +1593,7 @@ process: #endif { if (!tcp_prequeue(sk, skb)) - ret = tcp_v4_do_rcv(sk, skb); + ret = tcp_v4_do_rcv(sk, skb); } } else sk_add_backlog(sk, skb); @@ -2343,7 +2343,7 @@ void tcp4_proc_exit(void) struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) { - struct iphdr *iph = ip_hdr(skb); + struct iphdr *iph = skb_gro_network_header(skb); switch (skb->ip_summed) { case CHECKSUM_COMPLETE: diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 59aec60..79c39dc 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -288,7 +288,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); tp->ecn_flags = 0; - if (sysctl_tcp_ecn) { + if (sysctl_tcp_ecn == 1) { TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE | TCPCB_FLAG_CWR; tp->ecn_flags = TCP_ECN_OK; } |