summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c11
-rw-r--r--net/ipv4/ah4.c8
-rw-r--r--net/ipv4/arp.c8
-rw-r--r--net/ipv4/igmp.c16
-rw-r--r--net/ipv4/inet_connection_sock.c17
-rw-r--r--net/ipv4/inet_diag.c31
-rw-r--r--net/ipv4/ip_forward.c2
-rw-r--r--net/ipv4/ip_gre.c4
-rw-r--r--net/ipv4/ip_options.c4
-rw-r--r--net/ipv4/ip_sockglue.c35
-rw-r--r--net/ipv4/ipconfig.c15
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv4/netfilter/Kconfig1
-rw-r--r--net/ipv4/ping.c2
-rw-r--r--net/ipv4/proc.c9
-rw-r--r--net/ipv4/raw.c10
-rw-r--r--net/ipv4/route.c143
-rw-r--r--net/ipv4/tcp.c3
-rw-r--r--net/ipv4/tcp_input.c44
-rw-r--r--net/ipv4/tcp_ipv4.c1
-rw-r--r--net/ipv4/tcp_minisocks.c6
-rw-r--r--net/ipv4/tcp_output.c2
-rw-r--r--net/ipv4/udp.c6
23 files changed, 214 insertions, 166 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1b5096a..15dc4c4 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1250,7 +1250,8 @@ out:
return err;
}
-static struct sk_buff *inet_gso_segment(struct sk_buff *skb, u32 features)
+static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct iphdr *iph;
@@ -1572,9 +1573,9 @@ static __net_init int ipv4_mib_init_net(struct net *net)
sizeof(struct icmp_mib),
__alignof__(struct icmp_mib)) < 0)
goto err_icmp_mib;
- if (snmp_mib_init((void __percpu **)net->mib.icmpmsg_statistics,
- sizeof(struct icmpmsg_mib),
- __alignof__(struct icmpmsg_mib)) < 0)
+ net->mib.icmpmsg_statistics = kzalloc(sizeof(struct icmpmsg_mib),
+ GFP_KERNEL);
+ if (!net->mib.icmpmsg_statistics)
goto err_icmpmsg_mib;
tcp_mib_init(net);
@@ -1598,7 +1599,7 @@ err_tcp_mib:
static __net_exit void ipv4_mib_exit_net(struct net *net)
{
- snmp_mib_free((void __percpu **)net->mib.icmpmsg_statistics);
+ kfree(net->mib.icmpmsg_statistics);
snmp_mib_free((void __percpu **)net->mib.icmp_statistics);
snmp_mib_free((void __percpu **)net->mib.udplite_statistics);
snmp_mib_free((void __percpu **)net->mib.udp_statistics);
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index c1f4154..36d1440 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -136,8 +136,6 @@ static void ah_output_done(struct crypto_async_request *base, int err)
memcpy(top_iph+1, iph+1, top_iph->ihl*4 - sizeof(struct iphdr));
}
- err = ah->nexthdr;
-
kfree(AH_SKB_CB(skb)->tmp);
xfrm_output_resume(skb, err);
}
@@ -264,12 +262,12 @@ static void ah_input_done(struct crypto_async_request *base, int err)
if (err)
goto out;
+ err = ah->nexthdr;
+
skb->network_header += ah_hlen;
memcpy(skb_network_header(skb), work_iph, ihl);
__skb_pull(skb, ah_hlen + ihl);
skb_set_transport_header(skb, -ihl);
-
- err = ah->nexthdr;
out:
kfree(AH_SKB_CB(skb)->tmp);
xfrm_input_resume(skb, err);
@@ -371,8 +369,6 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
if (err == -EINPROGRESS)
goto out;
- if (err == -EBUSY)
- err = NET_XMIT_DROP;
goto out_free;
}
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 96a164a..5c29ac5 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -177,7 +177,7 @@ struct neigh_table arp_tbl = {
.gc_staletime = 60 * HZ,
.reachable_time = 30 * HZ,
.delay_probe_time = 5 * HZ,
- .queue_len = 3,
+ .queue_len_bytes = 64*1024,
.ucast_probes = 3,
.mcast_probes = 3,
.anycast_delay = 1 * HZ,
@@ -592,16 +592,18 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
struct sk_buff *skb;
struct arphdr *arp;
unsigned char *arp_ptr;
+ int hlen = LL_RESERVED_SPACE(dev);
+ int tlen = dev->needed_tailroom;
/*
* Allocate a buffer
*/
- skb = alloc_skb(arp_hdr_len(dev) + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC);
+ skb = alloc_skb(arp_hdr_len(dev) + hlen + tlen, GFP_ATOMIC);
if (skb == NULL)
return NULL;
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ skb_reserve(skb, hlen);
skb_reset_network_header(skb);
arp = (struct arphdr *) skb_put(skb, arp_hdr_len(dev));
skb->dev = dev;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index c7472ef..313ad93 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -304,9 +304,11 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
struct igmpv3_report *pig;
struct net *net = dev_net(dev);
struct flowi4 fl4;
+ int hlen = LL_RESERVED_SPACE(dev);
+ int tlen = dev->needed_tailroom;
while (1) {
- skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev),
+ skb = alloc_skb(size + hlen + tlen,
GFP_ATOMIC | __GFP_NOWARN);
if (skb)
break;
@@ -327,7 +329,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
skb_dst_set(skb, &rt->dst);
skb->dev = dev;
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ skb_reserve(skb, hlen);
skb_reset_network_header(skb);
pip = ip_hdr(skb);
@@ -647,6 +649,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
__be32 group = pmc ? pmc->multiaddr : 0;
struct flowi4 fl4;
__be32 dst;
+ int hlen, tlen;
if (type == IGMPV3_HOST_MEMBERSHIP_REPORT)
return igmpv3_send_report(in_dev, pmc);
@@ -661,7 +664,9 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
if (IS_ERR(rt))
return -1;
- skb = alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC);
+ hlen = LL_RESERVED_SPACE(dev);
+ tlen = dev->needed_tailroom;
+ skb = alloc_skb(IGMP_SIZE + hlen + tlen, GFP_ATOMIC);
if (skb == NULL) {
ip_rt_put(rt);
return -1;
@@ -669,7 +674,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
skb_dst_set(skb, &rt->dst);
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ skb_reserve(skb, hlen);
skb_reset_network_header(skb);
iph = ip_hdr(skb);
@@ -1716,7 +1721,8 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
if (err) {
int j;
- pmc->sfcount[sfmode]--;
+ if (!delta)
+ pmc->sfcount[sfmode]--;
for (j=0; j<i; j++)
(void) ip_mc_del1_src(pmc, sfmode, &psfsrc[j]);
} else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index c14d88a..a598768 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -588,10 +588,19 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
}
EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune);
-struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
- const gfp_t priority)
+/**
+ * inet_csk_clone_lock - clone an inet socket, and lock its clone
+ * @sk: the socket to clone
+ * @req: request_sock
+ * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
+ *
+ * Caller must unlock socket even in error path (bh_unlock_sock(newsk))
+ */
+struct sock *inet_csk_clone_lock(const struct sock *sk,
+ const struct request_sock *req,
+ const gfp_t priority)
{
- struct sock *newsk = sk_clone(sk, priority);
+ struct sock *newsk = sk_clone_lock(sk, priority);
if (newsk != NULL) {
struct inet_connection_sock *newicsk = inet_csk(newsk);
@@ -615,7 +624,7 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
}
return newsk;
}
-EXPORT_SYMBOL_GPL(inet_csk_clone);
+EXPORT_SYMBOL_GPL(inet_csk_clone_lock);
/*
* At this point, there should be no process reference to this
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index f5e2bda..0a46c54 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -108,9 +108,6 @@ static int inet_csk_diag_fill(struct sock *sk,
icsk->icsk_ca_ops->name);
}
- if ((ext & (1 << (INET_DIAG_TOS - 1))) && (sk->sk_family != AF_INET6))
- RTA_PUT_U8(skb, INET_DIAG_TOS, inet->tos);
-
r->idiag_family = sk->sk_family;
r->idiag_state = sk->sk_state;
r->idiag_timer = 0;
@@ -125,16 +122,20 @@ static int inet_csk_diag_fill(struct sock *sk,
r->id.idiag_src[0] = inet->inet_rcv_saddr;
r->id.idiag_dst[0] = inet->inet_daddr;
+ /* IPv6 dual-stack sockets use inet->tos for IPv4 connections,
+ * hence this needs to be included regardless of socket family.
+ */
+ if (ext & (1 << (INET_DIAG_TOS - 1)))
+ RTA_PUT_U8(skb, INET_DIAG_TOS, inet->tos);
+
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
if (r->idiag_family == AF_INET6) {
const struct ipv6_pinfo *np = inet6_sk(sk);
- ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
- &np->rcv_saddr);
- ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
- &np->daddr);
- if (ext & (1 << (INET_DIAG_TOS - 1)))
- RTA_PUT_U8(skb, INET_DIAG_TOS, np->tclass);
+ *(struct in6_addr *)r->id.idiag_src = np->rcv_saddr;
+ *(struct in6_addr *)r->id.idiag_dst = np->daddr;
+ if (ext & (1 << (INET_DIAG_TCLASS - 1)))
+ RTA_PUT_U8(skb, INET_DIAG_TCLASS, np->tclass);
}
#endif
@@ -224,10 +225,8 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
const struct inet6_timewait_sock *tw6 =
inet6_twsk((struct sock *)tw);
- ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
- &tw6->tw_v6_rcv_saddr);
- ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
- &tw6->tw_v6_daddr);
+ *(struct in6_addr *)r->id.idiag_src = tw6->tw_v6_rcv_saddr;
+ *(struct in6_addr *)r->id.idiag_dst = tw6->tw_v6_daddr;
}
#endif
nlh->nlmsg_len = skb_tail_pointer(skb) - previous_tail;
@@ -603,10 +602,8 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
r->idiag_inode = 0;
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
if (r->idiag_family == AF_INET6) {
- ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
- &inet6_rsk(req)->loc_addr);
- ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
- &inet6_rsk(req)->rmt_addr);
+ *(struct in6_addr *)r->id.idiag_src = inet6_rsk(req)->loc_addr;
+ *(struct in6_addr *)r->id.idiag_dst = inet6_rsk(req)->rmt_addr;
}
#endif
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 3b34d1c..29a07b6 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -84,7 +84,7 @@ int ip_forward(struct sk_buff *skb)
rt = skb_rtable(skb);
- if (opt->is_strictroute && ip_hdr(skb)->daddr != rt->rt_gateway)
+ if (opt->is_strictroute && opt->nexthop != rt->rt_gateway)
goto sr_failed;
if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) &&
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index d55110e..2b32296 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -171,7 +171,7 @@ struct pcpu_tstats {
unsigned long rx_bytes;
unsigned long tx_packets;
unsigned long tx_bytes;
-};
+} __attribute__((aligned(4*sizeof(unsigned long))));
static struct net_device_stats *ipgre_get_stats(struct net_device *dev)
{
@@ -835,6 +835,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
(skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
+ if (max_headroom > dev->needed_headroom)
+ dev->needed_headroom = max_headroom;
if (!new_skb) {
ip_rt_put(rt);
dev->stats.tx_dropped++;
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index ec93335..1e60f76 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -568,12 +568,13 @@ void ip_forward_options(struct sk_buff *skb)
) {
if (srrptr + 3 > srrspace)
break;
- if (memcmp(&ip_hdr(skb)->daddr, &optptr[srrptr-1], 4) == 0)
+ if (memcmp(&opt->nexthop, &optptr[srrptr-1], 4) == 0)
break;
}
if (srrptr + 3 <= srrspace) {
opt->is_changed = 1;
ip_rt_get_source(&optptr[srrptr-1], skb, rt);
+ ip_hdr(skb)->daddr = opt->nexthop;
optptr[2] = srrptr+4;
} else if (net_ratelimit())
printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n");
@@ -640,6 +641,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
}
if (srrptr <= srrspace) {
opt->srr_is_hit = 1;
+ opt->nexthop = nexthop;
opt->is_changed = 1;
}
return 0;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 09ff51b..80d5fa4 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -55,20 +55,13 @@
/*
* SOL_IP control messages.
*/
+#define PKTINFO_SKB_CB(__skb) ((struct in_pktinfo *)((__skb)->cb))
static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
{
- struct in_pktinfo info;
- struct rtable *rt = skb_rtable(skb);
+ struct in_pktinfo info = *PKTINFO_SKB_CB(skb);
info.ipi_addr.s_addr = ip_hdr(skb)->daddr;
- if (rt) {
- info.ipi_ifindex = rt->rt_iif;
- info.ipi_spec_dst.s_addr = rt->rt_spec_dst;
- } else {
- info.ipi_ifindex = 0;
- info.ipi_spec_dst.s_addr = 0;
- }
put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
}
@@ -992,20 +985,28 @@ e_inval:
}
/**
- * ip_queue_rcv_skb - Queue an skb into sock receive queue
+ * ipv4_pktinfo_prepare - transfert some info from rtable to skb
* @sk: socket
* @skb: buffer
*
- * Queues an skb into socket receive queue. If IP_CMSG_PKTINFO option
- * is not set, we drop skb dst entry now, while dst cache line is hot.
+ * To support IP_CMSG_PKTINFO option, we store rt_iif and rt_spec_dst
+ * in skb->cb[] before dst drop.
+ * This way, receiver doesnt make cache line misses to read rtable.
*/
-int ip_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+void ipv4_pktinfo_prepare(struct sk_buff *skb)
{
- if (!(inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO))
- skb_dst_drop(skb);
- return sock_queue_rcv_skb(sk, skb);
+ struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
+ const struct rtable *rt = skb_rtable(skb);
+
+ if (rt) {
+ pktinfo->ipi_ifindex = rt->rt_iif;
+ pktinfo->ipi_spec_dst.s_addr = rt->rt_spec_dst;
+ } else {
+ pktinfo->ipi_ifindex = 0;
+ pktinfo->ipi_spec_dst.s_addr = 0;
+ }
+ skb_dst_drop(skb);
}
-EXPORT_SYMBOL(ip_queue_rcv_skb);
int ip_setsockopt(struct sock *sk, int level,
int optname, char __user *optval, unsigned int optlen)
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 0da2afc..915eb52 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -763,13 +763,15 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
struct sk_buff *skb;
struct bootp_pkt *b;
struct iphdr *h;
+ int hlen = LL_RESERVED_SPACE(dev);
+ int tlen = dev->needed_tailroom;
/* Allocate packet */
- skb = alloc_skb(sizeof(struct bootp_pkt) + LL_ALLOCATED_SPACE(dev) + 15,
+ skb = alloc_skb(sizeof(struct bootp_pkt) + hlen + tlen + 15,
GFP_KERNEL);
if (!skb)
return;
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ skb_reserve(skb, hlen);
b = (struct bootp_pkt *) skb_put(skb, sizeof(struct bootp_pkt));
memset(b, 0, sizeof(struct bootp_pkt));
@@ -822,8 +824,13 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
if (dev_hard_header(skb, dev, ntohs(skb->protocol),
- dev->broadcast, dev->dev_addr, skb->len) < 0 ||
- dev_queue_xmit(skb) < 0)
+ dev->broadcast, dev->dev_addr, skb->len) < 0) {
+ kfree_skb(skb);
+ printk("E");
+ return;
+ }
+
+ if (dev_queue_xmit(skb) < 0)
printk("E");
}
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 065effd..9490690 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -148,7 +148,7 @@ struct pcpu_tstats {
unsigned long rx_bytes;
unsigned long tx_packets;
unsigned long tx_bytes;
-};
+} __attribute__((aligned(4*sizeof(unsigned long))));
static struct net_device_stats *ipip_get_stats(struct net_device *dev)
{
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 1dfc18a..f19f218 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -325,7 +325,6 @@ config IP_NF_TARGET_TTL
# raw + specific targets
config IP_NF_RAW
tristate 'raw table support (required for NOTRACK/TRACE)'
- depends on NETFILTER_ADVANCED
help
This option adds a `raw' table to iptables. This table is the very
first in the netfilter framework and hooks in at the PREROUTING
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index a06f73f..43d4c3b 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -339,7 +339,6 @@ void ping_err(struct sk_buff *skb, u32 info)
sk = ping_v4_lookup(net, iph->daddr, iph->saddr,
ntohs(icmph->un.echo.id), skb->dev->ifindex);
if (sk == NULL) {
- ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
pr_debug("no socket, dropping\n");
return; /* No socket for error */
}
@@ -679,7 +678,6 @@ static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n",
inet_sk(sk), inet_sk(sk)->inet_num, skb);
if (sock_queue_rcv_skb(sk, skb) < 0) {
- ICMP_INC_STATS_BH(sock_net(sk), ICMP_MIB_INERRORS);
kfree_skb(skb);
pr_debug("ping_queue_rcv_skb -> failed\n");
return -1;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 466ea8b..961eed4 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -288,7 +288,7 @@ static void icmpmsg_put(struct seq_file *seq)
count = 0;
for (i = 0; i < ICMPMSG_MIB_MAX; i++) {
- val = snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, i);
+ val = atomic_long_read(&net->mib.icmpmsg_statistics->mibs[i]);
if (val) {
type[count] = i;
vals[count++] = val;
@@ -307,6 +307,7 @@ static void icmp_put(struct seq_file *seq)
{
int i;
struct net *net = seq->private;
+ atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs;
seq_puts(seq, "\nIcmp: InMsgs InErrors");
for (i=0; icmpmibmap[i].name != NULL; i++)
@@ -319,15 +320,13 @@ static void icmp_put(struct seq_file *seq)
snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS));
for (i=0; icmpmibmap[i].name != NULL; i++)
seq_printf(seq, " %lu",
- snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics,
- icmpmibmap[i].index));
+ atomic_long_read(ptr + icmpmibmap[i].index));
seq_printf(seq, " %lu %lu",
snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS),
snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS));
for (i=0; icmpmibmap[i].name != NULL; i++)
seq_printf(seq, " %lu",
- snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics,
- icmpmibmap[i].index | 0x100));
+ atomic_long_read(ptr + (icmpmibmap[i].index | 0x100)));
}
/*
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 007e2eb..3ccda5a 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -292,7 +292,8 @@ static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb)
{
/* Charge it to the socket. */
- if (ip_queue_rcv_skb(sk, skb) < 0) {
+ ipv4_pktinfo_prepare(skb);
+ if (sock_queue_rcv_skb(sk, skb) < 0) {
kfree_skb(skb);
return NET_RX_DROP;
}
@@ -327,6 +328,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
unsigned int iphlen;
int err;
struct rtable *rt = *rtp;
+ int hlen, tlen;
if (length > rt->dst.dev->mtu) {
ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
@@ -336,12 +338,14 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
if (flags&MSG_PROBE)
goto out;
+ hlen = LL_RESERVED_SPACE(rt->dst.dev);
+ tlen = rt->dst.dev->needed_tailroom;
skb = sock_alloc_send_skb(sk,
- length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15,
+ length + hlen + tlen + 15,
flags & MSG_DONTWAIT, &err);
if (skb == NULL)
goto error;
- skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev));
+ skb_reserve(skb, hlen);
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 155138d..fb47c8f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -138,7 +138,7 @@ static int rt_chain_length_max __read_mostly = 20;
static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
-static unsigned int ipv4_default_mtu(const struct dst_entry *dst);
+static unsigned int ipv4_mtu(const struct dst_entry *dst);
static void ipv4_dst_destroy(struct dst_entry *dst);
static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
static void ipv4_link_failure(struct sk_buff *skb);
@@ -193,7 +193,7 @@ static struct dst_ops ipv4_dst_ops = {
.gc = rt_garbage_collect,
.check = ipv4_dst_check,
.default_advmss = ipv4_default_advmss,
- .default_mtu = ipv4_default_mtu,
+ .mtu = ipv4_mtu,
.cow_metrics = ipv4_cow_metrics,
.destroy = ipv4_dst_destroy,
.ifdown = ipv4_dst_ifdown,
@@ -1304,16 +1304,42 @@ static void rt_del(unsigned hash, struct rtable *rt)
spin_unlock_bh(rt_hash_lock_addr(hash));
}
+static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
+{
+ struct rtable *rt = (struct rtable *) dst;
+ __be32 orig_gw = rt->rt_gateway;
+ struct neighbour *n, *old_n;
+
+ dst_confirm(&rt->dst);
+
+ rt->rt_gateway = peer->redirect_learned.a4;
+
+ n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway);
+ if (IS_ERR(n))
+ return PTR_ERR(n);
+ old_n = xchg(&rt->dst._neighbour, n);
+ if (old_n)
+ neigh_release(old_n);
+ if (!n || !(n->nud_state & NUD_VALID)) {
+ if (n)
+ neigh_event_send(n, NULL);
+ rt->rt_gateway = orig_gw;
+ return -EAGAIN;
+ } else {
+ rt->rt_flags |= RTCF_REDIRECTED;
+ call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
+ }
+ return 0;
+}
+
/* called in rcu_read_lock() section */
void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
__be32 saddr, struct net_device *dev)
{
int s, i;
struct in_device *in_dev = __in_dev_get_rcu(dev);
- struct rtable *rt;
__be32 skeys[2] = { saddr, 0 };
int ikeys[2] = { dev->ifindex, 0 };
- struct flowi4 fl4;
struct inet_peer *peer;
struct net *net;
@@ -1336,33 +1362,42 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
goto reject_redirect;
}
- memset(&fl4, 0, sizeof(fl4));
- fl4.daddr = daddr;
for (s = 0; s < 2; s++) {
for (i = 0; i < 2; i++) {
- fl4.flowi4_oif = ikeys[i];
- fl4.saddr = skeys[s];
- rt = __ip_route_output_key(net, &fl4);
- if (IS_ERR(rt))
- continue;
-
- if (rt->dst.error || rt->dst.dev != dev ||
- rt->rt_gateway != old_gw) {
- ip_rt_put(rt);
- continue;
- }
+ unsigned int hash;
+ struct rtable __rcu **rthp;
+ struct rtable *rt;
+
+ hash = rt_hash(daddr, skeys[s], ikeys[i], rt_genid(net));
+
+ rthp = &rt_hash_table[hash].chain;
+
+ while ((rt = rcu_dereference(*rthp)) != NULL) {
+ rthp = &rt->dst.rt_next;
+
+ if (rt->rt_key_dst != daddr ||
+ rt->rt_key_src != skeys[s] ||
+ rt->rt_oif != ikeys[i] ||
+ rt_is_input_route(rt) ||
+ rt_is_expired(rt) ||
+ !net_eq(dev_net(rt->dst.dev), net) ||
+ rt->dst.error ||
+ rt->dst.dev != dev ||
+ rt->rt_gateway != old_gw)
+ continue;
- if (!rt->peer)
- rt_bind_peer(rt, rt->rt_dst, 1);
+ if (!rt->peer)
+ rt_bind_peer(rt, rt->rt_dst, 1);
- peer = rt->peer;
- if (peer) {
- peer->redirect_learned.a4 = new_gw;
- atomic_inc(&__rt_peer_genid);
+ peer = rt->peer;
+ if (peer) {
+ if (peer->redirect_learned.a4 != new_gw) {
+ peer->redirect_learned.a4 = new_gw;
+ atomic_inc(&__rt_peer_genid);
+ }
+ check_peer_redir(&rt->dst, peer);
+ }
}
-
- ip_rt_put(rt);
- return;
}
}
return;
@@ -1649,33 +1684,6 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
}
}
-static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
-{
- struct rtable *rt = (struct rtable *) dst;
- __be32 orig_gw = rt->rt_gateway;
- struct neighbour *n, *old_n;
-
- dst_confirm(&rt->dst);
-
- rt->rt_gateway = peer->redirect_learned.a4;
-
- n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway);
- if (IS_ERR(n))
- return PTR_ERR(n);
- old_n = xchg(&rt->dst._neighbour, n);
- if (old_n)
- neigh_release(old_n);
- if (!n || !(n->nud_state & NUD_VALID)) {
- if (n)
- neigh_event_send(n, NULL);
- rt->rt_gateway = orig_gw;
- return -EAGAIN;
- } else {
- rt->rt_flags |= RTCF_REDIRECTED;
- call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
- }
- return 0;
-}
static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
{
@@ -1806,12 +1814,17 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
return advmss;
}
-static unsigned int ipv4_default_mtu(const struct dst_entry *dst)
+static unsigned int ipv4_mtu(const struct dst_entry *dst)
{
- unsigned int mtu = dst->dev->mtu;
+ const struct rtable *rt = (const struct rtable *) dst;
+ unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+
+ if (mtu && rt_is_output_route(rt))
+ return mtu;
+
+ mtu = dst->dev->mtu;
if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
- const struct rtable *rt = (const struct rtable *) dst;
if (rt->rt_gateway != rt->rt_dst && mtu > 576)
mtu = 576;
@@ -2747,9 +2760,11 @@ static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 coo
return NULL;
}
-static unsigned int ipv4_blackhole_default_mtu(const struct dst_entry *dst)
+static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
{
- return 0;
+ unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+
+ return mtu ? : dst->dev->mtu;
}
static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -2767,7 +2782,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
.protocol = cpu_to_be16(ETH_P_IP),
.destroy = ipv4_dst_destroy,
.check = ipv4_blackhole_dst_check,
- .default_mtu = ipv4_blackhole_default_mtu,
+ .mtu = ipv4_blackhole_mtu,
.default_advmss = ipv4_default_advmss,
.update_pmtu = ipv4_rt_blackhole_update_pmtu,
.cow_metrics = ipv4_rt_blackhole_cow_metrics,
@@ -2845,7 +2860,7 @@ static int rt_fill_info(struct net *net,
struct rtable *rt = skb_rtable(skb);
struct rtmsg *r;
struct nlmsghdr *nlh;
- long expires = 0;
+ unsigned long expires = 0;
const struct inet_peer *peer = rt->peer;
u32 id = 0, ts = 0, tsage = 0, error;
@@ -2902,8 +2917,12 @@ static int rt_fill_info(struct net *net,
tsage = get_seconds() - peer->tcp_ts_stamp;
}
expires = ACCESS_ONCE(peer->pmtu_expires);
- if (expires)
- expires -= jiffies;
+ if (expires) {
+ if (time_before(jiffies, expires))
+ expires -= jiffies;
+ else
+ expires = 0;
+ }
}
if (rt_is_input_route(rt)) {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 34f5db1..50c3596 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2653,7 +2653,8 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
EXPORT_SYMBOL(compat_tcp_getsockopt);
#endif
-struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features)
+struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
+ netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct tcphdr *th;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 52b5c2d..78dd38c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2858,7 +2858,7 @@ static void tcp_try_keep_open(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
int state = TCP_CA_Open;
- if (tcp_left_out(tp) || tcp_any_retrans_done(sk) || tp->undo_marker)
+ if (tcp_left_out(tp) || tcp_any_retrans_done(sk))
state = TCP_CA_Disorder;
if (inet_csk(sk)->icsk_ca_state != state) {
@@ -2881,7 +2881,8 @@ static void tcp_try_to_open(struct sock *sk, int flag)
if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
tcp_try_keep_open(sk);
- tcp_moderate_cwnd(tp);
+ if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
+ tcp_moderate_cwnd(tp);
} else {
tcp_cwnd_down(sk, flag);
}
@@ -3009,11 +3010,11 @@ static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked,
* tcp_xmit_retransmit_queue().
*/
static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
- int newly_acked_sacked, int flag)
+ int newly_acked_sacked, bool is_dupack,
+ int flag)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
- int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
(tcp_fackets_out(tp) > tp->reordering));
int fast_rexmit = 0, mib_idx;
@@ -3066,17 +3067,6 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
}
break;
- case TCP_CA_Disorder:
- tcp_try_undo_dsack(sk);
- if (!tp->undo_marker ||
- /* For SACK case do not Open to allow to undo
- * catching for all duplicate ACKs. */
- tcp_is_reno(tp) || tp->snd_una != tp->high_seq) {
- tp->undo_marker = 0;
- tcp_set_ca_state(sk, TCP_CA_Open);
- }
- break;
-
case TCP_CA_Recovery:
if (tcp_is_reno(tp))
tcp_reset_reno_sack(tp);
@@ -3117,7 +3107,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
tcp_add_reno_sack(sk);
}
- if (icsk->icsk_ca_state == TCP_CA_Disorder)
+ if (icsk->icsk_ca_state <= TCP_CA_Disorder)
tcp_try_undo_dsack(sk);
if (!tcp_time_to_recover(sk)) {
@@ -3681,10 +3671,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
u32 prior_snd_una = tp->snd_una;
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
+ bool is_dupack = false;
u32 prior_in_flight;
u32 prior_fackets;
int prior_packets;
int prior_sacked = tp->sacked_out;
+ int pkts_acked = 0;
int newly_acked_sacked = 0;
int frto_cwnd = 0;
@@ -3757,6 +3749,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
/* See if we can take anything off of the retransmit queue. */
flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
+ pkts_acked = prior_packets - tp->packets_out;
newly_acked_sacked = (prior_packets - prior_sacked) -
(tp->packets_out - tp->sacked_out);
@@ -3771,8 +3764,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
tcp_may_raise_cwnd(sk, flag))
tcp_cong_avoid(sk, ack, prior_in_flight);
- tcp_fastretrans_alert(sk, prior_packets - tp->packets_out,
- newly_acked_sacked, flag);
+ is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
+ tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked,
+ is_dupack, flag);
} else {
if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
tcp_cong_avoid(sk, ack, prior_in_flight);
@@ -3784,6 +3778,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
return 1;
no_queue:
+ /* If data was DSACKed, see if we can undo a cwnd reduction. */
+ if (flag & FLAG_DSACKING_ACK)
+ tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked,
+ is_dupack, flag);
/* If this ack opens up a zero window, clear backoff. It was
* being used to time the probes, and is probably far higher than
* it needs to be for normal retransmission.
@@ -3797,10 +3795,14 @@ invalid_ack:
return -1;
old_ack:
+ /* If data was SACKed, tag it and see if we should send more data.
+ * If data was DSACKed, see if we can undo a cwnd reduction.
+ */
if (TCP_SKB_CB(skb)->sacked) {
- tcp_sacktag_write_queue(sk, skb, prior_snd_una);
- if (icsk->icsk_ca_state == TCP_CA_Open)
- tcp_try_keep_open(sk);
+ flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
+ newly_acked_sacked = tp->sacked_out - prior_sacked;
+ tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked,
+ is_dupack, flag);
}
SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a744315..a9db4b1 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1510,6 +1510,7 @@ exit:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
return NULL;
put_and_exit:
+ tcp_clear_xmit_timers(newsk);
bh_unlock_sock(newsk);
sock_put(newsk);
goto exit;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 66363b6..945efff 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -343,8 +343,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
tw6 = inet6_twsk((struct sock *)tw);
- ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr);
- ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr);
+ tw6->tw_v6_daddr = np->daddr;
+ tw6->tw_v6_rcv_saddr = np->rcv_saddr;
tw->tw_tclass = np->tclass;
tw->tw_ipv6only = np->ipv6only;
}
@@ -425,7 +425,7 @@ static inline void TCP_ECN_openreq_child(struct tcp_sock *tp,
*/
struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb)
{
- struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC);
+ struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC);
if (newsk != NULL) {
const struct inet_request_sock *ireq = inet_rsk(req);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 980b98f..63170e29 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1382,7 +1382,7 @@ static inline int tcp_minshall_check(const struct tcp_sock *tp)
/* Return 0, if packet can be sent now without violation Nagle's rules:
* 1. It is full sized.
* 2. Or it contains FIN. (already checked by caller)
- * 3. Or TCP_NODELAY was set.
+ * 3. Or TCP_CORK is not set, and TCP_NODELAY is set.
* 4. Or TCP_CORK is not set, and all sent packets are ACKed.
* With Minshall's modification: all sent small packets are ACKed.
*/
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ab0966d..b867ea2 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1357,7 +1357,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
if (inet_sk(sk)->inet_daddr)
sock_rps_save_rxhash(sk, skb);
- rc = ip_queue_rcv_skb(sk, skb);
+ rc = sock_queue_rcv_skb(sk, skb);
if (rc < 0) {
int is_udplite = IS_UDPLITE(sk);
@@ -1473,6 +1473,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
rc = 0;
+ ipv4_pktinfo_prepare(skb);
bh_lock_sock(sk);
if (!sock_owned_by_user(sk))
rc = __udp_queue_rcv_skb(sk, skb);
@@ -2246,7 +2247,8 @@ int udp4_ufo_send_check(struct sk_buff *skb)
return 0;
}
-struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, u32 features)
+struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
+ netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
unsigned int mss;
OpenPOWER on IntegriCloud