summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2011-07-11 14:15:48 +0200
committerJiri Kosina <jkosina@suse.cz>2011-07-11 14:15:55 +0200
commitb7e9c223be8ce335e30f2cf6ba588e6a4092275c (patch)
tree2d1e3b75606abc18df7ad65e51ac3f90cd68b38d /net/ipv4
parentc172d82500a6cf3c32d1e650722a1055d72ce858 (diff)
parente3bbfa78bab125f58b831b5f7f45b5a305091d72 (diff)
downloadop-kernel-dev-b7e9c223be8ce335e30f2cf6ba588e6a4092275c.zip
op-kernel-dev-b7e9c223be8ce335e30f2cf6ba588e6a4092275c.tar.gz
Merge branch 'master' into for-next
Sync with Linus' tree to be able to apply pending patches that are based on newer code already present upstream.
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c5
-rw-r--r--net/ipv4/inet_diag.c14
-rw-r--r--net/ipv4/ip_output.c25
-rw-r--r--net/ipv4/netfilter.c60
-rw-r--r--net/ipv4/netfilter/ip_queue.c6
-rw-r--r--net/ipv4/netfilter/ip_tables.c2
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c6
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c2
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c14
-rw-r--r--net/ipv4/netfilter/ipt_ecn.c7
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c6
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c4
-rw-r--r--net/ipv4/route.c82
-rw-r--r--net/ipv4/tcp.c10
-rw-r--r--net/ipv4/tcp_ipv4.c1
-rw-r--r--net/ipv4/udp.c13
-rw-r--r--net/ipv4/xfrm4_output.c7
21 files changed, 128 insertions, 144 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 9c19260..ef1528a 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -465,8 +465,10 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (addr_len < sizeof(struct sockaddr_in))
goto out;
- if (addr->sin_family != AF_INET)
+ if (addr->sin_family != AF_INET) {
+ err = -EAFNOSUPPORT;
goto out;
+ }
chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
@@ -676,6 +678,7 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags)
lock_sock(sk2);
+ sock_rps_record_flow(sk2);
WARN_ON(!((1 << sk2->sk_state) &
(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE)));
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 6ffe94c..3267d38 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -437,7 +437,7 @@ static int valid_cc(const void *bc, int len, int cc)
return 0;
if (cc == len)
return 1;
- if (op->yes < 4)
+ if (op->yes < 4 || op->yes & 3)
return 0;
len -= op->yes;
bc += op->yes;
@@ -447,11 +447,11 @@ static int valid_cc(const void *bc, int len, int cc)
static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
{
- const unsigned char *bc = bytecode;
+ const void *bc = bytecode;
int len = bytecode_len;
while (len > 0) {
- struct inet_diag_bc_op *op = (struct inet_diag_bc_op *)bc;
+ const struct inet_diag_bc_op *op = bc;
//printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len);
switch (op->code) {
@@ -462,22 +462,20 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
case INET_DIAG_BC_S_LE:
case INET_DIAG_BC_D_GE:
case INET_DIAG_BC_D_LE:
- if (op->yes < 4 || op->yes > len + 4)
- return -EINVAL;
case INET_DIAG_BC_JMP:
- if (op->no < 4 || op->no > len + 4)
+ if (op->no < 4 || op->no > len + 4 || op->no & 3)
return -EINVAL;
if (op->no < len &&
!valid_cc(bytecode, bytecode_len, len - op->no))
return -EINVAL;
break;
case INET_DIAG_BC_NOP:
- if (op->yes < 4 || op->yes > len + 4)
- return -EINVAL;
break;
default:
return -EINVAL;
}
+ if (op->yes < 4 || op->yes > len + 4 || op->yes & 3)
+ return -EINVAL;
bc += op->yes;
len -= op->yes;
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 98af369..84f26e8 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -799,9 +799,9 @@ static int __ip_append_data(struct sock *sk,
int csummode = CHECKSUM_NONE;
struct rtable *rt = (struct rtable *)cork->dst;
- exthdrlen = transhdrlen ? rt->dst.header_len : 0;
- length += exthdrlen;
- transhdrlen += exthdrlen;
+ skb = skb_peek_tail(queue);
+
+ exthdrlen = !skb ? rt->dst.header_len : 0;
mtu = cork->fragsize;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
@@ -825,12 +825,10 @@ static int __ip_append_data(struct sock *sk,
!exthdrlen)
csummode = CHECKSUM_PARTIAL;
- skb = skb_peek_tail(queue);
-
cork->length += length;
if (((length > mtu) || (skb && skb_is_gso(skb))) &&
(sk->sk_protocol == IPPROTO_UDP) &&
- (rt->dst.dev->features & NETIF_F_UFO)) {
+ (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len) {
err = ip_ufo_append_data(sk, queue, getfrag, from, length,
hh_len, fragheaderlen, transhdrlen,
mtu, flags);
@@ -883,17 +881,16 @@ alloc_new_skb:
else
alloclen = fraglen;
+ alloclen += exthdrlen;
+
/* The last fragment gets additional space at tail.
* Note, with MSG_MORE we overallocate on fragments,
* because we have no idea what fragment will be
* the last.
*/
- if (datalen == length + fraggap) {
+ if (datalen == length + fraggap)
alloclen += rt->dst.trailer_len;
- /* make sure mtu is not reached */
- if (datalen > mtu - fragheaderlen - rt->dst.trailer_len)
- datalen -= ALIGN(rt->dst.trailer_len, 8);
- }
+
if (transhdrlen) {
skb = sock_alloc_send_skb(sk,
alloclen + hh_len + 15,
@@ -926,11 +923,11 @@ alloc_new_skb:
/*
* Find where to start putting bytes.
*/
- data = skb_put(skb, fraglen);
+ data = skb_put(skb, fraglen + exthdrlen);
skb_set_network_header(skb, exthdrlen);
skb->transport_header = (skb->network_header +
fragheaderlen);
- data += fragheaderlen;
+ data += fragheaderlen + exthdrlen;
if (fraggap) {
skb->csum = skb_copy_and_csum_bits(
@@ -1064,7 +1061,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
*/
*rtp = NULL;
cork->fragsize = inet->pmtudisc == IP_PMTUDISC_PROBE ?
- rt->dst.dev->mtu : dst_mtu(rt->dst.path);
+ rt->dst.dev->mtu : dst_mtu(&rt->dst);
cork->dst = &rt->dst;
cork->length = 0;
cork->tx_flags = ipc->tx_flags;
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 4614bab..2e97e3e 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -17,51 +17,35 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
const struct iphdr *iph = ip_hdr(skb);
struct rtable *rt;
struct flowi4 fl4 = {};
- unsigned long orefdst;
+ __be32 saddr = iph->saddr;
+ __u8 flags = 0;
unsigned int hh_len;
- unsigned int type;
- type = inet_addr_type(net, iph->saddr);
- if (skb->sk && inet_sk(skb->sk)->transparent)
- type = RTN_LOCAL;
- if (addr_type == RTN_UNSPEC)
- addr_type = type;
+ if (!skb->sk && addr_type != RTN_LOCAL) {
+ if (addr_type == RTN_UNSPEC)
+ addr_type = inet_addr_type(net, saddr);
+ if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST)
+ flags |= FLOWI_FLAG_ANYSRC;
+ else
+ saddr = 0;
+ }
/* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
* packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook.
*/
- if (addr_type == RTN_LOCAL) {
- fl4.daddr = iph->daddr;
- if (type == RTN_LOCAL)
- fl4.saddr = iph->saddr;
- fl4.flowi4_tos = RT_TOS(iph->tos);
- fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
- fl4.flowi4_mark = skb->mark;
- fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0;
- rt = ip_route_output_key(net, &fl4);
- if (IS_ERR(rt))
- return -1;
-
- /* Drop old route. */
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
- } else {
- /* non-local src, find valid iif to satisfy
- * rp-filter when calling ip_route_input. */
- fl4.daddr = iph->saddr;
- rt = ip_route_output_key(net, &fl4);
- if (IS_ERR(rt))
- return -1;
+ fl4.daddr = iph->daddr;
+ fl4.saddr = saddr;
+ fl4.flowi4_tos = RT_TOS(iph->tos);
+ fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
+ fl4.flowi4_mark = skb->mark;
+ fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : flags;
+ rt = ip_route_output_key(net, &fl4);
+ if (IS_ERR(rt))
+ return -1;
- orefdst = skb->_skb_refdst;
- if (ip_route_input(skb, iph->daddr, iph->saddr,
- RT_TOS(iph->tos), rt->dst.dev) != 0) {
- dst_release(&rt->dst);
- return -1;
- }
- dst_release(&rt->dst);
- refdst_drop(orefdst);
- }
+ /* Drop old route. */
+ skb_dst_drop(skb);
+ skb_dst_set(skb, &rt->dst);
if (skb_dst(skb)->error)
return -1;
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index d2c1311..5c9b9d9 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -203,7 +203,8 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
else
pmsg->outdev_name[0] = '\0';
- if (entry->indev && entry->skb->dev) {
+ if (entry->indev && entry->skb->dev &&
+ entry->skb->mac_header != entry->skb->network_header) {
pmsg->hw_type = entry->skb->dev->type;
pmsg->hw_addrlen = dev_parse_header(entry->skb,
pmsg->hw_addr);
@@ -402,7 +403,8 @@ ipq_dev_drop(int ifindex)
static inline void
__ipq_rcv_skb(struct sk_buff *skb)
{
- int status, type, pid, flags, nlmsglen, skblen;
+ int status, type, pid, flags;
+ unsigned int nlmsglen, skblen;
struct nlmsghdr *nlh;
skblen = skb->len;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 7647438..24e556e 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -566,7 +566,7 @@ check_entry(const struct ipt_entry *e, const char *name)
const struct xt_entry_target *t;
if (!ip_checkentry(&e->ip)) {
- duprintf("ip check failed %p %s.\n", e, par->match->name);
+ duprintf("ip check failed %p %s.\n", e, name);
return -EINVAL;
}
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index d609ac3..5c9e97c 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -307,7 +307,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
* error messages (RELATED) and information requests (see below) */
if (ip_hdr(skb)->protocol == IPPROTO_ICMP &&
(ctinfo == IP_CT_RELATED ||
- ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY))
+ ctinfo == IP_CT_RELATED_REPLY))
return XT_CONTINUE;
/* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO,
@@ -321,12 +321,12 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
ct->mark = hash;
break;
case IP_CT_RELATED:
- case IP_CT_RELATED+IP_CT_IS_REPLY:
+ case IP_CT_RELATED_REPLY:
/* FIXME: we don't handle expectations at the
* moment. they can arrive on a different node than
* the master connection (e.g. FTP passive mode) */
case IP_CT_ESTABLISHED:
- case IP_CT_ESTABLISHED+IP_CT_IS_REPLY:
+ case IP_CT_ESTABLISHED_REPLY:
break;
default:
break;
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index d2ed9dc..9931152 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -60,7 +60,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
nat = nfct_nat(ct);
NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
- ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
+ ctinfo == IP_CT_RELATED_REPLY));
/* Source address is 0.0.0.0 - locally generated packet that is
* probably not supposed to be masqueraded.
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 1ff79e5..51f13f8 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -40,7 +40,6 @@ static void send_reset(struct sk_buff *oldskb, int hook)
struct iphdr *niph;
const struct tcphdr *oth;
struct tcphdr _otcph, *tcph;
- unsigned int addr_type;
/* IP header checks: fragment. */
if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
@@ -55,6 +54,9 @@ static void send_reset(struct sk_buff *oldskb, int hook)
if (oth->rst)
return;
+ if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
+ return;
+
/* Check checksum */
if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP))
return;
@@ -101,19 +103,11 @@ static void send_reset(struct sk_buff *oldskb, int hook)
nskb->csum_start = (unsigned char *)tcph - nskb->head;
nskb->csum_offset = offsetof(struct tcphdr, check);
- addr_type = RTN_UNSPEC;
- if (hook != NF_INET_FORWARD
-#ifdef CONFIG_BRIDGE_NETFILTER
- || (nskb->nf_bridge && nskb->nf_bridge->mask & BRNF_BRIDGED)
-#endif
- )
- addr_type = RTN_LOCAL;
-
/* ip_route_me_harder expects skb->dst to be set */
skb_dst_set_noref(nskb, skb_dst(oldskb));
nskb->protocol = htons(ETH_P_IP);
- if (ip_route_me_harder(nskb, addr_type))
+ if (ip_route_me_harder(nskb, RTN_UNSPEC))
goto free_nskb;
niph->ttl = ip4_dst_hoplimit(skb_dst(nskb));
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index af6e9c7..2b57e52 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -25,7 +25,8 @@ MODULE_LICENSE("GPL");
static inline bool match_ip(const struct sk_buff *skb,
const struct ipt_ecn_info *einfo)
{
- return (ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect;
+ return ((ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect) ^
+ !!(einfo->invert & IPT_ECN_OP_MATCH_IP);
}
static inline bool match_tcp(const struct sk_buff *skb,
@@ -76,8 +77,6 @@ static bool ecn_mt(const struct sk_buff *skb, struct xt_action_param *par)
return false;
if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) {
- if (ip_hdr(skb)->protocol != IPPROTO_TCP)
- return false;
if (!match_tcp(skb, info, &par->hotdrop))
return false;
}
@@ -97,7 +96,7 @@ static int ecn_mt_check(const struct xt_mtchk_param *par)
return -EINVAL;
if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) &&
- ip->proto != IPPROTO_TCP) {
+ (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) {
pr_info("cannot match TCP bits in rule for non-tcp packets\n");
return -EINVAL;
}
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 5a03c02..de9da21 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -101,7 +101,7 @@ static unsigned int ipv4_confirm(unsigned int hooknum,
/* This is where we call the helper: as the packet goes out. */
ct = nf_ct_get(skb, &ctinfo);
- if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
+ if (!ct || ctinfo == IP_CT_RELATED_REPLY)
goto out;
help = nfct_help(ct);
@@ -121,7 +121,9 @@ static unsigned int ipv4_confirm(unsigned int hooknum,
return ret;
}
- if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
+ /* adjust seqs for loopback traffic only in outgoing direction */
+ if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
+ !nf_is_loopback_packet(skb)) {
typeof(nf_nat_seq_adjust_hook) seq_adjust;
seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 7404bde..ab5b27a 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -160,7 +160,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
/* Update skb to refer to this connection */
skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general;
skb->nfctinfo = *ctinfo;
- return -NF_ACCEPT;
+ return NF_ACCEPT;
}
/* Small and modified version of icmp_rcv */
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 9c71b27..3346de5 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -433,7 +433,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
/* Must be RELATED */
NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED ||
- skb->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY);
+ skb->nfctinfo == IP_CT_RELATED_REPLY);
/* Redirects on non-null nats must be dropped, else they'll
start talking to each other without our translation, and be
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 99cfa28..ebc5f88 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -160,7 +160,7 @@ static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data
if (skb->ip_summed != CHECKSUM_PARTIAL) {
if (!(rt->rt_flags & RTCF_LOCAL) &&
- skb->dev->features & NETIF_F_V4_CSUM) {
+ (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) {
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum_start = skb_headroom(skb) +
skb_network_offset(skb) +
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 21c3042..733c9ab 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -53,7 +53,7 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par)
/* Connection must be valid and new. */
NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
- ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
+ ctinfo == IP_CT_RELATED_REPLY));
NF_CT_ASSERT(par->out != NULL);
return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC);
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 7317bdf..483b76d 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -116,7 +116,7 @@ nf_nat_fn(unsigned int hooknum,
switch (ctinfo) {
case IP_CT_RELATED:
- case IP_CT_RELATED+IP_CT_IS_REPLY:
+ case IP_CT_RELATED_REPLY:
if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
if (!nf_nat_icmp_reply_translation(ct, ctinfo,
hooknum, skb))
@@ -144,7 +144,7 @@ nf_nat_fn(unsigned int hooknum,
default:
/* ESTABLISHED */
NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
- ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY));
+ ctinfo == IP_CT_ESTABLISHED_REPLY);
}
return nf_nat_packet(ct, ctinfo, hooknum, skb);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 52b0b95..aa13ef1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1316,6 +1316,23 @@ reject_redirect:
;
}
+static bool peer_pmtu_expired(struct inet_peer *peer)
+{
+ unsigned long orig = ACCESS_ONCE(peer->pmtu_expires);
+
+ return orig &&
+ time_after_eq(jiffies, orig) &&
+ cmpxchg(&peer->pmtu_expires, orig, 0) == orig;
+}
+
+static bool peer_pmtu_cleaned(struct inet_peer *peer)
+{
+ unsigned long orig = ACCESS_ONCE(peer->pmtu_expires);
+
+ return orig &&
+ cmpxchg(&peer->pmtu_expires, orig, 0) == orig;
+}
+
static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
{
struct rtable *rt = (struct rtable *)dst;
@@ -1331,14 +1348,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
rt_genid(dev_net(dst->dev)));
rt_del(hash, rt);
ret = NULL;
- } else if (rt->peer &&
- rt->peer->pmtu_expires &&
- time_after_eq(jiffies, rt->peer->pmtu_expires)) {
- unsigned long orig = rt->peer->pmtu_expires;
-
- if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig)
- dst_metric_set(dst, RTAX_MTU,
- rt->peer->pmtu_orig);
+ } else if (rt->peer && peer_pmtu_expired(rt->peer)) {
+ dst_metric_set(dst, RTAX_MTU, rt->peer->pmtu_orig);
}
}
return ret;
@@ -1531,8 +1542,10 @@ unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph,
static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer)
{
- unsigned long expires = peer->pmtu_expires;
+ unsigned long expires = ACCESS_ONCE(peer->pmtu_expires);
+ if (!expires)
+ return;
if (time_before(jiffies, expires)) {
u32 orig_dst_mtu = dst_mtu(dst);
if (peer->pmtu_learned < orig_dst_mtu) {
@@ -1555,10 +1568,11 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
rt_bind_peer(rt, rt->rt_dst, 1);
peer = rt->peer;
if (peer) {
+ unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires);
+
if (mtu < ip_rt_min_pmtu)
mtu = ip_rt_min_pmtu;
- if (!peer->pmtu_expires || mtu < peer->pmtu_learned) {
- unsigned long pmtu_expires;
+ if (!pmtu_expires || mtu < peer->pmtu_learned) {
pmtu_expires = jiffies + ip_rt_mtu_expires;
if (!pmtu_expires)
@@ -1612,13 +1626,14 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
rt_bind_peer(rt, rt->rt_dst, 0);
peer = rt->peer;
- if (peer && peer->pmtu_expires)
+ if (peer) {
check_peer_pmtu(dst, peer);
- if (peer && peer->redirect_learned.a4 &&
- peer->redirect_learned.a4 != rt->rt_gateway) {
- if (check_peer_redir(dst, peer))
- return NULL;
+ if (peer->redirect_learned.a4 &&
+ peer->redirect_learned.a4 != rt->rt_gateway) {
+ if (check_peer_redir(dst, peer))
+ return NULL;
+ }
}
rt->rt_peer_genid = rt_peer_genid();
@@ -1649,14 +1664,8 @@ static void ipv4_link_failure(struct sk_buff *skb)
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
rt = skb_rtable(skb);
- if (rt &&
- rt->peer &&
- rt->peer->pmtu_expires) {
- unsigned long orig = rt->peer->pmtu_expires;
-
- if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig)
- dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig);
- }
+ if (rt && rt->peer && peer_pmtu_cleaned(rt->peer))
+ dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig);
}
static int ip_rt_bug(struct sk_buff *skb)
@@ -1770,8 +1779,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
sizeof(u32) * RTAX_MAX);
dst_init_metrics(&rt->dst, peer->metrics, false);
- if (peer->pmtu_expires)
- check_peer_pmtu(&rt->dst, peer);
+ check_peer_pmtu(&rt->dst, peer);
if (peer->redirect_learned.a4 &&
peer->redirect_learned.a4 != rt->rt_gateway) {
rt->rt_gateway = peer->redirect_learned.a4;
@@ -1894,9 +1902,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
rth = rt_intern_hash(hash, rth, skb, dev->ifindex);
- err = 0;
- if (IS_ERR(rth))
- err = PTR_ERR(rth);
+ return IS_ERR(rth) ? PTR_ERR(rth) : 0;
e_nobufs:
return -ENOBUFS;
@@ -2775,7 +2781,8 @@ static int rt_fill_info(struct net *net,
struct rtable *rt = skb_rtable(skb);
struct rtmsg *r;
struct nlmsghdr *nlh;
- long expires;
+ long expires = 0;
+ const struct inet_peer *peer = rt->peer;
u32 id = 0, ts = 0, tsage = 0, error;
nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
@@ -2823,15 +2830,16 @@ static int rt_fill_info(struct net *net,
NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark);
error = rt->dst.error;
- expires = (rt->peer && rt->peer->pmtu_expires) ?
- rt->peer->pmtu_expires - jiffies : 0;
- if (rt->peer) {
+ if (peer) {
inet_peer_refcheck(rt->peer);
- id = atomic_read(&rt->peer->ip_id_count) & 0xffff;
- if (rt->peer->tcp_ts_stamp) {
- ts = rt->peer->tcp_ts;
- tsage = get_seconds() - rt->peer->tcp_ts_stamp;
+ id = atomic_read(&peer->ip_id_count) & 0xffff;
+ if (peer->tcp_ts_stamp) {
+ ts = peer->tcp_ts;
+ tsage = get_seconds() - peer->tcp_ts_stamp;
}
+ expires = ACCESS_ONCE(peer->pmtu_expires);
+ if (expires)
+ expires -= jiffies;
}
if (rt_is_input_route(rt)) {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 054a59d..46febca 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3220,7 +3220,7 @@ __setup("thash_entries=", set_thash_entries);
void __init tcp_init(void)
{
struct sk_buff *skb = NULL;
- unsigned long nr_pages, limit;
+ unsigned long limit;
int i, max_share, cnt;
unsigned long jiffy = jiffies;
@@ -3277,13 +3277,7 @@ void __init tcp_init(void)
sysctl_tcp_max_orphans = cnt / 2;
sysctl_max_syn_backlog = max(128, cnt / 256);
- /* Set the pressure threshold to be a fraction of global memory that
- * is up to 1/2 at 256 MB, decreasing toward zero with the amount of
- * memory, with a floor of 128 pages.
- */
- nr_pages = totalram_pages - totalhigh_pages;
- limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
- limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
+ limit = nr_free_buffer_pages() / 8;
limit = max(limit, 128UL);
sysctl_tcp_mem[0] = limit / 4 * 3;
sysctl_tcp_mem[1] = limit;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a7d6671..708dc20 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1589,6 +1589,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
goto discard;
if (nsk != sk) {
+ sock_rps_save_rxhash(nsk, skb->rxhash);
if (tcp_child_process(sk, nsk, skb)) {
rsk = nsk;
goto reset;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index abca870..198f75b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1249,6 +1249,9 @@ csum_copy_err:
if (noblock)
return -EAGAIN;
+
+ /* starting over for a new packet */
+ msg->msg_flags &= ~MSG_TRUNC;
goto try_again;
}
@@ -2206,16 +2209,10 @@ void __init udp_table_init(struct udp_table *table, const char *name)
void __init udp_init(void)
{
- unsigned long nr_pages, limit;
+ unsigned long limit;
udp_table_init(&udp_table, "UDP");
- /* Set the pressure threshold up by the same strategy of TCP. It is a
- * fraction of global memory that is up to 1/2 at 256 MB, decreasing
- * toward zero with the amount of memory, with a floor of 128 pages.
- */
- nr_pages = totalram_pages - totalhigh_pages;
- limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
- limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
+ limit = nr_free_buffer_pages() / 8;
limit = max(limit, 128UL);
sysctl_udp_mem[0] = limit / 4 * 3;
sysctl_udp_mem[1] = limit;
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 2d51840..327a617 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -32,7 +32,12 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
dst = skb_dst(skb);
mtu = dst_mtu(dst);
if (skb->len > mtu) {
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+ if (skb->sk)
+ ip_local_error(skb->sk, EMSGSIZE, ip_hdr(skb)->daddr,
+ inet_sk(skb->sk)->inet_dport, mtu);
+ else
+ icmp_send(skb, ICMP_DEST_UNREACH,
+ ICMP_FRAG_NEEDED, htonl(mtu));
ret = -EMSGSIZE;
}
out:
OpenPOWER on IntegriCloud