summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2012-06-26 23:14:15 +0000
committerDavid S. Miller <davem@davemloft.net>2012-06-27 15:34:24 -0700
commitc074da2810c118b3812f32d6754bd9ead2f169e7 (patch)
tree772c7fbb9da464f9afd6d56e9e610157ed665e8f /net/ipv4
parent93040ae5cc8dcc893eca4a4366dc8415af278edf (diff)
downloadop-kernel-dev-c074da2810c118b3812f32d6754bd9ead2f169e7.zip
op-kernel-dev-c074da2810c118b3812f32d6754bd9ead2f169e7.tar.gz
ipv4: tcp: dont cache unconfirmed intput dst
DDOS synflood attacks hit badly IP route cache. On typical machines, this cache is allowed to hold up to 8 Millions dst entries, 256 bytes for each, for a total of 2GB of memory. rt_garbage_collect() triggers and tries to cleanup things. Eventually route cache is disabled but machine is under fire and might OOM and crash. This patch exploits the new TCP early demux, to set a nocache boolean in case incoming TCP frame is for a not yet ESTABLISHED or TIMEWAIT socket. This 'nocache' boolean is then used in case dst entry is not found in route cache, to create an unhashed dst entry (DST_NOCACHE) SYN-cookie-ACK sent use a similar mechanism (ipv4: tcp: dont cache output dst for syncookies), so after this patch, a machine is able to absorb a DDOS synflood attack without polluting its IP route cache. Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Hans Schillstrom <hans.schillstrom@ericsson.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/arp.c2
-rw-r--r--net/ipv4/ip_fragment.c2
-rw-r--r--net/ipv4/ip_input.c5
-rw-r--r--net/ipv4/route.c8
-rw-r--r--net/ipv4/tcp_ipv4.c4
-rw-r--r--net/ipv4/xfrm4_input.c2
6 files changed, 14 insertions, 9 deletions
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 2e560f0..6a97959 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -828,7 +828,7 @@ static int arp_process(struct sk_buff *skb)
}
if (arp->ar_op == htons(ARPOP_REQUEST) &&
- ip_route_input_noref(skb, tip, sip, 0, dev) == 0) {
+ ip_route_input_noref(skb, tip, sip, 0, dev, false) == 0) {
rt = skb_rtable(skb);
addr_type = rt->rt_type;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 8d07c97..978d55f 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -259,7 +259,7 @@ static void ip_expire(unsigned long arg)
skb_dst_drop(head);
iph = ip_hdr(head);
err = ip_route_input_noref(head, iph->daddr, iph->saddr,
- iph->tos, head->dev);
+ iph->tos, head->dev, false);
if (err)
goto out_rcu_unlock;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 2a39204..7be54c8 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -326,6 +326,7 @@ static int ip_rcv_finish(struct sk_buff *skb)
*/
if (skb_dst(skb) == NULL) {
int err = -ENOENT;
+ bool nocache = false;
if (sysctl_ip_early_demux) {
const struct net_protocol *ipprot;
@@ -334,13 +335,13 @@ static int ip_rcv_finish(struct sk_buff *skb)
rcu_read_lock();
ipprot = rcu_dereference(inet_protos[protocol]);
if (ipprot && ipprot->early_demux)
- err = ipprot->early_demux(skb);
+ err = ipprot->early_demux(skb, &nocache);
rcu_read_unlock();
}
if (err) {
err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
- iph->tos, skb->dev);
+ iph->tos, skb->dev, nocache);
if (unlikely(err)) {
if (err == -EXDEV)
NET_INC_STATS_BH(dev_net(skb->dev),
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 81533e3..fdc7900 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2214,7 +2214,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
*/
static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev)
+ u8 tos, struct net_device *dev, bool nocache)
{
struct fib_result res;
struct in_device *in_dev = __in_dev_get_rcu(dev);
@@ -2353,6 +2353,8 @@ local_input:
rth->dst.error= -err;
rth->rt_flags &= ~RTCF_LOCAL;
}
+ if (nocache)
+ rth->dst.flags |= DST_NOCACHE;
hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net));
rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif);
err = 0;
@@ -2395,7 +2397,7 @@ martian_source_keep_err:
}
int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev, bool noref)
+ u8 tos, struct net_device *dev, bool noref, bool nocache)
{
struct rtable *rth;
unsigned int hash;
@@ -2471,7 +2473,7 @@ skip_cache:
rcu_read_unlock();
return -EINVAL;
}
- res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
+ res = ip_route_input_slow(skb, daddr, saddr, tos, dev, nocache);
rcu_read_unlock();
return res;
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1781dc6..33aabd4 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1673,7 +1673,7 @@ csum_err:
}
EXPORT_SYMBOL(tcp_v4_do_rcv);
-int tcp_v4_early_demux(struct sk_buff *skb)
+int tcp_v4_early_demux(struct sk_buff *skb, bool *no_dst_cache)
{
struct net *net = dev_net(skb->dev);
const struct iphdr *iph;
@@ -1719,6 +1719,8 @@ int tcp_v4_early_demux(struct sk_buff *skb)
}
}
}
+ } else {
+ *no_dst_cache = true;
}
out_err:
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 06814b6..eee636b 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -28,7 +28,7 @@ static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb)
const struct iphdr *iph = ip_hdr(skb);
if (ip_route_input_noref(skb, iph->daddr, iph->saddr,
- iph->tos, skb->dev))
+ iph->tos, skb->dev, false))
goto drop;
}
return dst_input(skb);
OpenPOWER on IntegriCloud