summaryrefslogtreecommitdiffstats
path: root/net/ipv4/route.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r--net/ipv4/route.c233
1 files changed, 155 insertions, 78 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6bcb8fc..c035251 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -147,6 +147,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu);
static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb);
+static void ipv4_dst_destroy(struct dst_entry *dst);
static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
int how)
@@ -170,6 +171,7 @@ static struct dst_ops ipv4_dst_ops = {
.default_advmss = ipv4_default_advmss,
.mtu = ipv4_mtu,
.cow_metrics = ipv4_cow_metrics,
+ .destroy = ipv4_dst_destroy,
.ifdown = ipv4_dst_ifdown,
.negative_advice = ipv4_negative_advice,
.link_failure = ipv4_link_failure,
@@ -444,7 +446,7 @@ static inline int ip_rt_proc_init(void)
}
#endif /* CONFIG_PROC_FS */
-static inline int rt_is_expired(struct rtable *rth)
+static inline bool rt_is_expired(const struct rtable *rth)
{
return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
}
@@ -587,11 +589,17 @@ static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
build_sk_flow_key(fl4, sk);
}
-static DEFINE_SEQLOCK(fnhe_seqlock);
+static inline void rt_free(struct rtable *rt)
+{
+ call_rcu(&rt->dst.rcu_head, dst_rcu_free);
+}
+
+static DEFINE_SPINLOCK(fnhe_lock);
static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
{
struct fib_nh_exception *fnhe, *oldest;
+ struct rtable *orig;
oldest = rcu_dereference(hash->chain);
for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
@@ -599,6 +607,11 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
oldest = fnhe;
}
+ orig = rcu_dereference(oldest->fnhe_rth);
+ if (orig) {
+ RCU_INIT_POINTER(oldest->fnhe_rth, NULL);
+ rt_free(orig);
+ }
return oldest;
}
@@ -620,7 +633,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
int depth;
u32 hval = fnhe_hashfun(daddr);
- write_seqlock_bh(&fnhe_seqlock);
+ spin_lock_bh(&fnhe_lock);
hash = nh->nh_exceptions;
if (!hash) {
@@ -667,7 +680,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
fnhe->fnhe_stamp = jiffies;
out_unlock:
- write_sequnlock_bh(&fnhe_seqlock);
+ spin_unlock_bh(&fnhe_lock);
return;
}
@@ -1164,67 +1177,126 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
return NULL;
}
-static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
+static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
__be32 daddr)
{
- __be32 fnhe_daddr, gw;
- unsigned long expires;
- unsigned int seq;
- u32 pmtu;
-
-restart:
- seq = read_seqbegin(&fnhe_seqlock);
- fnhe_daddr = fnhe->fnhe_daddr;
- gw = fnhe->fnhe_gw;
- pmtu = fnhe->fnhe_pmtu;
- expires = fnhe->fnhe_expires;
- if (read_seqretry(&fnhe_seqlock, seq))
- goto restart;
-
- if (daddr != fnhe_daddr)
- return;
+ bool ret = false;
+
+ spin_lock_bh(&fnhe_lock);
- if (pmtu) {
- unsigned long diff = expires - jiffies;
+ if (daddr == fnhe->fnhe_daddr) {
+ struct rtable *orig;
- if (time_before(jiffies, expires)) {
- rt->rt_pmtu = pmtu;
- dst_set_expires(&rt->dst, diff);
+ if (fnhe->fnhe_pmtu) {
+ unsigned long expires = fnhe->fnhe_expires;
+ unsigned long diff = expires - jiffies;
+
+ if (time_before(jiffies, expires)) {
+ rt->rt_pmtu = fnhe->fnhe_pmtu;
+ dst_set_expires(&rt->dst, diff);
+ }
}
+ if (fnhe->fnhe_gw) {
+ rt->rt_flags |= RTCF_REDIRECTED;
+ rt->rt_gateway = fnhe->fnhe_gw;
+ }
+
+ orig = rcu_dereference(fnhe->fnhe_rth);
+ rcu_assign_pointer(fnhe->fnhe_rth, rt);
+ if (orig)
+ rt_free(orig);
+
+ fnhe->fnhe_stamp = jiffies;
+ ret = true;
+ } else {
+ /* Routes we intend to cache in nexthop exception have
+ * the DST_NOCACHE bit clear. However, if we are
+ * unsuccessful at storing this route into the cache
+ * we really need to set it.
+ */
+ rt->dst.flags |= DST_NOCACHE;
}
- if (gw) {
- rt->rt_flags |= RTCF_REDIRECTED;
- rt->rt_gateway = gw;
- }
- fnhe->fnhe_stamp = jiffies;
-}
+ spin_unlock_bh(&fnhe_lock);
-static inline void rt_release_rcu(struct rcu_head *head)
-{
- struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head);
- dst_release(dst);
+ return ret;
}
-static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
+static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
{
- struct rtable *orig, *prev, **p = &nh->nh_rth_output;
-
- if (rt_is_input_route(rt))
- p = &nh->nh_rth_input;
+ struct rtable *orig, *prev, **p;
+ bool ret = true;
+ if (rt_is_input_route(rt)) {
+ p = (struct rtable **)&nh->nh_rth_input;
+ } else {
+ if (!nh->nh_pcpu_rth_output)
+ goto nocache;
+ p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output);
+ }
orig = *p;
prev = cmpxchg(p, orig, rt);
if (prev == orig) {
- dst_clone(&rt->dst);
if (orig)
- call_rcu_bh(&orig->dst.rcu_head, rt_release_rcu);
+ rt_free(orig);
+ } else {
+ /* Routes we intend to cache in the FIB nexthop have
+ * the DST_NOCACHE bit clear. However, if we are
+ * unsuccessful at storing this route into the cache
+ * we really need to set it.
+ */
+nocache:
+ rt->dst.flags |= DST_NOCACHE;
+ ret = false;
}
+
+ return ret;
}
-static bool rt_cache_valid(struct rtable *rt)
+static DEFINE_SPINLOCK(rt_uncached_lock);
+static LIST_HEAD(rt_uncached_list);
+
+static void rt_add_uncached_list(struct rtable *rt)
{
- return (rt && rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK);
+ spin_lock_bh(&rt_uncached_lock);
+ list_add_tail(&rt->rt_uncached, &rt_uncached_list);
+ spin_unlock_bh(&rt_uncached_lock);
+}
+
+static void ipv4_dst_destroy(struct dst_entry *dst)
+{
+ struct rtable *rt = (struct rtable *) dst;
+
+ if (dst->flags & DST_NOCACHE) {
+ spin_lock_bh(&rt_uncached_lock);
+ list_del(&rt->rt_uncached);
+ spin_unlock_bh(&rt_uncached_lock);
+ }
+}
+
+void rt_flush_dev(struct net_device *dev)
+{
+ if (!list_empty(&rt_uncached_list)) {
+ struct net *net = dev_net(dev);
+ struct rtable *rt;
+
+ spin_lock_bh(&rt_uncached_lock);
+ list_for_each_entry(rt, &rt_uncached_list, rt_uncached) {
+ if (rt->dst.dev != dev)
+ continue;
+ rt->dst.dev = net->loopback_dev;
+ dev_hold(rt->dst.dev);
+ dev_put(dev);
+ }
+ spin_unlock_bh(&rt_uncached_lock);
+ }
+}
+
+static bool rt_cache_valid(const struct rtable *rt)
+{
+ return rt &&
+ rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
+ !rt_is_expired(rt);
}
static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
@@ -1232,20 +1304,24 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
struct fib_nh_exception *fnhe,
struct fib_info *fi, u16 type, u32 itag)
{
+ bool cached = false;
+
if (fi) {
struct fib_nh *nh = &FIB_RES_NH(*res);
if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
rt->rt_gateway = nh->nh_gw;
- if (unlikely(fnhe))
- rt_bind_exception(rt, fnhe, daddr);
dst_init_metrics(&rt->dst, fi->fib_metrics, true);
#ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = nh->nh_tclassid;
#endif
- if (!(rt->dst.flags & DST_HOST))
- rt_cache_route(nh, rt);
+ if (unlikely(fnhe))
+ cached = rt_bind_exception(rt, fnhe, daddr);
+ else if (!(rt->dst.flags & DST_NOCACHE))
+ cached = rt_cache_route(nh, rt);
}
+ if (unlikely(!cached))
+ rt_add_uncached_list(rt);
#ifdef CONFIG_IP_ROUTE_CLASSID
#ifdef CONFIG_IP_MULTIPLE_TABLES
@@ -1259,7 +1335,7 @@ static struct rtable *rt_dst_alloc(struct net_device *dev,
bool nopolicy, bool noxfrm, bool will_cache)
{
return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
- (will_cache ? 0 : DST_HOST) | DST_NOCACHE |
+ (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
(nopolicy ? DST_NOPOLICY : 0) |
(noxfrm ? DST_NOXFRM : 0));
}
@@ -1312,6 +1388,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->rt_iif = 0;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
+ INIT_LIST_HEAD(&rth->rt_uncached);
if (our) {
rth->dst.input= ip_local_deliver;
rth->rt_flags |= RTCF_LOCAL;
@@ -1364,8 +1441,7 @@ static void ip_handle_martian_source(struct net_device *dev,
static int __mkroute_input(struct sk_buff *skb,
const struct fib_result *res,
struct in_device *in_dev,
- __be32 daddr, __be32 saddr, u32 tos,
- struct rtable **result)
+ __be32 daddr, __be32 saddr, u32 tos)
{
struct rtable *rth;
int err;
@@ -1414,9 +1490,9 @@ static int __mkroute_input(struct sk_buff *skb,
do_cache = false;
if (res->fi) {
if (!itag) {
- rth = FIB_RES_NH(*res).nh_rth_input;
+ rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
if (rt_cache_valid(rth)) {
- dst_hold(&rth->dst);
+ skb_dst_set_noref(skb, &rth->dst);
goto out;
}
do_cache = true;
@@ -1438,13 +1514,14 @@ static int __mkroute_input(struct sk_buff *skb,
rth->rt_iif = 0;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
+ INIT_LIST_HEAD(&rth->rt_uncached);
rth->dst.input = ip_forward;
rth->dst.output = ip_output;
rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag);
+ skb_dst_set(skb, &rth->dst);
out:
- *result = rth;
err = 0;
cleanup:
return err;
@@ -1456,21 +1533,13 @@ static int ip_mkroute_input(struct sk_buff *skb,
struct in_device *in_dev,
__be32 daddr, __be32 saddr, u32 tos)
{
- struct rtable *rth = NULL;
- int err;
-
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi && res->fi->fib_nhs > 1)
fib_select_multipath(res);
#endif
/* create a routing cache entry */
- err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth);
- if (err)
- return err;
-
- skb_dst_set(skb, &rth->dst);
- return 0;
+ return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
}
/*
@@ -1584,10 +1653,11 @@ local_input:
do_cache = false;
if (res.fi) {
if (!itag) {
- rth = FIB_RES_NH(res).nh_rth_input;
+ rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
if (rt_cache_valid(rth)) {
- dst_hold(&rth->dst);
- goto set_and_out;
+ skb_dst_set_noref(skb, &rth->dst);
+ err = 0;
+ goto out;
}
do_cache = true;
}
@@ -1611,6 +1681,7 @@ local_input:
rth->rt_iif = 0;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
+ INIT_LIST_HEAD(&rth->rt_uncached);
if (res.type == RTN_UNREACHABLE) {
rth->dst.input= ip_error;
rth->dst.error= -err;
@@ -1618,7 +1689,6 @@ local_input:
}
if (do_cache)
rt_cache_route(&FIB_RES_NH(res), rth);
-set_and_out:
skb_dst_set(skb, &rth->dst);
err = 0;
goto out;
@@ -1656,8 +1726,8 @@ martian_source_keep_err:
goto out;
}
-int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev)
+int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ u8 tos, struct net_device *dev)
{
int res;
@@ -1700,7 +1770,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rcu_read_unlock();
return res;
}
-EXPORT_SYMBOL(ip_route_input);
+EXPORT_SYMBOL(ip_route_input_noref);
/* called with rcu_read_lock() */
static struct rtable *__mkroute_output(const struct fib_result *res,
@@ -1750,19 +1820,23 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
fnhe = NULL;
if (fi) {
+ struct rtable __rcu **prth;
+
fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr);
- if (!fnhe) {
- rth = FIB_RES_NH(*res).nh_rth_output;
- if (rt_cache_valid(rth)) {
- dst_hold(&rth->dst);
- return rth;
- }
+ if (fnhe)
+ prth = &fnhe->fnhe_rth;
+ else
+ prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output);
+ rth = rcu_dereference(*prth);
+ if (rt_cache_valid(rth)) {
+ dst_hold(&rth->dst);
+ return rth;
}
}
rth = rt_dst_alloc(dev_out,
IN_DEV_CONF_GET(in_dev, NOPOLICY),
IN_DEV_CONF_GET(in_dev, NOXFRM),
- fi && !fnhe);
+ fi);
if (!rth)
return ERR_PTR(-ENOBUFS);
@@ -1775,6 +1849,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
rth->rt_iif = orig_oif ? : 0;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
+ INIT_LIST_HEAD(&rth->rt_uncached);
RT_CACHE_STAT_INC(out_slow_tot);
@@ -2054,6 +2129,8 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
rt->rt_type = ort->rt_type;
rt->rt_gateway = ort->rt_gateway;
+ INIT_LIST_HEAD(&rt->rt_uncached);
+
dst_free(new);
}
OpenPOWER on IntegriCloud