summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorglebius <glebius@FreeBSD.org>2012-07-04 07:37:53 +0000
committerglebius <glebius@FreeBSD.org>2012-07-04 07:37:53 +0000
commit418a04b4676ef1730c653057e7bea6e0d8ff06aa (patch)
tree5a6dfb251b44f09882eafc14a986f989520add8e
parent31a966157854178ad501d2f8901feb3c53aba299 (diff)
downloadFreeBSD-src-418a04b4676ef1730c653057e7bea6e0d8ff06aa.zip
FreeBSD-src-418a04b4676ef1730c653057e7bea6e0d8ff06aa.tar.gz
When ip_output()/ip6_output() is supplied a struct route *ro argument,
it skips FLOWTABLE lookup. However, the non-NULL ro has dual meaning here: it may be supplied to provide route, and it may be supplied to store and return to caller the route that ip_output()/ip6_output() finds. In the latter case skipping FLOWTABLE lookup is pessimisation. The difference between struct route filled by FLOWTABLE and filled by rtalloc() family is that the former doesn't hold a reference on its rtentry. Reference is hold by flow entry, and it is about to be released in future. Thus, route filled by FLOWTABLE shouldn't be passed to RTFREE() macro. - Introduce new flag for struct route/route_in6, that marks route not holding a reference on rtentry. - Introduce new macro RO_RTFREE() that cleans up a struct route depending on its kind. - All callers to ip_output()/ip6_output() that do supply non-NULL but empty route should use RO_RTFREE() to free results of lookup. - ip_output()/ip6_output() now do FLOWTABLE lookup always when ro->ro_rt == NULL. Tested by: tuexen (SCTP part)
-rw-r--r--sys/net/flowtable.c3
-rw-r--r--sys/net/route.h15
-rw-r--r--sys/netinet/ip_input.c3
-rw-r--r--sys/netinet/ip_output.c44
-rw-r--r--sys/netinet/sctp_output.c9
-rw-r--r--sys/netinet6/ip6_output.c25
-rw-r--r--sys/netinet6/nd6_nbr.c12
7 files changed, 58 insertions, 53 deletions
diff --git a/sys/net/flowtable.c b/sys/net/flowtable.c
index 8501b18..0e50377 100644
--- a/sys/net/flowtable.c
+++ b/sys/net/flowtable.c
@@ -619,6 +619,7 @@ flow_to_route(struct flentry *fle, struct route *ro)
sin->sin_addr.s_addr = hashkey[2];
ro->ro_rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
ro->ro_lle = __DEVOLATILE(struct llentry *, fle->f_lle);
+ ro->ro_flags |= RT_NORTREF;
}
#endif /* INET */
@@ -826,7 +827,7 @@ flow_to_route_in6(struct flentry *fle, struct route_in6 *ro)
memcpy(&sin6->sin6_addr, &hashkey[5], sizeof (struct in6_addr));
ro->ro_rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
ro->ro_lle = __DEVOLATILE(struct llentry *, fle->f_lle);
-
+ ro->ro_flags |= RT_NORTREF;
}
#endif /* INET6 */
diff --git a/sys/net/route.h b/sys/net/route.h
index 1de222a..f12ed810 100644
--- a/sys/net/route.h
+++ b/sys/net/route.h
@@ -54,7 +54,8 @@ struct route {
struct sockaddr ro_dst;
};
-#define RT_CACHING_CONTEXT 0x1
+#define RT_CACHING_CONTEXT 0x1 /* XXX: not used anywhere */
+#define RT_NORTREF 0x2 /* doesn't hold reference on ro_rt */
/*
* These numbers are used by reliable protocols for determining
@@ -341,6 +342,18 @@ struct rt_addrinfo {
RTFREE_LOCKED(_rt); \
} while (0)
+#define RO_RTFREE(_ro) do { \
+ if ((_ro)->ro_rt) { \
+ if ((_ro)->ro_flags & RT_NORTREF) { \
+ (_ro)->ro_flags &= ~RT_NORTREF; \
+ (_ro)->ro_rt = NULL; \
+ } else { \
+ RT_LOCK((_ro)->ro_rt); \
+ RTFREE_LOCKED((_ro)->ro_rt); \
+ } \
+ } \
+} while (0)
+
struct radix_node_head *rt_tables_get_rnh(int, int);
struct ifmultiaddr;
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
index ab78fa2..e9388a0 100644
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@@ -1495,8 +1495,7 @@ ip_forward(struct mbuf *m, int srcrt)
if (error == EMSGSIZE && ro.ro_rt)
mtu = ro.ro_rt->rt_rmx.rmx_mtu;
- if (ro.ro_rt)
- RTFREE(ro.ro_rt);
+ RO_RTFREE(&ro);
if (error)
IPSTAT_INC(ips_cantforward);
diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c
index d13c397..cc38dcf 100644
--- a/sys/netinet/ip_output.c
+++ b/sys/netinet/ip_output.c
@@ -105,6 +105,10 @@ extern struct protosw inetsw[];
* ip_len and ip_off are in host format.
* The mbuf chain containing the packet will be freed.
* The mbuf opt, if present, will not be freed.
+ * If route ro is present and has ro_rt initialized, route lookup would be
+ * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL,
+ * then result of route lookup is stored in ro->ro_rt.
+ *
* In the IP forwarding case, the packet will arrive with options already
* inserted, so must have a NULL opt pointer.
*/
@@ -119,7 +123,6 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
int mtu;
int n; /* scratchpad */
int error = 0;
- int nortfree = 0;
struct sockaddr_in *dst;
struct in_ifaddr *ia = NULL;
int isbroadcast, sw_csum;
@@ -146,24 +149,23 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
if (ro == NULL) {
ro = &iproute;
bzero(ro, sizeof (*ro));
+ }
#ifdef FLOWTABLE
- {
- struct flentry *fle;
+ if (ro->ro_rt == NULL) {
+ struct flentry *fle;
- /*
- * The flow table returns route entries valid for up to 30
- * seconds; we rely on the remainder of ip_output() taking no
- * longer than that long for the stability of ro_rt. The
- * flow ID assignment must have happened before this point.
- */
- if ((fle = flowtable_lookup_mbuf(V_ip_ft, m, AF_INET)) != NULL) {
- flow_to_route(fle, ro);
- nortfree = 1;
- }
- }
-#endif
+ /*
+ * The flow table returns route entries valid for up to 30
+ * seconds; we rely on the remainder of ip_output() taking no
+ * longer than that long for the stability of ro_rt. The
+ * flow ID assignment must have happened before this point.
+ */
+ fle = flowtable_lookup_mbuf(V_ip_ft, m, AF_INET);
+ if (fle != NULL)
+ flow_to_route(fle, ro);
}
+#endif
if (opt) {
int len = 0;
@@ -209,10 +211,9 @@ again:
!RT_LINK_IS_UP(rte->rt_ifp) ||
dst->sin_family != AF_INET ||
dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
- if (!nortfree)
- RTFREE(rte);
- rte = ro->ro_rt = (struct rtentry *)NULL;
- ro->ro_lle = (struct llentry *)NULL;
+ RO_RTFREE(ro);
+ ro->ro_lle = NULL;
+ rte = NULL;
}
#ifdef IPFIREWALL_FORWARD
if (rte == NULL && fwd_tag == NULL) {
@@ -672,9 +673,8 @@ passout:
IPSTAT_INC(ips_fragmented);
done:
- if (ro == &iproute && ro->ro_rt && !nortfree) {
- RTFREE(ro->ro_rt);
- }
+ if (ro == &iproute)
+ RO_RTFREE(ro);
if (ia != NULL)
ifa_free(&ia->ia_ifa);
return (error);
diff --git a/sys/netinet/sctp_output.c b/sys/netinet/sctp_output.c
index 9886887..f99fd0b 100644
--- a/sys/netinet/sctp_output.c
+++ b/sys/netinet/sctp_output.c
@@ -4156,10 +4156,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
SCTPDBG(SCTP_DEBUG_OUTPUT3, "IP output returns %d\n", ret);
if (net == NULL) {
/* free tempy routes */
- if (ro->ro_rt) {
- RTFREE(ro->ro_rt);
- ro->ro_rt = NULL;
- }
+ RO_RTFREE(ro);
} else {
/*
* PMTU check versus smallest asoc MTU goes
@@ -4513,9 +4510,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
}
if (net == NULL) {
/* Now if we had a temp route free it */
- if (ro->ro_rt) {
- RTFREE(ro->ro_rt);
- }
+ RO_RTFREE(ro);
} else {
/*
* PMTU check versus smallest asoc MTU goes
diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c
index df4e1b0..e1569b0 100644
--- a/sys/netinet6/ip6_output.c
+++ b/sys/netinet6/ip6_output.c
@@ -213,6 +213,9 @@ in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset)
* This function may modify ver and hlim only.
* The mbuf chain containing the packet will be freed.
* The mbuf opt, if present, will not be freed.
+ * If route_in6 ro is present and has ro_rt initialized, route lookup would be
+ * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL,
+ * then result of route lookup is stored in ro->ro_rt.
*
* type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
* nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one,
@@ -243,7 +246,6 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
struct in6_addr finaldst, src0, dst0;
u_int32_t zone;
struct route_in6 *ro_pmtu = NULL;
- int flevalid = 0;
int hdrsplit = 0;
int needipsec = 0;
int sw_csum, tso;
@@ -506,7 +508,7 @@ skip_ipsec2:;
ro = &opt->ip6po_route;
dst = (struct sockaddr_in6 *)&ro->ro_dst;
#ifdef FLOWTABLE
- if (ro == &ip6route) {
+ if (ro->ro_rt == NULL) {
struct flentry *fle;
/*
@@ -515,11 +517,9 @@ skip_ipsec2:;
* longer than that long for the stability of ro_rt. The
* flow ID assignment must have happened before this point.
*/
- if ((fle = flowtable_lookup_mbuf(V_ip6_ft, m, AF_INET6)) != NULL) {
+ fle = flowtable_lookup_mbuf(V_ip6_ft, m, AF_INET6);
+ if (fle != NULL)
flow_to_route_in6(fle, ro);
- if (ro->ro_rt != NULL && ro->ro_lle != NULL)
- flevalid = 1;
- }
}
#endif
again:
@@ -627,7 +627,7 @@ again:
dst_sa.sin6_family = AF_INET6;
dst_sa.sin6_len = sizeof(dst_sa);
dst_sa.sin6_addr = ip6->ip6_dst;
- if (flevalid) {
+ if (ro->ro_rt) {
rt = ro->ro_rt;
ifp = ro->ro_rt->rt_ifp;
} else if ((error = in6_selectroute_fib(&dst_sa, opt, im6o, ro,
@@ -1182,13 +1182,10 @@ sendorfree:
V_ip6stat.ip6s_fragmented++;
done:
- if (ro == &ip6route && ro->ro_rt && flevalid == 0) {
- /* brace necessary for RTFREE */
- RTFREE(ro->ro_rt);
- } else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt &&
- ((flevalid == 0) || (ro_pmtu != ro))) {
- RTFREE(ro_pmtu->ro_rt);
- }
+ if (ro == &ip6route)
+ RO_RTFREE(ro);
+ if (ro_pmtu == &ip6route)
+ RO_RTFREE(ro_pmtu);
#ifdef IPSEC
if (sp != NULL)
KEY_FREESP(&sp);
diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c
index 4b80dc2..a203558 100644
--- a/sys/netinet6/nd6_nbr.c
+++ b/sys/netinet6/nd6_nbr.c
@@ -595,9 +595,9 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
icmp6_ifstat_inc(ifp, ifs6_out_neighborsolicit);
ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_SOLICIT]);
- if (ro.ro_rt) { /* we don't cache this route. */
- RTFREE(ro.ro_rt);
- }
+ /* We don't cache this route. */
+ RO_RTFREE(&ro);
+
return;
bad:
@@ -1124,9 +1124,9 @@ nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0,
icmp6_ifstat_inc(ifp, ifs6_out_neighboradvert);
ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_ADVERT]);
- if (ro.ro_rt) { /* we don't cache this route. */
- RTFREE(ro.ro_rt);
- }
+ /* We don't cache this route. */
+ RO_RTFREE(&ro);
+
return;
bad:
OpenPOWER on IntegriCloud