diff options
author | gnn <gnn@FreeBSD.org> | 2016-03-24 07:54:56 +0000 |
---|---|---|
committer | gnn <gnn@FreeBSD.org> | 2016-03-24 07:54:56 +0000 |
commit | c3d5404bbe9b51c8373832a220b2568fc8b806fe (patch) | |
tree | e930aaa09aa8f724ba7ca61b6ec317d3c5bf9cee /sys/netinet | |
parent | a9f48f4d565cc70d5eb24a4b37b79b0f870226de (diff) | |
download | FreeBSD-src-c3d5404bbe9b51c8373832a220b2568fc8b806fe.zip FreeBSD-src-c3d5404bbe9b51c8373832a220b2568fc8b806fe.tar.gz |
FreeBSD previously provided route caching for TCP (and UDP). Re-add
route caching for TCP, with some improvements. In particular, invalidate
the route cache if a new route is added, which might be a better match.
The cache is automatically invalidated if the old route is deleted.
Submitted by: Mike Karels
Reviewed by: gnn
Differential Revision: https://reviews.freebsd.org/D4306
Diffstat (limited to 'sys/netinet')
-rw-r--r-- | sys/netinet/in_pcb.c | 22 | ||||
-rw-r--r-- | sys/netinet/in_pcb.h | 10 | ||||
-rw-r--r-- | sys/netinet/ip_output.c | 29 | ||||
-rw-r--r-- | sys/netinet/tcp_output.c | 10 | ||||
-rw-r--r-- | sys/netinet/tcp_subr.c | 12 | ||||
-rw-r--r-- | sys/netinet/tcp_timer.c | 2 | ||||
-rw-r--r-- | sys/netinet/udp_usrreq.c | 42 |
7 files changed, 98 insertions, 29 deletions
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index ec42e67..50e5d6e 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -1298,6 +1298,11 @@ in_pcbfree(struct inpcb *inp) if (inp->inp_moptions != NULL) inp_freemoptions(inp->inp_moptions); #endif + if (inp->inp_route.ro_rt) { + RTFREE(inp->inp_route.ro_rt); + inp->inp_route.ro_rt = (struct rtentry *)NULL; + } + inp->inp_vflag = 0; inp->inp_flags2 |= INP_FREED; crfree(inp->inp_cred); @@ -2225,6 +2230,23 @@ in_pcbremlists(struct inpcb *inp) } /* + * Check for alternatives when higher level complains + * about service problems. For now, invalidate cached + * routing information. If the route was created dynamically + * (by a redirect), time to try a default gateway again. + */ +void +in_losing(struct inpcb *inp) +{ + + if (inp->inp_route.ro_rt) { + RTFREE(inp->inp_route.ro_rt); + inp->inp_route.ro_rt = (struct rtentry *)NULL; + } + return; +} + +/* * A set label operation has occurred at the socket layer, propagate the * label change into the in_pcb for the socket. */ diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index 54eab88..9c77a51 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -42,6 +42,7 @@ #include <sys/_lock.h> #include <sys/_mutex.h> #include <sys/_rwlock.h> +#include <net/route.h> #ifdef _KERNEL #include <sys/lock.h> @@ -238,8 +239,14 @@ struct inpcb { #define inp_zero_size offsetof(struct inpcb, inp_gencnt) inp_gen_t inp_gencnt; /* (c) generation count */ struct llentry *inp_lle; /* cached L2 information */ - struct rtentry *inp_rt; /* cached L3 information */ struct rwlock inp_lock; + rt_gen_t inp_rt_cookie; /* generation for route entry */ + union { /* cached L3 information */ + struct route inpu_route; + struct route_in6 inpu_route6; + } inp_rtu; +#define inp_route inp_rtu.inpu_route +#define inp_route6 inp_rtu.inpu_route6 }; #define inp_fport inp_inc.inc_fport #define inp_lport inp_inc.inc_lport @@ -722,6 +729,7 @@ void in_pcbrehash_mbuf(struct inpcb *, struct mbuf *); int in_pcbrele(struct inpcb *); int in_pcbrele_rlocked(struct inpcb *); int in_pcbrele_wlocked(struct inpcb *); +void in_losing(struct inpcb *); void in_pcbsetsolabel(struct socket *so); int in_getpeeraddr(struct socket *so, struct sockaddr **nam); int in_getsockaddr(struct socket *so, struct sockaddr **nam); diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index fa225fc..afdf1a7 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -282,17 +282,36 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, gw = dst = (struct sockaddr_in *)&ro->ro_dst; fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m); rte = ro->ro_rt; - /* - * The address family should also be checked in case of sharing - * the cache with IPv6. - */ - if (rte == NULL || dst->sin_family != AF_INET) { + if (rte == NULL) { bzero(dst, sizeof(*dst)); dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = ip->ip_dst; } again: + /* + * Validate route against routing table additions; + * a better/more specific route might have been added. + */ + if (inp) + RT_VALIDATE(ro, &inp->inp_rt_cookie, fibnum); + /* + * If there is a cached route, + * check that it is to the same destination + * and is still up. If not, free it and try again. + * The address family should also be checked in case of sharing the + * cache with IPv6. + * Also check whether routing cache needs invalidation. + */ + rte = ro->ro_rt; + if (rte && ((rte->rt_flags & RTF_UP) == 0 || + rte->rt_ifp == NULL || + !RT_LINK_IS_UP(rte->rt_ifp) || + dst->sin_family != AF_INET || + dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { + RTFREE(rte); + rte = ro->ro_rt = (struct rtentry *)NULL; + } ia = NULL; have_ia_ref = 0; /* diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index d7fb75d..2043fc9 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -1379,9 +1379,6 @@ send: #endif #ifdef INET { - struct route ro; - - bzero(&ro, sizeof(ro)); ip->ip_len = htons(m->m_pkthdr.len); #ifdef INET6 if (tp->t_inpcb->inp_vflag & INP_IPV6PROTO) @@ -1412,13 +1409,12 @@ send: tcp_pcap_add(th, m, &(tp->t_outpkts)); #endif - error = ip_output(m, tp->t_inpcb->inp_options, &ro, + error = ip_output(m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route, ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), 0, tp->t_inpcb); - if (error == EMSGSIZE && ro.ro_rt != NULL) - mtu = ro.ro_rt->rt_mtu; - RO_RTFREE(&ro); + if (error == EMSGSIZE && tp->t_inpcb->inp_route.ro_rt != NULL) + mtu = tp->t_inpcb->inp_route.ro_rt->rt_mtu; } #endif /* INET */ diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 6635bb7..877f7f8 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -1632,6 +1632,10 @@ tcp_notify(struct inpcb *inp, int error) if (tp->t_state == TCPS_ESTABLISHED && (error == EHOSTUNREACH || error == ENETUNREACH || error == EHOSTDOWN)) { + if (inp->inp_route.ro_rt) { + RTFREE(inp->inp_route.ro_rt); + inp->inp_route.ro_rt = (struct rtentry *)NULL; + } return (inp); } else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 && tp->t_softerror) { @@ -1926,11 +1930,11 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip) else if (V_icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB || cmd == PRC_UNREACH_PORT || cmd == PRC_TIMXCEED_INTRANS) && ip) notify = tcp_drop_syn_sent; - /* - * Redirects don't need to be handled up here. - */ - else if (PRC_IS_REDIRECT(cmd)) + else if (PRC_IS_REDIRECT(cmd)) { + /* signal EHOSTDOWN, as it flushes the cached route */ + in_pcbnotifyall(&tcbinfo, faddr, EHOSTDOWN, notify); return; + } /* * Hostdead is ugly because it goes linearly through all PCBs. * XXX: We never get this from ICMP, otherwise it makes an diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c index 046feba..172c394 100644 --- a/sys/netinet/tcp_timer.c +++ b/sys/netinet/tcp_timer.c @@ -786,7 +786,9 @@ tcp_timer_rexmt(void * xtp) #ifdef INET6 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) in6_losing(tp->t_inpcb); + else #endif + in_losing(tp->t_inpcb); tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); tp->t_srtt = 0; } diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index f474a54..ecd1e0c 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -740,6 +740,11 @@ udp_notify(struct inpcb *inp, int errno) * or a write lock, but a read lock is sufficient. */ INP_LOCK_ASSERT(inp); + if ((errno == EHOSTUNREACH || errno == ENETUNREACH || + errno == EHOSTDOWN) && inp->inp_route.ro_rt) { + RTFREE(inp->inp_route.ro_rt); + inp->inp_route.ro_rt = (struct rtentry *)NULL; + } inp->inp_socket->so_error = errno; sorwakeup(inp->inp_socket); @@ -761,11 +766,11 @@ udp_common_ctlinput(int cmd, struct sockaddr *sa, void *vip, if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) return; - /* - * Redirects don't need to be handled up here. - */ - if (PRC_IS_REDIRECT(cmd)) + if (PRC_IS_REDIRECT(cmd)) { + /* signal EHOSTDOWN, as it flushes the cached route */ + in_pcbnotifyall(&udbinfo, faddr, EHOSTDOWN, udp_notify); return; + } /* * Hostdead is ugly because it goes linearly through all PCBs. @@ -1116,7 +1121,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, int error = 0; int ipflags; u_short fport, lport; - int unlock_udbinfo; + int unlock_udbinfo, unlock_inp; u_char tos; uint8_t pr; uint16_t cscov = 0; @@ -1137,7 +1142,15 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, } src.sin_family = 0; - INP_RLOCK(inp); + sin = (struct sockaddr_in *)addr; + if (sin == NULL || + (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) { + INP_WLOCK(inp); + unlock_inp = UH_WLOCKED; + } else { + INP_RLOCK(inp); + unlock_inp = UH_RLOCKED; + } tos = inp->inp_ip_tos; if (control != NULL) { /* @@ -1145,7 +1158,10 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, * stored in a single mbuf. */ if (control->m_next) { - INP_RUNLOCK(inp); + if (unlock_inp == UH_WLOCKED) + INP_WUNLOCK(inp); + else + INP_RUNLOCK(inp); m_freem(control); m_freem(m); return (EINVAL); @@ -1220,7 +1236,10 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, m_freem(control); } if (error) { - INP_RUNLOCK(inp); + if (unlock_inp == UH_WLOCKED) + INP_WUNLOCK(inp); + else + INP_RUNLOCK(inp); m_freem(m); return (error); } @@ -1246,8 +1265,6 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, sin = (struct sockaddr_in *)addr; if (sin != NULL && (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) { - INP_RUNLOCK(inp); - INP_WLOCK(inp); INP_HASH_WLOCK(pcbinfo); unlock_udbinfo = UH_WLOCKED; } else if ((sin != NULL && ( @@ -1514,9 +1531,10 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, else if (unlock_udbinfo == UH_RLOCKED) INP_HASH_RUNLOCK(pcbinfo); UDP_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u); - error = ip_output(m, inp->inp_options, NULL, ipflags, + error = ip_output(m, inp->inp_options, + (unlock_inp == UH_WLOCKED ? &inp->inp_route : NULL), ipflags, inp->inp_moptions, inp); - if (unlock_udbinfo == UH_WLOCKED) + if (unlock_inp == UH_WLOCKED) INP_WUNLOCK(inp); else INP_RUNLOCK(inp); |