31 files changed, 1686 insertions, 1153 deletions
diff --git a/sys/conf/files b/sys/conf/files
index 63c378b..8eee001 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -1457,6 +1457,7 @@ netinet/ip_mroute.c	optional mrouting
 netinet/ip_output.c	optional inet
 netinet/raw_ip.c	optional inet
 netinet/tcp_debug.c	optional tcpdebug
+netinet/tcp_hostcache.c	optional inet
 netinet/tcp_input.c	optional inet
 netinet/tcp_output.c	optional inet
 netinet/tcp_subr.c	optional inet
diff --git a/sys/net/if_faith.c b/sys/net/if_faith.c
index 07216b5..a8da4ad 100644
--- a/sys/net/if_faith.c
+++ b/sys/net/if_faith.c
@@ -270,17 +270,8 @@ faithrtrequest(cmd, rt, info)
 	struct rt_addrinfo *info;
 {
 	RT_LOCK_ASSERT(rt);
-
-	if (rt) {
-		rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; /* for ISO */
-		/*
-		 * For optimal performance, the send and receive buffers
-		 * should be at least twice the MTU plus a little more for
-		 * overhead.
-		 */
-		rt->rt_rmx.rmx_recvpipe =
-			rt->rt_rmx.rmx_sendpipe = 3 * FAITHMTU;
-	}
+	if (rt)
+		rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
 }
 
 /*
diff --git a/sys/net/if_loop.c b/sys/net/if_loop.c
index afe0a73..9a54af4 100644
--- a/sys/net/if_loop.c
+++ b/sys/net/if_loop.c
@@ -329,17 +329,8 @@ lortrequest(cmd, rt, info)
 	struct rt_addrinfo *info;
 {
 	RT_LOCK_ASSERT(rt);
-
-	if (rt) {
-		rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; /* for ISO */
-		/*
-		 * For optimal performance, the send and receive buffers
-		 * should be at least twice the MTU plus a little more for
-		 * overhead.
-		 */
-		rt->rt_rmx.rmx_recvpipe =
-			rt->rt_rmx.rmx_sendpipe = 3 * LOMTU;
-	}
+	if (rt)
+		rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
 }
 
 /*
diff --git a/sys/net/route.h b/sys/net/route.h
index 8fff560..34c33eb 100644
--- a/sys/net/route.h
+++ b/sys/net/route.h
@@ -58,6 +58,12 @@ struct route {
  * These numbers are used by reliable protocols for determining
  * retransmission behavior and are included in the routing structure.
  */
+struct rt_metrics_lite {
+	u_long	rmx_mtu;	/* MTU for this path */
+	u_long	rmx_expire;	/* lifetime for route, e.g. redirect */
+	u_long	rmx_pksent;	/* packets sent using this route */
+};
+
 struct rt_metrics {
 	u_long	rmx_locks;	/* Kernel must leave these values alone */
 	u_long	rmx_mtu;	/* MTU for this path */
@@ -104,10 +110,10 @@ struct rtentry {
 	long	rt_refcnt;		/* # held references */
 	u_long	rt_flags;		/* up/down?, host/net */
 	struct	ifnet *rt_ifp;		/* the answer: interface to use */
-	struct	ifaddr *rt_ifa;		/* the answer: interface to use */
+	struct	ifaddr *rt_ifa;		/* the answer: interface address to use */
 	struct	sockaddr *rt_genmask;	/* for generation of cloned routes */
 	caddr_t	rt_llinfo;		/* pointer to link level info cache */
-	struct	rt_metrics rt_rmx;	/* metrics used by rx'ing protocols */
+	struct	rt_metrics_lite rt_rmx;	/* metrics used by rx'ing protocols */
 	struct	rtentry *rt_gwroute;	/* implied entry for gatewayed routes */
 	int	(*rt_output)(struct ifnet *, struct mbuf *, struct sockaddr *,
 		    struct rtentry *);
diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c
index 4fba1a2..3290c0c 100644
--- a/sys/net/rtsock.c
+++ b/sys/net/rtsock.c
@@ -87,7 +87,8 @@ static int	sysctl_dumpentry(struct radix_node *rn, void *vw);
 static int	sysctl_iflist(int af, struct walkarg *w);
 static int	sysctl_ifmalist(int af, struct walkarg *w);
 static int	route_output(struct mbuf *, struct socket *);
-static void	rt_setmetrics(u_long, struct rt_metrics *, struct rt_metrics *);
+static void	rt_setmetrics(u_long, struct rt_metrics *, struct rt_metrics_lite *);
+static void	rt_getmetrics(struct rt_metrics_lite *, struct rt_metrics *);
 static void	rt_dispatch(struct mbuf *, struct sockaddr *);
 
 /*
@@ -355,9 +356,6 @@ route_output(m, so)
 			RT_LOCK(saved_nrt);
 			rt_setmetrics(rtm->rtm_inits,
 				&rtm->rtm_rmx, &saved_nrt->rt_rmx);
-			saved_nrt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
-			saved_nrt->rt_rmx.rmx_locks |=
-				(rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
 			RT_REMREF(saved_nrt);
 			saved_nrt->rt_genmask = info.rti_info[RTAX_GENMASK];
 			RT_UNLOCK(saved_nrt);
@@ -428,7 +426,7 @@ route_output(m, so)
 			(void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm,
 				(struct walkarg *)0);
 			rtm->rtm_flags = rt->rt_flags;
-			rtm->rtm_rmx = rt->rt_rmx;
+			rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
 			rtm->rtm_addrs = info.rti_addrs;
 			break;
 
@@ -478,9 +476,7 @@ route_output(m, so)
 				rt->rt_genmask = info.rti_info[RTAX_GENMASK];
 			/* FALLTHROUGH */
 		case RTM_LOCK:
-			rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
-			rt->rt_rmx.rmx_locks |=
-				(rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
+			/* We don't support locks anymore */
 			break;
 		}
 		RT_UNLOCK(rt);
@@ -542,20 +538,28 @@ flush:
 }
 
 static void
-rt_setmetrics(u_long which, struct rt_metrics *in, struct rt_metrics *out)
+rt_setmetrics(u_long which, struct rt_metrics *in, struct rt_metrics_lite *out)
 {
 #define metric(f, e) if (which & (f)) out->e = in->e;
-	metric(RTV_RPIPE, rmx_recvpipe);
-	metric(RTV_SPIPE, rmx_sendpipe);
-	metric(RTV_SSTHRESH, rmx_ssthresh);
-	metric(RTV_RTT, rmx_rtt);
-	metric(RTV_RTTVAR, rmx_rttvar);
-	metric(RTV_HOPCOUNT, rmx_hopcount);
+	/*
+	 * Only these are stored in the routing entry since introduction
+	 * of tcp hostcache. The rest is ignored.
+	 */
 	metric(RTV_MTU, rmx_mtu);
 	metric(RTV_EXPIRE, rmx_expire);
 #undef metric
 }
 
+static void
+rt_getmetrics(struct rt_metrics_lite *in, struct rt_metrics *out)
+{
+#define metric(e) out->e = in->e;
+	bzero(out, sizeof(*out));
+	metric(rmx_mtu);
+	metric(rmx_expire);
+#undef metric
+}
+
 #define ROUNDUP(a) \
 	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
 
@@ -948,8 +952,8 @@ sysctl_dumpentry(struct radix_node *rn, void *vw)
 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
 
 		rtm->rtm_flags = rt->rt_flags;
-		rtm->rtm_use = rt->rt_use;
-		rtm->rtm_rmx = rt->rt_rmx;
+		rtm->rtm_use = rt->rt_rmx.rmx_pksent;
+		rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
 		rtm->rtm_index = rt->rt_ifp->if_index;
 		rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
 		rtm->rtm_addrs = info.rti_addrs;
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
index 11735ec..898c0d4 100644
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -561,7 +561,6 @@ in_pcbconnect_setup(inp, nam, laddrp, lportp, faddrp, fportp, oinpp, td)
 		if (error)
 			return (error);
 	}
-
 	if (!TAILQ_EMPTY(&in_ifaddrhead)) {
 		/*
 		 * If the destination address is INADDR_ANY,
@@ -579,32 +578,20 @@ in_pcbconnect_setup(inp, nam, laddrp, lportp, faddrp, fportp, oinpp, td)
 			    &in_ifaddrhead)->ia_broadaddr)->sin_addr;
 	}
 	if (laddr.s_addr == INADDR_ANY) {
-		register struct route *ro;
+		struct route sro;
 
+		sro.ro_rt = NULL;
 		ia = (struct in_ifaddr *)0;
 		/*
-		 * If route is known or can be allocated now,
-		 * our src addr is taken from the i/f, else punt.
-		 * Note that we should check the address family of the cached
-		 * destination, in case of sharing the cache with IPv6.
+		 * If route is known our src addr is taken from the i/f,
+		 * else punt.
 		 */
-		ro = &inp->inp_route;
-		if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
-		    ro->ro_dst.sa_family != AF_INET ||
-		    satosin(&ro->ro_dst)->sin_addr.s_addr != faddr.s_addr ||
-		    inp->inp_socket->so_options & SO_DONTROUTE)) {
-			RTFREE(ro->ro_rt);
-			ro->ro_rt = (struct rtentry *)0;
-		}
-		if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
-		    (ro->ro_rt == (struct rtentry *)0 ||
-		    ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
-			/* No route yet, so try to acquire one */
-			bzero(&ro->ro_dst, sizeof(struct sockaddr_in));
-			ro->ro_dst.sa_family = AF_INET;
-			ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
-			((struct sockaddr_in *)&ro->ro_dst)->sin_addr = faddr;
-			rtalloc(ro);
+		if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0) {
+			/* Find out route to destination */
+			sro.ro_dst.sa_family = AF_INET;
+			sro.ro_dst.sa_len = sizeof(struct sockaddr_in);
+			((struct sockaddr_in *)&sro.ro_dst)->sin_addr = faddr;
+			rtalloc_ign(&sro, RTF_CLONING);
 		}
 		/*
 		 * If we found a route, use the address
@@ -612,8 +599,10 @@ in_pcbconnect_setup(inp, nam, laddrp, lportp, faddrp, fportp, oinpp, td)
 		 * unless it is the loopback (in case a route
 		 * to our address on another net goes to loopback).
 		 */
-		if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))
-			ia = ifatoia(ro->ro_rt->rt_ifa);
+		if (sro.ro_rt && !(sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))
+			ia = ifatoia(sro.ro_rt->rt_ifa);
+		if (sro.ro_rt)
+			RTFREE(sro.ro_rt);
 		if (ia == 0) {
 			bzero(&sa, sizeof(sa));
 			sa.sin_addr = faddr;
@@ -706,8 +695,6 @@ in_pcbdetach(inp)
 	}
 	if (inp->inp_options)
 		(void)m_free(inp->inp_options);
-	if (inp->inp_route.ro_rt)
-		RTFREE(inp->inp_route.ro_rt);
 	ip_freemoptions(inp->inp_moptions);
 	inp->inp_vflag = 0;
 	INP_LOCK_DESTROY(inp);
@@ -884,62 +871,6 @@ in_pcbpurgeif0(pcbinfo, ifp)
 }
 
 /*
- * Check for alternatives when higher level complains
- * about service problems.  For now, invalidate cached
- * routing information.  If the route was created dynamically
- * (by a redirect), time to try a default gateway again.
- */
-void
-in_losing(inp)
-	struct inpcb *inp;
-{
-	register struct rtentry *rt;
-	struct rt_addrinfo info;
-
-	INP_LOCK_ASSERT(inp);
-
-	if ((rt = inp->inp_route.ro_rt)) {
-		RT_LOCK(rt);
-		inp->inp_route.ro_rt = NULL;
-		bzero((caddr_t)&info, sizeof(info));
-		info.rti_flags = rt->rt_flags;
-		info.rti_info[RTAX_DST] = rt_key(rt);
-		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
-		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
-		rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
-		if (rt->rt_flags & RTF_DYNAMIC)
-			rtexpunge(rt);
-		RTFREE_LOCKED(rt);
-		/*
-		 * A new route can be allocated
-		 * the next time output is attempted.
-		 */
-	}
-}
-
-/*
- * After a routing change, flush old routing
- * and allocate a (hopefully) better one.
- */
-struct inpcb *
-in_rtchange(inp, errno)
-	register struct inpcb *inp;
-	int errno;
-{
-	INP_LOCK_ASSERT(inp);
-
-	if (inp->inp_route.ro_rt) {
-		RTFREE(inp->inp_route.ro_rt);
-		inp->inp_route.ro_rt = 0;
-		/*
-		 * A new route can be allocated the next time
-		 * output is attempted.
-		 */
-	}
-	return inp;
-}
-
-/*
  * Lookup a PCB based on the local address and port.
  */
 struct inpcb *
diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
index 8a6717c..5e93328 100644
--- a/sys/netinet/in_pcb.h
+++ b/sys/netinet/in_pcb.h
@@ -94,31 +94,22 @@ struct in_endpoints {
 
 /*
  * XXX
- * At some point struct route should possibly change to:
- *   struct rtentry *rt
- *   struct in_endpoints *ie; 
+ * the defines for inc_* are hacks and should be changed to direct references
  */
 struct in_conninfo {
 	u_int8_t	inc_flags;
 	u_int8_t	inc_len;
 	u_int16_t	inc_pad;	/* XXX alignment for in_endpoints */
-	/* protocol dependent part; cached route */
+	/* protocol dependent part */
 	struct	in_endpoints inc_ie;
-	union {
-		/* placeholder for routing entry */
-		struct	route inc4_route;
-		struct	route_in6 inc6_route;
-	} inc_dependroute;
 };
 #define inc_isipv6	inc_flags	/* temp compatability */
 #define	inc_fport	inc_ie.ie_fport
 #define	inc_lport	inc_ie.ie_lport
 #define	inc_faddr	inc_ie.ie_faddr
 #define	inc_laddr	inc_ie.ie_laddr
-#define	inc_route	inc_dependroute.inc4_route
 #define	inc6_faddr	inc_ie.ie6_faddr
 #define	inc6_laddr	inc_ie.ie6_laddr
-#define	inc6_route	inc_dependroute.inc6_route
 
 struct	icmp6_filter;
 
@@ -157,7 +148,6 @@ struct inpcb {
 #define inp_lport	inp_inc.inc_lport
 #define	inp_faddr	inp_inc.inc_faddr
 #define	inp_laddr	inp_inc.inc_laddr
-#define	inp_route	inp_inc.inc_route
 #define	inp_ip_tos	inp_depend4.inp4_ip_tos
 #define	inp_options	inp_depend4.inp4_options
 #define	inp_moptions	inp_depend4.inp4_moptions
@@ -182,7 +172,7 @@ struct inpcb {
 
 #define	in6p_faddr	inp_inc.inc6_faddr
 #define	in6p_laddr	inp_inc.inc6_laddr
-#define	in6p_route	inp_inc.inc6_route
+#define	in6p_ip6_hlim	inp_depend6.inp6_hlim
 #define	in6p_hops	inp_depend6.inp6_hops	/* default hop limit */
 #define	in6p_ip6_nxt	inp_ip_p
 #define	in6p_flowinfo	inp_flow
@@ -347,9 +337,6 @@ extern int	ipport_hifirstauto;
 extern int	ipport_hilastauto;
 
 void	in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *);
-void	in_losing(struct inpcb *);
-struct inpcb *
-	in_rtchange(struct inpcb *, int);
 int	in_pcballoc(struct socket *, struct inpcbinfo *, struct thread *);
 int	in_pcbbind(struct inpcb *, struct sockaddr *, struct thread *);
 int	in_pcbbind_setup(struct inpcb *, struct sockaddr *, in_addr_t *,
diff --git a/sys/netinet/in_rmx.c b/sys/netinet/in_rmx.c
index 4625030..ea11792 100644
--- a/sys/netinet/in_rmx.c
+++ b/sys/netinet/in_rmx.c
@@ -98,8 +98,7 @@ in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
 	if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 		rt->rt_flags |= RTF_MULTICAST;
 
-	if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU) &&
-	    rt->rt_ifp)
+	if (!rt->rt_rmx.rmx_mtu && rt->rt_ifp)
 		rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
 
 	ret = rn_addroute(v_arg, n_arg, head, treenodes);
diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c
index 172021b..bd777dd 100644
--- a/sys/netinet/ip_divert.c
+++ b/sys/netinet/ip_divert.c
@@ -336,7 +336,7 @@ div_output(struct socket *so, struct mbuf *m,
 			ipstat.ips_rawout++;			/* XXX */
 
 			error = ip_output((struct mbuf *)&divert_tag,
-				    inp->inp_options, &inp->inp_route,
+				    inp->inp_options, NULL,
 				    (so->so_options & SO_DONTROUTE) |
 				    IP_ALLOWBROADCAST | IP_RAWOUTPUT,
 				    inp->inp_moptions, NULL);
@@ -527,11 +527,8 @@ div_ctlinput(int cmd, struct sockaddr *sa, void *vip)
 	faddr = ((struct sockaddr_in *)sa)->sin_addr;
 	if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
         	return;
-	if (PRC_IS_REDIRECT(cmd)) {
-		/* flush held routes */
-		in_pcbnotifyall(&divcbinfo, faddr,
-			inetctlerrmap[cmd], in_rtchange);
-	}
+	if (PRC_IS_REDIRECT(cmd))
+		return;
 }
 
 static int
diff --git a/sys/netinet/ip_fw2.c b/sys/netinet/ip_fw2.c
index 5d3e3da..999d064 100644
--- a/sys/netinet/ip_fw2.c
+++ b/sys/netinet/ip_fw2.c
@@ -466,10 +466,13 @@ verify_rev_path(struct in_addr src, struct ifnet *ifp)
 		rtalloc_ign(&ro, RTF_CLONING);
 	}
 
-	if ((ro.ro_rt == NULL) || (ifp == NULL) ||
-	    (ro.ro_rt->rt_ifp->if_index != ifp->if_index))
+	if (ro.ro_rt == NULL)
 		return 0;
-
+	if ((ifp == NULL) || (ro.ro_rt->rt_ifp->if_index != ifp->if_index)) {
+		RTFREE(ro.ro_rt);
+		return 0;
+	}
+	RTFREE(ro.ro_rt);
 	return 1;
 }
 
diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c
index f94e7b9..b84d689 100644
--- a/sys/netinet/ip_icmp.c
+++ b/sys/netinet/ip_icmp.c
@@ -52,11 +52,15 @@
 #include <net/route.h>
 
 #include <netinet/in.h>
+#include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
 #include <netinet/icmp_var.h>
 
 #ifdef IPSEC
@@ -395,7 +399,7 @@ icmp_input(m, off)
 			printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
 #endif
 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
-#if 1
+
 		/*
 		 * MTU discovery:
 		 * If we got a needfrag and there is a host route to the
@@ -405,40 +409,37 @@ icmp_input(m, off)
 		 * notice that the MTU has changed and adapt accordingly.
 		 * If no new MTU was suggested, then we guess a new one
 		 * less than the current value.  If the new MTU is 
-		 * unreasonably small (arbitrarily set at 296), then
-		 * we reset the MTU to the interface value and enable the
-		 * lock bit, indicating that we are no longer doing MTU
-		 * discovery.
+		 * unreasonably small, then we don't update the MTU value.
+		 *
+		 * XXX: All this should be done in tcp_mtudisc() because
+		 * the way we do it now, everyone can send us bogus ICMP
+		 * MSGSIZE packets for any destination. By doing this far
+		 * higher in the chain we have a matching tcp connection.
+		 * Thus spoofing is much harder. However there is no easy
+		 * non-hackish way to pass the new MTU up to tcp_mtudisc().
+		 * Also see next XXX regarding IPv4 AH TCP.
 		 */
 		if (code == PRC_MSGSIZE) {
-			struct rtentry *rt;
 			int mtu;
+			struct in_conninfo inc;
+
+			bzero(&inc, sizeof(inc));
+			inc.inc_flags = 0; /* IPv4 */
+			inc.inc_faddr = icmpsrc.sin_addr;
+
+			mtu = ntohs(icp->icmp_nextmtu);
+			if (!mtu)
+				mtu = ip_next_mtu(mtu, 1);
+
+			if (mtu >= 256 + sizeof(struct tcpiphdr))
+				tcp_hc_updatemtu(&inc, mtu);
 
-			rt = rtalloc1((struct sockaddr *)&icmpsrc, 0,
-				      RTF_CLONING);
-			if (rt && (rt->rt_flags & RTF_HOST)
-			    && !(rt->rt_rmx.rmx_locks & RTV_MTU)) {
-				mtu = ntohs(icp->icmp_nextmtu);
-				if (!mtu)
-					mtu = ip_next_mtu(rt->rt_rmx.rmx_mtu,
-							  1);
 #ifdef DEBUG_MTUDISC
-				printf("MTU for %s reduced to %d\n",
-					inet_ntoa(icmpsrc.sin_addr), mtu);
+			printf("MTU for %s reduced to %d\n",
+				inet_ntoa(icmpsrc.sin_addr), mtu);
 #endif
-				if (mtu < 296) {
-					/* rt->rt_rmx.rmx_mtu =
-						rt->rt_ifp->if_mtu; */
-					rt->rt_rmx.rmx_locks |= RTV_MTU;
-				} else if (rt->rt_rmx.rmx_mtu > mtu) {
-					rt->rt_rmx.rmx_mtu = mtu;
-				}
-			}
-			if (rt)
-				rtfree(rt);
 		}
 
-#endif
 		/*
 		 * XXX if the packet contains [IPv4 AH TCP], we can't make a
 		 * notification to TCP layer.
@@ -785,7 +786,6 @@ iptime()
 	return (htonl(t));
 }
 
-#if 1
 /*
  * Return the next larger or smaller MTU plateau (table from RFC 1191)
  * given current value MTU.  If DIR is less than zero, a larger plateau
@@ -823,7 +823,6 @@ ip_next_mtu(mtu, dir)
 		}
 	}
 }
-#endif
 
 
 /*
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
index df67d22..3d528f4 100644
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@@ -1612,22 +1612,22 @@ struct in_ifaddr *
 ip_rtaddr(dst)
 	struct in_addr dst;
 {
+	struct route sro;
 	struct sockaddr_in *sin;
 	struct in_ifaddr *ifa;
-	struct route ro;
 
-	bzero(&ro, sizeof(ro));
-	sin = (struct sockaddr_in *)&ro.ro_dst;
+	sro.ro_rt = NULL;
+	sin = (struct sockaddr_in *)&sro.ro_dst;
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(*sin);
 	sin->sin_addr = dst;
-	rtalloc_ign(&ro, RTF_CLONING);
+	rtalloc_ign(&sro, RTF_CLONING);
 
-	if (ro.ro_rt == 0)
+	if (sro.ro_rt == NULL)
 		return ((struct in_ifaddr *)0);
 
-	ifa = ifatoia(ro.ro_rt->rt_ifa);
-	RTFREE(ro.ro_rt);
+	ifa = ifatoia(sro.ro_rt->rt_ifa);
+	RTFREE(sro.ro_rt);
 	return ifa;
 }
 
@@ -1879,7 +1879,7 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop)
 		struct route ro;
 		struct rtentry *rt;
 
-		bzero(&ro, sizeof(ro));
+		ro.ro_rt = NULL;
 		sin = (struct sockaddr_in *)&ro.ro_dst;
 		sin->sin_family = AF_INET;
 		sin->sin_len = sizeof(*sin);
diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c
index cdf8b87..0a11524 100644
--- a/sys/netinet/ip_output.c
+++ b/sys/netinet/ip_output.c
@@ -302,13 +302,9 @@ ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro,
 		isbroadcast = 0;	/* fool gcc */
 	} else {
 		/*
-		 * If this is the case, we probably don't want to allocate
-		 * a protocol-cloned route since we didn't get one from the
-		 * ULP.  This lets TCP do its thing, while not burdening
-		 * forwarding or ICMP with the overhead of cloning a route.
-		 * Of course, we still want to do any cloning requested by
-		 * the link layer, as this is probably required in all cases
-		 * for correct operation (as it is for ARP).
+		 * We want to do any cloning requested by the link layer,
+		 * as this is probably required in all cases for correct
+		 * operation (as it is for ARP).
 		 */
 		if (ro->ro_rt == 0)
 			rtalloc(ro);
@@ -319,7 +315,7 @@ ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro,
 		}
 		ia = ifatoia(ro->ro_rt->rt_ifa);
 		ifp = ro->ro_rt->rt_ifp;
-		ro->ro_rt->rt_use++;
+		ro->ro_rt->rt_rmx.rmx_pksent++;
 		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
 			dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
 		if (ro->ro_rt->rt_flags & RTF_HOST)
@@ -931,16 +927,14 @@ spd_done:
 				ip_input((struct mbuf *)&tag);
 				goto done;
 			}
-			/* Some of the logic for this was
+			/*
+			 * Some of the logic for this was
 			 * nicked from above.
-			 *
-			 * This rewrites the cached route in a local PCB.
-			 * Is this what we want to do?
 			 */
 			bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
 
 			ro_fwd->ro_rt = 0;
-			rtalloc(ro_fwd);
+			rtalloc_ign(ro_fwd, RTF_CLONING);
 
 			if (ro_fwd->ro_rt == 0) {
 				ipstat.ips_noroute++;
@@ -950,7 +944,7 @@ spd_done:
 
 			ia = ifatoia(ro_fwd->ro_rt->rt_ifa);
 			ifp = ro_fwd->ro_rt->rt_ifp;
-			ro_fwd->ro_rt->rt_use++;
+			ro_fwd->ro_rt->rt_rmx.rmx_pksent++;
 			if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
 				dst = (struct sockaddr_in *)
 					ro_fwd->ro_rt->rt_gateway;
@@ -1045,7 +1039,6 @@ pass:
 		 * routes when the MTU is changed.
 		 */
 		if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) &&
-		    !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) &&
 		    (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
 			ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
 		}
@@ -1983,7 +1976,7 @@ ip_setmoptions(sopt, imop)
 			dst->sin_len = sizeof(*dst);
 			dst->sin_family = AF_INET;
 			dst->sin_addr = mreq.imr_multiaddr;
-			rtalloc(&ro);
+			rtalloc_ign(&ro, RTF_CLONING);
 			if (ro.ro_rt == NULL) {
 				error = EADDRNOTAVAIL;
 				splx(s);
diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
index 632e00a..0a76a7f 100644
--- a/sys/netinet/raw_ip.c
+++ b/sys/netinet/raw_ip.c
@@ -302,7 +302,7 @@ rip_output(struct mbuf *m, struct socket *so, u_long dst)
 	if (inp->inp_flags & INP_ONESBCAST)
 		flags |= IP_SENDONES;
 
-	return (ip_output(m, inp->inp_options, &inp->inp_route, flags,
+	return (ip_output(m, inp->inp_options, NULL, flags,
 			  inp->inp_moptions, inp));
 }
 
diff --git a/sys/netinet/tcp_hostcache.c b/sys/netinet/tcp_hostcache.c
new file mode 100644
index 0000000..461ce85
--- /dev/null
+++ b/sys/netinet/tcp_hostcache.c
@@ -0,0 +1,728 @@
+/*
+ * Copyright (c) 2002 Andre Oppermann, Internet Business Solutions AG
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * The tcp_hostcache moves the tcp specific cached metrics from the routing
+ * table into a dedicated structure indexed by the remote IP address. It
+ * keeps information on the measured tcp parameters of past tcp sessions
+ * to have better initial start values for following connections from the
+ * same source. Depending on the network parameters (delay, bandwidth, max
+ * MTU, congestion window) between local and remote site this can lead to
+ * significant speedups for new tcp connections after the first one.
+ *
+ * Due to this new tcp_hostcache all tcp specific metrics information in
+ * the routing table has been removed. The INPCB no longer keeps a pointer
+ * to the routing entry and protocol initiated route cloning has been
+ * removed as well. With these changes the routing table has gone back
+ * to being more lightwight and only carries information related to packet
+ * forwarding.
+ *
+ * Tcp_hostcache is designed for multiple concurrent access in SMP
+ * environments and high contention. All bucket rows have their own
+ * lock and thus multiple lookups and modifies can be done at the same
+ * time as long as they are in different bucket rows. If a request for
+ * insertion of a new record can't be satisfied it simply returns an
+ * empty structure. Nobody and nothing shall ever point directly to
+ * any entry in tcp_hostcache. All communication is done in an object
+ * oriented way and only funtions of tcp_hostcache will manipulate hostcache
+ * entries. Otherwise we are unable to achieve good behaviour in concurrent
+ * access situations. Since tcp_hostcache is only caching information there
+ * are no fatal consequences if we either can't satisfy any particular request
+ * or have to drop/overwrite an existing entry because of bucket limit
+ * memory constrains.
+ */
+
+/*
+ * Many thanks to jlemon for basic structure of tcp_syncache which is being
+ * followed here.
+ */
+
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_var.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
+#endif
+#include <netinet/tcp.h>
+#include <netinet/tcp_var.h>
+#ifdef INET6
+#include <netinet6/tcp6_var.h>
+#endif
+
+#include <vm/uma.h>
+
+
+TAILQ_HEAD(hc_qhead, hc_metrics);
+
+struct hc_head {
+	struct hc_qhead	hch_bucket;
+	u_int		hch_length;
+	struct mtx	hch_mtx;
+};
+
+struct hc_metrics {
+	/* housekeeping */
+	TAILQ_ENTRY(hc_metrics) rmx_q;
+	struct	hc_head *rmx_head; /* head of bucket tail queue */
+	struct	in_addr ip4;	/* IP address */
+	struct	in6_addr ip6;	/* IP6 address */
+	/* endpoint specific values for tcp */
+	u_long	rmx_mtu;	/* MTU for this path */
+	u_long	rmx_ssthresh;	/* outbound gateway buffer limit */
+	u_long	rmx_rtt;	/* estimated round trip time */
+	u_long	rmx_rttvar;	/* estimated rtt variance */
+	u_long	rmx_bandwidth;	/* estimated bandwidth */
+	u_long	rmx_cwnd;	/* congestion window */
+	u_long	rmx_sendpipe;	/* outbound delay-bandwidth product */
+	u_long	rmx_recvpipe;	/* inbound delay-bandwidth product */
+	struct	rmxp_tao rmx_tao; /* TAO cache for T/TCP */
+	/* tcp hostcache internal data */
+	int	rmx_expire;	/* lifetime for object */
+	u_long	rmx_hits;	/* number of hits */
+	u_long	rmx_updates;	/* number of updates */
+};
+
+/* Arbitrary values */
+#define TCP_HOSTCACHE_HASHSIZE		512
+#define TCP_HOSTCACHE_BUCKETLIMIT	30
+#define TCP_HOSTCACHE_EXPIRE		60*60	/* one hour */
+#define TCP_HOSTCACHE_PRUNE		5*60	/* every 5 minutes */
+
+struct tcp_hostcache {
+	struct	hc_head *hashbase;
+	uma_zone_t zone;
+	u_int	hashsize;
+	u_int	hashmask;
+	u_int	bucket_limit;
+	u_int	cache_count;
+	u_int	cache_limit;
+	int	expire;
+	int	purgeall;
+};
+static struct tcp_hostcache tcp_hostcache;
+
+static struct callout tcp_hc_callout;
+
+static struct hc_metrics *tcp_hc_lookup(struct in_conninfo *);
+static struct hc_metrics *tcp_hc_insert(struct in_conninfo *);
+static int sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS);
+static void tcp_hc_purge(void *);
+
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, hostcache, CTLFLAG_RW, 0, "TCP Host cache");
+
+SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, cachelimit, CTLFLAG_RDTUN,
+     &tcp_hostcache.cache_limit, 0, "Overall entry limit for hostcache");
+
+SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, hashsize, CTLFLAG_RDTUN,
+     &tcp_hostcache.hashsize, 0, "Size of TCP hostcache hashtable");
+
+SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, bucketlimit, CTLFLAG_RDTUN,
+     &tcp_hostcache.bucket_limit, 0, "Per-bucket hash limit for hostcache");
+
+SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, count, CTLFLAG_RD,
+     &tcp_hostcache.cache_count, 0, "Current number of entries in hostcache");
+
+SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, expire, CTLFLAG_RW,
+     &tcp_hostcache.expire, 0, "Expire time of TCP hostcache entries");
+
+SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, purge, CTLFLAG_RW,
+     &tcp_hostcache.purgeall, 0, "Expire all entires on next purge run");
+
+SYSCTL_PROC(_net_inet_tcp_hostcache, OID_AUTO, list,
+	CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP, 0, 0,
+	sysctl_tcp_hc_list, "A", "List of all hostcache entries");
+
+
+static MALLOC_DEFINE(M_HOSTCACHE, "hostcache", "TCP hostcache");
+
+#define HOSTCACHE_HASH(ip) \
+	(((ip)->s_addr ^ ((ip)->s_addr >> 7) ^ ((ip)->s_addr >> 17)) & 	\
+	  tcp_hostcache.hashmask)
+
+/* XXX: What is the recommended hash to get good entropy for IPv6 addresses? */
+#define HOSTCACHE_HASH6(ip6)	 			\
+	(((ip6)->s6_addr32[0] ^				\
+	  (ip6)->s6_addr32[1] ^				\
+	  (ip6)->s6_addr32[2] ^				\
+	  (ip6)->s6_addr32[3]) &			\
+	 tcp_hostcache.hashmask)
+
+#define THC_LOCK(lp)		mtx_lock(lp)
+#define THC_UNLOCK(lp)		mtx_unlock(lp)
+
+void
+tcp_hc_init(void)
+{
+	int i;
+
+	/*
+	 * Initialize hostcache structures
+	 */
+	tcp_hostcache.cache_count = 0;
+	tcp_hostcache.hashsize = TCP_HOSTCACHE_HASHSIZE;
+	tcp_hostcache.bucket_limit = TCP_HOSTCACHE_BUCKETLIMIT;
+	tcp_hostcache.cache_limit =
+	    tcp_hostcache.hashsize * tcp_hostcache.bucket_limit;
+	tcp_hostcache.expire = TCP_HOSTCACHE_EXPIRE;
+
+        TUNABLE_INT_FETCH("net.inet.tcp.hostcache.hashsize",
+	    &tcp_hostcache.hashsize);
+        TUNABLE_INT_FETCH("net.inet.tcp.hostcache.cachelimit",
+	    &tcp_hostcache.cache_limit);
+        TUNABLE_INT_FETCH("net.inet.tcp.hostcache.bucketlimit",
+	    &tcp_hostcache.bucket_limit);
+	if (!powerof2(tcp_hostcache.hashsize)) {
+                printf("WARNING: hostcache hash size is not a power of 2.\n");
+		tcp_hostcache.hashsize = 512;	/* safe default */
+        }
+	tcp_hostcache.hashmask = tcp_hostcache.hashsize - 1;
+
+	/*
+	 * Allocate the hash table
+	 */
+	tcp_hostcache.hashbase = (struct hc_head *)
+	    malloc(tcp_hostcache.hashsize * sizeof(struct hc_head),
+		   M_HOSTCACHE, M_WAITOK | M_ZERO);
+
+	/*
+	 * Initialize the hash buckets
+	 */
+	for (i = 0; i < tcp_hostcache.hashsize; i++) {
+		TAILQ_INIT(&tcp_hostcache.hashbase[i].hch_bucket);
+		tcp_hostcache.hashbase[i].hch_length = 0;
+		mtx_init(&tcp_hostcache.hashbase[i].hch_mtx, "tcp_hc_entry",
+			  NULL, MTX_DEF);
+	}
+
+	/*
+	 * Allocate the hostcache entries.
+	 */
+	tcp_hostcache.zone = uma_zcreate("hostcache", sizeof(struct hc_metrics),
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+	uma_zone_set_max(tcp_hostcache.zone, tcp_hostcache.cache_limit);
+
+	/*
+	 * Set up periodic cache cleanup.
+	 */
+	callout_init(&tcp_hc_callout, CALLOUT_MPSAFE);
+	callout_reset(&tcp_hc_callout, TCP_HOSTCACHE_PRUNE * hz, tcp_hc_purge, 0);
+}
+
+/*
+ * Internal function: lookup an entry in the hostcache or return NULL.
+ *
+ * If an entry has been returned, the caller becomes responsible for
+ * unlocking the bucket row after he is done reading/modifying the entry.
+ */
+static struct hc_metrics *
+tcp_hc_lookup(struct in_conninfo *inc)
+{
+	int hash;
+	struct hc_head *hc_head;
+	struct hc_metrics *hc_entry;
+
+	KASSERT(inc != NULL, ("tcp_hc_lookup with NULL in_conninfo pointer"));
+
+	/*
+	 * Hash the foreign ip address.
+	 */
+	if (inc->inc_isipv6)
+		hash = HOSTCACHE_HASH6(&inc->inc6_faddr);
+	else
+		hash = HOSTCACHE_HASH(&inc->inc_faddr);
+
+	hc_head = &tcp_hostcache.hashbase[hash];
+
+	/*
+	 * aquire lock for this bucket row
+	 * we release the lock if we don't find an entry,
+	 * otherwise the caller has to unlock after he is done
+	 */
+	THC_LOCK(&hc_head->hch_mtx);
+
+	/*
+	 * circle through entries in bucket row looking for a match
+	 */
+	TAILQ_FOREACH(hc_entry, &hc_head->hch_bucket, rmx_q) {
+		if (inc->inc_isipv6) {
+			if (memcmp(&inc->inc6_faddr, &hc_entry->ip6,
+			    sizeof(inc->inc6_faddr)) == 0)
+				return hc_entry;
+		} else {
+			if (memcmp(&inc->inc_faddr, &hc_entry->ip4,
+			    sizeof(inc->inc_faddr)) == 0)
+				return hc_entry;
+		}
+	}
+
+	/*
+	 * We were unsuccessful and didn't find anything
+	 */
+	THC_UNLOCK(&hc_head->hch_mtx);
+	return NULL;
+}
+
+/*
+ * Internal function: insert an entry into the hostcache or return NULL
+ * if unable to allocate a new one.
+ * 
+ * If an entry has been returned, the caller becomes responsible for
+ * unlocking the bucket row after he is done reading/modifying the entry.
+ */
+static struct hc_metrics *
+tcp_hc_insert(struct in_conninfo *inc)
+{
+	int hash;
+	struct hc_head *hc_head;
+	struct hc_metrics *hc_entry;
+
+	KASSERT(inc != NULL, ("tcp_hc_insert with NULL in_conninfo pointer"));
+
+	/*
+	 * Hash the foreign ip address
+	 */
+	if (inc->inc_isipv6)
+		hash = HOSTCACHE_HASH6(&inc->inc6_faddr);
+	else
+		hash = HOSTCACHE_HASH(&inc->inc_faddr);
+
+	hc_head = &tcp_hostcache.hashbase[hash];
+
+	/*
+	 * aquire lock for this bucket row
+	 * we release the lock if we don't find an entry,
+	 * otherwise the caller has to unlock after he is done
+	 */
+	THC_LOCK(&hc_head->hch_mtx);
+
+	/*
+	 * If the bucket limit is reached reuse the least used element
+	 */
+	if (hc_head->hch_length >= tcp_hostcache.bucket_limit ||
+	    tcp_hostcache.cache_count >= tcp_hostcache.cache_limit) {
+		hc_entry = TAILQ_LAST(&hc_head->hch_bucket, hc_qhead);
+		/*
+		 * At first we were dropping the last element, just to
+		 * reaquire it in the next two lines again which ain't
+		 * very efficient. Instead just reuse the least used element.
+		 * maybe we drop something that is still "in-use" but we can
+		 * be "lossy".
+		 */
+#if 0
+		TAILQ_REMOVE(&hc_head->hch_bucket, hc_entry, rmx_q);
+		uma_zfree(tcp_hostcache.zone, hc_entry);
+		tcp_hostcache.hashbase[hash].hch_length--;
+		tcp_hostcache.cache_count--;
+#endif
+		tcpstat.tcps_hc_bucketoverflow++;
+	} else {
+		/*
+		 * Allocate a new entry, or balk if not possible
+		 */
+		hc_entry = uma_zalloc(tcp_hostcache.zone, M_NOWAIT);
+		if (hc_entry == NULL) {
+			THC_UNLOCK(&hc_head->hch_mtx);
+			return NULL;
+		}
+	}
+
+	/*
+	 * Initialize basic information of hostcache entry
+	 */
+	bzero(hc_entry, sizeof(*hc_entry));
+	if (inc->inc_isipv6)
+		bcopy(&hc_entry->ip6, &inc->inc6_faddr, sizeof(hc_entry->ip6));
+	else
+		hc_entry->ip4 = inc->inc_faddr;
+	hc_entry->rmx_head = hc_head;
+	hc_entry->rmx_expire = tcp_hostcache.expire;
+
+	/*
+	 * Put it upfront
+	 */
+	TAILQ_INSERT_HEAD(&hc_head->hch_bucket, hc_entry, rmx_q);
+	tcp_hostcache.hashbase[hash].hch_length++;
+	tcp_hostcache.cache_count++;
+	tcpstat.tcps_hc_added++;
+
+	return hc_entry;
+}
+
+/*
+ * External function: lookup an entry in the hostcache and fill out the
+ * supplied tcp metrics structure.  Fills in null when no entry was found
+ * or a value is not set.
+ */
+void
+tcp_hc_get(struct in_conninfo *inc, struct hc_metrics_lite *hc_metrics_lite)
+{
+	struct hc_metrics *hc_entry;
+
+	/*
+	 * Find the right bucket
+	 */
+	hc_entry = tcp_hc_lookup(inc);
+
+	/*
+	 * If we don't have an existing object
+	 */
+	if (hc_entry == NULL) {
+		bzero(hc_metrics_lite, sizeof(*hc_metrics_lite));
+		return;
+	}
+	hc_entry->rmx_hits++;
+	hc_entry->rmx_expire = tcp_hostcache.expire; /* start over again */
+
+	hc_metrics_lite->rmx_mtu = hc_entry->rmx_mtu;
+	hc_metrics_lite->rmx_ssthresh = hc_entry->rmx_ssthresh;
+	hc_metrics_lite->rmx_rtt = hc_entry->rmx_rtt;
+	hc_metrics_lite->rmx_rttvar = hc_entry->rmx_rttvar;
+	hc_metrics_lite->rmx_bandwidth = hc_entry->rmx_bandwidth;
+	hc_metrics_lite->rmx_cwnd = hc_entry->rmx_cwnd;
+	hc_metrics_lite->rmx_sendpipe = hc_entry->rmx_sendpipe;
+	hc_metrics_lite->rmx_recvpipe = hc_entry->rmx_recvpipe;
+
+	/*
+	 * unlock bucket row
+	 */
+	THC_UNLOCK(&hc_entry->rmx_head->hch_mtx);
+}
+
+/*
+ * External function: lookup an entry in the hostcache and return the
+ * discovered path mtu.  Returns null if no entry found or value not is set.
+ */
+u_long
+tcp_hc_getmtu(struct in_conninfo *inc)
+{
+	struct hc_metrics *hc_entry;
+	u_long mtu;
+
+	hc_entry = tcp_hc_lookup(inc);
+	if (hc_entry == NULL) {
+		return 0;
+	}
+	hc_entry->rmx_hits++;
+	hc_entry->rmx_expire = tcp_hostcache.expire; /* start over again */
+
+	mtu = hc_entry->rmx_mtu;
+	THC_UNLOCK(&hc_entry->rmx_head->hch_mtx);
+	return mtu;
+}
+
+/*
+ * External function: lookup an entry in the hostcache and fill out the
+ * supplied t/tcp tao structure.  Fills in null when no entry was found
+ * or a value is not set.
+ */
+void
+tcp_hc_gettao(struct in_conninfo *inc, struct rmxp_tao *tao)
+{
+	struct hc_metrics *hc_entry;
+
+	hc_entry = tcp_hc_lookup(inc);
+	if (hc_entry == NULL) {
+		bzero(tao, sizeof(*tao));
+		return;
+	}
+	hc_entry->rmx_hits++;
+	hc_entry->rmx_expire = tcp_hostcache.expire; /* start over again */
+
+	bcopy(tao, &hc_entry->rmx_tao, sizeof(*tao));
+	THC_UNLOCK(&hc_entry->rmx_head->hch_mtx);
+}
+
+/*
+ * External function: update the mtu value of an entry in the hostcache.
+ * Creates a new entry if none was found.
+ */
+void
+tcp_hc_updatemtu(struct in_conninfo *inc, u_long mtu)
+{
+	struct hc_metrics *hc_entry;
+
+	/*
+	 * Find the right bucket
+	 */
+	hc_entry = tcp_hc_lookup(inc);
+
+	/*
+	 * If we don't have an existing object try to insert a new one
+	 */
+	if (hc_entry == NULL) {
+		hc_entry = tcp_hc_insert(inc);
+		if (hc_entry == NULL)
+			return;
+	}
+	hc_entry->rmx_updates++;
+	hc_entry->rmx_expire = tcp_hostcache.expire; /* start over again */
+
+	hc_entry->rmx_mtu = mtu;
+
+	/*
+	 * put it upfront so we find it faster next time
+	 */
+	TAILQ_REMOVE(&hc_entry->rmx_head->hch_bucket, hc_entry, rmx_q);
+	TAILQ_INSERT_HEAD(&hc_entry->rmx_head->hch_bucket, hc_entry, rmx_q);
+
+	/*
+	 * unlock bucket row
+	 */
+	THC_UNLOCK(&hc_entry->rmx_head->hch_mtx);
+}
+
+/*
+ * External function: update the tcp metrics of an entry in the hostcache.
+ * Creates a new entry if none was found.
+ */
+void
+tcp_hc_update(struct in_conninfo *inc, struct hc_metrics_lite *hcml)
+{
+	struct hc_metrics *hc_entry;
+
+	hc_entry = tcp_hc_lookup(inc);
+	if (hc_entry == NULL) {
+		hc_entry = tcp_hc_insert(inc);
+		if (hc_entry == NULL)
+			return;
+	}
+	hc_entry->rmx_updates++;
+	hc_entry->rmx_expire = tcp_hostcache.expire; /* start over again */
+
+	if (hcml->rmx_rtt != 0) {
+		if (hc_entry->rmx_rtt == 0)
+			hc_entry->rmx_rtt = hcml->rmx_rtt;
+		else
+			hc_entry->rmx_rtt =
+			    (hc_entry->rmx_rtt + hcml->rmx_rtt) / 2;
+		tcpstat.tcps_cachedrtt++;
+	}
+	if (hcml->rmx_rttvar != 0) {
+	        if (hc_entry->rmx_rttvar == 0)
+        	        hc_entry->rmx_rttvar = hcml->rmx_rttvar;
+		else
+			hc_entry->rmx_rttvar =
+			    (hc_entry->rmx_rttvar + hcml->rmx_rttvar) / 2;
+		tcpstat.tcps_cachedrttvar++;
+	}
+	if (hcml->rmx_ssthresh != 0) {
+		if (hc_entry->rmx_ssthresh == 0)
+			hc_entry->rmx_ssthresh = hcml->rmx_ssthresh;
+		else
+			hc_entry->rmx_ssthresh =
+			    (hc_entry->rmx_ssthresh + hcml->rmx_ssthresh) / 2;
+		tcpstat.tcps_cachedssthresh++;
+	}
+	if (hcml->rmx_bandwidth != 0) {
+		if (hc_entry->rmx_bandwidth == 0)
+			hc_entry->rmx_bandwidth = hcml->rmx_bandwidth;
+		else
+			hc_entry->rmx_bandwidth =
+			    (hc_entry->rmx_bandwidth + hcml->rmx_bandwidth) / 2;
+		/* tcpstat.tcps_cachedbandwidth++; */
+	}
+	if (hcml->rmx_cwnd != 0) {
+		if (hc_entry->rmx_cwnd == 0)
+			hc_entry->rmx_cwnd = hcml->rmx_cwnd;
+		else
+			hc_entry->rmx_cwnd =
+			    (hc_entry->rmx_cwnd + hcml->rmx_cwnd) / 2;
+		/* tcpstat.tcps_cachedcwnd++; */
+	}
+	if (hcml->rmx_sendpipe != 0) {
+		if (hc_entry->rmx_sendpipe == 0)
+			hc_entry->rmx_sendpipe = hcml->rmx_sendpipe;
+		else
+			hc_entry->rmx_sendpipe =
+			    (hc_entry->rmx_sendpipe + hcml->rmx_sendpipe) /2;
+                /* tcpstat.tcps_cachedsendpipe++; */
+        }
+	if (hcml->rmx_recvpipe != 0) {
+		if (hc_entry->rmx_recvpipe == 0)
+			hc_entry->rmx_recvpipe = hcml->rmx_recvpipe;
+		else
+			hc_entry->rmx_recvpipe =
+			    (hc_entry->rmx_recvpipe + hcml->rmx_recvpipe) /2;
+		/* tcpstat.tcps_cachedrecvpipe++; */
+	}
+
+	TAILQ_REMOVE(&hc_entry->rmx_head->hch_bucket, hc_entry, rmx_q);
+	TAILQ_INSERT_HEAD(&hc_entry->rmx_head->hch_bucket, hc_entry, rmx_q);
+	THC_UNLOCK(&hc_entry->rmx_head->hch_mtx);
+}
+
+/*
+ * External function: update the t/tcp tao of an entry in the hostcache.
+ * Creates a new entry if none was found.
+ */
+void
+tcp_hc_updatetao(struct in_conninfo *inc, int field, tcp_cc ccount, u_short mss)
+{
+	struct hc_metrics *hc_entry;
+
+	hc_entry = tcp_hc_lookup(inc);
+	if (hc_entry == NULL) {
+		hc_entry = tcp_hc_insert(inc);
+		if (hc_entry == NULL)
+			return;
+	}
+	hc_entry->rmx_updates++;
+	hc_entry->rmx_expire = tcp_hostcache.expire; /* start over again */
+
+	switch(field) {
+		case TCP_HC_TAO_CC:
+			hc_entry->rmx_tao.tao_cc = ccount;
+			break;
+
+		case TCP_HC_TAO_CCSENT:
+			hc_entry->rmx_tao.tao_ccsent = ccount;
+			break;
+
+		case TCP_HC_TAO_MSSOPT:
+			hc_entry->rmx_tao.tao_mssopt = mss;
+			break;
+	}
+
+	TAILQ_REMOVE(&hc_entry->rmx_head->hch_bucket, hc_entry, rmx_q);
+	TAILQ_INSERT_HEAD(&hc_entry->rmx_head->hch_bucket, hc_entry, rmx_q);
+	THC_UNLOCK(&hc_entry->rmx_head->hch_mtx);
+}
+
+/*
+ * Sysctl function: prints the list and values of all hostcache entries in
+ * unsorted order.
+ */
+static int
+sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS)
+{
+	int bufsize;
+	int linesize = 128;
+	char *p, *buf;
+	int len, i, error;
+	struct hc_metrics *hc_entry;
+
+	bufsize = linesize * (tcp_hostcache.cache_count + 1);
+
+	p = buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO);
+
+	len = snprintf(p, linesize,
+		"\nIP address        MTU  SSTRESH      RTT   RTTVAR BANDWIDTH "
+		"    CWND SENDPIPE RECVPIPE HITS  UPD  EXP\n");
+	p += len;
+
+#define msec(u) (((u) + 500) / 1000)
+	for (i = 0; i < tcp_hostcache.hashsize; i++) {
+		THC_LOCK(&tcp_hostcache.hashbase[i].hch_mtx);
+		TAILQ_FOREACH(hc_entry, &tcp_hostcache.hashbase[i].hch_bucket,
+			      rmx_q) {
+			len = snprintf(p, linesize,
+			    "%-15s %5lu %8lu %6lums %6lums %9lu %8lu %8lu %8lu "
+			    "%4lu %4lu %4i\n",
+			    hc_entry->ip4.s_addr ? inet_ntoa(hc_entry->ip4) :
+#ifdef INET6
+				ip6_sprintf(&hc_entry->ip6),
+#else
+				"IPv6?",
+#endif
+			    hc_entry->rmx_mtu,
+			    hc_entry->rmx_ssthresh,
+			    msec(hc_entry->rmx_rtt *
+				(RTM_RTTUNIT / (hz * TCP_RTT_SCALE))),
+			    msec(hc_entry->rmx_rttvar *
+				(RTM_RTTUNIT / (hz * TCP_RTT_SCALE))),
+			    hc_entry->rmx_bandwidth * hz * 8,
+			    hc_entry->rmx_cwnd,
+			    hc_entry->rmx_sendpipe,
+			    hc_entry->rmx_recvpipe,
+			    hc_entry->rmx_hits,
+			    hc_entry->rmx_updates,
+			    hc_entry->rmx_expire);
+			p += len;
+		}
+		THC_UNLOCK(&tcp_hostcache.hashbase[i].hch_mtx);
+	}
+#undef msec
+	error = SYSCTL_OUT(req, buf, p - buf);
+	free(buf, M_TEMP);
+	return(error);
+}
+
+/*
+ * Expire and purge (old|all) entries in the tcp_hostcache.  Runs periodically
+ * from the callout.
+ */
+static void
+tcp_hc_purge(void *arg)
+{
+	struct hc_metrics *hc_entry;
+	int all = (intptr_t)arg;
+	int i;
+
+	if (tcp_hostcache.purgeall) {
+		all = 1;
+		tcp_hostcache.purgeall = 0;
+	}
+
+	for (i = 0; i < tcp_hostcache.hashsize; i++) {
+		THC_LOCK(&tcp_hostcache.hashbase[i].hch_mtx);
+		TAILQ_FOREACH(hc_entry, &tcp_hostcache.hashbase[i].hch_bucket,
+			      rmx_q) {
+			if (all || hc_entry->rmx_expire <= 0) {
+				TAILQ_REMOVE(&tcp_hostcache.hashbase[i].hch_bucket,
+					      hc_entry, rmx_q);
+				uma_zfree(tcp_hostcache.zone, hc_entry);
+				tcp_hostcache.hashbase[i].hch_length--;
+				tcp_hostcache.cache_count--;
+			} else
+				hc_entry->rmx_expire -= TCP_HOSTCACHE_PRUNE;
+		}
+		THC_UNLOCK(&tcp_hostcache.hashbase[i].hch_mtx);
+	}
+	callout_reset(&tcp_hc_callout, TCP_HOSTCACHE_PRUNE * hz, tcp_hc_purge, 0);
+}
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index a247138..eca5cb2 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -154,9 +154,8 @@ static int	 tcp_timewait(struct tcptw *, struct tcpopt *,
 #define ND6_HINT(tp) \
 do { \
 	if ((tp) && (tp)->t_inpcb && \
-	    ((tp)->t_inpcb->inp_vflag & INP_IPV6) != 0 && \
-	    (tp)->t_inpcb->in6p_route.ro_rt) \
-		nd6_nud_hint((tp)->t_inpcb->in6p_route.ro_rt, NULL, 0); \
+	    ((tp)->t_inpcb->inp_vflag & INP_IPV6) != 0) \
+		nd6_nud_hint(NULL, NULL, 0); \
 } while (0)
 #else
 #define ND6_HINT(tp)
@@ -358,8 +357,7 @@ tcp_input(m, off0)
 	int todrop, acked, ourfinisacked, needoutput = 0;
 	u_long tiwin;
 	struct tcpopt to;		/* options in this segment */
-	struct rmxp_tao *taop;		/* pointer to our TAO cache entry */
-	struct rmxp_tao	tao_noncached;	/* in case there's no cached entry */
+	struct rmxp_tao tao;		/* our TAO cache entry */
 	int headlocked = 0;
 	struct sockaddr_in *next_hop = NULL;
 	int rstreason; /* For badport_bandlim accounting purposes */
@@ -389,6 +387,7 @@ tcp_input(m, off0)
 #ifdef INET6
 	isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
 #endif
+	bzero(&tao, sizeof(tao));
 	bzero((char *)&to, sizeof(to));
 
 	tcpstat.tcps_rcvtotal++;
@@ -707,11 +706,9 @@ findpcb:
 		if (isipv6) {
 			inc.inc6_faddr = ip6->ip6_src;
 			inc.inc6_laddr = ip6->ip6_dst;
-			inc.inc6_route.ro_rt = NULL;		/* XXX */
 		} else {
 			inc.inc_faddr = ip->ip_src;
 			inc.inc_laddr = ip->ip_dst;
-			inc.inc_route.ro_rt = NULL;		/* XXX */
 		}
 		inc.inc_fport = th->th_sport;
 		inc.inc_lport = th->th_dport;
@@ -916,7 +913,7 @@ findpcb:
 	}
 after_listen:
 
-/* XXX temp debugging */
+	/* XXX temp debugging */
 	/* should not happen - syncache should pick up these connections */
 	if (tp->t_state == TCPS_LISTEN)
 		panic("tcp_input: TCPS_LISTEN");
@@ -930,8 +927,9 @@ after_listen:
 		callout_reset(tp->tt_keep, tcp_keepidle, tcp_timer_keep, tp);
 
 	/*
-	 * Process options.
-	 * XXX this is tradtitional behavior, may need to be cleaned up.
+	 * Process options only when we get SYN/ACK back. The SYN case
+	 * for incoming connections is handled in tcp_syncache.
+	 * XXX this is traditional behavior, may need to be cleaned up.
 	 */
 	tcp_dooptions(&to, optp, optlen, thflags & TH_SYN);
 	if (thflags & TH_SYN) {
@@ -1179,10 +1177,8 @@ after_listen:
 	 *	continue processing rest of data/controls, beginning with URG
 	 */
 	case TCPS_SYN_SENT:
-		if ((taop = tcp_gettaocache(&inp->inp_inc)) == NULL) {
-			taop = &tao_noncached;
-			bzero(taop, sizeof(*taop));
-		}
+		if (tcp_do_rfc1644)
+			tcp_hc_gettao(&inp->inp_inc, &tao);
 
 		if ((thflags & TH_ACK) &&
 		    (SEQ_LEQ(th->th_ack, tp->iss) ||
@@ -1195,7 +1191,7 @@ after_listen:
 			 * Our new SYN, when it arrives, will serve as the
 			 * needed ACK.
 			 */
-			if (taop->tao_ccsent != 0)
+			if (tao.tao_ccsent != 0)
 				goto drop;
 			else {
 				rstreason = BANDLIM_UNLIMITED;
@@ -1225,7 +1221,7 @@ after_listen:
 			 */
 			if (to.to_flags & TOF_CCECHO) {
 				if (tp->cc_send != to.to_ccecho) {
-					if (taop->tao_ccsent != 0)
+					if (tao.tao_ccsent != 0)
 						goto drop;
 					else {
 						rstreason = BANDLIM_UNLIMITED;
@@ -1246,8 +1242,8 @@ after_listen:
 				tp->rcv_scale = tp->request_r_scale;
 			}
 			/* Segment is acceptable, update cache if undefined. */
-			if (taop->tao_ccsent == 0)
-				taop->tao_ccsent = to.to_ccecho;
+			if (tao.tao_ccsent == 0 && tcp_do_rfc1644)
+				tcp_hc_updatetao(&inp->inp_inc, TCP_HC_TAO_CCSENT, to.to_ccecho, 0);
 
 			tp->rcv_adv += tp->rcv_wnd;
 			tp->snd_una++;		/* SYN is acked */
@@ -1290,14 +1286,16 @@ after_listen:
 			tp->t_flags |= TF_ACKNOW;
 			callout_stop(tp->tt_rexmt);
 			if (to.to_flags & TOF_CC) {
-				if (taop->tao_cc != 0 &&
-				    CC_GT(to.to_cc, taop->tao_cc)) {
+				if (tao.tao_cc != 0 &&
+				    CC_GT(to.to_cc, tao.tao_cc)) {
 					/*
 					 * update cache and make transition:
 					 *        SYN-SENT -> ESTABLISHED*
 					 *        SYN-SENT* -> FIN-WAIT-1*
 					 */
-					taop->tao_cc = to.to_cc;
+					tao.tao_cc = to.to_cc;
+					tcp_hc_updatetao(&inp->inp_inc,
+						TCP_HC_TAO_CC, to.to_cc, 0);
 					tp->t_starttime = ticks;
 					if (tp->t_flags & TF_NEEDFIN) {
 						tp->t_state = TCPS_FIN_WAIT_1;
@@ -1313,8 +1311,12 @@ after_listen:
 				} else
 					tp->t_state = TCPS_SYN_RECEIVED;
 			} else {
-				/* CC.NEW or no option => invalidate cache */
-				taop->tao_cc = 0;
+				if (tcp_do_rfc1644) {
+					/* CC.NEW or no option => invalidate cache */
+					tao.tao_cc = 0;
+					tcp_hc_updatetao(&inp->inp_inc,
+						TCP_HC_TAO_CC, to.to_cc, 0);
+				}
 				tp->t_state = TCPS_SYN_RECEIVED;
 			}
 		}
@@ -1682,13 +1684,14 @@ trimthenstep6:
 		}
 		/*
 		 * Upon successful completion of 3-way handshake,
-		 * update cache.CC if it was undefined, pass any queued
-		 * data to the user, and advance state appropriately.
+		 * update cache.CC, pass any queued data to the user,
+		 * and advance state appropriately.
 		 */
-		if ((taop = tcp_gettaocache(&inp->inp_inc)) != NULL &&
-		    taop->tao_cc == 0)
-			taop->tao_cc = tp->cc_recv;
-
+		if (tcp_do_rfc1644) {
+			tao.tao_cc = tp->cc_recv;
+			tcp_hc_updatetao(&inp->inp_inc, TCP_HC_TAO_CC,
+					 tp->cc_recv, 0);
+		}
 		/*
 		 * Make transitions:
 		 *      SYN-RECEIVED  -> ESTABLISHED
@@ -2611,25 +2614,26 @@ tcp_xmit_timer(tp, rtt)
  * are present.  Store the upper limit of the length of options plus
  * data in maxopd.
  *
- * NOTE that this routine is only called when we process an incoming
- * segment, for outgoing segments only tcp_mssopt is called.
  *
  * In case of T/TCP, we call this routine during implicit connection
  * setup as well (offer = -1), to initialize maxseg from the cached
  * MSS of our peer.
+ *
+ * NOTE that this routine is only called when we process an incoming
+ * segment. Outgoing SYN/ACK MSS settings are handled in tcp_mssopt().
  */
 void
 tcp_mss(tp, offer)
 	struct tcpcb *tp;
 	int offer;
 {
-	register struct rtentry *rt;
-	struct ifnet *ifp;
-	register int rtt, mss;
+	int rtt, mss;
 	u_long bufsize;
+	u_long maxmtu;
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so;
-	struct rmxp_tao *taop;
+	struct hc_metrics_lite metrics;
+	struct rmxp_tao tao;
 	int origoffer = offer;
 #ifdef INET6
 	int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0;
@@ -2637,96 +2641,96 @@ tcp_mss(tp, offer)
 			    sizeof (struct ip6_hdr) + sizeof (struct tcphdr) :
 			    sizeof (struct tcpiphdr);
 #else
-	const int isipv6 = 0;
-	const size_t min_protoh = sizeof (struct tcpiphdr);
+	const size_t min_protoh = sizeof(struct tcpiphdr);
 #endif
+	bzero(&tao, sizeof(tao));
 
-	if (isipv6)
-		rt = tcp_rtlookup6(&inp->inp_inc);
-	else
-		rt = tcp_rtlookup(&inp->inp_inc);
-	if (rt == NULL) {
-		tp->t_maxopd = tp->t_maxseg =
-				isipv6 ? tcp_v6mssdflt : tcp_mssdflt;
-		return;
+	/* initialize */
+#ifdef INET6
+	if (isipv6) {
+		maxmtu = tcp_maxmtu6(&inp->inp_inc);
+		tp->t_maxopd = tp->t_maxseg = tcp_v6mssdflt;
+	} else
+#endif
+	{
+		maxmtu = tcp_maxmtu(&inp->inp_inc);
+		tp->t_maxopd = tp->t_maxseg = tcp_mssdflt;
 	}
-	ifp = rt->rt_ifp;
 	so = inp->inp_socket;
 
-	taop = rmx_taop(rt->rt_rmx);
 	/*
-	 * Offer == -1 means that we didn't receive SYN yet,
-	 * use cached value in that case;
+	 * no route to sender, take default mss and return
 	 */
-	if (offer == -1)
-		offer = taop->tao_mssopt;
-	/*
-	 * Offer == 0 means that there was no MSS on the SYN segment,
-	 * in this case we use tcp_mssdflt.
-	 */
-	if (offer == 0)
-		offer = isipv6 ? tcp_v6mssdflt : tcp_mssdflt;
-	else
-		/*
-		 * Sanity check: make sure that maxopd will be large
-		 * enough to allow some data on segments even is the
-		 * all the option space is used (40bytes).  Otherwise
-		 * funny things may happen in tcp_output.
-		 */
-		offer = max(offer, 64);
-	taop->tao_mssopt = offer;
+	if (maxmtu == 0)
+		return;
+
+	/* what have we got? */
+	switch (offer) {
+		case 0:
+			/*
+			 * Offer == 0 means that there was no MSS on the SYN
+			 * segment, in this case we use tcp_mssdflt.
+			 */
+			offer =
+#ifdef INET6
+				isipv6 ? tcp_v6mssdflt :
+#endif
+				tcp_mssdflt;
+			break;
+
+		case -1:
+			/*
+			 * Offer == -1 means that we didn't receive SYN yet,
+			 * use cached value in that case;
+			 */
+			if (tcp_do_rfc1644)
+				tcp_hc_gettao(&inp->inp_inc, &tao);
+			if (tao.tao_mssopt != 0)
+				offer = tao.tao_mssopt;
+			/* FALLTHROUGH */
+
+		default:
+			/*
+			 * Sanity check: make sure that maxopd will be large
+			 * enough to allow some data on segments even if the
+			 * all the option space is used (40bytes).  Otherwise
+			 * funny things may happen in tcp_output.
+			 */
+			offer = max(offer, 64);
+			if (tcp_do_rfc1644)
+				tcp_hc_updatetao(&inp->inp_inc,
+						 TCP_HC_TAO_MSSOPT, 0, offer);
+	}
 
 	/*
-	 * While we're here, check if there's an initial rtt
-	 * or rttvar.  Convert from the route-table units
-	 * to scaled multiples of the slow timeout timer.
+	 * rmx information is now retrieved from tcp_hostcache
 	 */
-	if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) {
-		/*
-		 * XXX the lock bit for RTT indicates that the value
-		 * is also a minimum value; this is subject to time.
-		 */
-		if (rt->rt_rmx.rmx_locks & RTV_RTT)
-			tp->t_rttmin = rtt / (RTM_RTTUNIT / hz);
-		tp->t_srtt = rtt / (RTM_RTTUNIT / (hz * TCP_RTT_SCALE));
-		tp->t_rttbest = tp->t_srtt + TCP_RTT_SCALE;
-		tcpstat.tcps_usedrtt++;
-		if (rt->rt_rmx.rmx_rttvar) {
-			tp->t_rttvar = rt->rt_rmx.rmx_rttvar /
-			    (RTM_RTTUNIT / (hz * TCP_RTTVAR_SCALE));
-			tcpstat.tcps_usedrttvar++;
-		} else {
-			/* default variation is +- 1 rtt */
-			tp->t_rttvar =
-			    tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
-		}
-		TCPT_RANGESET(tp->t_rxtcur,
-			      ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
-			      tp->t_rttmin, TCPTV_REXMTMAX);
-	}
+	tcp_hc_get(&inp->inp_inc, &metrics);
+
 	/*
-	 * if there's an mtu associated with the route, use it
+	 * if there's a discovered mtu int tcp hostcache, use it
 	 * else, use the link mtu.
 	 */
-	if (rt->rt_rmx.rmx_mtu)
-		mss = rt->rt_rmx.rmx_mtu - min_protoh;
+	if (metrics.rmx_mtu)
+		mss = metrics.rmx_mtu - min_protoh;
 	else {
 #ifdef INET6
-		mss = (isipv6 ? IN6_LINKMTU(rt->rt_ifp) : ifp->if_mtu)
-			- min_protoh;
-#else
-		mss = ifp->if_mtu - min_protoh;
-#endif
-#ifdef INET6
 		if (isipv6) {
-			if (!in6_localaddr(&inp->in6p_faddr))
+			mss = maxmtu - min_protoh;
+			if (!path_mtu_discovery &&
+			    !in6_localaddr(&inp->in6p_faddr))
 				mss = min(mss, tcp_v6mssdflt);
 		} else
 #endif
-			if (!in_localaddr(inp->inp_faddr))
+		{
+			mss = maxmtu - min_protoh;
+			if (!path_mtu_discovery &&
+			    !in_localaddr(inp->inp_faddr))
 				mss = min(mss, tcp_mssdflt);
+		}
 	}
 	mss = min(mss, offer);
+
 	/*
 	 * maxopd stores the maximum length of data AND options
 	 * in a segment; maxseg is the amount of data in a normal
@@ -2749,6 +2753,7 @@ tcp_mss(tp, offer)
 	    (origoffer == -1 ||
 	     (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC))
 		mss -= TCPOLEN_CC_APPA;
+	tp->t_maxseg = mss;
 
 #if	(MCLBYTES & (MCLBYTES - 1)) == 0
 		if (mss > MCLBYTES)
@@ -2757,15 +2762,18 @@ tcp_mss(tp, offer)
 		if (mss > MCLBYTES)
 			mss = mss / MCLBYTES * MCLBYTES;
 #endif
+	tp->t_maxseg = mss;
+
 	/*
-	 * If there's a pipesize, change the socket buffer
-	 * to that size.  Make the socket buffers an integral
-	 * number of mss units; if the mss is larger than
-	 * the socket buffer, decrease the mss.
+	 * If there's a pipesize, change the socket buffer to that size,
+	 * don't change if sb_hiwat is different than default (then it
+	 * has been changed on purpose with setsockopt).
+	 * Make the socket buffers an integral number of mss units;
+	 * if the mss is larger than the socket buffer, decrease the mss.
 	 */
-#ifdef RTV_SPIPE
-	if ((bufsize = rt->rt_rmx.rmx_sendpipe) == 0)
-#endif
+	if ((so->so_snd.sb_hiwat == tcp_sendspace) && metrics.rmx_sendpipe)
+		bufsize = metrics.rmx_sendpipe;
+	else
 		bufsize = so->so_snd.sb_hiwat;
 	if (bufsize < mss)
 		mss = bufsize;
@@ -2778,9 +2786,9 @@ tcp_mss(tp, offer)
 	}
 	tp->t_maxseg = mss;
 
-#ifdef RTV_RPIPE
-	if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0)
-#endif
+	if ((so->so_rcv.sb_hiwat == tcp_recvspace) && metrics.rmx_recvpipe)
+		bufsize = metrics.rmx_recvpipe;
+	else
 		bufsize = so->so_rcv.sb_hiwat;
 	if (bufsize > mss) {
 		bufsize = roundup(bufsize, mss);
@@ -2789,62 +2797,110 @@ tcp_mss(tp, offer)
 		if (bufsize > so->so_rcv.sb_hiwat)
 			(void)sbreserve(&so->so_rcv, bufsize, so, NULL);
 	}
+	/*
+	 * While we're here, check the others too
+	 */
+	if (tp->t_srtt == 0 && (rtt = metrics.rmx_rtt)) {
+		tp->t_srtt = rtt;
+		tp->t_rttbest = tp->t_srtt + TCP_RTT_SCALE;
+		tcpstat.tcps_usedrtt++;
+		if (metrics.rmx_rttvar) {
+			tp->t_rttvar = metrics.rmx_rttvar;
+			tcpstat.tcps_usedrttvar++;
+		} else {
+			/* default variation is +- 1 rtt */
+			tp->t_rttvar =
+			    tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
+		}
+		TCPT_RANGESET(tp->t_rxtcur,
+			      ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
+			      tp->t_rttmin, TCPTV_REXMTMAX);
+	}
+	if (metrics.rmx_ssthresh) {
+		/*
+		 * There's some sort of gateway or interface
+		 * buffer limit on the path.  Use this to set
+		 * the slow start threshhold, but set the
+		 * threshold to no less than 2*mss.
+		 */
+		tp->snd_ssthresh = max(2 * mss, metrics.rmx_ssthresh);
+		tcpstat.tcps_usedssthresh++;
+	}
+	if (metrics.rmx_bandwidth)
+		tp->snd_bandwidth = metrics.rmx_bandwidth;
 
 	/*
 	 * Set the slow-start flight size depending on whether this
 	 * is a local network or not.
+	 *
+	 * Extend this so we cache the cwnd too and retrieve it here.
+	 * Make cwnd even bigger than RFC3390 suggests but only if we
+	 * have previous experience with the remote host. Be careful
+	 * not make cwnd bigger than remote receive window or our own
+	 * send socket buffer. Maybe put some additional upper bound
+	 * on the retrieved cwnd. Should do incremental updates to
+	 * hostcache when cwnd collapses so next connection doesn't
+	 * overloads the path again.
+	 *
+	 * RFC3390 says only do this if SYN or SYN/ACK didn't got lost.
+	 * We currently check only in syncache_socket for that.
 	 */
+#define TCP_METRICS_CWND
+#ifdef TCP_METRICS_CWND
+	if (metrics.rmx_cwnd)
+		tp->snd_cwnd = max(mss,
+				min(metrics.rmx_cwnd / 2,
+				 min(tp->snd_wnd, so->so_snd.sb_hiwat)));
+	else
+#endif
 	if (tcp_do_rfc3390)
 		tp->snd_cwnd = min(4 * mss, max(2 * mss, 4380));
+#ifdef INET6
 	else if ((isipv6 && in6_localaddr(&inp->in6p_faddr)) ||
 	    (!isipv6 && in_localaddr(inp->inp_faddr)))
 		tp->snd_cwnd = mss * ss_fltsz_local;
+#endif
 	else
 		tp->snd_cwnd = mss * ss_fltsz;
-
-	if (rt->rt_rmx.rmx_ssthresh) {
-		/*
-		 * There's some sort of gateway or interface
-		 * buffer limit on the path.  Use this to set
-		 * the slow start threshhold, but set the
-		 * threshold to no less than 2*mss.
-		 */
-		tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh);
-		tcpstat.tcps_usedssthresh++;
-	}
 }
 
 /*
  * Determine the MSS option to send on an outgoing SYN.
  */
 int
-tcp_mssopt(tp)
-	struct tcpcb *tp;
+tcp_mssopt(inc)
+	struct in_conninfo *inc;
 {
-	struct rtentry *rt;
+	int mss = 0;
+	u_long maxmtu = 0;
+	u_long thcmtu = 0;
+	size_t min_protoh;
 #ifdef INET6
-	int isipv6 = ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) ? 1 : 0;
-	size_t min_protoh = isipv6 ?
-			    sizeof (struct ip6_hdr) + sizeof (struct tcphdr) :
-			    sizeof (struct tcpiphdr);
-#else
-	const int isipv6 = 0;
-	const size_t min_protoh = sizeof (struct tcpiphdr);
+	int isipv6 = inc->inc_isipv6 ? 1 : 0;
 #endif
 
-	if (isipv6)
-		rt = tcp_rtlookup6(&tp->t_inpcb->inp_inc);
-	else
-		rt = tcp_rtlookup(&tp->t_inpcb->inp_inc);
-	if (rt == NULL)
-		return (isipv6 ? tcp_v6mssdflt : tcp_mssdflt);
+	KASSERT(inc != NULL, ("tcp_mssopt with NULL in_conninfo pointer"));
 
 #ifdef INET6
-	return (isipv6 ? IN6_LINKMTU(rt->rt_ifp) :
-		rt->rt_ifp->if_mtu - min_protoh);
-#else
-	return (rt->rt_ifp->if_mtu - min_protoh);
+	if (isipv6) {
+		mss = tcp_v6mssdflt;
+		maxmtu = tcp_maxmtu6(inc);
+		thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */
+		min_protoh = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
+	} else
 #endif
+	{
+		mss = tcp_mssdflt;
+		maxmtu = tcp_maxmtu(inc);
+		thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */
+		min_protoh = sizeof(struct tcpiphdr);
+	}
+	if (maxmtu && thcmtu)
+		mss = min(maxmtu, thcmtu) - min_protoh;
+	else if (maxmtu || thcmtu)
+		mss = max(maxmtu, thcmtu) - min_protoh;
+
+	return (mss);
 }
 
 
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index a48ec4a..a8b8e53 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -125,11 +125,12 @@ tcp_output(struct tcpcb *tp)
 #if 0
 	int maxburst = TCP_MAXBURST;
 #endif
-	struct rmxp_tao *taop;
+	struct rmxp_tao tao;
 #ifdef INET6
 	struct ip6_hdr *ip6 = NULL;
 	int isipv6;
 
+	bzero(&tao, sizeof(tao));
 	isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0;
 #endif
 
@@ -232,7 +233,6 @@ again:
 	 */
 	len = (long)ulmin(so->so_snd.sb_cc, win) - off;
 
-	taop = tcp_gettaocache(&tp->t_inpcb->inp_inc);
 
 	/*
 	 * Lop off SYN bit if it has already been sent.  However, if this
@@ -242,8 +242,10 @@ again:
 	if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) {
 		flags &= ~TH_SYN;
 		off--, len++;
+		if (tcp_do_rfc1644)
+			tcp_hc_gettao(&tp->t_inpcb->inp_inc, &tao);
 		if (len > 0 && tp->t_state == TCPS_SYN_SENT &&
-		    (taop == NULL || taop->tao_ccsent == 0))
+		     tao.tao_ccsent == 0)
 			return 0;
 	}
 
@@ -429,7 +431,7 @@ send:
 
 			opt[0] = TCPOPT_MAXSEG;
 			opt[1] = TCPOLEN_MAXSEG;
-			mss = htons((u_short) tcp_mssopt(tp));
+			mss = htons((u_short) tcp_mssopt(&tp->t_inpcb->inp_inc));
 			(void)memcpy(opt + 2, &mss, sizeof(mss));
 			optlen = TCPOLEN_MAXSEG;
 
@@ -872,10 +874,7 @@ send:
 		 * Also, desired default hop limit might be changed via
 		 * Neighbor Discovery.
 		 */
-		ip6->ip6_hlim = in6_selecthlim(tp->t_inpcb,
-					       tp->t_inpcb->in6p_route.ro_rt ?
-					       tp->t_inpcb->in6p_route.ro_rt->rt_ifp
-					       : NULL);
+		ip6->ip6_hlim = in6_selecthlim(tp->t_inpcb, NULL);
 
 		/* TODO: IPv6 IP6TOS_ECT bit on */
 #if defined(IPSEC) && !defined(FAST_IPSEC)
@@ -886,36 +885,27 @@ send:
 		}
 #endif /*IPSEC*/
 		error = ip6_output(m,
-			    tp->t_inpcb->in6p_outputopts,
-			    &tp->t_inpcb->in6p_route,
+			    tp->t_inpcb->in6p_outputopts, NULL,
 			    (so->so_options & SO_DONTROUTE), NULL, NULL,
 			    tp->t_inpcb);
 	} else
 #endif /* INET6 */
     {
-	struct rtentry *rt;
 	ip->ip_len = m->m_pkthdr.len;
 #ifdef INET6
  	if (INP_CHECK_SOCKAF(so, AF_INET6))
- 		ip->ip_ttl = in6_selecthlim(tp->t_inpcb,
- 					    tp->t_inpcb->in6p_route.ro_rt ?
- 					    tp->t_inpcb->in6p_route.ro_rt->rt_ifp
- 					    : NULL);
+ 		ip->ip_ttl = in6_selecthlim(tp->t_inpcb, NULL);
 #endif /* INET6 */
 	/*
-	 * See if we should do MTU discovery.  We do it only if the following
-	 * are true:
-	 *	1) we have a valid route to the destination
-	 *	2) the MTU is not locked (if it is, then discovery has been
-	 *	   disabled)
+	 * If we do path MTU discovery, then we set DF on every packet.
+	 * This might not be the best thing to do according to RFC3390
+	 * Section 2. However the tcp hostcache migitates the problem
+	 * so it affects only the first tcp connection with a host.
 	 */
-	if (path_mtu_discovery
-	    && (rt = tp->t_inpcb->inp_route.ro_rt)
-	    && rt->rt_flags & RTF_UP
-	    && !(rt->rt_rmx.rmx_locks & RTV_MTU)) {
+	if (path_mtu_discovery)
 		ip->ip_off |= IP_DF;
-	}
-	error = ip_output(m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route,
+
+	error = ip_output(m, tp->t_inpcb->inp_options, NULL,
 	    (so->so_options & SO_DONTROUTE), 0, tp->t_inpcb);
     }
 	if (error) {
diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c
index a247138..eca5cb2 100644
--- a/sys/netinet/tcp_reass.c
+++ b/sys/netinet/tcp_reass.c
@@ -154,9 +154,8 @@ static int	 tcp_timewait(struct tcptw *, struct tcpopt *,
 #define ND6_HINT(tp) \
 do { \
 	if ((tp) && (tp)->t_inpcb && \
-	    ((tp)->t_inpcb->inp_vflag & INP_IPV6) != 0 && \
-	    (tp)->t_inpcb->in6p_route.ro_rt) \
-		nd6_nud_hint((tp)->t_inpcb->in6p_route.ro_rt, NULL, 0); \
+	    ((tp)->t_inpcb->inp_vflag & INP_IPV6) != 0) \
+		nd6_nud_hint(NULL, NULL, 0); \
 } while (0)
 #else
 #define ND6_HINT(tp)
@@ -358,8 +357,7 @@ tcp_input(m, off0)
 	int todrop, acked, ourfinisacked, needoutput = 0;
 	u_long tiwin;
 	struct tcpopt to;		/* options in this segment */
-	struct rmxp_tao *taop;		/* pointer to our TAO cache entry */
-	struct rmxp_tao	tao_noncached;	/* in case there's no cached entry */
+	struct rmxp_tao tao;		/* our TAO cache entry */
 	int headlocked = 0;
 	struct sockaddr_in *next_hop = NULL;
 	int rstreason; /* For badport_bandlim accounting purposes */
@@ -389,6 +387,7 @@ tcp_input(m, off0)
 #ifdef INET6
 	isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
 #endif
+	bzero(&tao, sizeof(tao));
 	bzero((char *)&to, sizeof(to));
 
 	tcpstat.tcps_rcvtotal++;
@@ -707,11 +706,9 @@ findpcb:
 		if (isipv6) {
 			inc.inc6_faddr = ip6->ip6_src;
 			inc.inc6_laddr = ip6->ip6_dst;
-			inc.inc6_route.ro_rt = NULL;		/* XXX */
 		} else {
 			inc.inc_faddr = ip->ip_src;
 			inc.inc_laddr = ip->ip_dst;
-			inc.inc_route.ro_rt = NULL;		/* XXX */
 		}
 		inc.inc_fport = th->th_sport;
 		inc.inc_lport = th->th_dport;
@@ -916,7 +913,7 @@ findpcb:
 	}
 after_listen:
 
-/* XXX temp debugging */
+	/* XXX temp debugging */
 	/* should not happen - syncache should pick up these connections */
 	if (tp->t_state == TCPS_LISTEN)
 		panic("tcp_input: TCPS_LISTEN");
@@ -930,8 +927,9 @@ after_listen:
 		callout_reset(tp->tt_keep, tcp_keepidle, tcp_timer_keep, tp);
 
 	/*
-	 * Process options.
-	 * XXX this is tradtitional behavior, may need to be cleaned up.
+	 * Process options only when we get SYN/ACK back. The SYN case
+	 * for incoming connections is handled in tcp_syncache.
+	 * XXX this is traditional behavior, may need to be cleaned up.
 	 */
 	tcp_dooptions(&to, optp, optlen, thflags & TH_SYN);
 	if (thflags & TH_SYN) {
@@ -1179,10 +1177,8 @@ after_listen:
 	 *	continue processing rest of data/controls, beginning with URG
 	 */
 	case TCPS_SYN_SENT:
-		if ((taop = tcp_gettaocache(&inp->inp_inc)) == NULL) {
-			taop = &tao_noncached;
-			bzero(taop, sizeof(*taop));
-		}
+		if (tcp_do_rfc1644)
+			tcp_hc_gettao(&inp->inp_inc, &tao);
 
 		if ((thflags & TH_ACK) &&
 		    (SEQ_LEQ(th->th_ack, tp->iss) ||
@@ -1195,7 +1191,7 @@ after_listen:
 			 * Our new SYN, when it arrives, will serve as the
 			 * needed ACK.
 			 */
-			if (taop->tao_ccsent != 0)
+			if (tao.tao_ccsent != 0)
 				goto drop;
 			else {
 				rstreason = BANDLIM_UNLIMITED;
@@ -1225,7 +1221,7 @@ after_listen:
 			 */
 			if (to.to_flags & TOF_CCECHO) {
 				if (tp->cc_send != to.to_ccecho) {
-					if (taop->tao_ccsent != 0)
+					if (tao.tao_ccsent != 0)
 						goto drop;
 					else {
 						rstreason = BANDLIM_UNLIMITED;
@@ -1246,8 +1242,8 @@ after_listen:
 				tp->rcv_scale = tp->request_r_scale;
 			}
 			/* Segment is acceptable, update cache if undefined. */
-			if (taop->tao_ccsent == 0)
-				taop->tao_ccsent = to.to_ccecho;
+			if (tao.tao_ccsent == 0 && tcp_do_rfc1644)
+				tcp_hc_updatetao(&inp->inp_inc, TCP_HC_TAO_CCSENT, to.to_ccecho, 0);
 
 			tp->rcv_adv += tp->rcv_wnd;
 			tp->snd_una++;		/* SYN is acked */
@@ -1290,14 +1286,16 @@ after_listen:
 			tp->t_flags |= TF_ACKNOW;
 			callout_stop(tp->tt_rexmt);
 			if (to.to_flags & TOF_CC) {
-				if (taop->tao_cc != 0 &&
-				    CC_GT(to.to_cc, taop->tao_cc)) {
+				if (tao.tao_cc != 0 &&
+				    CC_GT(to.to_cc, tao.tao_cc)) {
 					/*
 					 * update cache and make transition:
 					 *        SYN-SENT -> ESTABLISHED*
 					 *        SYN-SENT* -> FIN-WAIT-1*
 					 */
-					taop->tao_cc = to.to_cc;
+					tao.tao_cc = to.to_cc;
+					tcp_hc_updatetao(&inp->inp_inc,
+						TCP_HC_TAO_CC, to.to_cc, 0);
 					tp->t_starttime = ticks;
 					if (tp->t_flags & TF_NEEDFIN) {
 						tp->t_state = TCPS_FIN_WAIT_1;
@@ -1313,8 +1311,12 @@ after_listen:
 				} else
 					tp->t_state = TCPS_SYN_RECEIVED;
 			} else {
-				/* CC.NEW or no option => invalidate cache */
-				taop->tao_cc = 0;
+				if (tcp_do_rfc1644) {
+					/* CC.NEW or no option => invalidate cache */
+					tao.tao_cc = 0;
+					tcp_hc_updatetao(&inp->inp_inc,
+						TCP_HC_TAO_CC, to.to_cc, 0);
+				}
 				tp->t_state = TCPS_SYN_RECEIVED;
 			}
 		}
@@ -1682,13 +1684,14 @@ trimthenstep6:
 		}
 		/*
 		 * Upon successful completion of 3-way handshake,
-		 * update cache.CC if it was undefined, pass any queued
-		 * data to the user, and advance state appropriately.
+		 * update cache.CC, pass any queued data to the user,
+		 * and advance state appropriately.
 		 */
-		if ((taop = tcp_gettaocache(&inp->inp_inc)) != NULL &&
-		    taop->tao_cc == 0)
-			taop->tao_cc = tp->cc_recv;
-
+		if (tcp_do_rfc1644) {
+			tao.tao_cc = tp->cc_recv;
+			tcp_hc_updatetao(&inp->inp_inc, TCP_HC_TAO_CC,
+					 tp->cc_recv, 0);
+		}
 		/*
 		 * Make transitions:
 		 *      SYN-RECEIVED  -> ESTABLISHED
@@ -2611,25 +2614,26 @@ tcp_xmit_timer(tp, rtt)
  * are present.  Store the upper limit of the length of options plus
  * data in maxopd.
  *
- * NOTE that this routine is only called when we process an incoming
- * segment, for outgoing segments only tcp_mssopt is called.
  *
  * In case of T/TCP, we call this routine during implicit connection
  * setup as well (offer = -1), to initialize maxseg from the cached
  * MSS of our peer.
+ *
+ * NOTE that this routine is only called when we process an incoming
+ * segment. Outgoing SYN/ACK MSS settings are handled in tcp_mssopt().
  */
 void
 tcp_mss(tp, offer)
 	struct tcpcb *tp;
 	int offer;
 {
-	register struct rtentry *rt;
-	struct ifnet *ifp;
-	register int rtt, mss;
+	int rtt, mss;
 	u_long bufsize;
+	u_long maxmtu;
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so;
-	struct rmxp_tao *taop;
+	struct hc_metrics_lite metrics;
+	struct rmxp_tao tao;
 	int origoffer = offer;
 #ifdef INET6
 	int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0;
@@ -2637,96 +2641,96 @@ tcp_mss(tp, offer)
 			    sizeof (struct ip6_hdr) + sizeof (struct tcphdr) :
 			    sizeof (struct tcpiphdr);
 #else
-	const int isipv6 = 0;
-	const size_t min_protoh = sizeof (struct tcpiphdr);
+	const size_t min_protoh = sizeof(struct tcpiphdr);
 #endif
+	bzero(&tao, sizeof(tao));
 
-	if (isipv6)
-		rt = tcp_rtlookup6(&inp->inp_inc);
-	else
-		rt = tcp_rtlookup(&inp->inp_inc);
-	if (rt == NULL) {
-		tp->t_maxopd = tp->t_maxseg =
-				isipv6 ? tcp_v6mssdflt : tcp_mssdflt;
-		return;
+	/* initialize */
+#ifdef INET6
+	if (isipv6) {
+		maxmtu = tcp_maxmtu6(&inp->inp_inc);
+		tp->t_maxopd = tp->t_maxseg = tcp_v6mssdflt;
+	} else
+#endif
+	{
+		maxmtu = tcp_maxmtu(&inp->inp_inc);
+		tp->t_maxopd = tp->t_maxseg = tcp_mssdflt;
 	}
-	ifp = rt->rt_ifp;
 	so = inp->inp_socket;
 
-	taop = rmx_taop(rt->rt_rmx);
 	/*
-	 * Offer == -1 means that we didn't receive SYN yet,
-	 * use cached value in that case;
+	 * no route to sender, take default mss and return
 	 */
-	if (offer == -1)
-		offer = taop->tao_mssopt;
-	/*
-	 * Offer == 0 means that there was no MSS on the SYN segment,
-	 * in this case we use tcp_mssdflt.
-	 */
-	if (offer == 0)
-		offer = isipv6 ? tcp_v6mssdflt : tcp_mssdflt;
-	else
-		/*
-		 * Sanity check: make sure that maxopd will be large
-		 * enough to allow some data on segments even is the
-		 * all the option space is used (40bytes).  Otherwise
-		 * funny things may happen in tcp_output.
-		 */
-		offer = max(offer, 64);
-	taop->tao_mssopt = offer;
+	if (maxmtu == 0)
+		return;
+
+	/* what have we got? */
+	switch (offer) {
+		case 0:
+			/*
+			 * Offer == 0 means that there was no MSS on the SYN
+			 * segment, in this case we use tcp_mssdflt.
+			 */
+			offer =
+#ifdef INET6
+				isipv6 ? tcp_v6mssdflt :
+#endif
+				tcp_mssdflt;
+			break;
+
+		case -1:
+			/*
+			 * Offer == -1 means that we didn't receive SYN yet,
+			 * use cached value in that case;
+			 */
+			if (tcp_do_rfc1644)
+				tcp_hc_gettao(&inp->inp_inc, &tao);
+			if (tao.tao_mssopt != 0)
+				offer = tao.tao_mssopt;
+			/* FALLTHROUGH */
+
+		default:
+			/*
+			 * Sanity check: make sure that maxopd will be large
+			 * enough to allow some data on segments even if the
+			 * all the option space is used (40bytes).  Otherwise
+			 * funny things may happen in tcp_output.
+			 */
+			offer = max(offer, 64);
+			if (tcp_do_rfc1644)
+				tcp_hc_updatetao(&inp->inp_inc,
+						 TCP_HC_TAO_MSSOPT, 0, offer);
+	}
 
 	/*
-	 * While we're here, check if there's an initial rtt
-	 * or rttvar.  Convert from the route-table units
-	 * to scaled multiples of the slow timeout timer.
+	 * rmx information is now retrieved from tcp_hostcache
 	 */
-	if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) {
-		/*
-		 * XXX the lock bit for RTT indicates that the value
-		 * is also a minimum value; this is subject to time.
-		 */
-		if (rt->rt_rmx.rmx_locks & RTV_RTT)
-			tp->t_rttmin = rtt / (RTM_RTTUNIT / hz);
-		tp->t_srtt = rtt / (RTM_RTTUNIT / (hz * TCP_RTT_SCALE));
-		tp->t_rttbest = tp->t_srtt + TCP_RTT_SCALE;
-		tcpstat.tcps_usedrtt++;
-		if (rt->rt_rmx.rmx_rttvar) {
-			tp->t_rttvar = rt->rt_rmx.rmx_rttvar /
-			    (RTM_RTTUNIT / (hz * TCP_RTTVAR_SCALE));
-			tcpstat.tcps_usedrttvar++;
-		} else {
-			/* default variation is +- 1 rtt */
-			tp->t_rttvar =
-			    tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
-		}
-		TCPT_RANGESET(tp->t_rxtcur,
-			      ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
-			      tp->t_rttmin, TCPTV_REXMTMAX);
-	}
+	tcp_hc_get(&inp->inp_inc, &metrics);
+
 	/*
-	 * if there's an mtu associated with the route, use it
+	 * if there's a discovered mtu int tcp hostcache, use it
 	 * else, use the link mtu.
 	 */
-	if (rt->rt_rmx.rmx_mtu)
-		mss = rt->rt_rmx.rmx_mtu - min_protoh;
+	if (metrics.rmx_mtu)
+		mss = metrics.rmx_mtu - min_protoh;
 	else {
 #ifdef INET6
-		mss = (isipv6 ? IN6_LINKMTU(rt->rt_ifp) : ifp->if_mtu)
-			- min_protoh;
-#else
-		mss = ifp->if_mtu - min_protoh;
-#endif
-#ifdef INET6
 		if (isipv6) {
-			if (!in6_localaddr(&inp->in6p_faddr))
+			mss = maxmtu - min_protoh;
+			if (!path_mtu_discovery &&
+			    !in6_localaddr(&inp->in6p_faddr))
 				mss = min(mss, tcp_v6mssdflt);
 		} else
 #endif
-			if (!in_localaddr(inp->inp_faddr))
+		{
+			mss = maxmtu - min_protoh;
+			if (!path_mtu_discovery &&
+			    !in_localaddr(inp->inp_faddr))
 				mss = min(mss, tcp_mssdflt);
+		}
 	}
 	mss = min(mss, offer);
+
 	/*
 	 * maxopd stores the maximum length of data AND options
 	 * in a segment; maxseg is the amount of data in a normal
@@ -2749,6 +2753,7 @@ tcp_mss(tp, offer)
 	    (origoffer == -1 ||
 	     (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC))
 		mss -= TCPOLEN_CC_APPA;
+	tp->t_maxseg = mss;
 
 #if	(MCLBYTES & (MCLBYTES - 1)) == 0
 		if (mss > MCLBYTES)
@@ -2757,15 +2762,18 @@ tcp_mss(tp, offer)
 		if (mss > MCLBYTES)
 			mss = mss / MCLBYTES * MCLBYTES;
 #endif
+	tp->t_maxseg = mss;
+
 	/*
-	 * If there's a pipesize, change the socket buffer
-	 * to that size.  Make the socket buffers an integral
-	 * number of mss units; if the mss is larger than
-	 * the socket buffer, decrease the mss.
+	 * If there's a pipesize, change the socket buffer to that size,
+	 * don't change if sb_hiwat is different than default (then it
+	 * has been changed on purpose with setsockopt).
+	 * Make the socket buffers an integral number of mss units;
+	 * if the mss is larger than the socket buffer, decrease the mss.
 	 */
-#ifdef RTV_SPIPE
-	if ((bufsize = rt->rt_rmx.rmx_sendpipe) == 0)
-#endif
+	if ((so->so_snd.sb_hiwat == tcp_sendspace) && metrics.rmx_sendpipe)
+		bufsize = metrics.rmx_sendpipe;
+	else
 		bufsize = so->so_snd.sb_hiwat;
 	if (bufsize < mss)
 		mss = bufsize;
@@ -2778,9 +2786,9 @@ tcp_mss(tp, offer)
 	}
 	tp->t_maxseg = mss;
 
-#ifdef RTV_RPIPE
-	if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0)
-#endif
+	if ((so->so_rcv.sb_hiwat == tcp_recvspace) && metrics.rmx_recvpipe)
+		bufsize = metrics.rmx_recvpipe;
+	else
 		bufsize = so->so_rcv.sb_hiwat;
 	if (bufsize > mss) {
 		bufsize = roundup(bufsize, mss);
@@ -2789,62 +2797,110 @@ tcp_mss(tp, offer)
 		if (bufsize > so->so_rcv.sb_hiwat)
 			(void)sbreserve(&so->so_rcv, bufsize, so, NULL);
 	}
+	/*
+	 * While we're here, check the others too
+	 */
+	if (tp->t_srtt == 0 && (rtt = metrics.rmx_rtt)) {
+		tp->t_srtt = rtt;
+		tp->t_rttbest = tp->t_srtt + TCP_RTT_SCALE;
+		tcpstat.tcps_usedrtt++;
+		if (metrics.rmx_rttvar) {
+			tp->t_rttvar = metrics.rmx_rttvar;
+			tcpstat.tcps_usedrttvar++;
+		} else {
+			/* default variation is +- 1 rtt */
+			tp->t_rttvar =
+			    tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
+		}
+		TCPT_RANGESET(tp->t_rxtcur,
+			      ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
+			      tp->t_rttmin, TCPTV_REXMTMAX);
+	}
+	if (metrics.rmx_ssthresh) {
+		/*
+		 * There's some sort of gateway or interface
+		 * buffer limit on the path.  Use this to set
+		 * the slow start threshhold, but set the
+		 * threshold to no less than 2*mss.
+		 */
+		tp->snd_ssthresh = max(2 * mss, metrics.rmx_ssthresh);
+		tcpstat.tcps_usedssthresh++;
+	}
+	if (metrics.rmx_bandwidth)
+		tp->snd_bandwidth = metrics.rmx_bandwidth;
 
 	/*
 	 * Set the slow-start flight size depending on whether this
 	 * is a local network or not.
+	 *
+	 * Extend this so we cache the cwnd too and retrieve it here.
+	 * Make cwnd even bigger than RFC3390 suggests but only if we
+	 * have previous experience with the remote host. Be careful
+	 * not make cwnd bigger than remote receive window or our own
+	 * send socket buffer. Maybe put some additional upper bound
+	 * on the retrieved cwnd. Should do incremental updates to
+	 * hostcache when cwnd collapses so next connection doesn't
+	 * overloads the path again.
+	 *
+	 * RFC3390 says only do this if SYN or SYN/ACK didn't got lost.
+	 * We currently check only in syncache_socket for that.
 	 */
+#define TCP_METRICS_CWND
+#ifdef TCP_METRICS_CWND
+	if (metrics.rmx_cwnd)
+		tp->snd_cwnd = max(mss,
+				min(metrics.rmx_cwnd / 2,
+				 min(tp->snd_wnd, so->so_snd.sb_hiwat)));
+	else
+#endif
 	if (tcp_do_rfc3390)
 		tp->snd_cwnd = min(4 * mss, max(2 * mss, 4380));
+#ifdef INET6
 	else if ((isipv6 && in6_localaddr(&inp->in6p_faddr)) ||
 	    (!isipv6 && in_localaddr(inp->inp_faddr)))
 		tp->snd_cwnd = mss * ss_fltsz_local;
+#endif
 	else
 		tp->snd_cwnd = mss * ss_fltsz;
-
-	if (rt->rt_rmx.rmx_ssthresh) {
-		/*
-		 * There's some sort of gateway or interface
-		 * buffer limit on the path.  Use this to set
-		 * the slow start threshhold, but set the
-		 * threshold to no less than 2*mss.
-		 */
-		tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh);
-		tcpstat.tcps_usedssthresh++;
-	}
 }
 
 /*
  * Determine the MSS option to send on an outgoing SYN.
  */
 int
-tcp_mssopt(tp)
-	struct tcpcb *tp;
+tcp_mssopt(inc)
+	struct in_conninfo *inc;
 {
-	struct rtentry *rt;
+	int mss = 0;
+	u_long maxmtu = 0;
+	u_long thcmtu = 0;
+	size_t min_protoh;
 #ifdef INET6
-	int isipv6 = ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) ? 1 : 0;
-	size_t min_protoh = isipv6 ?
-			    sizeof (struct ip6_hdr) + sizeof (struct tcphdr) :
-			    sizeof (struct tcpiphdr);
-#else
-	const int isipv6 = 0;
-	const size_t min_protoh = sizeof (struct tcpiphdr);
+	int isipv6 = inc->inc_isipv6 ? 1 : 0;
 #endif
 
-	if (isipv6)
-		rt = tcp_rtlookup6(&tp->t_inpcb->inp_inc);
-	else
-		rt = tcp_rtlookup(&tp->t_inpcb->inp_inc);
-	if (rt == NULL)
-		return (isipv6 ? tcp_v6mssdflt : tcp_mssdflt);
+	KASSERT(inc != NULL, ("tcp_mssopt with NULL in_conninfo pointer"));
 
 #ifdef INET6
-	return (isipv6 ? IN6_LINKMTU(rt->rt_ifp) :
-		rt->rt_ifp->if_mtu - min_protoh);
-#else
-	return (rt->rt_ifp->if_mtu - min_protoh);
+	if (isipv6) {
+		mss = tcp_v6mssdflt;
+		maxmtu = tcp_maxmtu6(inc);
+		thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */
+		min_protoh = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
+	} else
 #endif
+	{
+		mss = tcp_mssdflt;
+		maxmtu = tcp_maxmtu(inc);
+		thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */
+		min_protoh = sizeof(struct tcpiphdr);
+	}
+	if (maxmtu && thcmtu)
+		mss = min(maxmtu, thcmtu) - min_protoh;
+	else if (maxmtu || thcmtu)
+		mss = max(maxmtu, thcmtu) - min_protoh;
+
+	return (mss);
 }
 
 
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 7ce06f6..dfd6de1 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -76,6 +76,7 @@
 #include <netinet/ip_var.h>
 #ifdef INET6
 #include <netinet6/ip6_var.h>
+#include <netinet6/nd6.h>
 #endif
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
@@ -177,7 +178,6 @@ static int	tcp_inflight_stab = 20;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, inflight_stab, CTLFLAG_RW,
     &tcp_inflight_stab, 0, "Inflight Algorithm Stabilization 20 = 2 packets");
 
-static void	tcp_cleartaocache(void);
 static struct inpcb *tcp_notify(struct inpcb *, int);
 static void	tcp_discardcb(struct tcpcb *);
 
@@ -215,7 +215,6 @@ tcp_init()
 	int hashsize = TCBHASHSIZE;
 	
 	tcp_ccgen = 1;
-	tcp_cleartaocache();
 
 	tcp_delacktime = TCPTV_DELACK;
 	tcp_keepinit = TCPTV_KEEP_INIT;
@@ -262,6 +261,7 @@ tcp_init()
 	uma_zone_set_max(tcptw_zone, maxsockets / 5);
 	tcp_timer_init();
 	syncache_init();
+	tcp_hc_init();
 }
 
 /*
@@ -367,18 +367,14 @@ tcp_respond(tp, ipgen, th, m, ack, seq, flags)
 {
 	register int tlen;
 	int win = 0;
-	struct route *ro = 0;
-	struct route sro;
 	struct ip *ip;
 	struct tcphdr *nth;
 #ifdef INET6
-	struct route_in6 *ro6 = 0;
-	struct route_in6 sro6;
 	struct ip6_hdr *ip6;
 	int isipv6;
 #endif /* INET6 */
 	int ipflags = 0;
-	struct inpcb *inp;
+	struct inpcb *inp = NULL;
 
 	KASSERT(tp != NULL || m != NULL, ("tcp_respond: tp and m both NULL"));
 
@@ -398,24 +394,6 @@ tcp_respond(tp, ipgen, th, m, ack, seq, flags)
 			if (win > (long)TCP_MAXWIN << tp->rcv_scale)
 				win = (long)TCP_MAXWIN << tp->rcv_scale;
 		}
-#ifdef INET6
-		if (isipv6)
-			ro6 = &inp->in6p_route;
-		else
-#endif /* INET6 */
-		ro = &inp->inp_route;
-	} else {
-		inp = NULL;
-#ifdef INET6
-		if (isipv6) {
-			ro6 = &sro6;
-			bzero(ro6, sizeof *ro6);
-		} else
-#endif /* INET6 */
-	      {
-		ro = &sro;
-		bzero(ro, sizeof *ro);
-	      }
 	}
 	if (m == 0) {
 		m = m_gethdr(M_DONTWAIT, MT_HEADER);
@@ -516,10 +494,7 @@ tcp_respond(tp, ipgen, th, m, ack, seq, flags)
 		nth->th_sum = in6_cksum(m, IPPROTO_TCP,
 					sizeof(struct ip6_hdr),
 					tlen - sizeof(struct ip6_hdr));
-		ip6->ip6_hlim = in6_selecthlim(inp,
-					       ro6 && ro6->ro_rt ?
-					       ro6->ro_rt->rt_ifp :
-					       NULL);
+		ip6->ip6_hlim = in6_selecthlim(tp ? tp->t_inpcb : NULL, NULL);
 	} else
 #endif /* INET6 */
       {
@@ -533,21 +508,11 @@ tcp_respond(tp, ipgen, th, m, ack, seq, flags)
 		tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0);
 #endif
 #ifdef INET6
-	if (isipv6) {
-		(void) ip6_output(m, NULL, ro6, ipflags, NULL, NULL, inp);
-		if (ro6 == &sro6 && ro6->ro_rt) {
-			RTFREE(ro6->ro_rt);
-			ro6->ro_rt = NULL;
-		}
-	} else
+	if (isipv6)
+		(void) ip6_output(m, NULL, NULL, ipflags, NULL, NULL, inp);
+	else
 #endif /* INET6 */
-	{
-		(void) ip_output(m, NULL, ro, ipflags, NULL, inp);
-		if (ro == &sro && ro->ro_rt) {
-			RTFREE(ro->ro_rt);
-			ro->ro_rt = NULL;
-		}
-	}
+	(void) ip_output(m, NULL, NULL, ipflags, NULL, inp);
 }
 
 /*
@@ -647,8 +612,6 @@ tcp_discardcb(tp)
 #ifdef INET6
 	int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
 #endif /* INET6 */
-	struct rtentry *rt;
-	int dosavessthresh;
 
 	/*
 	 * Make sure that all of our timers are stopped before we
@@ -663,89 +626,34 @@ tcp_discardcb(tp)
 	/*
 	 * If we got enough samples through the srtt filter,
 	 * save the rtt and rttvar in the routing entry.
-	 * 'Enough' is arbitrarily defined as the 16 samples.
-	 * 16 samples is enough for the srtt filter to converge
-	 * to within 5% of the correct value; fewer samples and
-	 * we could save a very bogus rtt.
-	 *
-	 * Don't update the default route's characteristics and don't
-	 * update anything that the user "locked".
+	 * 'Enough' is arbitrarily defined as 4 rtt samples.
+	 * 4 samples is enough for the srtt filter to converge
+	 * to within enough % of the correct value; fewer samples
+	 * and we could save a bogus rtt. The danger is not high
+	 * as tcp quickly recovers from everything.
+	 * XXX: Works very well but needs some more statistics!
 	 */
-	if (tp->t_rttupdated >= 16) {
-		register u_long i = 0;
-#ifdef INET6
-		if (isipv6) {
-			struct sockaddr_in6 *sin6;
+	if (tp->t_rttupdated >= 4) {
+		struct hc_metrics_lite metrics;
+		u_long ssthresh;
 
-			if ((rt = inp->in6p_route.ro_rt) == NULL)
-				goto no_valid_rt;
-			sin6 = (struct sockaddr_in6 *)rt_key(rt);
-			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
-				goto no_valid_rt;
-		}
-		else
-#endif /* INET6 */		
-		if ((rt = inp->inp_route.ro_rt) == NULL ||
-		    ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr
-		    == INADDR_ANY)
-			goto no_valid_rt;
-
-		if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) {
-			i = tp->t_srtt *
-			    (RTM_RTTUNIT / (hz * TCP_RTT_SCALE));
-			if (rt->rt_rmx.rmx_rtt && i)
-				/*
-				 * filter this update to half the old & half
-				 * the new values, converting scale.
-				 * See route.h and tcp_var.h for a
-				 * description of the scaling constants.
-				 */
-				rt->rt_rmx.rmx_rtt =
-				    (rt->rt_rmx.rmx_rtt + i) / 2;
-			else
-				rt->rt_rmx.rmx_rtt = i;
-			tcpstat.tcps_cachedrtt++;
-		}
-		if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) {
-			i = tp->t_rttvar *
-			    (RTM_RTTUNIT / (hz * TCP_RTTVAR_SCALE));
-			if (rt->rt_rmx.rmx_rttvar && i)
-				rt->rt_rmx.rmx_rttvar =
-				    (rt->rt_rmx.rmx_rttvar + i) / 2;
-			else
-				rt->rt_rmx.rmx_rttvar = i;
-			tcpstat.tcps_cachedrttvar++;
-		}
+		bzero(&metrics, sizeof(metrics));
 		/*
-		 * The old comment here said:
-		 * update the pipelimit (ssthresh) if it has been updated
-		 * already or if a pipesize was specified & the threshhold
-		 * got below half the pipesize.  I.e., wait for bad news
-		 * before we start updating, then update on both good
-		 * and bad news.
-		 *
-		 * But we want to save the ssthresh even if no pipesize is
-		 * specified explicitly in the route, because such
-		 * connections still have an implicit pipesize specified
-		 * by the global tcp_sendspace.  In the absence of a reliable
-		 * way to calculate the pipesize, it will have to do.
+		 * Update the ssthresh always when the conditions below
+		 * are satisfied. This gives us better new start value
+		 * for the congestion avoidance for new connections.
+		 * ssthresh is only set if packet loss occured on a session.
 		 */
-		i = tp->snd_ssthresh;
-		if (rt->rt_rmx.rmx_sendpipe != 0)
-			dosavessthresh = (i < rt->rt_rmx.rmx_sendpipe / 2);
-		else
-			dosavessthresh = (i < so->so_snd.sb_hiwat / 2);
-		if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 &&
-		     i != 0 && rt->rt_rmx.rmx_ssthresh != 0)
-		    || dosavessthresh) {
+		ssthresh = tp->snd_ssthresh;
+		if (ssthresh != 0 && ssthresh < so->so_snd.sb_hiwat / 2) {
 			/*
 			 * convert the limit from user data bytes to
 			 * packets then to packet data bytes.
 			 */
-			i = (i + tp->t_maxseg / 2) / tp->t_maxseg;
-			if (i < 2)
-				i = 2;
-			i *= (u_long)(tp->t_maxseg +
+			ssthresh = (ssthresh + tp->t_maxseg / 2) / tp->t_maxseg;
+			if (ssthresh < 2)
+				ssthresh = 2;
+			ssthresh *= (u_long)(tp->t_maxseg +
 #ifdef INET6
 				      (isipv6 ? sizeof (struct ip6_hdr) +
 					       sizeof (struct tcphdr) :
@@ -755,15 +663,21 @@ tcp_discardcb(tp)
 				       )
 #endif
 				      );
-			if (rt->rt_rmx.rmx_ssthresh)
-				rt->rt_rmx.rmx_ssthresh =
-				    (rt->rt_rmx.rmx_ssthresh + i) / 2;
-			else
-				rt->rt_rmx.rmx_ssthresh = i;
-			tcpstat.tcps_cachedssthresh++;
-		}
+		} else
+			ssthresh = 0;
+		metrics.rmx_ssthresh = ssthresh;
+
+		metrics.rmx_rtt = tp->t_srtt;
+		metrics.rmx_rttvar = tp->t_rttvar;
+		/* XXX: This wraps if the pipe is more than 4 Gbit per second */
+		metrics.rmx_bandwidth = tp->snd_bandwidth;
+		metrics.rmx_cwnd = tp->snd_cwnd;
+		metrics.rmx_sendpipe = 0; 
+		metrics.rmx_recvpipe = 0;
+
+		tcp_hc_update(&inp->inp_inc, &metrics);
 	}
-    no_valid_rt:
+
 	/* free the reassembly queue, if any */
 	while ((q = LIST_FIRST(&tp->t_segq)) != NULL) {
 		LIST_REMOVE(q, tqe_q);
@@ -1138,10 +1052,17 @@ tcp_ctlinput(cmd, sa, vip)
 		notify = tcp_drop_syn_sent;
 	else if (cmd == PRC_MSGSIZE)
 		notify = tcp_mtudisc;
-	else if (PRC_IS_REDIRECT(cmd)) {
-		ip = 0;
-		notify = in_rtchange;
-	} else if (cmd == PRC_HOSTDEAD)
+	/*
+	 * Redirects don't need to be handled up here.
+	 */
+	else if (PRC_IS_REDIRECT(cmd))
+		return;
+	/*
+	 * Hostdead is ugly because it goes linearly through all PCBs.
+	 * XXX: We never get this from ICMP, otherwise it makes an
+	 * excellent DoS attack on machines with many connections.
+	 */
+	else if (cmd == PRC_HOSTDEAD)
 		ip = 0;
 	else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)
 		return;
@@ -1379,23 +1300,28 @@ tcp_mtudisc(inp, errno)
 	int errno;
 {
 	struct tcpcb *tp = intotcpcb(inp);
-	struct rtentry *rt;
-	struct rmxp_tao *taop;
+	struct rmxp_tao tao;
 	struct socket *so = inp->inp_socket;
-	int offered;
+	u_int maxmtu;
+	u_int romtu;
 	int mss;
 #ifdef INET6
 	int isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0;
 #endif /* INET6 */
+	bzero(&tao, sizeof(tao));
 
 	if (tp) {
+		maxmtu = tcp_hc_getmtu(&inp->inp_inc); /* IPv4 and IPv6 */
+		romtu =
 #ifdef INET6
-		if (isipv6)
-			rt = tcp_rtlookup6(&inp->inp_inc);
-		else
+		    isipv6 ? tcp_maxmtu6(&inp->inp_inc) :
 #endif /* INET6 */
-		rt = tcp_rtlookup(&inp->inp_inc);
-		if (!rt || !rt->rt_rmx.rmx_mtu) {
+		    tcp_maxmtu(&inp->inp_inc);
+		if (!maxmtu)
+			maxmtu = romtu;
+		else
+			maxmtu = min(maxmtu, romtu);
+		if (!maxmtu) {
 			tp->t_maxopd = tp->t_maxseg =
 #ifdef INET6
 				isipv6 ? tcp_v6mssdflt :
@@ -1403,9 +1329,7 @@ tcp_mtudisc(inp, errno)
 				tcp_mssdflt;
 			return inp;
 		}
-		taop = rmx_taop(rt->rt_rmx);
-		offered = taop->tao_mssopt;
-		mss = rt->rt_rmx.rmx_mtu -
+		mss = maxmtu -
 #ifdef INET6
 			(isipv6 ?
 			 sizeof(struct ip6_hdr) + sizeof(struct tcphdr) :
@@ -1416,8 +1340,11 @@ tcp_mtudisc(inp, errno)
 #endif /* INET6 */
 			;
 
-		if (offered)
-			mss = min(mss, offered);
+		if (tcp_do_rfc1644) {
+			tcp_hc_gettao(&inp->inp_inc, &tao);
+			if (tao.tao_mssopt)
+				mss = min(mss, tao.tao_mssopt);
+		}
 		/*
 		 * XXX - The above conditional probably violates the TCP
 		 * spec.  The problem is that, since we don't know the
@@ -1471,50 +1398,65 @@ tcp_mtudisc(inp, errno)
  * is called by TCP routines that access the rmx structure and by tcp_mss
  * to get the interface MTU.
  */
-struct rtentry *
-tcp_rtlookup(inc)
+u_long 
+tcp_maxmtu(inc)
 	struct in_conninfo *inc;
 {
-	struct route *ro;
-	struct rtentry *rt;
-
-	ro = &inc->inc_route;
-	rt = ro->ro_rt;
-	if (rt == NULL || !(rt->rt_flags & RTF_UP)) {
-		/* No route yet, so try to acquire one */
-		if (inc->inc_faddr.s_addr != INADDR_ANY) {
-			ro->ro_dst.sa_family = AF_INET;
-			ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
-			((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
-			    inc->inc_faddr;
-			rtalloc(ro);
-			rt = ro->ro_rt;
-		}
+	struct route sro;
+	struct sockaddr_in *dst;
+	struct ifnet *ifp;
+	u_long maxmtu = 0;
+
+	KASSERT(inc != NULL, ("tcp_maxmtu with NULL in_conninfo pointer"));
+
+	sro.ro_rt = NULL;
+	if (inc->inc_faddr.s_addr != INADDR_ANY) {
+	        dst = (struct sockaddr_in *)&sro.ro_dst;
+		dst->sin_family = AF_INET;
+		dst->sin_len = sizeof(*dst);
+		dst->sin_addr = inc->inc_faddr;
+		rtalloc_ign(&sro, RTF_CLONING);
+	}
+	if (sro.ro_rt != NULL) {
+		ifp = sro.ro_rt->rt_ifp;
+		if (sro.ro_rt->rt_rmx.rmx_mtu == 0)
+			maxmtu = ifp->if_mtu;
+		else
+			maxmtu = min(sro.ro_rt->rt_rmx.rmx_mtu, ifp->if_mtu);
+		RTFREE(sro.ro_rt);
 	}
-	return rt;
+	return (maxmtu);
 }
 
 #ifdef INET6
-struct rtentry *
-tcp_rtlookup6(inc)
+u_long
+tcp_maxmtu6(inc)
 	struct in_conninfo *inc;
 {
-	struct route_in6 *ro6;
-	struct rtentry *rt;
-
-	ro6 = &inc->inc6_route;
-	rt = ro6->ro_rt;
-	if (rt == NULL || !(rt->rt_flags & RTF_UP)) {
-		/* No route yet, so try to acquire one */
-		if (!IN6_IS_ADDR_UNSPECIFIED(&inc->inc6_faddr)) {
-			ro6->ro_dst.sin6_family = AF_INET6;
-			ro6->ro_dst.sin6_len = sizeof(struct sockaddr_in6);
-			ro6->ro_dst.sin6_addr = inc->inc6_faddr;
-			rtalloc((struct route *)ro6);
-			rt = ro6->ro_rt;
-		}
+	struct route_in6 sro6;
+	struct ifnet *ifp;
+	u_long maxmtu = 0;
+
+	KASSERT(inc != NULL, ("tcp_maxmtu6 with NULL in_conninfo pointer"));
+
+	sro6.ro_rt = NULL;
+	if (!IN6_IS_ADDR_UNSPECIFIED(&inc->inc6_faddr)) {
+		sro6.ro_dst.sin6_family = AF_INET6;
+		sro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6);
+		sro6.ro_dst.sin6_addr = inc->inc6_faddr;
+		rtalloc_ign((struct route *)&sro6, RTF_CLONING);
 	}
-	return rt;
+	if (sro6.ro_rt != NULL) {
+		ifp = sro6.ro_rt->rt_ifp;
+		if (sro6.ro_rt->rt_rmx.rmx_mtu == 0)
+			maxmtu = IN6_LINKMTU(sro6.ro_rt->rt_ifp);
+		else
+			maxmtu = min(sro6.ro_rt->rt_rmx.rmx_mtu,
+				     IN6_LINKMTU(sro6.ro_rt->rt_ifp));
+		RTFREE(sro6.ro_rt);
+	}
+
+	return (maxmtu);
 }
 #endif /* INET6 */
 
@@ -1563,45 +1505,6 @@ ipsec_hdrsiz_tcp(tp)
 #endif /*IPSEC*/
 
 /*
- * Return a pointer to the cached information about the remote host.
- * The cached information is stored in the protocol specific part of
- * the route metrics.
- */
-struct rmxp_tao *
-tcp_gettaocache(inc)
-	struct in_conninfo *inc;
-{
-	struct rtentry *rt;
-
-#ifdef INET6
-	if (inc->inc_isipv6)
-		rt = tcp_rtlookup6(inc);
-	else
-#endif /* INET6 */
-	rt = tcp_rtlookup(inc);
-
-	/* Make sure this is a host route and is up. */
-	if (rt == NULL ||
-	    (rt->rt_flags & (RTF_UP|RTF_HOST)) != (RTF_UP|RTF_HOST))
-		return NULL;
-
-	return rmx_taop(rt->rt_rmx);
-}
-
-/*
- * Clear all the TAO cache entries, called from tcp_init.
- *
- * XXX
- * This routine is just an empty one, because we assume that the routing
- * routing tables are initialized at the same time when TCP, so there is
- * nothing in the cache left over.
- */
-static void
-tcp_cleartaocache()
-{
-}
-
-/*
  * Move a TCP connection into TIME_WAIT state.
  *    tcbinfo is unlocked.
  *    inp is locked, and is unlocked before returning.
@@ -1822,9 +1725,8 @@ tcp_twrespond(struct tcptw *tw, struct socket *so, struct mbuf *msrc,
 	if (isipv6) {
 		th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr),
 		    sizeof(struct tcphdr) + optlen);
-		ip6->ip6_hlim = in6_selecthlim(inp, inp->in6p_route.ro_rt ?
-		    inp->in6p_route.ro_rt->rt_ifp : NULL);
-		error = ip6_output(m, inp->in6p_outputopts, &inp->in6p_route,
+		ip6->ip6_hlim = in6_selecthlim(inp, NULL);
+		error = ip6_output(m, inp->in6p_outputopts, NULL,
 		    (tw->tw_so_options & SO_DONTROUTE), NULL, NULL, inp);
 	} else
 #endif
@@ -1834,7 +1736,7 @@ tcp_twrespond(struct tcptw *tw, struct socket *so, struct mbuf *msrc,
 		m->m_pkthdr.csum_flags = CSUM_TCP;
 		m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 		ip->ip_len = m->m_pkthdr.len;
-		error = ip_output(m, inp->inp_options, &inp->inp_route,
+		error = ip_output(m, inp->inp_options, NULL,
 		    (tw->tw_so_options & SO_DONTROUTE), NULL, inp);
 	}
 	if (flags & TH_ACK)
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index 822ffeb..e2d96e9 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -202,29 +202,9 @@ static MALLOC_DEFINE(M_SYNCACHE, "syncache", "TCP syncache");
 static void
 syncache_free(struct syncache *sc)
 {
-	struct rtentry *rt;
-
 	if (sc->sc_ipopts)
 		(void) m_free(sc->sc_ipopts);
-#ifdef INET6
-	if (sc->sc_inc.inc_isipv6)
-		rt = sc->sc_route6.ro_rt;
-	else
-#endif
-		rt = sc->sc_route.ro_rt;
-	if (rt != NULL) {
-		/*
-		 * If this is the only reference to a protocol cloned 
-		 * route, remove it immediately.
-		 */
-		if (rt->rt_flags & RTF_WASCLONED &&
-		    (sc->sc_flags & SCF_KEEPROUTE) == 0 &&
-		    rt->rt_refcnt == 1)
-			rtrequest(RTM_DELETE, rt_key(rt),
-			    rt->rt_gateway, rt_mask(rt),
-			    rt->rt_flags, NULL);
-		RTFREE(rt);
-	}
+
 	uma_zfree(tcp_syncache.zone, sc);
 }
 
@@ -644,8 +624,6 @@ syncache_socket(sc, lso, m)
 		if (oinp->in6p_outputopts)
 			inp->in6p_outputopts =
 			    ip6_copypktopts(oinp->in6p_outputopts, M_NOWAIT);
-		inp->in6p_route = sc->sc_route6;
-		sc->sc_route6.ro_rt = NULL;
 
 		MALLOC(sin6, struct sockaddr_in6 *, sizeof *sin6,
 		    M_SONAME, M_NOWAIT | M_ZERO);
@@ -675,8 +653,6 @@ syncache_socket(sc, lso, m)
 			inp->inp_options = sc->sc_ipopts;
 			sc->sc_ipopts = NULL;
 		}
-		inp->inp_route = sc->sc_route;
-		sc->sc_route.ro_rt = NULL;
 
 		MALLOC(sin, struct sockaddr_in *, sizeof *sin,
 		    M_SONAME, M_NOWAIT | M_ZERO);
@@ -733,6 +709,10 @@ syncache_socket(sc, lso, m)
 		tp->cc_recv = sc->sc_cc_recv;
 	}
 
+	/*
+	 * Set up MSS and get cached values from tcp_hostcache.
+	 * This might overwrite some of the defaults we just set.
+	 */
 	tcp_mss(tp, sc->sc_peer_mss);
 
 	/*
@@ -811,10 +791,9 @@ resetandabort:
 #endif
 		m_freem(m);			/* XXX only needed for above */
 		tcpstat.tcps_sc_aborted++;
-	} else {
-		sc->sc_flags |= SCF_KEEPROUTE;
+	} else
 		tcpstat.tcps_sc_completed++;
-	}
+
 	if (sch == NULL)
 		syncache_free(sc);
 	else
@@ -849,13 +828,14 @@ syncache_add(inc, to, th, sop, m)
 	struct syncache *sc = NULL;
 	struct syncache_head *sch;
 	struct mbuf *ipopts = NULL;
-	struct rmxp_tao *taop;
+	struct rmxp_tao tao;
 	int i, win;
 
 	INP_INFO_WLOCK_ASSERT(&tcbinfo);
 
 	so = *sop;
 	tp = sototcpcb(so);
+	bzero(&tao, sizeof(tao));
 
 	/*
 	 * Remember the IP options, if any.
@@ -949,13 +929,11 @@ syncache_add(inc, to, th, sop, m)
 	if (inc->inc_isipv6) {
 		sc->sc_inc.inc6_faddr = inc->inc6_faddr;
 		sc->sc_inc.inc6_laddr = inc->inc6_laddr;
-		sc->sc_route6.ro_rt = NULL;
 	} else
 #endif
 	{
 		sc->sc_inc.inc_faddr = inc->inc_faddr;
 		sc->sc_inc.inc_laddr = inc->inc_laddr;
-		sc->sc_route.ro_rt = NULL;
 	}
 	sc->sc_irs = th->th_seq;
 	sc->sc_flags = 0;
@@ -1027,17 +1005,19 @@ syncache_add(inc, to, th, sop, m)
 	 *	processing: drop SYN, process data and FIN.
 	 * - otherwise do a normal 3-way handshake.
 	 */
-	taop = tcp_gettaocache(&sc->sc_inc);
+	if (tcp_do_rfc1644)
+		tcp_hc_gettao(&sc->sc_inc, &tao);
+
 	if ((to->to_flags & TOF_CC) != 0) {
 		if (((tp->t_flags & TF_NOPUSH) != 0) &&
-		    sc->sc_flags & SCF_CC && 
-		    taop != NULL && taop->tao_cc != 0 &&
-		    CC_GT(to->to_cc, taop->tao_cc)) {
+		    sc->sc_flags & SCF_CC && tao.tao_cc != 0 &&
+		    CC_GT(to->to_cc, tao.tao_cc)) {
 			sc->sc_rxtslot = 0;
 			so = syncache_socket(sc, *sop, m);
 			if (so != NULL) {
-				sc->sc_flags |= SCF_KEEPROUTE;
-				taop->tao_cc = to->to_cc;
+				tao.tao_cc = to->to_cc;
+				tcp_hc_updatetao(&sc->sc_inc, TCP_HC_TAO_CC,
+						 tao.tao_cc, 0);
 				*sop = so;
 			}
 			syncache_free(sc);
@@ -1047,9 +1027,13 @@ syncache_add(inc, to, th, sop, m)
 		/*
 		 * No CC option, but maybe CC.NEW: invalidate cached value.
 		 */
-		if (taop != NULL)
-			taop->tao_cc = 0;
+		if (tcp_do_rfc1644) {
+			tao.tao_cc = 0;
+			tcp_hc_updatetao(&sc->sc_inc, TCP_HC_TAO_CC,
+					 tao.tao_cc, 0);
+		}
 	}
+
 	/*
 	 * TAO test failed or there was no CC option,
 	 *    do a standard 3-way handshake.
@@ -1087,33 +1071,22 @@ syncache_respond(sc, m)
 	int optlen, error;
 	u_int16_t tlen, hlen, mssopt;
 	struct ip *ip = NULL;
-	struct rtentry *rt;
 	struct tcphdr *th;
 	struct inpcb *inp;
 #ifdef INET6
 	struct ip6_hdr *ip6 = NULL;
 #endif
 
+	hlen =
 #ifdef INET6
-	if (sc->sc_inc.inc_isipv6) {
-		rt = tcp_rtlookup6(&sc->sc_inc);
-		if (rt != NULL)
-			mssopt = rt->rt_ifp->if_mtu -
-			     (sizeof(struct ip6_hdr) + sizeof(struct tcphdr));
-		else 
-			mssopt = tcp_v6mssdflt;
-		hlen = sizeof(struct ip6_hdr);
-	} else
+	       (sc->sc_inc.inc_isipv6) ? sizeof(struct ip6_hdr) : 
 #endif
-	{
-		rt = tcp_rtlookup(&sc->sc_inc);
-		if (rt != NULL)
-			mssopt = rt->rt_ifp->if_mtu -
-			     (sizeof(struct ip) + sizeof(struct tcphdr));
-		else 
-			mssopt = tcp_mssdflt;
-		hlen = sizeof(struct ip);
-	}
+		sizeof(struct ip);
+
+	KASSERT((&sc->sc_inc) != NULL, ("syncache_respond with NULL in_conninfo pointer"));
+
+	/* Determine MSS we advertize to other end of connection */
+	mssopt = tcp_mssopt(&sc->sc_inc);
 
 	/* Compute the size of the TCP options. */
 	if (sc->sc_flags & SCF_NOOPT) {
@@ -1244,13 +1217,10 @@ syncache_respond(sc, m)
 
 #ifdef INET6
 	if (sc->sc_inc.inc_isipv6) {
-		struct route_in6 *ro6 = &sc->sc_route6;
-
 		th->th_sum = 0;
 		th->th_sum = in6_cksum(m, IPPROTO_TCP, hlen, tlen - hlen);
-		ip6->ip6_hlim = in6_selecthlim(NULL,
-		    ro6->ro_rt ? ro6->ro_rt->rt_ifp : NULL);
-		error = ip6_output(m, NULL, ro6, 0, NULL, NULL, inp);
+		ip6->ip6_hlim = in6_selecthlim(NULL, NULL);
+		error = ip6_output(m, NULL, NULL, 0, NULL, NULL, inp);
 	} else
 #endif
 	{
@@ -1268,7 +1238,7 @@ syncache_respond(sc, m)
 			    mtod(m, void *), th, 0);
 		}
 #endif
-		error = ip_output(m, sc->sc_ipopts, &sc->sc_route, 0, NULL,inp);
+		error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, inp);
 	}
 	INP_UNLOCK(inp);
 	return (error);
@@ -1435,13 +1405,11 @@ syncookie_lookup(inc, th, so)
 	if (inc->inc_isipv6) {
 		sc->sc_inc.inc6_faddr = inc->inc6_faddr;
 		sc->sc_inc.inc6_laddr = inc->inc6_laddr;
-		sc->sc_route6.ro_rt = NULL;
 	} else
 #endif
 	{
 		sc->sc_inc.inc_faddr = inc->inc_faddr;
 		sc->sc_inc.inc_laddr = inc->inc_laddr;
-		sc->sc_route.ro_rt = NULL;
 	}
 	sc->sc_irs = th->th_seq - 1;
 	sc->sc_iss = th->th_ack - 1;
diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c
index 1a253ab..1eeb66e 100644
--- a/sys/netinet/tcp_timer.c
+++ b/sys/netinet/tcp_timer.c
@@ -551,10 +551,8 @@ tcp_timer_rexmt(xtp)
 	if ((tp->t_state == TCPS_SYN_SENT) && (tp->t_rxtshift == 3))
 		tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_REQ_CC);
 	/*
-	 * If losing, let the lower level know and try for
-	 * a better route.  Also, if we backed off this far,
-	 * our srtt estimate is probably bogus.  Clobber it
-	 * so we'll take the next rtt measurement as our srtt;
+	 * If we backed off this far, our srtt estimate is probably bogus.
+	 * Clobber it so we'll take the next rtt measurement as our srtt;
 	 * move the current srtt into rttvar to keep the current
 	 * retransmit times until then.
 	 */
@@ -564,7 +562,6 @@ tcp_timer_rexmt(xtp)
 			in6_losing(tp->t_inpcb);
 		else
 #endif
-		in_losing(tp->t_inpcb);
 		tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
 		tp->t_srtt = 0;
 	}
diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c
index 7ce06f6..dfd6de1 100644
--- a/sys/netinet/tcp_timewait.c
+++ b/sys/netinet/tcp_timewait.c
@@ -76,6 +76,7 @@
 #include <netinet/ip_var.h>
 #ifdef INET6
 #include <netinet6/ip6_var.h>
+#include <netinet6/nd6.h>
 #endif
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
@@ -177,7 +178,6 @@ static int	tcp_inflight_stab = 20;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, inflight_stab, CTLFLAG_RW,
     &tcp_inflight_stab, 0, "Inflight Algorithm Stabilization 20 = 2 packets");
 
-static void	tcp_cleartaocache(void);
 static struct inpcb *tcp_notify(struct inpcb *, int);
 static void	tcp_discardcb(struct tcpcb *);
 
@@ -215,7 +215,6 @@ tcp_init()
 	int hashsize = TCBHASHSIZE;
 	
 	tcp_ccgen = 1;
-	tcp_cleartaocache();
 
 	tcp_delacktime = TCPTV_DELACK;
 	tcp_keepinit = TCPTV_KEEP_INIT;
@@ -262,6 +261,7 @@ tcp_init()
 	uma_zone_set_max(tcptw_zone, maxsockets / 5);
 	tcp_timer_init();
 	syncache_init();
+	tcp_hc_init();
 }
 
 /*
@@ -367,18 +367,14 @@ tcp_respond(tp, ipgen, th, m, ack, seq, flags)
 {
 	register int tlen;
 	int win = 0;
-	struct route *ro = 0;
-	struct route sro;
 	struct ip *ip;
 	struct tcphdr *nth;
 #ifdef INET6
-	struct route_in6 *ro6 = 0;
-	struct route_in6 sro6;
 	struct ip6_hdr *ip6;
 	int isipv6;
 #endif /* INET6 */
 	int ipflags = 0;
-	struct inpcb *inp;
+	struct inpcb *inp = NULL;
 
 	KASSERT(tp != NULL || m != NULL, ("tcp_respond: tp and m both NULL"));
 
@@ -398,24 +394,6 @@ tcp_respond(tp, ipgen, th, m, ack, seq, flags)
 			if (win > (long)TCP_MAXWIN << tp->rcv_scale)
 				win = (long)TCP_MAXWIN << tp->rcv_scale;
 		}
-#ifdef INET6
-		if (isipv6)
-			ro6 = &inp->in6p_route;
-		else
-#endif /* INET6 */
-		ro = &inp->inp_route;
-	} else {
-		inp = NULL;
-#ifdef INET6
-		if (isipv6) {
-			ro6 = &sro6;
-			bzero(ro6, sizeof *ro6);
-		} else
-#endif /* INET6 */
-	      {
-		ro = &sro;
-		bzero(ro, sizeof *ro);
-	      }
 	}
 	if (m == 0) {
 		m = m_gethdr(M_DONTWAIT, MT_HEADER);
@@ -516,10 +494,7 @@ tcp_respond(tp, ipgen, th, m, ack, seq, flags)
 		nth->th_sum = in6_cksum(m, IPPROTO_TCP,
 					sizeof(struct ip6_hdr),
 					tlen - sizeof(struct ip6_hdr));
-		ip6->ip6_hlim = in6_selecthlim(inp,
-					       ro6 && ro6->ro_rt ?
-					       ro6->ro_rt->rt_ifp :
-					       NULL);
+		ip6->ip6_hlim = in6_selecthlim(tp ? tp->t_inpcb : NULL, NULL);
 	} else
 #endif /* INET6 */
       {
@@ -533,21 +508,11 @@ tcp_respond(tp, ipgen, th, m, ack, seq, flags)
 		tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0);
 #endif
 #ifdef INET6
-	if (isipv6) {
-		(void) ip6_output(m, NULL, ro6, ipflags, NULL, NULL, inp);
-		if (ro6 == &sro6 && ro6->ro_rt) {
-			RTFREE(ro6->ro_rt);
-			ro6->ro_rt = NULL;
-		}
-	} else
+	if (isipv6)
+		(void) ip6_output(m, NULL, NULL, ipflags, NULL, NULL, inp);
+	else
 #endif /* INET6 */
-	{
-		(void) ip_output(m, NULL, ro, ipflags, NULL, inp);
-		if (ro == &sro && ro->ro_rt) {
-			RTFREE(ro->ro_rt);
-			ro->ro_rt = NULL;
-		}
-	}
+	(void) ip_output(m, NULL, NULL, ipflags, NULL, inp);
 }
 
 /*
@@ -647,8 +612,6 @@ tcp_discardcb(tp)
 #ifdef INET6
 	int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
 #endif /* INET6 */
-	struct rtentry *rt;
-	int dosavessthresh;
 
 	/*
 	 * Make sure that all of our timers are stopped before we
@@ -663,89 +626,34 @@ tcp_discardcb(tp)
 	/*
 	 * If we got enough samples through the srtt filter,
 	 * save the rtt and rttvar in the routing entry.
-	 * 'Enough' is arbitrarily defined as the 16 samples.
-	 * 16 samples is enough for the srtt filter to converge
-	 * to within 5% of the correct value; fewer samples and
-	 * we could save a very bogus rtt.
-	 *
-	 * Don't update the default route's characteristics and don't
-	 * update anything that the user "locked".
+	 * 'Enough' is arbitrarily defined as 4 rtt samples.
+	 * 4 samples is enough for the srtt filter to converge
+	 * to within enough % of the correct value; fewer samples
+	 * and we could save a bogus rtt. The danger is not high
+	 * as tcp quickly recovers from everything.
+	 * XXX: Works very well but needs some more statistics!
 	 */
-	if (tp->t_rttupdated >= 16) {
-		register u_long i = 0;
-#ifdef INET6
-		if (isipv6) {
-			struct sockaddr_in6 *sin6;
+	if (tp->t_rttupdated >= 4) {
+		struct hc_metrics_lite metrics;
+		u_long ssthresh;
 
-			if ((rt = inp->in6p_route.ro_rt) == NULL)
-				goto no_valid_rt;
-			sin6 = (struct sockaddr_in6 *)rt_key(rt);
-			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
-				goto no_valid_rt;
-		}
-		else
-#endif /* INET6 */		
-		if ((rt = inp->inp_route.ro_rt) == NULL ||
-		    ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr
-		    == INADDR_ANY)
-			goto no_valid_rt;
-
-		if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) {
-			i = tp->t_srtt *
-			    (RTM_RTTUNIT / (hz * TCP_RTT_SCALE));
-			if (rt->rt_rmx.rmx_rtt && i)
-				/*
-				 * filter this update to half the old & half
-				 * the new values, converting scale.
-				 * See route.h and tcp_var.h for a
-				 * description of the scaling constants.
-				 */
-				rt->rt_rmx.rmx_rtt =
-				    (rt->rt_rmx.rmx_rtt + i) / 2;
-			else
-				rt->rt_rmx.rmx_rtt = i;
-			tcpstat.tcps_cachedrtt++;
-		}
-		if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) {
-			i = tp->t_rttvar *
-			    (RTM_RTTUNIT / (hz * TCP_RTTVAR_SCALE));
-			if (rt->rt_rmx.rmx_rttvar && i)
-				rt->rt_rmx.rmx_rttvar =
-				    (rt->rt_rmx.rmx_rttvar + i) / 2;
-			else
-				rt->rt_rmx.rmx_rttvar = i;
-			tcpstat.tcps_cachedrttvar++;
-		}
+		bzero(&metrics, sizeof(metrics));
 		/*
-		 * The old comment here said:
-		 * update the pipelimit (ssthresh) if it has been updated
-		 * already or if a pipesize was specified & the threshhold
-		 * got below half the pipesize.  I.e., wait for bad news
-		 * before we start updating, then update on both good
-		 * and bad news.
-		 *
-		 * But we want to save the ssthresh even if no pipesize is
-		 * specified explicitly in the route, because such
-		 * connections still have an implicit pipesize specified
-		 * by the global tcp_sendspace.  In the absence of a reliable
-		 * way to calculate the pipesize, it will have to do.
+		 * Update the ssthresh always when the conditions below
+		 * are satisfied. This gives us better new start value
+		 * for the congestion avoidance for new connections.
+		 * ssthresh is only set if packet loss occured on a session.
 		 */
-		i = tp->snd_ssthresh;
-		if (rt->rt_rmx.rmx_sendpipe != 0)
-			dosavessthresh = (i < rt->rt_rmx.rmx_sendpipe / 2);
-		else
-			dosavessthresh = (i < so->so_snd.sb_hiwat / 2);
-		if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 &&
-		     i != 0 && rt->rt_rmx.rmx_ssthresh != 0)
-		    || dosavessthresh) {
+		ssthresh = tp->snd_ssthresh;
+		if (ssthresh != 0 && ssthresh < so->so_snd.sb_hiwat / 2) {
 			/*
 			 * convert the limit from user data bytes to
 			 * packets then to packet data bytes.
 			 */
-			i = (i + tp->t_maxseg / 2) / tp->t_maxseg;
-			if (i < 2)
-				i = 2;
-			i *= (u_long)(tp->t_maxseg +
+			ssthresh = (ssthresh + tp->t_maxseg / 2) / tp->t_maxseg;
+			if (ssthresh < 2)
+				ssthresh = 2;
+			ssthresh *= (u_long)(tp->t_maxseg +
 #ifdef INET6
 				      (isipv6 ? sizeof (struct ip6_hdr) +
 					       sizeof (struct tcphdr) :
@@ -755,15 +663,21 @@ tcp_discardcb(tp)
 				       )
 #endif
 				      );
-			if (rt->rt_rmx.rmx_ssthresh)
-				rt->rt_rmx.rmx_ssthresh =
-				    (rt->rt_rmx.rmx_ssthresh + i) / 2;
-			else
-				rt->rt_rmx.rmx_ssthresh = i;
-			tcpstat.tcps_cachedssthresh++;
-		}
+		} else
+			ssthresh = 0;
+		metrics.rmx_ssthresh = ssthresh;
+
+		metrics.rmx_rtt = tp->t_srtt;
+		metrics.rmx_rttvar = tp->t_rttvar;
+		/* XXX: This wraps if the pipe is more than 4 Gbit per second */
+		metrics.rmx_bandwidth = tp->snd_bandwidth;
+		metrics.rmx_cwnd = tp->snd_cwnd;
+		metrics.rmx_sendpipe = 0; 
+		metrics.rmx_recvpipe = 0;
+
+		tcp_hc_update(&inp->inp_inc, &metrics);
 	}
-    no_valid_rt:
+
 	/* free the reassembly queue, if any */
 	while ((q = LIST_FIRST(&tp->t_segq)) != NULL) {
 		LIST_REMOVE(q, tqe_q);
@@ -1138,10 +1052,17 @@ tcp_ctlinput(cmd, sa, vip)
 		notify = tcp_drop_syn_sent;
 	else if (cmd == PRC_MSGSIZE)
 		notify = tcp_mtudisc;
-	else if (PRC_IS_REDIRECT(cmd)) {
-		ip = 0;
-		notify = in_rtchange;
-	} else if (cmd == PRC_HOSTDEAD)
+	/*
+	 * Redirects don't need to be handled up here.
+	 */
+	else if (PRC_IS_REDIRECT(cmd))
+		return;
+	/*
+	 * Hostdead is ugly because it goes linearly through all PCBs.
+	 * XXX: We never get this from ICMP, otherwise it makes an
+	 * excellent DoS attack on machines with many connections.
+	 */
+	else if (cmd == PRC_HOSTDEAD)
 		ip = 0;
 	else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)
 		return;
@@ -1379,23 +1300,28 @@ tcp_mtudisc(inp, errno)
 	int errno;
 {
 	struct tcpcb *tp = intotcpcb(inp);
-	struct rtentry *rt;
-	struct rmxp_tao *taop;
+	struct rmxp_tao tao;
 	struct socket *so = inp->inp_socket;
-	int offered;
+	u_int maxmtu;
+	u_int romtu;
 	int mss;
 #ifdef INET6
 	int isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0;
 #endif /* INET6 */
+	bzero(&tao, sizeof(tao));
 
 	if (tp) {
+		maxmtu = tcp_hc_getmtu(&inp->inp_inc); /* IPv4 and IPv6 */
+		romtu =
 #ifdef INET6
-		if (isipv6)
-			rt = tcp_rtlookup6(&inp->inp_inc);
-		else
+		    isipv6 ? tcp_maxmtu6(&inp->inp_inc) :
 #endif /* INET6 */
-		rt = tcp_rtlookup(&inp->inp_inc);
-		if (!rt || !rt->rt_rmx.rmx_mtu) {
+		    tcp_maxmtu(&inp->inp_inc);
+		if (!maxmtu)
+			maxmtu = romtu;
+		else
+			maxmtu = min(maxmtu, romtu);
+		if (!maxmtu) {
 			tp->t_maxopd = tp->t_maxseg =
 #ifdef INET6
 				isipv6 ? tcp_v6mssdflt :
@@ -1403,9 +1329,7 @@ tcp_mtudisc(inp, errno)
 				tcp_mssdflt;
 			return inp;
 		}
-		taop = rmx_taop(rt->rt_rmx);
-		offered = taop->tao_mssopt;
-		mss = rt->rt_rmx.rmx_mtu -
+		mss = maxmtu -
 #ifdef INET6
 			(isipv6 ?
 			 sizeof(struct ip6_hdr) + sizeof(struct tcphdr) :
@@ -1416,8 +1340,11 @@ tcp_mtudisc(inp, errno)
 #endif /* INET6 */
 			;
 
-		if (offered)
-			mss = min(mss, offered);
+		if (tcp_do_rfc1644) {
+			tcp_hc_gettao(&inp->inp_inc, &tao);
+			if (tao.tao_mssopt)
+				mss = min(mss, tao.tao_mssopt);
+		}
 		/*
 		 * XXX - The above conditional probably violates the TCP
 		 * spec.  The problem is that, since we don't know the
@@ -1471,50 +1398,65 @@ tcp_mtudisc(inp, errno)
  * is called by TCP routines that access the rmx structure and by tcp_mss
  * to get the interface MTU.
  */
-struct rtentry *
-tcp_rtlookup(inc)
+u_long 
+tcp_maxmtu(inc)
 	struct in_conninfo *inc;
 {
-	struct route *ro;
-	struct rtentry *rt;
-
-	ro = &inc->inc_route;
-	rt = ro->ro_rt;
-	if (rt == NULL || !(rt->rt_flags & RTF_UP)) {
-		/* No route yet, so try to acquire one */
-		if (inc->inc_faddr.s_addr != INADDR_ANY) {
-			ro->ro_dst.sa_family = AF_INET;
-			ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
-			((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
-			    inc->inc_faddr;
-			rtalloc(ro);
-			rt = ro->ro_rt;
-		}
+	struct route sro;
+	struct sockaddr_in *dst;
+	struct ifnet *ifp;
+	u_long maxmtu = 0;
+
+	KASSERT(inc != NULL, ("tcp_maxmtu with NULL in_conninfo pointer"));
+
+	sro.ro_rt = NULL;
+	if (inc->inc_faddr.s_addr != INADDR_ANY) {
+	        dst = (struct sockaddr_in *)&sro.ro_dst;
+		dst->sin_family = AF_INET;
+		dst->sin_len = sizeof(*dst);
+		dst->sin_addr = inc->inc_faddr;
+		rtalloc_ign(&sro, RTF_CLONING);
+	}
+	if (sro.ro_rt != NULL) {
+		ifp = sro.ro_rt->rt_ifp;
+		if (sro.ro_rt->rt_rmx.rmx_mtu == 0)
+			maxmtu = ifp->if_mtu;
+		else
+			maxmtu = min(sro.ro_rt->rt_rmx.rmx_mtu, ifp->if_mtu);
+		RTFREE(sro.ro_rt);
 	}
-	return rt;
+	return (maxmtu);
 }
 
 #ifdef INET6
-struct rtentry *
-tcp_rtlookup6(inc)
+u_long
+tcp_maxmtu6(inc)
 	struct in_conninfo *inc;
 {
-	struct route_in6 *ro6;
-	struct rtentry *rt;
-
-	ro6 = &inc->inc6_route;
-	rt = ro6->ro_rt;
-	if (rt == NULL || !(rt->rt_flags & RTF_UP)) {
-		/* No route yet, so try to acquire one */
-		if (!IN6_IS_ADDR_UNSPECIFIED(&inc->inc6_faddr)) {
-			ro6->ro_dst.sin6_family = AF_INET6;
-			ro6->ro_dst.sin6_len = sizeof(struct sockaddr_in6);
-			ro6->ro_dst.sin6_addr = inc->inc6_faddr;
-			rtalloc((struct route *)ro6);
-			rt = ro6->ro_rt;
-		}
+	struct route_in6 sro6;
+	struct ifnet *ifp;
+	u_long maxmtu = 0;
+
+	KASSERT(inc != NULL, ("tcp_maxmtu6 with NULL in_conninfo pointer"));
+
+	sro6.ro_rt = NULL;
+	if (!IN6_IS_ADDR_UNSPECIFIED(&inc->inc6_faddr)) {
+		sro6.ro_dst.sin6_family = AF_INET6;
+		sro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6);
+		sro6.ro_dst.sin6_addr = inc->inc6_faddr;
+		rtalloc_ign((struct route *)&sro6, RTF_CLONING);
 	}
-	return rt;
+	if (sro6.ro_rt != NULL) {
+		ifp = sro6.ro_rt->rt_ifp;
+		if (sro6.ro_rt->rt_rmx.rmx_mtu == 0)
+			maxmtu = IN6_LINKMTU(sro6.ro_rt->rt_ifp);
+		else
+			maxmtu = min(sro6.ro_rt->rt_rmx.rmx_mtu,
+				     IN6_LINKMTU(sro6.ro_rt->rt_ifp));
+		RTFREE(sro6.ro_rt);
+	}
+
+	return (maxmtu);
 }
 #endif /* INET6 */
 
@@ -1563,45 +1505,6 @@ ipsec_hdrsiz_tcp(tp)
 #endif /*IPSEC*/
 
 /*
- * Return a pointer to the cached information about the remote host.
- * The cached information is stored in the protocol specific part of
- * the route metrics.
- */
-struct rmxp_tao *
-tcp_gettaocache(inc)
-	struct in_conninfo *inc;
-{
-	struct rtentry *rt;
-
-#ifdef INET6
-	if (inc->inc_isipv6)
-		rt = tcp_rtlookup6(inc);
-	else
-#endif /* INET6 */
-	rt = tcp_rtlookup(inc);
-
-	/* Make sure this is a host route and is up. */
-	if (rt == NULL ||
-	    (rt->rt_flags & (RTF_UP|RTF_HOST)) != (RTF_UP|RTF_HOST))
-		return NULL;
-
-	return rmx_taop(rt->rt_rmx);
-}
-
-/*
- * Clear all the TAO cache entries, called from tcp_init.
- *
- * XXX
- * This routine is just an empty one, because we assume that the routing
- * routing tables are initialized at the same time when TCP, so there is
- * nothing in the cache left over.
- */
-static void
-tcp_cleartaocache()
-{
-}
-
-/*
  * Move a TCP connection into TIME_WAIT state.
  *    tcbinfo is unlocked.
  *    inp is locked, and is unlocked before returning.
@@ -1822,9 +1725,8 @@ tcp_twrespond(struct tcptw *tw, struct socket *so, struct mbuf *msrc,
 	if (isipv6) {
 		th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr),
 		    sizeof(struct tcphdr) + optlen);
-		ip6->ip6_hlim = in6_selecthlim(inp, inp->in6p_route.ro_rt ?
-		    inp->in6p_route.ro_rt->rt_ifp : NULL);
-		error = ip6_output(m, inp->in6p_outputopts, &inp->in6p_route,
+		ip6->ip6_hlim = in6_selecthlim(inp, NULL);
+		error = ip6_output(m, inp->in6p_outputopts, NULL,
 		    (tw->tw_so_options & SO_DONTROUTE), NULL, NULL, inp);
 	} else
 #endif
@@ -1834,7 +1736,7 @@ tcp_twrespond(struct tcptw *tw, struct socket *so, struct mbuf *msrc,
 		m->m_pkthdr.csum_flags = CSUM_TCP;
 		m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 		ip->ip_len = m->m_pkthdr.len;
-		error = ip_output(m, inp->inp_options, &inp->inp_route,
+		error = ip_output(m, inp->inp_options, NULL,
 		    (tw->tw_so_options & SO_DONTROUTE), NULL, inp);
 	}
 	if (flags & TH_ACK)
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index 7035227..17566c8 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -848,12 +848,13 @@ tcp_connect(tp, nam, td)
 	struct inpcb *inp = tp->t_inpcb, *oinp;
 	struct socket *so = inp->inp_socket;
 	struct tcptw *otw;
-	struct rmxp_tao *taop;
-	struct rmxp_tao tao_noncached;
+	struct rmxp_tao tao;
 	struct in_addr laddr;
 	u_short lport;
 	int error;
 
+	bzero(&tao, sizeof(tao));
+
 	if (inp->inp_lport == 0) {
 		error = in_pcbbind(inp, (struct sockaddr *)0, td);
 		if (error)
@@ -902,20 +903,22 @@ tcp_connect(tp, nam, td)
 	 * Generate a CC value for this connection and
 	 * check whether CC or CCnew should be used.
 	 */
-	if ((taop = tcp_gettaocache(&tp->t_inpcb->inp_inc)) == NULL) {
-		taop = &tao_noncached;
-		bzero(taop, sizeof(*taop));
-	}
+	if (tcp_do_rfc1644)
+		tcp_hc_gettao(&inp->inp_inc, &tao);
 
 	tp->cc_send = CC_INC(tcp_ccgen);
-	if (taop->tao_ccsent != 0 &&
-	    CC_GEQ(tp->cc_send, taop->tao_ccsent)) {
-		taop->tao_ccsent = tp->cc_send;
+	if (tao.tao_ccsent != 0 &&
+	    CC_GEQ(tp->cc_send, tao.tao_ccsent)) {
+		tao.tao_ccsent = tp->cc_send;
 	} else {
-		taop->tao_ccsent = 0;
+		tao.tao_ccsent = 0;
 		tp->t_flags |= TF_SENDCCNEW;
 	}
 
+	if (tcp_do_rfc1644)
+		tcp_hc_updatetao(&inp->inp_inc, TCP_HC_TAO_CCSENT,
+				 tao.tao_ccsent, 0);
+
 	return 0;
 }
 
@@ -931,10 +934,11 @@ tcp6_connect(tp, nam, td)
 	struct tcptw *otw;
 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
 	struct in6_addr *addr6;
-	struct rmxp_tao *taop;
-	struct rmxp_tao tao_noncached;
+	struct rmxp_tao tao;
 	int error;
 
+	bzero(&tao, sizeof(tao));
+
 	if (inp->inp_lport == 0) {
 		error = in6_pcbbind(inp, (struct sockaddr *)0, td);
 		if (error)
@@ -991,19 +995,20 @@ tcp6_connect(tp, nam, td)
 	 * Generate a CC value for this connection and
 	 * check whether CC or CCnew should be used.
 	 */
-	if ((taop = tcp_gettaocache(&tp->t_inpcb->inp_inc)) == NULL) {
-		taop = &tao_noncached;
-		bzero(taop, sizeof(*taop));
-	}
+	if (tcp_do_rfc1644)
+		tcp_hc_gettao(&inp->inp_inc, &tao);
 
 	tp->cc_send = CC_INC(tcp_ccgen);
-	if (taop->tao_ccsent != 0 &&
-	    CC_GEQ(tp->cc_send, taop->tao_ccsent)) {
-		taop->tao_ccsent = tp->cc_send;
+	if (tao.tao_ccsent != 0 &&
+	    CC_GEQ(tp->cc_send, tao.tao_ccsent)) {
+		tao.tao_ccsent = tp->cc_send;
 	} else {
-		taop->tao_ccsent = 0;
+		tao.tao_ccsent = 0;
 		tp->t_flags |= TF_SENDCCNEW;
 	}
+	if (tcp_do_rfc1644)
+		tcp_hc_updatetao(&inp->inp_inc, TCP_HC_TAO_CCSENT,
+				 tao.tao_ccsent, 0);
 
 	return 0;
 }
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index 2e5b3fa..ddcfd3c 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -213,8 +213,6 @@ struct syncache {
 	struct 		tcpcb *sc_tp;		/* tcb for listening socket */
 	struct		mbuf *sc_ipopts;	/* source route */
 	struct 		in_conninfo sc_inc;	/* addresses */
-#define sc_route	sc_inc.inc_route
-#define sc_route6	sc_inc.inc6_route
 	u_int32_t	sc_tsrecent;
 	tcp_cc		sc_cc_send;		/* holds CC or CCnew */
 	tcp_cc		sc_cc_recv;
@@ -232,7 +230,6 @@ struct syncache {
 #define SCF_TIMESTAMP	0x04			/* negotiated timestamps */
 #define SCF_CC		0x08			/* negotiated CC */
 #define SCF_UNREACH	0x10			/* icmp unreachable received */
-#define SCF_KEEPROUTE	0x20			/* keep cloned route */
 	TAILQ_ENTRY(syncache)	sc_hash;
 	TAILQ_ENTRY(syncache)	sc_timerq;
 };
@@ -242,6 +239,17 @@ struct syncache_head {
 	u_int		sch_length;
 };
 
+struct hc_metrics_lite {	/* must stay in sync with hc_metrics */
+	u_long	rmx_mtu;	/* MTU for this path */
+	u_long	rmx_ssthresh;	/* outbound gateway buffer limit */
+	u_long	rmx_rtt;	/* estimated round trip time */
+	u_long	rmx_rttvar;	/* estimated rtt variance */
+	u_long	rmx_bandwidth;	/* estimated bandwidth */
+	u_long	rmx_cwnd;	/* congestion window */
+	u_long	rmx_sendpipe;   /* outbound delay-bandwidth product */
+	u_long	rmx_recvpipe;   /* inbound delay-bandwidth product */
+};
+
 struct tcptw {
 	struct inpcb	*tw_inpcb;	/* XXX back pointer to internet pcb */
 	tcp_seq		snd_nxt;
@@ -260,8 +268,7 @@ struct tcptw {
 };
  
 /*
- * The TAO cache entry which is stored in the protocol family specific
- * portion of the route metrics.
+ * The TAO cache entry which is stored in the tcp hostcache.
  */
 struct rmxp_tao {
 	tcp_cc	tao_cc;			/* latest CC in valid SYN */
@@ -274,7 +281,6 @@ struct rmxp_tao {
 #define	TAOF_UNDEF	0		/* we don't know yet */
 #endif /* notyet */
 };
-#define rmx_taop(r)	((struct rmxp_tao *)(r).rmx_filler)
 
 #define	intotcpcb(ip)	((struct tcpcb *)(ip)->inp_ppcb)
 #define	intotw(ip)	((struct tcptw *)(ip)->inp_ppcb)
@@ -401,6 +407,9 @@ struct	tcpstat {
 	u_long	tcps_sc_zonefail;	/* zalloc() failed */
 	u_long	tcps_sc_sendcookie;	/* SYN cookie sent */
 	u_long	tcps_sc_recvcookie;	/* SYN cookie received */
+
+	u_long	tcps_hc_added;		/* entry added to hostcache */
+	u_long	tcps_hc_bucketoverflow;	/* hostcache per bucket limit hit */
 };
 
 /*
@@ -451,6 +460,7 @@ struct	xtcpcb {
 	{ "pcblist", CTLTYPE_STRUCT }, \
 	{ "delacktime", CTLTYPE_INT }, \
 	{ "v6mssdflt", CTLTYPE_INT }, \
+	{ "maxid", CTLTYPE_INT }, \
 }
 
 
@@ -482,12 +492,12 @@ struct tcpcb *
 	 tcp_drop(struct tcpcb *, int);
 void	 tcp_drain(void);
 void	 tcp_fasttimo(void);
-struct rmxp_tao *
-	 tcp_gettaocache(struct in_conninfo *);
 void	 tcp_init(void);
 void	 tcp_input(struct mbuf *, int);
+u_long	 tcp_maxmtu(struct in_conninfo *);
+u_long	 tcp_maxmtu6(struct in_conninfo *);
 void	 tcp_mss(struct tcpcb *, int);
-int	 tcp_mssopt(struct tcpcb *);
+int	 tcp_mssopt(struct in_conninfo *);
 struct inpcb *	 
 	 tcp_drop_syn_sent(struct inpcb *, int);
 struct inpcb *
@@ -500,8 +510,6 @@ struct inpcb *
 void	 tcp_respond(struct tcpcb *, void *,
 	    struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int);
 int	 tcp_twrespond(struct tcptw *, struct socket *, struct mbuf *, int);
-struct rtentry *
-	 tcp_rtlookup(struct in_conninfo *);
 void	 tcp_setpersist(struct tcpcb *);
 void	 tcp_slowtimo(void);
 struct tcptemp *
@@ -519,6 +527,20 @@ int	 syncache_add(struct in_conninfo *, struct tcpopt *,
 	     struct tcphdr *, struct socket **, struct mbuf *);
 void	 syncache_chkrst(struct in_conninfo *, struct tcphdr *);
 void	 syncache_badack(struct in_conninfo *);
+/*
+ * All tcp_hc_* functions are IPv4 and IPv6 (via in_conninfo)
+ */
+void	 tcp_hc_init(void);
+void	 tcp_hc_get(struct in_conninfo *, struct hc_metrics_lite *);
+u_long	 tcp_hc_getmtu(struct in_conninfo *);
+void	 tcp_hc_gettao(struct in_conninfo *, struct rmxp_tao *);
+void	 tcp_hc_updatemtu(struct in_conninfo *, u_long);
+void	 tcp_hc_update(struct in_conninfo *, struct hc_metrics_lite *);
+void	 tcp_hc_updatetao(struct in_conninfo *, int, tcp_cc, u_short);
+/* update which tao field */
+#define	TCP_HC_TAO_CC		0x1
+#define TCP_HC_TAO_CCSENT	0x2
+#define TCP_HC_TAO_MSSOPT	0x3
 
 extern	struct pr_usrreqs tcp_usrreqs;
 extern	u_long tcp_sendspace;
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index 60ec82b..62e6131 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -544,10 +544,17 @@ udp_ctlinput(cmd, sa, vip)
 	if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
         	return;
 
-	if (PRC_IS_REDIRECT(cmd)) {
-		ip = 0;
-		notify = in_rtchange;
-	} else if (cmd == PRC_HOSTDEAD)
+	/*
+	 * Redirects don't need to be handled up here.
+	 */
+	if (PRC_IS_REDIRECT(cmd))
+		return;
+	/*
+	 * Hostdead is ugly because it goes linearly through all PCBs.
+	 * XXX: We never get this from ICMP, otherwise it makes an
+	 * excellent DoS attack on machines with many connections.
+	 */
+	if (cmd == PRC_HOSTDEAD)
 		ip = 0;
 	else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)
 		return;
@@ -873,7 +880,7 @@ udp_output(inp, m, addr, control, td)
 	((struct ip *)ui)->ip_tos = inp->inp_ip_tos;	/* XXX */
 	udpstat.udps_opackets++;
 
-	error = ip_output(m, inp->inp_options, &inp->inp_route, ipflags,
+	error = ip_output(m, inp->inp_options, NULL, ipflags,
 	    inp->inp_moptions, inp);
 	return (error);
 
diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c
index 997474e..6baa2db 100644
--- a/sys/netinet6/icmp6.c
+++ b/sys/netinet6/icmp6.c
@@ -94,6 +94,7 @@
 #include <netinet/in_var.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
+#include <netinet/tcp_var.h>
 #include <netinet6/in6_ifattach.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6protosw.h>
@@ -1105,8 +1106,7 @@ icmp6_mtudisc_update(ip6cp, validated)
 	struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6;
 	struct mbuf *m = ip6cp->ip6c_m;	/* will be necessary for scope issue */
 	u_int mtu = ntohl(icmp6->icmp6_mtu);
-	struct rtentry *rt = NULL;
-	struct sockaddr_in6 sin6;
+	struct in_conninfo inc;
 
 #if 0
 	/*
@@ -1131,31 +1131,19 @@ icmp6_mtudisc_update(ip6cp, validated)
 	if (!validated)
 		return;
 
-	bzero(&sin6, sizeof(sin6));
-	sin6.sin6_family = PF_INET6;
-	sin6.sin6_len = sizeof(struct sockaddr_in6);
-	sin6.sin6_addr = *dst;
+	bzero(&inc, sizeof(inc));
+	inc.inc_flags = 1; /* IPv6 */
+	inc.inc6_faddr = *dst;
 	/* XXX normally, this won't happen */
 	if (IN6_IS_ADDR_LINKLOCAL(dst)) {
-		sin6.sin6_addr.s6_addr16[1] =
+		inc.inc6_faddr.s6_addr16[1] =
 		    htons(m->m_pkthdr.rcvif->if_index);
 	}
-	/* sin6.sin6_scope_id = XXX: should be set if DST is a scoped addr */
-	rt = rtalloc1((struct sockaddr *)&sin6, 0, RTF_CLONING);
-
-	if (rt && (rt->rt_flags & RTF_HOST) &&
-	    !(rt->rt_rmx.rmx_locks & RTV_MTU)) {
-		if (mtu < IPV6_MMTU) {
-				/* xxx */
-			rt->rt_rmx.rmx_locks |= RTV_MTU;
-		} else if (mtu < rt->rt_ifp->if_mtu &&
-			   rt->rt_rmx.rmx_mtu > mtu) {
-			icmp6stat.icp6s_pmtuchg++;
-			rt->rt_rmx.rmx_mtu = mtu;
-		}
+
+	if (mtu >= IPV6_MMTU) {
+		tcp_hc_updatemtu(&inc, mtu);
+		icmp6stat.icp6s_pmtuchg++;
 	}
-	if (rt)
-		rtfree(rt);
 }
 
 /*
diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c
index 5c7f1f2..b3d58e8 100644
--- a/sys/netinet6/in6_pcb.c
+++ b/sys/netinet6/in6_pcb.c
@@ -337,8 +337,7 @@ in6_pcbladdr(inp, nam, plocal_addr6)
 		 * Is it the intended behavior?
 		 */
 		*plocal_addr6 = in6_selectsrc(sin6, inp->in6p_outputopts,
-					      inp->in6p_moptions,
-					      &inp->in6p_route,
+					      inp->in6p_moptions, NULL,
 					      &inp->in6p_laddr, &error);
 		if (*plocal_addr6 == 0) {
 			if (error == 0)
@@ -351,10 +350,6 @@ in6_pcbladdr(inp, nam, plocal_addr6)
 		 * and exit to caller, that will do the lookup.
 		 */
 	}
-
-	if (inp->in6p_route.ro_rt)
-		ifp = inp->in6p_route.ro_rt->rt_ifp;
-
 	return (0);
 }
 
@@ -447,8 +442,6 @@ in6_pcbdetach(inp)
 
  	ip6_freepcbopts(inp->in6p_outputopts);
  	ip6_freemoptions(inp->in6p_moptions);
-	if (inp->in6p_route.ro_rt)
-		RTFREE(inp->in6p_route.ro_rt);
 	/* Check and free IPv4 related resources in case of mapped addr */
 	if (inp->inp_options)
 		(void)m_free(inp->inp_options);
@@ -830,26 +823,10 @@ void
 in6_losing(in6p)
 	struct inpcb *in6p;
 {
-	struct rtentry *rt;
-	struct rt_addrinfo info;
-
-	if ((rt = in6p->in6p_route.ro_rt) != NULL) {
-		RT_LOCK(rt);
-		in6p->in6p_route.ro_rt = NULL;
-		bzero((caddr_t)&info, sizeof(info));
-		info.rti_flags = rt->rt_flags;
-		info.rti_info[RTAX_DST] = rt_key(rt);
-		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
-		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
-		rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
-		if (rt->rt_flags & RTF_DYNAMIC)
-			rtexpunge(rt);
-		RTFREE_LOCKED(rt);
-		/*
-		 * A new route can be allocated
-		 * the next time output is attempted.
-		 */
-	}
+	/*
+	 * We don't store route pointers in the routing table anymore
+	 */
+	return;
 }
 
 /*
@@ -861,14 +838,9 @@ in6_rtchange(inp, errno)
 	struct inpcb *inp;
 	int errno;
 {
-	if (inp->in6p_route.ro_rt) {
-		RTFREE(inp->in6p_route.ro_rt);
-		inp->in6p_route.ro_rt = 0;
-		/*
-		 * A new route can be allocated the next time
-		 * output is attempted.
-		 */
-	}
+	/*
+	 * We don't store route pointers in the routing table anymore
+	 */
 	return inp;
 }
 
diff --git a/sys/netinet6/in6_rmx.c b/sys/netinet6/in6_rmx.c
index 09526b2..b68852d 100644
--- a/sys/netinet6/in6_rmx.c
+++ b/sys/netinet6/in6_rmx.c
@@ -141,8 +141,7 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
 		}
 	}
 
-	if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU)
-	    && rt->rt_ifp)
+	if (!rt->rt_rmx.rmx_mtu && rt->rt_ifp)
 		rt->rt_rmx.rmx_mtu = IN6_LINKMTU(rt->rt_ifp);
 
 	ret = rn_addroute(v_arg, n_arg, head, treenodes);
diff --git a/sys/netinet6/in6_src.c b/sys/netinet6/in6_src.c
index d584956..88ace1c 100644
--- a/sys/netinet6/in6_src.c
+++ b/sys/netinet6/in6_src.c
@@ -211,7 +211,6 @@ in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp)
 		    != 0) {
 			return (NULL);
 		}
-
 		/*
 		 * determine the appropriate zone id of the source based on
 		 * the zone of the destination and the outgoing interface.
@@ -449,12 +448,19 @@ in6_selectif(dstsock, opts, mopts, ro, retifp)
 	struct route_in6 *ro;
 	struct ifnet **retifp;
 {
-	int error, clone;
+	int error;
+	struct route_in6 sro;
 	struct rtentry *rt = NULL;
 
-	clone = IN6_IS_ADDR_MULTICAST(&dstsock->sin6_addr) ? 0 : 1;
+	if (ro == NULL) {
+		bzero(&sro, sizeof(sro));
+		ro = &sro;
+	}
+
 	if ((error = in6_selectroute(dstsock, opts, mopts, ro, retifp,
-				     &rt, clone)) != 0) {
+				     &rt, 0)) != 0) {
+		if (rt && rt == sro.ro_rt)
+			RTFREE(rt);
 		return (error);
 	}
 
@@ -476,7 +482,11 @@ in6_selectif(dstsock, opts, mopts, ro, retifp)
 	 * We thus reject the case here.
 	 */
 	if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) {
-		return (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
+		int flags = (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
+
+		if (rt && rt == sro.ro_rt)
+			RTFREE(rt);
+		return (flags);
 	}
 
 	/*
@@ -489,6 +499,8 @@ in6_selectif(dstsock, opts, mopts, ro, retifp)
 	if (rt && rt->rt_ifa && rt->rt_ifa->ifa_ifp)
 		*retifp = rt->rt_ifa->ifa_ifp;
 
+	if (rt && rt == sro.ro_rt)
+		RTFREE(rt);
 	return (0);
 }
 
@@ -623,6 +635,7 @@ in6_selectroute(dstsock, opts, mopts, ro, retifp, retrt, clone)
 			sa6 = (struct sockaddr_in6 *)&ro->ro_dst;
 			*sa6 = *dstsock;
 			sa6->sin6_scope_id = 0;
+
 			if (clone) {
 				rtalloc((struct route *)ro);
 			} else {
@@ -695,7 +708,7 @@ in6_selectroute(dstsock, opts, mopts, ro, retifp, retrt, clone)
  * 2. (If the outgoing interface is detected) the current
  *     hop limit of the interface specified by router advertisement.
  * 3. The system default hoplimit.
-*/
+ */
 int
 in6_selecthlim(in6p, ifp)
 	struct in6pcb *in6p;
@@ -705,8 +718,24 @@ in6_selecthlim(in6p, ifp)
 		return (in6p->in6p_hops);
 	else if (ifp)
 		return (ND_IFINFO(ifp)->chlim);
-	else
-		return (ip6_defhlim);
+	else if (in6p && !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) {
+		struct route_in6 ro6;
+		struct ifnet *lifp;
+
+		bzero(&ro6, sizeof(ro6));
+		ro6.ro_dst.sin6_family = AF_INET6;
+		ro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6);
+		ro6.ro_dst.sin6_addr = in6p->in6p_faddr;
+		rtalloc((struct route *)&ro6);
+		if (ro6.ro_rt) {
+			lifp = ro6.ro_rt->rt_ifp;
+			RTFREE(ro6.ro_rt);
+			if (lifp)
+				return (ND_IFINFO(lifp)->chlim);
+		} else
+			return (ip6_defhlim);
+	}
+	return (ip6_defhlim);
 }
 
 /*
diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c
index b95b197..3072851 100644
--- a/sys/netinet6/ip6_output.c
+++ b/sys/netinet6/ip6_output.c
@@ -96,6 +96,7 @@
 #include <netinet/icmp6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/in_pcb.h>
+#include <netinet/tcp_var.h>
 #include <netinet6/nd6.h>
 
 #ifdef IPSEC
@@ -661,7 +662,7 @@ skip_ipsec2:;
 		/* XXX rt not locked */
 		ia = ifatoia6(ro->ro_rt->rt_ifa);
 		ifp = ro->ro_rt->rt_ifp;
-		ro->ro_rt->rt_use++;
+		ro->ro_rt->rt_rmx.rmx_pksent++;
 		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
 			dst = (struct sockaddr_in6 *)ro->ro_rt->rt_gateway;
 		m->m_flags &= ~(M_BCAST | M_MCAST);	/* just in case */
@@ -757,7 +758,7 @@ skip_ipsec2:;
 			}
 			ia = ifatoia6(ro->ro_rt->rt_ifa);
 			ifp = ro->ro_rt->rt_ifp;
-			ro->ro_rt->rt_use++;
+			ro->ro_rt->rt_rmx.rmx_pksent++;
 			RT_UNLOCK(ro->ro_rt);
 		}
 
@@ -1387,11 +1388,20 @@ ip6_getpmtu(ro_pmtu, ro, ifp, dst, mtup, alwaysfragp)
 	}
 	if (ro_pmtu->ro_rt) {
 		u_int32_t ifmtu;
+		struct in_conninfo inc;
+
+		bzero(&inc, sizeof(inc));
+		inc.inc_flags = 1; /* IPv6 */
+		inc.inc6_faddr = *dst;
 
 		if (ifp == NULL)
 			ifp = ro_pmtu->ro_rt->rt_ifp;
 		ifmtu = IN6_LINKMTU(ifp);
-		mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
+		mtu = tcp_hc_getmtu(&inc);
+		if (mtu)
+			mtu = min(mtu, ro_pmtu->ro_rt->rt_rmx.rmx_mtu);
+		else
+			mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
 		if (mtu == 0)
 			mtu = ifmtu;
 		else if (mtu < IPV6_MMTU) {
@@ -1415,8 +1425,7 @@ ip6_getpmtu(ro_pmtu, ro, ifp, dst, mtup, alwaysfragp)
 			 * field isn't locked).
 			 */
 			mtu = ifmtu;
-			if (!(ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU))
-				ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu;
+			ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu;
 		}
 	} else if (ifp) {
 		mtu = IN6_LINKMTU(ifp);
@@ -1993,7 +2002,9 @@ do { \
 			{
 				u_long pmtu = 0;
 				struct ip6_mtuinfo mtuinfo;
-				struct route_in6 *ro = (struct route_in6 *)&in6p->in6p_route;
+				struct route_in6 sro;
+
+				bzero(&sro, sizeof(sro));
 
 				if (!(so->so_state & SS_ISCONNECTED))
 					return (ENOTCONN);
@@ -2002,8 +2013,10 @@ do { \
 				 * routing, or optional information to specify
 				 * the outgoing interface.
 				 */
-				error = ip6_getpmtu(ro, NULL, NULL,
+				error = ip6_getpmtu(&sro, NULL, NULL,
 				    &in6p->in6p_faddr, &pmtu, NULL);
+				if (sro.ro_rt)
+					RTFREE(sro.ro_rt);
 				if (error)
 					break;
 				if (pmtu > IPV6_MAXPACKET)
diff --git a/sys/netinet6/udp6_output.c b/sys/netinet6/udp6_output.c
index 36a7fba..d905e84 100644
--- a/sys/netinet6/udp6_output.c
+++ b/sys/netinet6/udp6_output.c
@@ -203,8 +203,7 @@ udp6_output(in6p, m, addr6, control, td)
 
 		if (!IN6_IS_ADDR_V4MAPPED(faddr)) {
 			laddr = in6_selectsrc(sin6, in6p->in6p_outputopts,
-					      in6p->in6p_moptions,
-					      &in6p->in6p_route,
+					      in6p->in6p_moptions, NULL,
 					      &in6p->in6p_laddr, &error);
 		} else
 			laddr = &in6p->in6p_laddr;	/* XXX */
@@ -277,9 +276,7 @@ udp6_output(in6p, m, addr6, control, td)
 		ip6->ip6_plen	= htons((u_short)plen);
 #endif
 		ip6->ip6_nxt	= IPPROTO_UDP;
-		ip6->ip6_hlim	= in6_selecthlim(in6p,
-						 in6p->in6p_route.ro_rt ?
-						 in6p->in6p_route.ro_rt->rt_ifp : NULL);
+		ip6->ip6_hlim	= in6_selecthlim(in6p, NULL);
 		ip6->ip6_src	= *laddr;
 		ip6->ip6_dst	= *faddr;
 
@@ -297,7 +294,7 @@ udp6_output(in6p, m, addr6, control, td)
 			goto release;
 		}
 #endif /* IPSEC */
-		error = ip6_output(m, in6p->in6p_outputopts, &in6p->in6p_route,
+		error = ip6_output(m, in6p->in6p_outputopts, NULL,
 		    flags, in6p->in6p_moptions, NULL, in6p);
 		break;
 	case AF_INET: