100 files changed, 60186 insertions, 0 deletions
diff --git a/sys/netinet/accf_data.c b/sys/netinet/accf_data.c
new file mode 100644
index 0000000..b66e1c7
--- /dev/null
+++ b/sys/netinet/accf_data.c
@@ -0,0 +1,67 @@
+/*-
+ * Copyright (c) 2000 Alfred Perlstein <alfred@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	$FreeBSD$
+ */
+
+#define ACCEPT_FILTER_MOD
+
+#include <sys/param.h>
+#include <sys/sysctl.h>
+#include <sys/kernel.h>
+#include <sys/socketvar.h>
+
+/* accept filter that holds a socket until data arrives */
+
+static void	sohasdata(struct socket *so, void *arg, int waitflag);
+
+static struct accept_filter accf_data_filter = {
+	"dataready",
+	sohasdata,
+	NULL,
+	NULL
+};
+
+static moduledata_t accf_data_mod = {
+	"accf_data",
+	accept_filt_generic_mod_event,
+	&accf_data_filter
+};
+
+DECLARE_MODULE(accf_data, accf_data_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
+
+static void
+sohasdata(struct socket *so, void *arg, int waitflag)
+{
+
+	if (!soreadable(so)) {
+		return;
+	}
+
+	so->so_upcall = NULL;
+	so->so_rcv.sb_flags &= ~SB_UPCALL;
+	soisconnected(so);
+	return;
+}
diff --git a/sys/netinet/accf_http.c b/sys/netinet/accf_http.c
new file mode 100644
index 0000000..a9a8fb0
--- /dev/null
+++ b/sys/netinet/accf_http.c
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2000 Paycounter, Inc.
+ * Author: Alfred Perlstein <alfred@paycounter.com>, <alfred@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	$FreeBSD$
+ */
+
+#define ACCEPT_FILTER_MOD
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/socketvar.h>
+#include <sys/mbuf.h>
+
+/* check for GET/HEAD */
+static void sohashttpget(struct socket *so, void *arg, int waitflag);
+/* check for HTTP/1.0 or HTTP/1.1 */
+static void soparsehttpvers(struct socket *so, void *arg, int waitflag);
+/* check for end of HTTP/1.x request */
+static void soishttpconnected(struct socket *so, void *arg, int waitflag);
+/* strcmp on an mbuf chain */
+static int mbufstrcmp(struct mbuf *m, struct mbuf *npkt, int offset, char *cmp);
+/* strncmp on an mbuf chain */
+static int mbufstrncmp(struct mbuf *m, struct mbuf *npkt, int offset,
+	int max, char *cmp);
+/* socketbuffer is full */
+static int sbfull(struct sockbuf *sb);
+
+static struct accept_filter accf_http_filter = {
+	"httpready",
+	sohashttpget,
+	NULL,
+	NULL
+};
+
+static moduledata_t accf_http_mod = {
+	"accf_http",
+	accept_filt_generic_mod_event,
+	&accf_http_filter
+};
+
+DECLARE_MODULE(accf_http, accf_http_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
+
+static int parse_http_version = 1;
+
+SYSCTL_NODE(_net_inet_accf, OID_AUTO, http, CTLFLAG_RW, 0,
+"HTTP accept filter");
+SYSCTL_INT(_net_inet_accf_http, OID_AUTO, parsehttpversion, CTLFLAG_RW,
+&parse_http_version, 1,
+"Parse http version so that non 1.x requests work");
+
+#ifdef ACCF_HTTP_DEBUG
+#define DPRINT(fmt, args...) \
+	do {	\
+		printf("%s:%d: " fmt "\n", __func__, __LINE__ , ##args);	\
+	} while (0)
+#else
+#define DPRINT(fmt, args...)
+#endif
+
+static int
+sbfull(struct sockbuf *sb)
+{
+
+	DPRINT("sbfull, cc(%ld) >= hiwat(%ld): %d, mbcnt(%ld) >= mbmax(%ld): %d", 
+		sb->sb_cc, sb->sb_hiwat, sb->sb_cc >= sb->sb_hiwat,
+		sb->sb_mbcnt, sb->sb_mbmax, sb->sb_mbcnt >= sb->sb_mbmax);
+	return(sb->sb_cc >= sb->sb_hiwat || sb->sb_mbcnt >= sb->sb_mbmax);
+}
+
+/*
+ * start at mbuf m, (must provide npkt if exists)
+ * starting at offset in m compare characters in mbuf chain for 'cmp'
+ */
+static int
+mbufstrcmp(struct mbuf *m, struct mbuf *npkt, int offset, char *cmp)
+{
+	struct mbuf *n;
+
+	for (;m != NULL; m = n) {
+		n = npkt;
+		if (npkt)
+			npkt = npkt->m_nextpkt;
+		for (; m; m = m->m_next) {
+			for (; offset < m->m_len; offset++, cmp++) {
+				if (*cmp == '\0') {
+					return (1);
+				} else if (*cmp != *(mtod(m, char *) + offset)) {
+					return (0);
+				}
+			}
+			offset = 0;
+		}
+	}
+	return (0);
+}
+
+/*
+ * start at mbuf m, (must provide npkt if exists)
+ * starting at offset in m compare characters in mbuf chain for 'cmp'
+ * stop at 'max' characters
+ */
+static int
+mbufstrncmp(struct mbuf *m, struct mbuf *npkt, int offset, int max, char *cmp)
+{
+	struct mbuf *n;
+
+	for (;m != NULL; m = n) {
+		n = npkt;
+		if (npkt)
+			npkt = npkt->m_nextpkt;
+		for (; m; m = m->m_next) {
+			for (; offset < m->m_len; offset++, cmp++, max--) {
+				if (max == 0 || *cmp == '\0') {
+					return (1);
+				} else if (*cmp != *(mtod(m, char *) + offset)) {
+					return (0);
+				}
+			}
+			offset = 0;
+		}
+	}
+	return (0);
+}
+
+#define STRSETUP(sptr, slen, str) \
+	do {	\
+		sptr = str;	\
+		slen = sizeof(str) - 1;	\
+	} while(0)
+
+static void
+sohashttpget(struct socket *so, void *arg, int waitflag)
+{
+
+	if ((so->so_state & SS_CANTRCVMORE) == 0 && !sbfull(&so->so_rcv)) {
+		struct mbuf *m;
+		char *cmp;
+		int	cmplen, cc;
+
+		m = so->so_rcv.sb_mb;
+		cc = so->so_rcv.sb_cc - 1;
+		if (cc < 1)
+			return;
+		switch (*mtod(m, char *)) {
+		case 'G':
+			STRSETUP(cmp, cmplen, "ET ");
+			break;
+		case 'H':
+			STRSETUP(cmp, cmplen, "EAD ");
+			break;
+		default:
+			goto fallout;
+		}
+		if (cc < cmplen) {
+			if (mbufstrncmp(m, m->m_nextpkt, 1, cc, cmp) == 1) {
+				DPRINT("short cc (%d) but mbufstrncmp ok", cc);
+				return;
+			} else {
+				DPRINT("short cc (%d) mbufstrncmp failed", cc);
+				goto fallout;
+			}
+		}
+		if (mbufstrcmp(m, m->m_nextpkt, 1, cmp) == 1) {
+			DPRINT("mbufstrcmp ok");
+			if (parse_http_version == 0)
+				soishttpconnected(so, arg, waitflag);
+			else
+				soparsehttpvers(so, arg, waitflag);
+			return;
+		}
+		DPRINT("mbufstrcmp bad");
+	}
+
+fallout:
+	DPRINT("fallout");
+	so->so_upcall = NULL;
+	so->so_rcv.sb_flags &= ~SB_UPCALL;
+	soisconnected(so);
+	return;
+}
+
+static void
+soparsehttpvers(struct socket *so, void *arg, int waitflag)
+{
+	struct mbuf *m, *n;
+	int	i, cc, spaces, inspaces;
+
+	if ((so->so_state & SS_CANTRCVMORE) != 0 || sbfull(&so->so_rcv))
+		goto fallout;
+
+	m = so->so_rcv.sb_mb;
+	cc = so->so_rcv.sb_cc;
+	inspaces = spaces = 0;
+	for (m = so->so_rcv.sb_mb; m; m = n) {
+		n = m->m_nextpkt;
+		for (; m; m = m->m_next) {
+			for (i = 0; i < m->m_len; i++, cc--) {
+				switch (*(mtod(m, char *) + i)) {
+				case ' ':
+					if (!inspaces) {
+						spaces++;
+						inspaces = 1;
+					}
+					break;
+				case '\r':
+				case '\n':
+					DPRINT("newline");
+					goto fallout;
+				default:
+					if (spaces == 2) {
+						/* make sure we have enough data left */
+						if (cc < sizeof("HTTP/1.0") - 1) {
+							if (mbufstrncmp(m, n, i, cc, "HTTP/1.") == 1) {
+								DPRINT("mbufstrncmp ok");
+								goto readmore;
+							} else {
+								DPRINT("mbufstrncmp bad");
+								goto fallout;
+							}
+						} else if (mbufstrcmp(m, n, i, "HTTP/1.0") == 1 ||
+									mbufstrcmp(m, n, i, "HTTP/1.1") == 1) {
+								DPRINT("mbufstrcmp ok");
+								soishttpconnected(so, arg, waitflag);
+								return;
+						} else {
+							DPRINT("mbufstrcmp bad");
+							goto fallout;
+						}
+					}
+					inspaces = 0;
+					break;
+				}
+			}
+		}
+	}
+readmore:
+	DPRINT("readmore");
+	/*
+	 * if we hit here we haven't hit something
+	 * we don't understand or a newline, so try again
+	 */
+	so->so_upcall = soparsehttpvers;
+	so->so_rcv.sb_flags |= SB_UPCALL;
+	return;
+
+fallout:
+	DPRINT("fallout");
+	so->so_upcall = NULL;
+	so->so_rcv.sb_flags &= ~SB_UPCALL;
+	soisconnected(so);
+	return;
+}
+
+
+#define NCHRS 3
+
+static void
+soishttpconnected(struct socket *so, void *arg, int waitflag)
+{
+	char a, b, c;
+	struct mbuf *m, *n;
+	int ccleft, copied;
+
+	DPRINT("start");
+	if ((so->so_state & SS_CANTRCVMORE) != 0 || sbfull(&so->so_rcv))
+		goto gotit;
+
+	/*
+	 * Walk the socketbuffer and copy the last NCHRS (3) into a, b, and c
+	 * copied - how much we've copied so far
+	 * ccleft - how many bytes remaining in the socketbuffer
+	 * just loop over the mbufs subtracting from 'ccleft' until we only
+	 * have NCHRS left
+	 */
+	copied = 0;
+	ccleft = so->so_rcv.sb_cc;
+	if (ccleft < NCHRS)
+		goto readmore;
+	a = b = c = '\0';
+	for (m = so->so_rcv.sb_mb; m; m = n) {
+		n = m->m_nextpkt;
+		for (; m; m = m->m_next) {
+			ccleft -= m->m_len;
+			if (ccleft <= NCHRS) {
+				char *src;
+				int tocopy;
+
+				tocopy = (NCHRS - ccleft) - copied;
+				src = mtod(m, char *) + (m->m_len - tocopy);
+
+				while (tocopy--) {
+					switch (copied++) {
+					case 0:
+						a = *src++;
+						break;
+					case 1:
+						b = *src++;
+						break;
+					case 2:
+						c = *src++;
+						break;
+					}
+				}
+			}
+		}
+	}
+	if (c == '\n' && (b == '\n' || (b == '\r' && a == '\n'))) {
+		/* we have all request headers */
+		goto gotit;
+	}
+
+readmore:
+	so->so_upcall = soishttpconnected;
+	so->so_rcv.sb_flags |= SB_UPCALL;
+	return;
+
+gotit:
+	so->so_upcall = NULL;
+	so->so_rcv.sb_flags &= ~SB_UPCALL;
+	soisconnected(so);
+	return;
+}
diff --git a/sys/netinet/fil.c b/sys/netinet/fil.c
new file mode 100644
index 0000000..6d4622e
--- /dev/null
+++ b/sys/netinet/fil.c
@@ -0,0 +1,2132 @@
+/*
+ * Copyright (C) 1993-2000 by Darren Reed.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that this notice is preserved and due credit is given
+ * to the original author and the contributors.
+ */
+#if !defined(lint)
+static const char sccsid[] = "@(#)fil.c	1.36 6/5/96 (C) 1993-2000 Darren Reed";
+/* static const char rcsid[] = "@(#)$Id: fil.c,v 2.3.2.16 2000/01/27 08:49:37 darrenr Exp $"; */
+static const char rcsid[] = "@(#)$FreeBSD$";
+#endif
+
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/file.h>
+#if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
+    defined(_KERNEL)
+# include "opt_ipfilter_log.h"
+#endif
+#if (defined(KERNEL) || defined(_KERNEL)) && defined(__FreeBSD_version) && \
+    (__FreeBSD_version >= 220000)
+# if (__FreeBSD_version >= 400000)
+#  ifndef KLD_MODULE
+#   include "opt_inet6.h"
+#  endif
+#  if (__FreeBSD_version == 400019)
+#   define CSUM_DELAY_DATA
+#  endif
+# endif
+# include <sys/filio.h>
+# include <sys/fcntl.h>
+#else
+# include <sys/ioctl.h>
+#endif
+#if (defined(_KERNEL) || defined(KERNEL)) && !defined(linux)
+# include <sys/systm.h>
+#else
+# include <stdio.h>
+# include <string.h>
+# include <stdlib.h>
+#endif
+#include <sys/uio.h>
+#if !defined(__SVR4) && !defined(__svr4__)
+# ifndef linux
+#  include <sys/mbuf.h>
+# endif
+#else
+# include <sys/byteorder.h>
+# if SOLARIS2 < 5
+#  include <sys/dditypes.h>
+# endif
+#  include <sys/stream.h>
+#endif
+#ifndef linux
+# include <sys/protosw.h>
+# include <sys/socket.h>
+#endif
+#include <net/if.h>
+#ifdef sun
+# include <net/af.h>
+#endif
+#include <net/route.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifndef linux
+# include <netinet/ip_var.h>
+#endif
+#if defined(__sgi) && defined(IFF_DRVRLOCK) /* IRIX 6 */
+# include <sys/hashing.h>
+# include <netinet/in_var.h>
+#endif
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet/ip_icmp.h>
+#include "netinet/ip_compat.h"
+#ifdef	USE_INET6
+# include <netinet/icmp6.h>
+# if !SOLARIS && defined(_KERNEL)
+#  include <netinet6/in6_var.h>
+# endif
+#endif
+#include <netinet/tcpip.h>
+#include "netinet/ip_fil.h"
+#include "netinet/ip_proxy.h"
+#include "netinet/ip_nat.h"
+#include "netinet/ip_frag.h"
+#include "netinet/ip_state.h"
+#include "netinet/ip_auth.h"
+# if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000)
+#  include <sys/malloc.h>
+#  if defined(_KERNEL) && !defined(IPFILTER_LKM)
+#   include "opt_ipfilter.h"
+#  endif
+# endif
+#ifndef	MIN
+# define	MIN(a,b)	(((a)<(b))?(a):(b))
+#endif
+#include "netinet/ipl.h"
+
+#include <machine/in_cksum.h>
+
+#ifndef	_KERNEL
+# include "ipf.h"
+# include "ipt.h"
+extern	int	opts;
+
+# define	FR_VERBOSE(verb_pr)			verbose verb_pr
+# define	FR_DEBUG(verb_pr)			debug verb_pr
+# define	IPLLOG(a, c, d, e)		ipllog()
+#else /* #ifndef _KERNEL */
+# define	FR_VERBOSE(verb_pr)
+# define	FR_DEBUG(verb_pr)
+# define	IPLLOG(a, c, d, e)		ipflog(a, c, d, e)
+# if SOLARIS || defined(__sgi)
+extern	KRWLOCK_T	ipf_mutex, ipf_auth, ipf_nat;
+extern	kmutex_t	ipf_rw;
+# endif
+# if SOLARIS
+#  define	FR_NEWAUTH(m, fi, ip, qif)	fr_newauth((mb_t *)m, fi, \
+							   ip, qif)
+# else /* SOLARIS */
+#  define	FR_NEWAUTH(m, fi, ip, qif)	fr_newauth((mb_t *)m, fi, ip)
+# endif /* SOLARIS || __sgi */
+#endif /* _KERNEL */
+
+
+struct	filterstats frstats[2] = {{0,0,0,0,0},{0,0,0,0,0}};
+struct	frentry	*ipfilter[2][2] = { { NULL, NULL }, { NULL, NULL } },
+#ifdef	USE_INET6
+		*ipfilter6[2][2] = { { NULL, NULL }, { NULL, NULL } },
+		*ipacct6[2][2] = { { NULL, NULL }, { NULL, NULL } },
+#endif
+		*ipacct[2][2] = { { NULL, NULL }, { NULL, NULL } };
+struct	frgroup *ipfgroups[3][2];
+int	fr_flags = IPF_LOGGING;
+int	fr_active = 0;
+int	fr_chksrc = 0;
+int	fr_minttl = 3;
+int	fr_minttllog = 1;
+#if defined(IPFILTER_DEFAULT_BLOCK)
+int	fr_pass = FR_NOMATCH|FR_BLOCK;
+#else
+int	fr_pass = (IPF_DEFAULT_PASS|FR_NOMATCH);
+#endif
+char	ipfilter_version[] = IPL_VERSION;
+
+fr_info_t	frcache[2];
+
+static	int	frflushlist __P((int, minor_t, int *, frentry_t **));
+#ifdef	_KERNEL
+static	void	frsynclist __P((frentry_t *));
+#endif
+
+
+/*
+ * bit values for identifying presence of individual IP options
+ */
+struct	optlist	ipopts[20] = {
+	{ IPOPT_NOP,	0x000001 },
+	{ IPOPT_RR,	0x000002 },
+	{ IPOPT_ZSU,	0x000004 },
+	{ IPOPT_MTUP,	0x000008 },
+	{ IPOPT_MTUR,	0x000010 },
+	{ IPOPT_ENCODE,	0x000020 },
+	{ IPOPT_TS,	0x000040 },
+	{ IPOPT_TR,	0x000080 },
+	{ IPOPT_SECURITY, 0x000100 },
+	{ IPOPT_LSRR,	0x000200 },
+	{ IPOPT_E_SEC,	0x000400 },
+	{ IPOPT_CIPSO,	0x000800 },
+	{ IPOPT_SATID,	0x001000 },
+	{ IPOPT_SSRR,	0x002000 },
+	{ IPOPT_ADDEXT,	0x004000 },
+	{ IPOPT_VISA,	0x008000 },
+	{ IPOPT_IMITD,	0x010000 },
+	{ IPOPT_EIP,	0x020000 },
+	{ IPOPT_FINN,	0x040000 },
+	{ 0,		0x000000 }
+};
+
+/*
+ * bit values for identifying presence of individual IP security options
+ */
+struct	optlist	secopt[8] = {
+	{ IPSO_CLASS_RES4,	0x01 },
+	{ IPSO_CLASS_TOPS,	0x02 },
+	{ IPSO_CLASS_SECR,	0x04 },
+	{ IPSO_CLASS_RES3,	0x08 },
+	{ IPSO_CLASS_CONF,	0x10 },
+	{ IPSO_CLASS_UNCL,	0x20 },
+	{ IPSO_CLASS_RES2,	0x40 },
+	{ IPSO_CLASS_RES1,	0x80 }
+};
+
+
+/*
+ * compact the IP header into a structure which contains just the info.
+ * which is useful for comparing IP headers with.
+ */
+void	fr_makefrip(hlen, ip, fin)
+int hlen;
+ip_t *ip;
+fr_info_t *fin;
+{
+	u_short optmsk = 0, secmsk = 0, auth = 0;
+	int i, mv, ol, off, p, plen, v;
+	fr_ip_t *fi = &fin->fin_fi;
+	struct optlist *op;
+	u_char *s, opt;
+	tcphdr_t *tcp;
+
+	fin->fin_rev = 0;
+	fin->fin_fr = NULL;
+	fin->fin_tcpf = 0;
+	fin->fin_data[0] = 0;
+	fin->fin_data[1] = 0;
+	fin->fin_rule = -1;
+	fin->fin_group = -1;
+#ifdef	_KERNEL
+	fin->fin_icode = ipl_unreach;
+#endif
+	v = fin->fin_v;
+	fi->fi_v = v;
+	fin->fin_hlen = hlen;
+	if (v == 4) {
+		fin->fin_id = ip->ip_id;
+		fi->fi_tos = ip->ip_tos;
+		off = (ip->ip_off & IP_OFFMASK) << 3;
+		tcp = (tcphdr_t *)((char *)ip + hlen);
+		(*(((u_short *)fi) + 1)) = (*(((u_short *)ip) + 4));
+		fi->fi_src.i6[1] = 0;
+		fi->fi_src.i6[2] = 0;
+		fi->fi_src.i6[3] = 0;
+		fi->fi_dst.i6[1] = 0;
+		fi->fi_dst.i6[2] = 0;
+		fi->fi_dst.i6[3] = 0;
+		fi->fi_saddr = ip->ip_src.s_addr;
+		fi->fi_daddr = ip->ip_dst.s_addr;
+		p = ip->ip_p;
+		fi->fi_fl = (hlen > sizeof(ip_t)) ? FI_OPTIONS : 0;
+		if (ip->ip_off & 0x3fff)
+			fi->fi_fl |= FI_FRAG;
+		plen = ip->ip_len;
+		fin->fin_dlen = plen - hlen;
+	}
+#ifdef	USE_INET6
+	else if (v == 6) {
+		ip6_t *ip6 = (ip6_t *)ip;
+
+		off = 0;
+		p = ip6->ip6_nxt;
+		fi->fi_p = p;
+		fi->fi_ttl = ip6->ip6_hlim;
+		tcp = (tcphdr_t *)(ip6 + 1);
+		fi->fi_src.in6 = ip6->ip6_src;
+		fi->fi_dst.in6 = ip6->ip6_dst;
+		fin->fin_id = (u_short)(ip6->ip6_flow & 0xffff);
+		fi->fi_tos = 0;
+		fi->fi_fl = 0;
+		plen = ntohs(ip6->ip6_plen);
+		fin->fin_dlen = plen;
+	}
+#endif
+	else
+		return;
+
+	fin->fin_off = off;
+	fin->fin_plen = plen;
+	fin->fin_dp = (void *)tcp;
+
+	switch (p)
+	{
+#ifdef USE_INET6
+	case IPPROTO_ICMPV6 :
+	{
+		int minicmpsz = sizeof(struct icmp6_hdr);
+		struct icmp6_hdr *icmp6;
+
+		if (fin->fin_dlen > 1) {
+			fin->fin_data[0] = *(u_short *)tcp;
+
+			icmp6 = (struct icmp6_hdr *)tcp;
+
+			switch (icmp6->icmp6_type)
+			{
+			case ICMP6_ECHO_REPLY :
+			case ICMP6_ECHO_REQUEST :
+				minicmpsz = ICMP6ERR_MINPKTLEN;
+				break;
+			case ICMP6_DST_UNREACH :
+			case ICMP6_PACKET_TOO_BIG :
+			case ICMP6_TIME_EXCEEDED :
+			case ICMP6_PARAM_PROB :
+				minicmpsz = ICMP6ERR_IPICMPHLEN;
+				break;
+			default :
+				break;
+			}
+		}
+
+		if (!(plen >= hlen + minicmpsz))
+			fi->fi_fl |= FI_SHORT;
+
+		break;
+	}
+#endif
+	case IPPROTO_ICMP :
+	{
+		int minicmpsz = sizeof(struct icmp);
+		icmphdr_t *icmp;
+
+		if (!off && (fin->fin_dlen > 1)) {
+			fin->fin_data[0] = *(u_short *)tcp;
+
+			icmp = (icmphdr_t *)tcp;
+
+			if (icmp->icmp_type == ICMP_ECHOREPLY ||
+			    icmp->icmp_type == ICMP_ECHO)
+				minicmpsz = ICMP_MINLEN;
+
+			/*
+			 * type(1) + code(1) + cksum(2) + id(2) seq(2) +
+			 * 3*timestamp(3*4)
+			 */
+			else if (icmp->icmp_type == ICMP_TSTAMP ||
+				 icmp->icmp_type == ICMP_TSTAMPREPLY)
+				minicmpsz = 20;
+
+			/*
+			 * type(1) + code(1) + cksum(2) + id(2) seq(2) +
+			 * mask(4)
+			 */
+			else if (icmp->icmp_type == ICMP_MASKREQ ||
+				 icmp->icmp_type == ICMP_MASKREPLY)
+				minicmpsz = 12;
+		}
+
+		if ((!(plen >= hlen + minicmpsz) && !off) ||
+		    (off && off < sizeof(struct icmp)))
+			fi->fi_fl |= FI_SHORT;
+
+		break;
+	}
+	case IPPROTO_TCP :
+		fi->fi_fl |= FI_TCPUDP;
+#ifdef	USE_INET6
+		if (v == 6) {
+			if (plen < sizeof(struct tcphdr))
+				fi->fi_fl |= FI_SHORT;
+		} else
+#endif
+		if (v == 4) {
+			if ((!IPMINLEN(ip, tcphdr) && !off) ||
+			     (off && off < sizeof(struct tcphdr)))
+				fi->fi_fl |= FI_SHORT;
+		}
+		if (!(fi->fi_fl & FI_SHORT) && !off)
+			fin->fin_tcpf = tcp->th_flags;
+		goto getports;
+	case IPPROTO_UDP :
+		fi->fi_fl |= FI_TCPUDP;
+#ifdef	USE_INET6
+		if (v == 6) {
+			if (plen < sizeof(struct udphdr))
+				fi->fi_fl |= FI_SHORT;
+		} else
+#endif
+		if (v == 4) {
+			if ((!IPMINLEN(ip, udphdr) && !off) ||
+			    (off && off < sizeof(struct udphdr)))
+				fi->fi_fl |= FI_SHORT;
+		}
+getports:
+		if (!off && (fin->fin_dlen > 3)) {
+			fin->fin_data[0] = ntohs(tcp->th_sport);
+			fin->fin_data[1] = ntohs(tcp->th_dport);
+		}
+		break;
+	default :
+		break;
+	}
+
+#ifdef	USE_INET6
+	if (v == 6) {
+		fi->fi_optmsk = 0;
+		fi->fi_secmsk = 0;
+		fi->fi_auth = 0;
+		return;
+	}
+#endif
+
+	for (s = (u_char *)(ip + 1), hlen -= (int)sizeof(*ip); hlen > 0; ) {
+		opt = *s;
+		if (opt == '\0')
+			break;
+		else if (opt == IPOPT_NOP)
+			ol = 1;
+		else {
+			if (hlen < 2)
+				break;
+			ol = (int)*(s + 1);
+			if (ol < 2 || ol > hlen)
+				break;
+		}
+		for (i = 9, mv = 4; mv >= 0; ) {
+			op = ipopts + i;
+			if (opt == (u_char)op->ol_val) {
+				optmsk |= op->ol_bit;
+				if (opt == IPOPT_SECURITY) {
+					struct optlist *sp;
+					u_char	sec;
+					int j, m;
+
+					sec = *(s + 2);	/* classification */
+					for (j = 3, m = 2; m >= 0; ) {
+						sp = secopt + j;
+						if (sec == sp->ol_val) {
+							secmsk |= sp->ol_bit;
+							auth = *(s + 3);
+							auth *= 256;
+							auth += *(s + 4);
+							break;
+						}
+						if (sec < sp->ol_val)
+							j -= m--;
+						else
+							j += m--;
+					}
+				}
+				break;
+			}
+			if (opt < op->ol_val)
+				i -= mv--;
+			else
+				i += mv--;
+		}
+		hlen -= ol;
+		s += ol;
+	}
+	if (auth && !(auth & 0x0100))
+		auth &= 0xff00;
+	fi->fi_optmsk = optmsk;
+	fi->fi_secmsk = secmsk;
+	fi->fi_auth = auth;
+}
+
+
+/*
+ * check an IP packet for TCP/UDP characteristics such as ports and flags.
+ */
+int fr_tcpudpchk(ft, fin)
+frtuc_t *ft;
+fr_info_t *fin;
+{
+	register u_short po, tup;
+	register char i;
+	register int err = 1;
+
+	/*
+	 * Both ports should *always* be in the first fragment.
+	 * So far, I cannot find any cases where they can not be.
+	 *
+	 * compare destination ports
+	 */
+	if ((i = (int)ft->ftu_dcmp)) {
+		po = ft->ftu_dport;
+		tup = fin->fin_data[1];
+		/*
+		 * Do opposite test to that required and
+		 * continue if that succeeds.
+		 */
+		if (!--i && tup != po) /* EQUAL */
+			err = 0;
+		else if (!--i && tup == po) /* NOTEQUAL */
+			err = 0;
+		else if (!--i && tup >= po) /* LESSTHAN */
+			err = 0;
+		else if (!--i && tup <= po) /* GREATERTHAN */
+			err = 0;
+		else if (!--i && tup > po) /* LT or EQ */
+			err = 0;
+		else if (!--i && tup < po) /* GT or EQ */
+			err = 0;
+		else if (!--i &&	   /* Out of range */
+			 (tup >= po && tup <= ft->ftu_dtop))
+			err = 0;
+		else if (!--i &&	   /* In range */
+			 (tup <= po || tup >= ft->ftu_dtop))
+			err = 0;
+	}
+	/*
+	 * compare source ports
+	 */
+	if (err && (i = (int)ft->ftu_scmp)) {
+		po = ft->ftu_sport;
+		tup = fin->fin_data[0];
+		if (!--i && tup != po)
+			err = 0;
+		else if (!--i && tup == po)
+			err = 0;
+		else if (!--i && tup >= po)
+			err = 0;
+		else if (!--i && tup <= po)
+			err = 0;
+		else if (!--i && tup > po)
+			err = 0;
+		else if (!--i && tup < po)
+			err = 0;
+		else if (!--i &&	   /* Out of range */
+			 (tup >= po && tup <= ft->ftu_stop))
+			err = 0;
+		else if (!--i &&	   /* In range */
+			 (tup <= po || tup >= ft->ftu_stop))
+			err = 0;
+	}
+
+	/*
+	 * If we don't have all the TCP/UDP header, then how can we
+	 * expect to do any sort of match on it ?  If we were looking for
+	 * TCP flags, then NO match.  If not, then match (which should
+	 * satisfy the "short" class too).
+	 */
+	if (err && (fin->fin_fi.fi_p == IPPROTO_TCP)) {
+		if (fin->fin_fi.fi_fl & FI_SHORT)
+			return !(ft->ftu_tcpf | ft->ftu_tcpfm);
+		/*
+		 * Match the flags ?  If not, abort this match.
+		 */
+		if (ft->ftu_tcpfm &&
+		    ft->ftu_tcpf != (fin->fin_tcpf & ft->ftu_tcpfm)) {
+			FR_DEBUG(("f. %#x & %#x != %#x\n", fin->fin_tcpf,
+				 ft->ftu_tcpfm, ft->ftu_tcpf));
+			err = 0;
+		}
+	}
+	return err;
+}
+
+/*
+ * Check the input/output list of rules for a match and result.
+ * Could be per interface, but this gets real nasty when you don't have
+ * kernel sauce.
+ */
+int fr_scanlist(pass, ip, fin, m)
+u_32_t pass;
+ip_t *ip;
+register fr_info_t *fin;
+void *m;
+{
+	register struct frentry *fr;
+	register fr_ip_t *fi = &fin->fin_fi;
+	int rulen, portcmp = 0, off, skip = 0, logged = 0;
+	u_32_t passt;
+
+	fr = fin->fin_fr;
+	fin->fin_fr = NULL;
+	fin->fin_rule = 0;
+	fin->fin_group = 0;
+	if (fin->fin_v == 4)
+		off = ip->ip_off & IP_OFFMASK;
+	else
+		off = 0;
+	pass |= (fi->fi_fl << 24);
+
+	if ((fi->fi_fl & FI_TCPUDP) && (fin->fin_dlen > 3) && !off)
+		portcmp = 1;
+
+	for (rulen = 0; fr; fr = fr->fr_next, rulen++) {
+		if (skip) {
+			skip--;
+			continue;
+		}
+		/*
+		 * In all checks below, a null (zero) value in the
+		 * filter struture is taken to mean a wildcard.
+		 *
+		 * check that we are working for the right interface
+		 */
+#ifdef	_KERNEL
+# if BSD >= 199306
+		if (fin->fin_out != 0) {
+			if ((fr->fr_oifa &&
+			     fr->fr_oifa != ((mb_t *)m)->m_pkthdr.rcvif) ||
+			    (fr->fr_ifa && fr->fr_ifa != fin->fin_ifp))
+				continue;
+		} else
+# endif
+			if (fr->fr_ifa && fr->fr_ifa != fin->fin_ifp)
+				continue;
+#else
+		if (opts & (OPT_VERBOSE|OPT_DEBUG))
+			printf("\n");
+		FR_VERBOSE(("%c", (pass & FR_PASS) ? 'p' : 
+				  (pass & FR_AUTH) ? 'a' : 'b'));
+		if (fr->fr_ifa && fr->fr_ifa != fin->fin_ifp)
+			continue;
+		FR_VERBOSE((":i"));
+#endif
+		{
+			register u_32_t	*ld, *lm, *lip;
+			register int i;
+
+			lip = (u_32_t *)fi;
+			lm = (u_32_t *)&fr->fr_mip;
+			ld = (u_32_t *)&fr->fr_ip;
+			i = ((*lip & *lm) != *ld);
+			FR_DEBUG(("0. %#08x & %#08x != %#08x\n",
+				   *lip, *lm, *ld));
+			if (i)
+				continue;
+			/*
+			 * We now know whether the packet version and the
+			 * rule version match, along with protocol, ttl and
+			 * tos.
+			 */
+			lip++, lm++, ld++;
+			/*
+			 * Unrolled loops (4 each, for 32 bits).
+			 */
+			i |= ((*lip & *lm) != *ld) << 19;
+			FR_DEBUG(("1a. %#08x & %#08x != %#08x\n",
+				   *lip, *lm, *ld));
+			if (fi->fi_v == 6) {
+				lip++, lm++, ld++;
+				i |= ((*lip & *lm) != *ld) << 19;
+				FR_DEBUG(("1b. %#08x & %#08x != %#08x\n",
+					   *lip, *lm, *ld));
+				lip++, lm++, ld++;
+				i |= ((*lip & *lm) != *ld) << 19;
+				FR_DEBUG(("1c. %#08x & %#08x != %#08x\n",
+					   *lip, *lm, *ld));
+				lip++, lm++, ld++;
+				i |= ((*lip & *lm) != *ld) << 19;
+				FR_DEBUG(("1d. %#08x & %#08x != %#08x\n",
+					   *lip, *lm, *ld));
+			} else {
+				lip += 3;
+				lm += 3;
+				ld += 3;
+			}
+			i ^= (fr->fr_flags & FR_NOTSRCIP);
+			if (i)
+				continue;
+			lip++, lm++, ld++;
+			i |= ((*lip & *lm) != *ld) << 20;
+			FR_DEBUG(("2a. %#08x & %#08x != %#08x\n",
+				   *lip, *lm, *ld));
+			if (fi->fi_v == 6) {
+				lip++, lm++, ld++;
+				i |= ((*lip & *lm) != *ld) << 20;
+				FR_DEBUG(("2b. %#08x & %#08x != %#08x\n",
+					   *lip, *lm, *ld));
+				lip++, lm++, ld++;
+				i |= ((*lip & *lm) != *ld) << 20;
+				FR_DEBUG(("2c. %#08x & %#08x != %#08x\n",
+					   *lip, *lm, *ld));
+				lip++, lm++, ld++;
+				i |= ((*lip & *lm) != *ld) << 20;
+				FR_DEBUG(("2d. %#08x & %#08x != %#08x\n",
+					   *lip, *lm, *ld));
+			} else {
+				lip += 3;
+				lm += 3;
+				ld += 3;
+			}
+			i ^= (fr->fr_flags & FR_NOTDSTIP);
+			if (i)
+				continue;
+			lip++, lm++, ld++;
+			i |= ((*lip & *lm) != *ld);
+			FR_DEBUG(("3. %#08x & %#08x != %#08x\n",
+				   *lip, *lm, *ld));
+			lip++, lm++, ld++;
+			i |= ((*lip & *lm) != *ld);
+			FR_DEBUG(("4. %#08x & %#08x != %#08x\n",
+				   *lip, *lm, *ld));
+			if (i)
+				continue;
+		}
+
+		/*
+		 * If a fragment, then only the first has what we're looking
+		 * for here...
+		 */
+		if (!portcmp && (fr->fr_dcmp || fr->fr_scmp || fr->fr_tcpf ||
+				 fr->fr_tcpfm))
+			continue;
+		if (fi->fi_fl & FI_TCPUDP) {
+			if (!fr_tcpudpchk(&fr->fr_tuc, fin))
+				continue;
+		} else if (fr->fr_icmpm || fr->fr_icmp) {
+			if ((fi->fi_p != IPPROTO_ICMP) || off ||
+			    (fin->fin_dlen < 2))
+				continue;
+			if ((fin->fin_data[0] & fr->fr_icmpm) != fr->fr_icmp) {
+				FR_DEBUG(("i. %#x & %#x != %#x\n",
+					 fin->fin_data[0], fr->fr_icmpm,
+					 fr->fr_icmp));
+				continue;
+			}
+		}
+		FR_VERBOSE(("*"));
+		/*
+		 * Just log this packet...
+		 */
+		passt = fr->fr_flags;
+#if (BSD >= 199306) && (defined(_KERNEL) || defined(KERNEL))
+		if (securelevel <= 0)
+#endif
+			if ((passt & FR_CALLNOW) && fr->fr_func)
+				passt = (*fr->fr_func)(passt, ip, fin);
+		fin->fin_fr = fr;
+#ifdef  IPFILTER_LOG
+		if ((passt & FR_LOGMASK) == FR_LOG) {
+			if (!IPLLOG(passt, ip, fin, m)) {
+				if (passt & FR_LOGORBLOCK)
+					passt |= FR_BLOCK|FR_QUICK;
+				ATOMIC_INCL(frstats[fin->fin_out].fr_skip);
+			}
+			ATOMIC_INCL(frstats[fin->fin_out].fr_pkl);
+			logged = 1;
+		}
+#endif /* IPFILTER_LOG */
+		if (!(skip = fr->fr_skip) && (passt & FR_LOGMASK) != FR_LOG)
+			pass = passt;
+		FR_DEBUG(("pass %#x\n", pass));
+		ATOMIC_INCL(fr->fr_hits);
+		if (pass & FR_ACCOUNT)
+			fr->fr_bytes += (U_QUAD_T)ip->ip_len;
+		else
+			fin->fin_icode = fr->fr_icode;
+		fin->fin_rule = rulen;
+		fin->fin_group = fr->fr_group;
+		if (fr->fr_grp) {
+			fin->fin_fr = fr->fr_grp;
+			pass = fr_scanlist(pass, ip, fin, m);
+			if (fin->fin_fr == NULL) {
+				fin->fin_rule = rulen;
+				fin->fin_group = fr->fr_group;
+				fin->fin_fr = fr;
+			}
+			if (pass & FR_DONTCACHE)
+				logged = 1;
+		}
+		if (pass & FR_QUICK)
+			break;
+	}
+	if (logged)
+		pass |= FR_DONTCACHE;
+	return pass;
+}
+
+
+/*
+ * frcheck - filter check
+ * check using source and destination addresses/ports in a packet whether
+ * or not to pass it on or not.
+ */
+int fr_check(ip, hlen, ifp, out
+#if defined(_KERNEL) && SOLARIS
+, qif, mp)
+qif_t *qif;
+#else
+, mp)
+#endif
+mb_t **mp;
+ip_t *ip;
+int hlen;
+void *ifp;
+int out;
+{
+	/*
+	 * The above really sucks, but short of writing a diff
+	 */
+	fr_info_t frinfo, *fc;
+	register fr_info_t *fin = &frinfo;
+	int changed, error = EHOSTUNREACH, v = ip->ip_v;
+	frentry_t *fr = NULL, *list;
+	u_32_t pass, apass;
+#if !SOLARIS || !defined(_KERNEL)
+	register mb_t *m = *mp;
+#endif
+
+#ifdef	_KERNEL
+	int p, len, drop = 0, logit = 0;
+	mb_t *mc = NULL;
+# if !defined(__SVR4) && !defined(__svr4__)
+#  ifdef __sgi
+	char hbuf[(0xf << 2) + sizeof(struct icmp) + sizeof(ip_t) + 8];
+#  endif
+	int up;
+
+#  ifdef M_CANFASTFWD
+	/*
+	 * XXX For now, IP Filter and fast-forwarding of cached flows
+	 * XXX are mutually exclusive.  Eventually, IP Filter should
+	 * XXX get a "can-fast-forward" filter rule.
+	 */
+	m->m_flags &= ~M_CANFASTFWD;
+#  endif /* M_CANFASTFWD */
+#  ifdef CSUM_DELAY_DATA
+	/*
+	 * disable delayed checksums.
+	 */
+	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+		in_delayed_cksum(m);
+		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+	}
+#  endif /* CSUM_DELAY_DATA */
+
+# ifdef	USE_INET6
+	if (v == 6) {
+		len = ntohs(((ip6_t*)ip)->ip6_plen);
+		p = ((ip6_t *)ip)->ip6_nxt;
+	} else
+# endif
+	{
+		p = ip->ip_p;
+		len = ip->ip_len;
+	}
+
+	if ((p == IPPROTO_TCP || p == IPPROTO_UDP || p == IPPROTO_ICMP
+# ifdef USE_INET6
+	    || (v == 6 && p == IPPROTO_ICMPV6)
+# endif
+	   )) {
+		int plen = 0;
+
+		if ((v == 6) || (ip->ip_off & IP_OFFMASK) == 0)
+			switch(p)
+			{
+			case IPPROTO_TCP:
+				plen = sizeof(tcphdr_t);
+				break;
+			case IPPROTO_UDP:
+				plen = sizeof(udphdr_t);
+				break;
+			/* 96 - enough for complete ICMP error IP header */
+			case IPPROTO_ICMP:
+				plen = ICMPERR_MAXPKTLEN - sizeof(ip_t);
+				break;
+# ifdef USE_INET6
+	    		case IPPROTO_ICMPV6 :
+				/*
+				 * XXX does not take intermediate header
+				 * into account
+				 */
+				plen = ICMP6ERR_MINPKTLEN + 8 - sizeof(ip6_t);
+				break;
+# endif
+			}
+		up = MIN(hlen + plen, len);
+
+		if (up > m->m_len) {
+#  ifdef __sgi
+	/* Under IRIX, avoid m_pullup as it makes ping <hostname> panic */
+			if ((up > sizeof(hbuf)) || (m_length(m) < up)) {
+				ATOMIC_INCL(frstats[out].fr_pull[1]);
+				return -1;
+			}
+			m_copydata(m, 0, up, hbuf);
+			ATOMIC_INCL(frstats[out].fr_pull[0]);
+			ip = (ip_t *)hbuf;
+#  else /* __ sgi */
+#   ifndef linux
+			if ((*mp = m_pullup(m, up)) == 0) {
+				ATOMIC_INCL(frstats[out].fr_pull[1]);
+				return -1;
+			} else {
+				ATOMIC_INCL(frstats[out].fr_pull[0]);
+				m = *mp;
+				ip = mtod(m, ip_t *);
+			}
+#   endif /* !linux */
+#  endif /* __sgi */
+		} else
+			up = 0;
+	} else
+		up = 0;
+# endif /* !defined(__SVR4) && !defined(__svr4__) */
+# if SOLARIS
+	mb_t *m = qif->qf_m;
+
+	if ((u_int)ip & 0x3)
+		return 2;
+	fin->fin_qfm = m;
+	fin->fin_qif = qif;
+# endif
+#endif /* _KERNEL */
+	
+#ifndef __FreeBSD__
+	/*
+	 * Be careful here: ip_id is in network byte order when called
+	 * from ip_output()
+	 */
+	if ((out) && (v == 4))
+		ip->ip_id = ntohs(ip->ip_id);
+#endif
+
+	changed = 0;
+	fin->fin_ifp = ifp;
+	fin->fin_v = v;
+	fin->fin_out = out;
+	fin->fin_mp = mp;
+	fr_makefrip(hlen, ip, fin);
+
+#ifdef _KERNEL
+# ifdef	USE_INET6
+	if (v == 6) {
+		ATOMIC_INCL(frstats[0].fr_ipv6[out]);
+		if (((ip6_t *)ip)->ip6_hlim < fr_minttl) {
+			ATOMIC_INCL(frstats[0].fr_badttl);
+			if (fr_minttllog)
+				logit = -2;
+		}
+	} else
+# endif
+	if (!out) {
+		if (fr_chksrc && !fr_verifysrc(ip->ip_src, ifp)) {
+			ATOMIC_INCL(frstats[0].fr_badsrc);
+			if (fr_chksrc == 2)
+				logit = -2;
+		} else if (ip->ip_ttl < fr_minttl) {
+			ATOMIC_INCL(frstats[0].fr_badttl);
+			if (fr_minttllog)
+				logit = -3;
+		}
+	}
+	if (drop) {
+# ifdef	IPFILTER_LOG
+		if (logit) {
+			fin->fin_group = logit;
+			pass = FR_INQUE|FR_NOMATCH|FR_LOGB;
+			(void) IPLLOG(pass, ip, fin, m);
+		}
+# endif
+# if !SOLARIS
+		m_freem(m);
+# endif
+		return error;
+	}
+#endif
+	pass = fr_pass;
+	if (fin->fin_fi.fi_fl & FI_SHORT) {
+		ATOMIC_INCL(frstats[out].fr_short);
+	}
+
+	READ_ENTER(&ipf_mutex);
+
+	if (fin->fin_fi.fi_fl & FI_SHORT)
+		ATOMIC_INCL(frstats[out].fr_short);
+	
+	/*
+	 * Check auth now.  This, combined with the check below to see if apass
+	 * is 0 is to ensure that we don't count the packet twice, which can
+	 * otherwise occur when we reprocess it.  As it is, we only count it
+	 * after it has no auth. table matchup.  This also stops NAT from
+	 * occuring until after the packet has been auth'd.
+	 */
+	apass = fr_checkauth(ip, fin);
+
+	if (!out) {
+#ifdef	USE_INET6
+		if (v == 6)
+			list = ipacct6[0][fr_active];
+		else
+#endif
+			list = ipacct[0][fr_active];
+		changed = ip_natin(ip, fin);
+		if (!apass && (fin->fin_fr = list) &&
+		    (fr_scanlist(FR_NOMATCH, ip, fin, m) & FR_ACCOUNT)) {
+			ATOMIC_INCL(frstats[0].fr_acct);
+		}
+	}
+
+	if (apass || (!(fr = ipfr_knownfrag(ip, fin)) &&
+	    !(fr = fr_checkstate(ip, fin)))) {
+		/*
+		 * If a packet is found in the auth table, then skip checking
+		 * the access lists for permission but we do need to consider
+		 * the result as if it were from the ACL's.
+		 */
+		if (!apass) {
+			fc = frcache + out;
+			if (!bcmp((char *)fin, (char *)fc, FI_CSIZE)) {
+				/*
+				 * copy cached data so we can unlock the mutex
+				 * earlier.
+				 */
+				bcopy((char *)fc, (char *)fin, FI_COPYSIZE);
+				ATOMIC_INCL(frstats[out].fr_chit);
+				if ((fr = fin->fin_fr)) {
+					ATOMIC_INCL(fr->fr_hits);
+					pass = fr->fr_flags;
+				}
+			} else {
+#ifdef	USE_INET6
+				if (v == 6)
+					list = ipfilter6[out][fr_active];
+				else
+#endif
+					list = ipfilter[out][fr_active];
+				if ((fin->fin_fr = list))
+					pass = fr_scanlist(fr_pass, ip, fin, m);
+				if (!(pass & (FR_KEEPSTATE|FR_DONTCACHE)))
+					bcopy((char *)fin, (char *)fc,
+					      FI_COPYSIZE);
+				if (pass & FR_NOMATCH) {
+					ATOMIC_INCL(frstats[out].fr_nom);
+				}
+			}
+			fr = fin->fin_fr;
+		} else
+			pass = apass;
+
+		/*
+		 * If we fail to add a packet to the authorization queue,
+		 * then we drop the packet later.  However, if it was added
+		 * then pretend we've dropped it already.
+		 */
+		if ((pass & FR_AUTH))
+			if (fr_newauth((mb_t *)m, fin, ip) != 0)
+#ifdef	_KERNEL
+				m = *mp = NULL;
+#else
+				;
+#endif
+
+		if (pass & FR_PREAUTH) {
+			READ_ENTER(&ipf_auth);
+			if ((fin->fin_fr = ipauth) &&
+			    (pass = fr_scanlist(0, ip, fin, m))) {
+				ATOMIC_INCL(fr_authstats.fas_hits);
+			} else {
+				ATOMIC_INCL(fr_authstats.fas_miss);
+			}
+			RWLOCK_EXIT(&ipf_auth);
+		}
+
+		fin->fin_fr = fr;
+		if ((pass & (FR_KEEPFRAG|FR_KEEPSTATE)) == FR_KEEPFRAG) {
+			if (fin->fin_fi.fi_fl & FI_FRAG) {
+				if (ipfr_newfrag(ip, fin, pass) == -1) {
+					ATOMIC_INCL(frstats[out].fr_bnfr);
+				} else {
+					ATOMIC_INCL(frstats[out].fr_nfr);
+				}
+			} else {
+				ATOMIC_INCL(frstats[out].fr_cfr);
+			}
+		}
+		if (pass & FR_KEEPSTATE) {
+			if (fr_addstate(ip, fin, 0) == NULL) {
+				ATOMIC_INCL(frstats[out].fr_bads);
+			} else {
+				ATOMIC_INCL(frstats[out].fr_ads);
+			}
+		}
+	} else if (fr != NULL) {
+		pass = fr->fr_flags;
+		if (pass & FR_LOGFIRST)
+			pass &= ~(FR_LOGFIRST|FR_LOG);
+	}
+
+#if (BSD >= 199306) && (defined(_KERNEL) || defined(KERNEL))
+	if (securelevel <= 0)
+#endif
+		if (fr && fr->fr_func && !(pass & FR_CALLNOW))
+			pass = (*fr->fr_func)(pass, ip, fin);
+
+	/*
+	 * Only count/translate packets which will be passed on, out the
+	 * interface.
+	 */
+	if (out && (pass & FR_PASS)) {
+#ifdef	USE_INET6
+		if (v == 6)
+			list = ipacct6[1][fr_active];
+		else
+#endif
+			list = ipacct[1][fr_active];
+		if ((fin->fin_fr = list) &&
+		    (fr_scanlist(FR_NOMATCH, ip, fin, m) & FR_ACCOUNT)) {
+			ATOMIC_INCL(frstats[1].fr_acct);
+		}
+		fin->fin_fr = fr;
+		changed = ip_natout(ip, fin);
+	} else
+		fin->fin_fr = fr;
+	RWLOCK_EXIT(&ipf_mutex);
+
+#ifdef	IPFILTER_LOG
+	if ((fr_flags & FF_LOGGING) || (pass & FR_LOGMASK)) {
+		if ((fr_flags & FF_LOGNOMATCH) && (pass & FR_NOMATCH)) {
+			pass |= FF_LOGNOMATCH;
+			ATOMIC_INCL(frstats[out].fr_npkl);
+			goto logit;
+		} else if (((pass & FR_LOGMASK) == FR_LOGP) ||
+		    ((pass & FR_PASS) && (fr_flags & FF_LOGPASS))) {
+			if ((pass & FR_LOGMASK) != FR_LOGP)
+				pass |= FF_LOGPASS;
+			ATOMIC_INCL(frstats[out].fr_ppkl);
+			goto logit;
+		} else if (((pass & FR_LOGMASK) == FR_LOGB) ||
+			   ((pass & FR_BLOCK) && (fr_flags & FF_LOGBLOCK))) {
+			if ((pass & FR_LOGMASK) != FR_LOGB)
+				pass |= FF_LOGBLOCK;
+			ATOMIC_INCL(frstats[out].fr_bpkl);
+logit:
+			if (!IPLLOG(pass, ip, fin, m)) {
+				ATOMIC_INCL(frstats[out].fr_skip);
+				if ((pass & (FR_PASS|FR_LOGORBLOCK)) ==
+				    (FR_PASS|FR_LOGORBLOCK))
+					pass ^= FR_PASS|FR_BLOCK;
+			}
+		}
+	}
+#endif /* IPFILTER_LOG */
+
+#ifndef __FreeBSD__	
+	if ((out) && (v == 4))
+		ip->ip_id = htons(ip->ip_id);
+#endif
+
+#ifdef	_KERNEL
+	/*
+	 * Only allow FR_DUP to work if a rule matched - it makes no sense to
+	 * set FR_DUP as a "default" as there are no instructions about where
+	 * to send the packet.
+	 */
+	if (fr && (pass & FR_DUP))
+# if	SOLARIS
+		mc = dupmsg(m);
+# else
+#  ifndef linux
+		mc = m_copy(m, 0, M_COPYALL);
+#  else
+		;
+#  endif
+# endif
+#endif
+	if (pass & FR_PASS) {
+		ATOMIC_INCL(frstats[out].fr_pass);
+	} else if (pass & FR_BLOCK) {
+		ATOMIC_INCL(frstats[out].fr_block);
+		/*
+		 * Should we return an ICMP packet to indicate error
+		 * status passing through the packet filter ?
+		 * WARNING: ICMP error packets AND TCP RST packets should
+		 * ONLY be sent in repsonse to incoming packets.  Sending them
+		 * in response to outbound packets can result in a panic on
+		 * some operating systems.
+		 */
+		if (!out) {
+#ifdef	_KERNEL
+			if (pass & FR_RETICMP) {
+				int dst;
+
+				if ((pass & FR_RETMASK) == FR_FAKEICMP)
+					dst = 1;
+				else
+					dst = 0;
+				send_icmp_err(ip, ICMP_UNREACH, fin, dst);
+				ATOMIC_INCL(frstats[0].fr_ret);
+			} else if (((pass & FR_RETMASK) == FR_RETRST) &&
+				   !(fin->fin_fi.fi_fl & FI_SHORT)) {
+				if (send_reset(ip, fin) == 0) {
+					ATOMIC_INCL(frstats[1].fr_ret);
+				}
+			}
+#else
+			if ((pass & FR_RETMASK) == FR_RETICMP) {
+				verbose("- ICMP unreachable sent\n");
+				ATOMIC_INCL(frstats[0].fr_ret);
+			} else if ((pass & FR_RETMASK) == FR_FAKEICMP) {
+				verbose("- forged ICMP unreachable sent\n");
+				ATOMIC_INCL(frstats[0].fr_ret);
+			} else if (((pass & FR_RETMASK) == FR_RETRST) &&
+				   !(fin->fin_fi.fi_fl & FI_SHORT)) {
+				verbose("- TCP RST sent\n");
+				ATOMIC_INCL(frstats[1].fr_ret);
+			}
+#endif
+		} else {
+			if (pass & FR_RETRST)
+				error = ECONNRESET;
+		}
+	}
+
+	/*
+	 * If we didn't drop off the bottom of the list of rules (and thus
+	 * the 'current' rule fr is not NULL), then we may have some extra
+	 * instructions about what to do with a packet.
+	 * Once we're finished return to our caller, freeing the packet if
+	 * we are dropping it (* BSD ONLY *).
+	 */
+	if ((changed == -1) && (pass & FR_PASS)) {
+		pass &= ~FR_PASS;
+		pass |= FR_BLOCK;
+	}
+#if defined(_KERNEL)
+# if !SOLARIS
+#  if !defined(linux)
+	if (fr) {
+		frdest_t *fdp = &fr->fr_tif;
+
+		if (((pass & FR_FASTROUTE) && !out) ||
+		    (fdp->fd_ifp && fdp->fd_ifp != (struct ifnet *)-1)) {
+			if (ipfr_fastroute(m, fin, fdp) == 0)
+				m = *mp = NULL;
+		}
+		if (mc)
+			ipfr_fastroute(mc, fin, &fr->fr_dif);
+	}
+	if (!(pass & FR_PASS) && m)
+		m_freem(m);
+#   ifdef __sgi
+	else if (changed && up && m)
+		m_copyback(m, 0, up, hbuf);
+#   endif
+#  endif /* !linux */
+# else /* !SOLARIS */
+	if (fr) {
+		frdest_t *fdp = &fr->fr_tif;
+
+		if (((pass & FR_FASTROUTE) && !out) ||
+		    (fdp->fd_ifp && fdp->fd_ifp != (struct ifnet *)-1)) {
+			if (ipfr_fastroute(ip, m, mp, fin, fdp) == 0)
+				m = *mp = NULL;
+		}
+		if (mc)
+			ipfr_fastroute(ip, mc, mp, fin, &fr->fr_dif);
+	}
+# endif /* !SOLARIS */
+	return (pass & FR_PASS) ? 0 : error;
+#else /* _KERNEL */
+	if (pass & FR_NOMATCH)
+		return 1;
+	if (pass & FR_PASS)
+		return 0;
+	if (pass & FR_AUTH)
+		return -2;
+	return -1;
+#endif /* _KERNEL */
+}
+
+
+/*
+ * ipf_cksum
+ * addr should be 16bit aligned and len is in bytes.
+ * length is in bytes
+ */
+u_short ipf_cksum(addr, len)
+register u_short *addr;
+register int len;
+{
+	register u_32_t sum = 0;
+
+	for (sum = 0; len > 1; len -= 2)
+		sum += *addr++;
+
+	/* mop up an odd byte, if necessary */
+	if (len == 1)
+		sum += *(u_char *)addr;
+
+	/*
+	 * add back carry outs from top 16 bits to low 16 bits
+	 */
+	sum = (sum >> 16) + (sum & 0xffff);	/* add hi 16 to low 16 */
+	sum += (sum >> 16);			/* add carry */
+	return (u_short)(~sum);
+}
+
+
+/*
+ * NB: This function assumes we've pullup'd enough for all of the IP header
+ * and the TCP header.  We also assume that data blocks aren't allocated in
+ * odd sizes.
+ */
+u_short fr_tcpsum(m, ip, tcp)
+mb_t *m;
+ip_t *ip;
+tcphdr_t *tcp;
+{
+	u_short *sp, slen, ts;
+	u_int sum, sum2;
+	int hlen;
+
+	/*
+	 * Add up IP Header portion
+	 */
+	hlen = ip->ip_hl << 2;
+	slen = ip->ip_len - hlen;
+	sum = htons((u_short)ip->ip_p);
+	sum += htons(slen);
+	sp = (u_short *)&ip->ip_src;
+	sum += *sp++;	/* ip_src */
+	sum += *sp++;
+	sum += *sp++;	/* ip_dst */
+	sum += *sp++;
+	ts = tcp->th_sum;
+	tcp->th_sum = 0;
+#ifdef	KERNEL
+# if SOLARIS
+	sum2 = ip_cksum(m, hlen, sum);	/* hlen == offset */
+	sum2 = (sum2 & 0xffff) + (sum2 >> 16);
+	sum2 = ~sum2 & 0xffff;
+# else /* SOLARIS */
+#  if defined(BSD) || defined(sun)
+#   if BSD >= 199306
+	m->m_data += hlen;
+#   else
+	m->m_off += hlen;
+#   endif
+	m->m_len -= hlen;
+	sum2 = in_cksum(m, slen);
+	m->m_len += hlen;
+#   if BSD >= 199306
+	m->m_data -= hlen;
+#   else
+	m->m_off -= hlen;
+#   endif
+	/*
+	 * Both sum and sum2 are partial sums, so combine them together.
+	 */
+	sum = (sum & 0xffff) + (sum >> 16);
+	sum = ~sum & 0xffff;
+	sum2 += sum;
+	sum2 = (sum2 & 0xffff) + (sum2 >> 16);
+#  else /* defined(BSD) || defined(sun) */
+{
+	union {
+		u_char	c[2];
+		u_short	s;
+	} bytes;
+	u_short len = ip->ip_len;
+# if defined(__sgi)
+	int add;
+# endif
+
+	/*
+	 * Add up IP Header portion
+	 */
+	sp = (u_short *)&ip->ip_src;
+	len -= (ip->ip_hl << 2);
+	sum = ntohs(IPPROTO_TCP);
+	sum += htons(len);
+	sum += *sp++;	/* ip_src */
+	sum += *sp++;
+	sum += *sp++;	/* ip_dst */
+	sum += *sp++;
+	if (sp != (u_short *)tcp)
+		sp = (u_short *)tcp;
+	sum += *sp++;	/* sport */
+	sum += *sp++;	/* dport */
+	sum += *sp++;	/* seq */
+	sum += *sp++;
+	sum += *sp++;	/* ack */
+	sum += *sp++;
+	sum += *sp++;	/* off */
+	sum += *sp++;	/* win */
+	sum += *sp++;	/* Skip over checksum */
+	sum += *sp++;	/* urp */
+
+# ifdef	__sgi
+	/*
+	 * In case we had to copy the IP & TCP header out of mbufs,
+	 * skip over the mbuf bits which are the header
+	 */
+	if ((caddr_t)ip != mtod(m, caddr_t)) {
+		hlen = (caddr_t)sp - (caddr_t)ip;
+		while (hlen) {
+			add = MIN(hlen, m->m_len);
+			sp = (u_short *)(mtod(m, caddr_t) + add);
+			hlen -= add;
+			if (add == m->m_len) {
+				m = m->m_next;
+				if (!hlen) {
+					if (!m)
+						break;
+					sp = mtod(m, u_short *);
+				}
+				PANIC((!m),("fr_tcpsum(1): not enough data"));
+			}
+		}
+	}
+# endif
+
+	if (!(len -= sizeof(*tcp)))
+		goto nodata;
+	while (len > 1) {
+		if (((caddr_t)sp - mtod(m, caddr_t)) >= m->m_len) {
+			m = m->m_next;
+			PANIC((!m),("fr_tcpsum(2): not enough data"));
+			sp = mtod(m, u_short *);
+		}
+		if (((caddr_t)(sp + 1) - mtod(m, caddr_t)) > m->m_len) {
+			bytes.c[0] = *(u_char *)sp;
+			m = m->m_next;
+			PANIC((!m),("fr_tcpsum(3): not enough data"));
+			sp = mtod(m, u_short *);
+			bytes.c[1] = *(u_char *)sp;
+			sum += bytes.s;
+			sp = (u_short *)((u_char *)sp + 1);
+		}
+		if ((u_long)sp & 1) {
+			bcopy((char *)sp++, (char *)&bytes.s, sizeof(bytes.s));
+			sum += bytes.s;
+		} else
+			sum += *sp++;
+		len -= 2;
+	}
+	if (len)
+		sum += ntohs(*(u_char *)sp << 8);
+nodata:
+	while (sum > 0xffff)
+		sum = (sum & 0xffff) + (sum >> 16);
+	sum2 = (u_short)(~sum & 0xffff);
+}
+#  endif /*  defined(BSD) || defined(sun) */
+# endif /* SOLARIS */
+#else /* KERNEL */
+	sum2 = 0;
+#endif /* KERNEL */
+	tcp->th_sum = ts;
+	return sum2;
+}
+
+
+#if defined(_KERNEL) && ( ((BSD < 199306) && !SOLARIS) || defined(__sgi) )
+/*
+ * Copyright (c) 1982, 1986, 1988, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
+ * $Id: fil.c,v 2.35.2.30 2000/12/17 05:49:22 darrenr Exp $
+ */
+/*
+ * Copy data from an mbuf chain starting "off" bytes from the beginning,
+ * continuing for "len" bytes, into the indicated buffer.
+ */
+void
+m_copydata(m, off, len, cp)
+	register mb_t *m;
+	register int off;
+	register int len;
+	caddr_t cp;
+{
+	register unsigned count;
+
+	if (off < 0 || len < 0)
+		panic("m_copydata");
+	while (off > 0) {
+		if (m == 0)
+			panic("m_copydata");
+		if (off < m->m_len)
+			break;
+		off -= m->m_len;
+		m = m->m_next;
+	}
+	while (len > 0) {
+		if (m == 0)
+			panic("m_copydata");
+		count = MIN(m->m_len - off, len);
+		bcopy(mtod(m, caddr_t) + off, cp, count);
+		len -= count;
+		cp += count;
+		off = 0;
+		m = m->m_next;
+	}
+}
+
+
+# ifndef linux
+/*
+ * Copy data from a buffer back into the indicated mbuf chain,
+ * starting "off" bytes from the beginning, extending the mbuf
+ * chain if necessary.
+ */
+void
+m_copyback(m0, off, len, cp)
+	struct	mbuf *m0;
+	register int off;
+	register int len;
+	caddr_t cp;
+{
+	register int mlen;
+	register struct mbuf *m = m0, *n;
+	int totlen = 0;
+
+	if (m0 == 0)
+		return;
+	while (off > (mlen = m->m_len)) {
+		off -= mlen;
+		totlen += mlen;
+		if (m->m_next == 0) {
+			n = m_getclr(M_DONTWAIT, m->m_type);
+			if (n == 0)
+				goto out;
+			n->m_len = min(MLEN, len + off);
+			m->m_next = n;
+		}
+		m = m->m_next;
+	}
+	while (len > 0) {
+		mlen = min (m->m_len - off, len);
+		bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen);
+		cp += mlen;
+		len -= mlen;
+		mlen += off;
+		off = 0;
+		totlen += mlen;
+		if (len == 0)
+			break;
+		if (m->m_next == 0) {
+			n = m_get(M_DONTWAIT, m->m_type);
+			if (n == 0)
+				break;
+			n->m_len = min(MLEN, len);
+			m->m_next = n;
+		}
+		m = m->m_next;
+	}
+out:
+#if 0
+	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
+		m->m_pkthdr.len = totlen;
+#endif
+	return;
+}
+# endif /* linux */
+#endif /* (_KERNEL) && ( ((BSD < 199306) && !SOLARIS) || __sgi) */
+
+
+frgroup_t *fr_findgroup(num, flags, which, set, fgpp)
+u_32_t num, flags;
+minor_t which;
+int set;
+frgroup_t ***fgpp;
+{
+	frgroup_t *fg, **fgp;
+
+	if (which == IPL_LOGAUTH)
+		fgp = &ipfgroups[2][set];
+	else if (flags & FR_ACCOUNT)
+		fgp = &ipfgroups[1][set];
+	else if (flags & (FR_OUTQUE|FR_INQUE))
+		fgp = &ipfgroups[0][set];
+	else
+		return NULL;
+	num &= 0xffff;
+
+	while ((fg = *fgp))
+		if (fg->fg_num == num)
+			break;
+		else
+			fgp = &fg->fg_next;
+	if (fgpp)
+		*fgpp = fgp;
+	return fg;
+}
+
+
+frgroup_t *fr_addgroup(num, fp, which, set)
+u_32_t num;
+frentry_t *fp;
+minor_t which;
+int set;
+{
+	frgroup_t *fg, **fgp;
+
+	if ((fg = fr_findgroup(num, fp->fr_flags, which, set, &fgp)))
+		return fg;
+
+	KMALLOC(fg, frgroup_t *);
+	if (fg) {
+		fg->fg_num = num;
+		fg->fg_next = *fgp;
+		fg->fg_head = fp;
+		fg->fg_start = &fp->fr_grp;
+		*fgp = fg;
+	}
+	return fg;
+}
+
+
+void fr_delgroup(num, flags, which, set)
+u_32_t num, flags;
+minor_t which;
+int set;
+{
+	frgroup_t *fg, **fgp;
+ 
+	if (!(fg = fr_findgroup(num, flags, which, set, &fgp)))
+		return;
+ 
+	*fgp = fg->fg_next;
+	KFREE(fg);
+}
+
+
+
+/*
+ * recursively flush rules from the list, descending groups as they are
+ * encountered.  if a rule is the head of a group and it has lost all its
+ * group members, then also delete the group reference.
+ */
+static int frflushlist(set, unit, nfreedp, listp)
+int set;
+minor_t unit;
+int *nfreedp;
+frentry_t **listp;
+{
+	register int freed = 0, i;
+	register frentry_t *fp;
+
+	while ((fp = *listp)) {
+		*listp = fp->fr_next;
+		if (fp->fr_grp) {
+			i = frflushlist(set, unit, nfreedp, &fp->fr_grp);
+			MUTEX_ENTER(&ipf_rw);
+			fp->fr_ref -= i;
+			MUTEX_EXIT(&ipf_rw);
+		}
+
+		ATOMIC_DEC32(fp->fr_ref);
+		if (fp->fr_grhead) {
+			fr_delgroup(fp->fr_grhead, fp->fr_flags, 
+				    unit, set);
+			fp->fr_grhead = 0;
+		}
+		if (fp->fr_ref == 0) {
+			KFREE(fp);
+			freed++;
+		} else
+			fp->fr_next = NULL;
+	}
+	*nfreedp += freed;
+	return freed;
+}
+
+
+int frflush(unit, flags)
+minor_t unit;
+int flags;
+{
+	int flushed = 0, set;
+
+	if (unit != IPL_LOGIPF)
+		return 0;
+	WRITE_ENTER(&ipf_mutex);
+	bzero((char *)frcache, sizeof(frcache[0]) * 2);
+
+	set = fr_active;
+	if (flags & FR_INACTIVE)
+		set = 1 - set;
+
+	if (flags & FR_OUTQUE) {
+#ifdef	USE_INET6
+		(void) frflushlist(set, unit, &flushed, &ipfilter6[1][set]);
+		(void) frflushlist(set, unit, &flushed, &ipacct6[1][set]);
+#endif
+		(void) frflushlist(set, unit, &flushed, &ipfilter[1][set]);
+		(void) frflushlist(set, unit, &flushed, &ipacct[1][set]);
+	}
+	if (flags & FR_INQUE) {
+#ifdef	USE_INET6
+		(void) frflushlist(set, unit, &flushed, &ipfilter6[0][set]);
+		(void) frflushlist(set, unit, &flushed, &ipacct6[0][set]);
+#endif
+		(void) frflushlist(set, unit, &flushed, &ipfilter[0][set]);
+		(void) frflushlist(set, unit, &flushed, &ipacct[0][set]);
+	}
+	RWLOCK_EXIT(&ipf_mutex);
+	return flushed;
+}
+
+
+char *memstr(src, dst, slen, dlen)
+char *src, *dst;
+int slen, dlen;
+{
+	char *s = NULL;
+
+	while (dlen >= slen) {
+		if (bcmp(src, dst, slen) == 0) {
+			s = dst;
+			break;
+		}
+		dst++;
+		dlen--;
+	}
+	return s;
+}
+
+
+void fixskip(listp, rp, addremove)
+frentry_t **listp, *rp;
+int addremove;
+{
+	frentry_t *fp;
+	int rules = 0, rn = 0;
+
+	for (fp = *listp; fp && (fp != rp); fp = fp->fr_next, rules++)
+		;
+
+	if (!fp)
+		return;
+
+	for (fp = *listp; fp && (fp != rp); fp = fp->fr_next, rn++)
+		if (fp->fr_skip && (rn + fp->fr_skip >= rules))
+			fp->fr_skip += addremove;
+}
+
+
+#ifdef	_KERNEL
+/*
+ * count consecutive 1's in bit mask.  If the mask generated by counting
+ * consecutive 1's is different to that passed, return -1, else return #
+ * of bits.
+ */
+int	countbits(ip)
+u_32_t	ip;
+{
+	u_32_t	ipn;
+	int	cnt = 0, i, j;
+
+	ip = ipn = ntohl(ip);
+	for (i = 32; i; i--, ipn *= 2)
+		if (ipn & 0x80000000)
+			cnt++;
+		else
+			break;
+	ipn = 0;
+	for (i = 32, j = cnt; i; i--, j--) {
+		ipn *= 2;
+		if (j > 0)
+			ipn++;
+	}
+	if (ipn == ip)
+		return cnt;
+	return -1;
+}
+
+
+/*
+ * return the first IP Address associated with an interface
+ */
+int fr_ifpaddr(v, ifptr, inp)
+int v;
+void *ifptr;
+struct in_addr *inp;
+{
+# ifdef	USE_INET6
+	struct in6_addr *inp6 = NULL;
+# endif
+# if SOLARIS
+	ill_t *ill = ifptr;
+# else
+	struct ifnet *ifp = ifptr;
+# endif
+	struct in_addr in;
+
+# if SOLARIS
+#  ifdef	USE_INET6
+	if (v == 6) {
+		struct in6_addr in6;
+
+		/*
+		 * First is always link local.
+		 */
+		if (ill->ill_ipif->ipif_next)
+			in6 = ill->ill_ipif->ipif_next->ipif_v6lcl_addr;
+		else
+			bzero((char *)&in6, sizeof(in6));
+		bcopy((char *)&in6, (char *)inp, sizeof(in6));
+	} else
+#  endif
+	{
+		in.s_addr = ill->ill_ipif->ipif_local_addr;
+		*inp = in;
+	}
+# else /* SOLARIS */
+#  if linux
+	;
+#  else /* linux */
+	struct sockaddr_in *sin;
+	struct ifaddr *ifa;
+
+#   if	(__FreeBSD_version >= 300000)
+	ifa = TAILQ_FIRST(&ifp->if_addrhead);
+#   else
+#    if defined(__NetBSD__) || defined(__OpenBSD__)
+	ifa = ifp->if_addrlist.tqh_first;
+#    else
+#     if defined(__sgi) && defined(IFF_DRVRLOCK) /* IRIX 6 */
+	ifa = &((struct in_ifaddr *)ifp->in_ifaddr)->ia_ifa;
+#     else
+	ifa = ifp->if_addrlist;
+#     endif
+#    endif /* __NetBSD__ || __OpenBSD__ */
+#   endif /* __FreeBSD_version >= 300000 */
+#   if (BSD < 199306) && !(/*IRIX6*/defined(__sgi) && defined(IFF_DRVRLOCK))
+	sin = (struct sockaddr_in *)&ifa->ifa_addr;
+#   else
+	sin = (struct sockaddr_in *)ifa->ifa_addr;
+	while (sin && ifa) {
+		if ((v == 4) && (sin->sin_family == AF_INET))
+			break;
+#    ifdef USE_INET6
+		if ((v == 6) && (sin->sin_family == AF_INET6)) {
+			inp6 = &((struct sockaddr_in6 *)sin)->sin6_addr;
+			if (!IN6_IS_ADDR_LINKLOCAL(inp6) &&
+			    !IN6_IS_ADDR_LOOPBACK(inp6))
+				break;
+		}
+#    endif
+#    if	(__FreeBSD_version >= 300000)
+		ifa = TAILQ_NEXT(ifa, ifa_link);
+#    else
+#     if defined(__NetBSD__) || defined(__OpenBSD__)
+		ifa = ifa->ifa_list.tqe_next;
+#     else
+		ifa = ifa->ifa_next;
+#     endif
+#    endif /* __FreeBSD_version >= 300000 */
+		if (ifa)
+			sin = (struct sockaddr_in *)ifa->ifa_addr;
+	}
+	if (ifa == NULL)
+		sin = NULL;
+	if (sin == NULL)
+		return -1;
+#   endif /* (BSD < 199306) && (!__sgi && IFF_DRVLOCK) */
+#    ifdef	USE_INET6
+	if (v == 6)
+		bcopy((char *)inp6, (char *)inp, sizeof(*inp6));
+	else
+#    endif
+	{
+		in = sin->sin_addr;
+		*inp = in;
+	}
+#  endif /* linux */
+# endif /* SOLARIS */
+	return 0;
+}
+
+
+static void frsynclist(fr)
+register frentry_t *fr;
+{
+	for (; fr; fr = fr->fr_next) {
+		if (fr->fr_ifa != NULL) {
+			fr->fr_ifa = GETUNIT(fr->fr_ifname, fr->fr_ip.fi_v);
+			if (fr->fr_ifa == NULL)
+				fr->fr_ifa = (void *)-1;
+		}
+		if (fr->fr_grp)
+			frsynclist(fr->fr_grp);
+	}
+}
+
+
+void frsync()
+{
+# if !SOLARIS
+	register struct ifnet *ifp;
+
+#  if defined(__OpenBSD__) || ((NetBSD >= 199511) && (NetBSD < 1991011)) || \
+     (defined(__FreeBSD_version) && (__FreeBSD_version >= 300000))
+#   if (NetBSD >= 199905) || defined(__OpenBSD__)
+	for (ifp = ifnet.tqh_first; ifp; ifp = ifp->if_list.tqe_next)
+#   else
+	for (ifp = ifnet.tqh_first; ifp; ifp = ifp->if_link.tqe_next)
+#   endif
+#  else
+	for (ifp = ifnet; ifp; ifp = ifp->if_next)
+#  endif
+	{
+		ip_natsync(ifp);
+		ip_statesync(ifp);
+	}
+	ip_natsync((struct ifnet *)-1);
+# endif
+
+	WRITE_ENTER(&ipf_mutex);
+	frsynclist(ipacct[0][fr_active]);
+	frsynclist(ipacct[1][fr_active]);
+	frsynclist(ipfilter[0][fr_active]);
+	frsynclist(ipfilter[1][fr_active]);
+#ifdef	USE_INET6
+	frsynclist(ipacct6[0][fr_active]);
+	frsynclist(ipacct6[1][fr_active]);
+	frsynclist(ipfilter6[0][fr_active]);
+	frsynclist(ipfilter6[1][fr_active]);
+#endif
+	RWLOCK_EXIT(&ipf_mutex);
+}
+
+
+/*
+ * In the functions below, bcopy() is called because the pointer being
+ * copied _from_ in this instance is a pointer to a char buf (which could
+ * end up being unaligned) and on the kernel's local stack.
+ */
+int ircopyptr(a, b, c)
+void *a, *b;
+size_t c;
+{
+	caddr_t ca;
+	int err;
+
+#if SOLARIS
+	if (copyin(a, (char *)&ca, sizeof(ca)))
+		return EFAULT;
+#else
+	bcopy(a, &ca, sizeof(ca));
+#endif
+	err = copyin(ca, b, c);
+	if (err)
+		err = EFAULT;
+	return err;
+}
+
+
+int iwcopyptr(a, b, c)
+void *a, *b;
+size_t c;
+{
+	caddr_t ca;
+	int err;
+
+#if SOLARIS
+	if (copyin(b, (char *)&ca, sizeof(ca)))
+		return EFAULT;
+#else
+	bcopy(b, &ca, sizeof(ca));
+#endif
+	err = copyout(a, ca, c);
+	if (err)
+		err = EFAULT;
+	return err;
+}
+
+#else /* _KERNEL */
+
+
+/*
+ * return the first IP Address associated with an interface
+ */
+int fr_ifpaddr(v, ifptr, inp)
+int v;
+void *ifptr;
+struct in_addr *inp;
+{
+	return 0;
+}
+
+
+int ircopyptr(a, b, c)
+void *a, *b;
+size_t c;
+{
+	caddr_t ca;
+
+	bcopy(a, &ca, sizeof(ca));
+	bcopy(ca, b, c);
+	return 0;
+}
+
+
+int iwcopyptr(a, b, c)
+void *a, *b;
+size_t c;
+{
+	caddr_t ca;
+
+	bcopy(b, &ca, sizeof(ca));
+	bcopy(a, ca, c);
+	return 0;
+}
+
+
+#endif
+
+
+int fr_lock(data, lockp)
+caddr_t data;
+int *lockp;
+{
+	int arg, error;
+
+	error = IRCOPY(data, (caddr_t)&arg, sizeof(arg));
+	if (!error) {
+		error = IWCOPY((caddr_t)lockp, data, sizeof(*lockp));
+		if (!error)
+			*lockp = arg;
+	}
+	return error;
+}
+
+
+void fr_getstat(fiop)
+friostat_t *fiop;
+{
+	bcopy((char *)frstats, (char *)fiop->f_st, sizeof(filterstats_t) * 2);
+	fiop->f_locks[0] = fr_state_lock;
+	fiop->f_locks[1] = fr_nat_lock;
+	fiop->f_locks[2] = fr_frag_lock;
+	fiop->f_locks[3] = fr_auth_lock;
+	fiop->f_fin[0] = ipfilter[0][0];
+	fiop->f_fin[1] = ipfilter[0][1];
+	fiop->f_fout[0] = ipfilter[1][0];
+	fiop->f_fout[1] = ipfilter[1][1];
+	fiop->f_acctin[0] = ipacct[0][0];
+	fiop->f_acctin[1] = ipacct[0][1];
+	fiop->f_acctout[0] = ipacct[1][0];
+	fiop->f_acctout[1] = ipacct[1][1];
+#ifdef	USE_INET6
+	fiop->f_fin6[0] = ipfilter6[0][0];
+	fiop->f_fin6[1] = ipfilter6[0][1];
+	fiop->f_fout6[0] = ipfilter6[1][0];
+	fiop->f_fout6[1] = ipfilter6[1][1];
+	fiop->f_acctin6[0] = ipacct6[0][0];
+	fiop->f_acctin6[1] = ipacct6[0][1];
+	fiop->f_acctout6[0] = ipacct6[1][0];
+	fiop->f_acctout6[1] = ipacct6[1][1];
+#else
+	fiop->f_fin6[0] = NULL;
+	fiop->f_fin6[1] = NULL;
+	fiop->f_fout6[0] = NULL;
+	fiop->f_fout6[1] = NULL;
+	fiop->f_acctin6[0] = NULL;
+	fiop->f_acctin6[1] = NULL;
+	fiop->f_acctout6[0] = NULL;
+	fiop->f_acctout6[1] = NULL;
+#endif
+	fiop->f_active = fr_active;
+	fiop->f_froute[0] = ipl_frouteok[0];
+	fiop->f_froute[1] = ipl_frouteok[1];
+
+	fiop->f_running = fr_running;
+	fiop->f_groups[0][0] = ipfgroups[0][0];
+	fiop->f_groups[0][1] = ipfgroups[0][1];
+	fiop->f_groups[1][0] = ipfgroups[1][0];
+	fiop->f_groups[1][1] = ipfgroups[1][1];
+	fiop->f_groups[2][0] = ipfgroups[2][0];
+	fiop->f_groups[2][1] = ipfgroups[2][1];
+#ifdef  IPFILTER_LOG
+	fiop->f_logging = 1;
+#else
+	fiop->f_logging = 0;
+#endif
+	fiop->f_defpass = fr_pass;
+	strncpy(fiop->f_version, ipfilter_version, sizeof(fiop->f_version));
+}
+
+
+#ifdef	USE_INET6
+int icmptoicmp6types[ICMP_MAXTYPE+1] = {
+	ICMP6_ECHO_REPLY,	/* 0: ICMP_ECHOREPLY */
+	-1,			/* 1: UNUSED */
+	-1,			/* 2: UNUSED */
+	ICMP6_DST_UNREACH,	/* 3: ICMP_UNREACH */
+	-1,			/* 4: ICMP_SOURCEQUENCH */
+	ND_REDIRECT,		/* 5: ICMP_REDIRECT */
+	-1,			/* 6: UNUSED */
+	-1,			/* 7: UNUSED */
+	ICMP6_ECHO_REQUEST,	/* 8: ICMP_ECHO */
+	-1,			/* 9: UNUSED */
+	-1,			/* 10: UNUSED */
+	ICMP6_TIME_EXCEEDED,	/* 11: ICMP_TIMXCEED */
+	ICMP6_PARAM_PROB,	/* 12: ICMP_PARAMPROB */
+	-1,			/* 13: ICMP_TSTAMP */
+	-1,			/* 14: ICMP_TSTAMPREPLY */
+	-1,			/* 15: ICMP_IREQ */
+	-1,			/* 16: ICMP_IREQREPLY */
+	-1,			/* 17: ICMP_MASKREQ */
+	-1,			/* 18: ICMP_MASKREPLY */
+};
+
+
+int	icmptoicmp6unreach[ICMP_MAX_UNREACH] = {
+	ICMP6_DST_UNREACH_ADDR,		/* 0: ICMP_UNREACH_NET */
+	ICMP6_DST_UNREACH_ADDR,		/* 1: ICMP_UNREACH_HOST */
+	-1,				/* 2: ICMP_UNREACH_PROTOCOL */
+	ICMP6_DST_UNREACH_NOPORT,	/* 3: ICMP_UNREACH_PORT */
+	-1,				/* 4: ICMP_UNREACH_NEEDFRAG */
+	ICMP6_DST_UNREACH_NOTNEIGHBOR,	/* 5: ICMP_UNREACH_SRCFAIL */
+	ICMP6_DST_UNREACH_ADDR,		/* 6: ICMP_UNREACH_NET_UNKNOWN */
+	ICMP6_DST_UNREACH_ADDR,		/* 7: ICMP_UNREACH_HOST_UNKNOWN */
+	-1,				/* 8: ICMP_UNREACH_ISOLATED */
+	ICMP6_DST_UNREACH_ADMIN,	/* 9: ICMP_UNREACH_NET_PROHIB */
+	ICMP6_DST_UNREACH_ADMIN,	/* 10: ICMP_UNREACH_HOST_PROHIB */
+	-1,				/* 11: ICMP_UNREACH_TOSNET */
+	-1,				/* 12: ICMP_UNREACH_TOSHOST */
+	ICMP6_DST_UNREACH_ADMIN,	/* 13: ICMP_UNREACH_ADMIN_PROHIBIT */
+};
+#endif
diff --git a/sys/netinet/icmp6.h b/sys/netinet/icmp6.h
new file mode 100644
index 0000000..3625ee4
--- /dev/null
+++ b/sys/netinet/icmp6.h
@@ -0,0 +1,661 @@
+/*	$FreeBSD$	*/
+/*	$KAME: icmp6.h,v 1.18 2000/07/03 02:51:08 itojun Exp $	*/
+
+/*
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ip_icmp.h	8.1 (Berkeley) 6/10/93
+ */
+
+#ifndef _NETINET_ICMP6_H_
+#define _NETINET_ICMP6_H_
+
+#define ICMPV6_PLD_MAXLEN	1232	/* IPV6_MMTU - sizeof(struct ip6_hdr)
+					   - sizeof(struct icmp6_hdr) */
+
+struct icmp6_hdr {
+	u_int8_t	icmp6_type;	/* type field */
+	u_int8_t	icmp6_code;	/* code field */
+	u_int16_t	icmp6_cksum;	/* checksum field */
+	union {
+		u_int32_t	icmp6_un_data32[1]; /* type-specific field */
+		u_int16_t	icmp6_un_data16[2]; /* type-specific field */
+		u_int8_t	icmp6_un_data8[4];  /* type-specific field */
+	} icmp6_dataun;
+};
+
+#define icmp6_data32	icmp6_dataun.icmp6_un_data32
+#define icmp6_data16	icmp6_dataun.icmp6_un_data16
+#define icmp6_data8	icmp6_dataun.icmp6_un_data8
+#define icmp6_pptr	icmp6_data32[0]		/* parameter prob */
+#define icmp6_mtu	icmp6_data32[0]		/* packet too big */
+#define icmp6_id	icmp6_data16[0]		/* echo request/reply */
+#define icmp6_seq	icmp6_data16[1]		/* echo request/reply */
+#define icmp6_maxdelay	icmp6_data16[0]		/* mcast group membership */
+
+#define ICMP6_DST_UNREACH		1	/* dest unreachable, codes: */
+#define ICMP6_PACKET_TOO_BIG		2	/* packet too big */
+#define ICMP6_TIME_EXCEEDED		3	/* time exceeded, code: */
+#define ICMP6_PARAM_PROB		4	/* ip6 header bad */
+
+#define ICMP6_ECHO_REQUEST		128	/* echo service */
+#define ICMP6_ECHO_REPLY		129	/* echo reply */
+#define ICMP6_MEMBERSHIP_QUERY		130	/* group membership query */
+#define MLD6_LISTENER_QUERY		130 	/* multicast listener query */
+#define ICMP6_MEMBERSHIP_REPORT		131	/* group membership report */
+#define MLD6_LISTENER_REPORT		131	/* multicast listener report */
+#define ICMP6_MEMBERSHIP_REDUCTION	132	/* group membership termination */
+#define MLD6_LISTENER_DONE		132	/* multicast listener done */
+
+#define ND_ROUTER_SOLICIT		133	/* router solicitation */
+#define ND_ROUTER_ADVERT		134	/* router advertisment */
+#define ND_NEIGHBOR_SOLICIT		135	/* neighbor solicitation */
+#define ND_NEIGHBOR_ADVERT		136	/* neighbor advertisment */
+#define ND_REDIRECT			137	/* redirect */
+
+#define ICMP6_ROUTER_RENUMBERING	138	/* router renumbering */
+
+#define ICMP6_WRUREQUEST		139	/* who are you request */
+#define ICMP6_WRUREPLY			140	/* who are you reply */
+#define ICMP6_FQDN_QUERY		139	/* FQDN query */
+#define ICMP6_FQDN_REPLY		140	/* FQDN reply */
+#define ICMP6_NI_QUERY			139	/* node information request */
+#define ICMP6_NI_REPLY			140	/* node information reply */
+
+/* The definitions below are experimental. TBA */
+#define MLD6_MTRACE_RESP		141	/* mtrace response(to sender) */
+#define MLD6_MTRACE			142	/* mtrace messages */
+
+#define ICMP6_MAXTYPE			142
+
+#define ICMP6_DST_UNREACH_NOROUTE	0	/* no route to destination */
+#define ICMP6_DST_UNREACH_ADMIN	 	1	/* administratively prohibited */
+#define ICMP6_DST_UNREACH_NOTNEIGHBOR	2	/* not a neighbor(obsolete) */
+#define ICMP6_DST_UNREACH_BEYONDSCOPE	2	/* beyond scope of source address */
+#define ICMP6_DST_UNREACH_ADDR		3	/* address unreachable */
+#define ICMP6_DST_UNREACH_NOPORT	4	/* port unreachable */
+
+#define ICMP6_TIME_EXCEED_TRANSIT 	0	/* ttl==0 in transit */
+#define ICMP6_TIME_EXCEED_REASSEMBLY	1	/* ttl==0 in reass */
+
+#define ICMP6_PARAMPROB_HEADER 	 	0	/* erroneous header field */
+#define ICMP6_PARAMPROB_NEXTHEADER	1	/* unrecognized next header */
+#define ICMP6_PARAMPROB_OPTION		2	/* unrecognized option */
+
+#define ICMP6_INFOMSG_MASK		0x80	/* all informational messages */
+
+#define ICMP6_NI_SUBJ_IPV6	0	/* Query Subject is an IPv6 address */
+#define ICMP6_NI_SUBJ_FQDN	1	/* Query Subject is a Domain name */
+#define ICMP6_NI_SUBJ_IPV4	2	/* Query Subject is an IPv4 address */
+
+#define ICMP6_NI_SUCESS		0	/* node information successful reply */
+#define ICMP6_NI_REFUSED	1	/* node information request is refused */
+#define ICMP6_NI_UNKNOWN	2	/* unknown Qtype */
+
+#define ICMP6_ROUTER_RENUMBERING_COMMAND  0	/* rr command */
+#define ICMP6_ROUTER_RENUMBERING_RESULT   1	/* rr result */
+#define ICMP6_ROUTER_RENUMBERING_SEQNUM_RESET   255	/* rr seq num reset */
+
+/* Used in kernel only */
+#define ND_REDIRECT_ONLINK	0	/* redirect to an on-link node */
+#define ND_REDIRECT_ROUTER	1	/* redirect to a better router */
+
+/*
+ * Multicast Listener Discovery
+ */
+struct mld6_hdr {
+	struct icmp6_hdr	mld6_hdr;
+	struct in6_addr		mld6_addr; /* multicast address */
+};
+
+#define mld6_type	mld6_hdr.icmp6_type
+#define mld6_code	mld6_hdr.icmp6_code
+#define mld6_cksum	mld6_hdr.icmp6_cksum
+#define mld6_maxdelay	mld6_hdr.icmp6_data16[0]
+#define mld6_reserved	mld6_hdr.icmp6_data16[1]
+
+/*
+ * Neighbor Discovery
+ */
+
+struct nd_router_solicit {	/* router solicitation */
+	struct icmp6_hdr 	nd_rs_hdr;
+	/* could be followed by options */
+};
+
+#define nd_rs_type	nd_rs_hdr.icmp6_type
+#define nd_rs_code	nd_rs_hdr.icmp6_code
+#define nd_rs_cksum	nd_rs_hdr.icmp6_cksum
+#define nd_rs_reserved	nd_rs_hdr.icmp6_data32[0]
+
+struct nd_router_advert {	/* router advertisement */
+	struct icmp6_hdr	nd_ra_hdr;
+	u_int32_t		nd_ra_reachable;	/* reachable time */
+	u_int32_t		nd_ra_retransmit;	/* retransmit timer */
+	/* could be followed by options */
+};
+
+#define nd_ra_type		nd_ra_hdr.icmp6_type
+#define nd_ra_code		nd_ra_hdr.icmp6_code
+#define nd_ra_cksum		nd_ra_hdr.icmp6_cksum
+#define nd_ra_curhoplimit	nd_ra_hdr.icmp6_data8[0]
+#define nd_ra_flags_reserved	nd_ra_hdr.icmp6_data8[1]
+#define ND_RA_FLAG_MANAGED	0x80
+#define ND_RA_FLAG_OTHER	0x40
+#define nd_ra_router_lifetime	nd_ra_hdr.icmp6_data16[1]
+
+struct nd_neighbor_solicit {	/* neighbor solicitation */
+	struct icmp6_hdr	nd_ns_hdr;
+	struct in6_addr		nd_ns_target;	/*target address */
+	/* could be followed by options */
+};
+
+#define nd_ns_type		nd_ns_hdr.icmp6_type
+#define nd_ns_code		nd_ns_hdr.icmp6_code
+#define nd_ns_cksum		nd_ns_hdr.icmp6_cksum
+#define nd_ns_reserved		nd_ns_hdr.icmp6_data32[0]
+
+struct nd_neighbor_advert {	/* neighbor advertisement */
+	struct icmp6_hdr	nd_na_hdr;
+	struct in6_addr		nd_na_target;	/* target address */
+	/* could be followed by options */
+};
+
+#define nd_na_type		nd_na_hdr.icmp6_type
+#define nd_na_code		nd_na_hdr.icmp6_code
+#define nd_na_cksum		nd_na_hdr.icmp6_cksum
+#define nd_na_flags_reserved	nd_na_hdr.icmp6_data32[0]
+#if BYTE_ORDER == BIG_ENDIAN
+#define ND_NA_FLAG_ROUTER		0x80000000
+#define ND_NA_FLAG_SOLICITED		0x40000000
+#define ND_NA_FLAG_OVERRIDE		0x20000000
+#else
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define ND_NA_FLAG_ROUTER		0x80
+#define ND_NA_FLAG_SOLICITED		0x40
+#define ND_NA_FLAG_OVERRIDE		0x20
+#endif
+#endif
+
+struct nd_redirect {		/* redirect */
+	struct icmp6_hdr	nd_rd_hdr;
+	struct in6_addr		nd_rd_target;	/* target address */
+	struct in6_addr		nd_rd_dst;	/* destination address */
+	/* could be followed by options */
+};
+
+#define nd_rd_type		nd_rd_hdr.icmp6_type
+#define nd_rd_code		nd_rd_hdr.icmp6_code
+#define nd_rd_cksum		nd_rd_hdr.icmp6_cksum
+#define nd_rd_reserved		nd_rd_hdr.icmp6_data32[0]
+
+struct nd_opt_hdr {		/* Neighbor discovery option header */
+	u_int8_t	nd_opt_type;
+	u_int8_t	nd_opt_len;
+	/* followed by option specific data*/
+};
+
+#define ND_OPT_SOURCE_LINKADDR		1
+#define ND_OPT_TARGET_LINKADDR		2
+#define ND_OPT_PREFIX_INFORMATION	3
+#define ND_OPT_REDIRECTED_HEADER	4
+#define ND_OPT_MTU			5
+
+struct nd_opt_prefix_info {	/* prefix information */
+	u_int8_t	nd_opt_pi_type;
+	u_int8_t	nd_opt_pi_len;
+	u_int8_t	nd_opt_pi_prefix_len;
+	u_int8_t	nd_opt_pi_flags_reserved;
+	u_int32_t	nd_opt_pi_valid_time;
+	u_int32_t	nd_opt_pi_preferred_time;
+	u_int32_t	nd_opt_pi_reserved2;
+	struct in6_addr	nd_opt_pi_prefix;
+};
+
+#define ND_OPT_PI_FLAG_ONLINK		0x80
+#define ND_OPT_PI_FLAG_AUTO		0x40
+
+struct nd_opt_rd_hdr {		/* redirected header */
+	u_int8_t	nd_opt_rh_type;
+	u_int8_t	nd_opt_rh_len;
+	u_int16_t	nd_opt_rh_reserved1;
+	u_int32_t	nd_opt_rh_reserved2;
+	/* followed by IP header and data */
+};
+
+struct nd_opt_mtu {		/* MTU option */
+	u_int8_t	nd_opt_mtu_type;
+	u_int8_t	nd_opt_mtu_len;
+	u_int16_t	nd_opt_mtu_reserved;
+	u_int32_t	nd_opt_mtu_mtu;
+};
+
+/*
+ * icmp6 namelookup
+ */
+
+struct icmp6_namelookup {
+	struct icmp6_hdr 	icmp6_nl_hdr;
+	u_int8_t	icmp6_nl_nonce[8];
+	int32_t		icmp6_nl_ttl;
+#if 0
+	u_int8_t	icmp6_nl_len;
+	u_int8_t	icmp6_nl_name[3];
+#endif
+	/* could be followed by options */
+};
+
+/*
+ * icmp6 node information
+ */
+struct icmp6_nodeinfo {
+	struct icmp6_hdr icmp6_ni_hdr;
+	u_int8_t icmp6_ni_nonce[8];
+	/* could be followed by reply data */
+};
+
+#define ni_type		icmp6_ni_hdr.icmp6_type
+#define ni_code		icmp6_ni_hdr.icmp6_code
+#define ni_cksum	icmp6_ni_hdr.icmp6_cksum
+#define ni_qtype	icmp6_ni_hdr.icmp6_data16[0]
+#define ni_flags	icmp6_ni_hdr.icmp6_data16[1]
+
+#define NI_QTYPE_NOOP		0 /* NOOP  */
+#define NI_QTYPE_SUPTYPES	1 /* Supported Qtypes */
+#define NI_QTYPE_FQDN		2 /* FQDN */
+#define NI_QTYPE_NODEADDR	3 /* Node Addresses. XXX: spec says 2, but it may be a typo... */
+
+#if BYTE_ORDER == BIG_ENDIAN
+#define NI_SUPTYPE_FLAG_COMPRESS	0x1
+#define NI_FQDN_FLAG_VALIDTTL		0x1
+#elif BYTE_ORDER == LITTLE_ENDIAN
+#define NI_SUPTYPE_FLAG_COMPRESS	0x0100
+#define NI_FQDN_FLAG_VALIDTTL		0x0100
+#endif
+
+#ifdef NAME_LOOKUPS_04
+#if BYTE_ORDER == BIG_ENDIAN
+#define NI_NODEADDR_FLAG_LINKLOCAL	0x1
+#define NI_NODEADDR_FLAG_SITELOCAL	0x2
+#define NI_NODEADDR_FLAG_GLOBAL		0x4
+#define NI_NODEADDR_FLAG_ALL		0x8
+#define NI_NODEADDR_FLAG_TRUNCATE	0x10
+#define NI_NODEADDR_FLAG_ANYCAST	0x20 /* just experimental. not in spec */
+#elif BYTE_ORDER == LITTLE_ENDIAN
+#define NI_NODEADDR_FLAG_LINKLOCAL	0x0100
+#define NI_NODEADDR_FLAG_SITELOCAL	0x0200
+#define NI_NODEADDR_FLAG_GLOBAL		0x0400
+#define NI_NODEADDR_FLAG_ALL		0x0800
+#define NI_NODEADDR_FLAG_TRUNCATE	0x1000
+#define NI_NODEADDR_FLAG_ANYCAST	0x2000 /* just experimental. not in spec */
+#endif
+#else  /* draft-ietf-ipngwg-icmp-name-lookups-05 (and later?) */
+#if BYTE_ORDER == BIG_ENDIAN
+#define NI_NODEADDR_FLAG_TRUNCATE	0x1
+#define NI_NODEADDR_FLAG_ALL		0x2
+#define NI_NODEADDR_FLAG_COMPAT		0x4
+#define NI_NODEADDR_FLAG_LINKLOCAL	0x8
+#define NI_NODEADDR_FLAG_SITELOCAL	0x10
+#define NI_NODEADDR_FLAG_GLOBAL		0x20
+#define NI_NODEADDR_FLAG_ANYCAST	0x40 /* just experimental. not in spec */
+#elif BYTE_ORDER == LITTLE_ENDIAN
+#define NI_NODEADDR_FLAG_TRUNCATE	0x0100
+#define NI_NODEADDR_FLAG_ALL		0x0200
+#define NI_NODEADDR_FLAG_COMPAT		0x0400
+#define NI_NODEADDR_FLAG_LINKLOCAL	0x0800
+#define NI_NODEADDR_FLAG_SITELOCAL	0x1000
+#define NI_NODEADDR_FLAG_GLOBAL		0x2000
+#define NI_NODEADDR_FLAG_ANYCAST	0x4000 /* just experimental. not in spec */
+#endif
+#endif
+
+struct ni_reply_fqdn {
+	u_int32_t ni_fqdn_ttl;	/* TTL */
+	u_int8_t ni_fqdn_namelen; /* length in octets of the FQDN */
+	u_int8_t ni_fqdn_name[3]; /* XXX: alignment */
+};
+
+/*
+ * Router Renumbering. as router-renum-08.txt
+ */
+struct icmp6_router_renum {	/* router renumbering header */
+	struct icmp6_hdr	rr_hdr;
+	u_int8_t	rr_segnum;
+	u_int8_t	rr_flags;
+	u_int16_t	rr_maxdelay;
+	u_int32_t	rr_reserved;
+};
+#define ICMP6_RR_FLAGS_SEGNUM		0x80
+#define ICMP6_RR_FLAGS_TEST		0x40
+#define ICMP6_RR_FLAGS_REQRESULT	0x20
+#define ICMP6_RR_FLAGS_FORCEAPPLY	0x10
+#define ICMP6_RR_FLAGS_SPECSITE		0x08
+#define ICMP6_RR_FLAGS_PREVDONE		0x04
+
+#define rr_type		rr_hdr.icmp6_type
+#define rr_code		rr_hdr.icmp6_code
+#define rr_cksum	rr_hdr.icmp6_cksum
+#define rr_seqnum 	rr_hdr.icmp6_data32[0]
+
+struct rr_pco_match {		/* match prefix part */
+	u_int8_t	rpm_code;
+	u_int8_t	rpm_len;
+	u_int8_t	rpm_ordinal;
+	u_int8_t	rpm_matchlen;
+	u_int8_t	rpm_minlen;
+	u_int8_t	rpm_maxlen;
+	u_int16_t	rpm_reserved;
+	struct	in6_addr	rpm_prefix;
+};
+
+#define RPM_PCO_ADD		1
+#define RPM_PCO_CHANGE		2
+#define RPM_PCO_SETGLOBAL	3
+#define RPM_PCO_MAX		4
+
+struct rr_pco_use {		/* use prefix part */
+	u_int8_t	rpu_uselen;
+	u_int8_t	rpu_keeplen;
+	u_int8_t	rpu_ramask;
+	u_int8_t	rpu_raflags;
+	u_int32_t	rpu_vltime;
+	u_int32_t	rpu_pltime;
+	u_int32_t	rpu_flags;
+	struct	in6_addr rpu_prefix;
+};
+#define ICMP6_RR_PCOUSE_RAFLAGS_ONLINK	0x80
+#define ICMP6_RR_PCOUSE_RAFLAGS_AUTO	0x40
+
+#if BYTE_ORDER == BIG_ENDIAN
+#define ICMP6_RR_PCOUSE_FLAGS_DECRVLTIME     0x80000000
+#define ICMP6_RR_PCOUSE_FLAGS_DECRPLTIME     0x40000000
+#elif BYTE_ORDER == LITTLE_ENDIAN
+#define ICMP6_RR_PCOUSE_FLAGS_DECRVLTIME     0x80
+#define ICMP6_RR_PCOUSE_FLAGS_DECRPLTIME     0x40
+#endif
+
+struct rr_result {		/* router renumbering result message */
+	u_int16_t	rrr_flags;
+	u_int8_t	rrr_ordinal;
+	u_int8_t	rrr_matchedlen;
+	u_int32_t	rrr_ifid;
+	struct	in6_addr rrr_prefix;
+};
+#if BYTE_ORDER == BIG_ENDIAN
+#define ICMP6_RR_RESULT_FLAGS_OOB		0x0002
+#define ICMP6_RR_RESULT_FLAGS_FORBIDDEN		0x0001
+#elif BYTE_ORDER == LITTLE_ENDIAN
+#define ICMP6_RR_RESULT_FLAGS_OOB		0x02
+#define ICMP6_RR_RESULT_FLAGS_FORBIDDEN		0x01
+#endif
+
+/*
+ * icmp6 filter structures.
+ */
+
+struct icmp6_filter {
+	u_int32_t icmp6_filt[8];
+};
+
+#ifdef _KERNEL
+#define	ICMP6_FILTER_SETPASSALL(filterp) \
+do {								\
+	int i; u_char *p;					\
+	p = (u_char *)filterp;					\
+	for (i = 0; i < sizeof(struct icmp6_filter); i++)	\
+		p[i] = 0xff;					\
+} while (0)
+#define	ICMP6_FILTER_SETBLOCKALL(filterp) \
+	bzero(filterp, sizeof(struct icmp6_filter))
+#else /* _KERNEL */
+#define	ICMP6_FILTER_SETPASSALL(filterp) \
+	memset(filterp, 0xff, sizeof(struct icmp6_filter))
+#define	ICMP6_FILTER_SETBLOCKALL(filterp) \
+	memset(filterp, 0x00, sizeof(struct icmp6_filter))
+#endif /* _KERNEL */
+
+#define	ICMP6_FILTER_SETPASS(type, filterp) \
+	(((filterp)->icmp6_filt[(type) >> 5]) |= (1 << ((type) & 31)))
+#define	ICMP6_FILTER_SETBLOCK(type, filterp) \
+	(((filterp)->icmp6_filt[(type) >> 5]) &= ~(1 << ((type) & 31)))
+#define	ICMP6_FILTER_WILLPASS(type, filterp) \
+	((((filterp)->icmp6_filt[(type) >> 5]) & (1 << ((type) & 31))) != 0)
+#define	ICMP6_FILTER_WILLBLOCK(type, filterp) \
+	((((filterp)->icmp6_filt[(type) >> 5]) & (1 << ((type) & 31))) == 0)
+
+/*
+ * Variables related to this implementation
+ * of the internet control message protocol version 6.
+ */
+struct icmp6errstat {
+	u_quad_t icp6errs_dst_unreach_noroute;
+	u_quad_t icp6errs_dst_unreach_admin;
+	u_quad_t icp6errs_dst_unreach_beyondscope;
+	u_quad_t icp6errs_dst_unreach_addr;
+	u_quad_t icp6errs_dst_unreach_noport;
+	u_quad_t icp6errs_packet_too_big;
+	u_quad_t icp6errs_time_exceed_transit;
+	u_quad_t icp6errs_time_exceed_reassembly;
+	u_quad_t icp6errs_paramprob_header;
+	u_quad_t icp6errs_paramprob_nextheader;
+	u_quad_t icp6errs_paramprob_option;
+	u_quad_t icp6errs_redirect; /* we regard redirect as an error here */
+	u_quad_t icp6errs_unknown;
+};
+
+struct icmp6stat {
+/* statistics related to icmp6 packets generated */
+	u_quad_t icp6s_error;		/* # of calls to icmp6_error */
+	u_quad_t icp6s_canterror;	/* no error 'cuz old was icmp */
+	u_quad_t icp6s_toofreq;		/* no error 'cuz rate limitation */
+	u_quad_t icp6s_outhist[256];
+/* statistics related to input message processed */
+	u_quad_t icp6s_badcode;		/* icmp6_code out of range */
+	u_quad_t icp6s_tooshort;	/* packet < sizeof(struct icmp6_hdr) */
+	u_quad_t icp6s_checksum;	/* bad checksum */
+	u_quad_t icp6s_badlen;		/* calculated bound mismatch */
+	u_quad_t icp6s_reflect;		/* number of responses */
+	u_quad_t icp6s_inhist[256];	
+	u_quad_t icp6s_nd_toomanyopt;	/* too many ND options */
+	struct icmp6errstat icp6s_outerrhist;
+#define icp6s_odst_unreach_noroute \
+	icp6s_outerrhist.icp6errs_dst_unreach_noroute
+#define icp6s_odst_unreach_admin icp6s_outerrhist.icp6errs_dst_unreach_admin
+#define icp6s_odst_unreach_beyondscope \
+	icp6s_outerrhist.icp6errs_dst_unreach_beyondscope
+#define icp6s_odst_unreach_addr icp6s_outerrhist.icp6errs_dst_unreach_addr
+#define icp6s_odst_unreach_noport icp6s_outerrhist.icp6errs_dst_unreach_noport
+#define icp6s_opacket_too_big icp6s_outerrhist.icp6errs_packet_too_big
+#define icp6s_otime_exceed_transit \
+	icp6s_outerrhist.icp6errs_time_exceed_transit
+#define icp6s_otime_exceed_reassembly \
+	icp6s_outerrhist.icp6errs_time_exceed_reassembly
+#define icp6s_oparamprob_header icp6s_outerrhist.icp6errs_paramprob_header
+#define icp6s_oparamprob_nextheader \
+	icp6s_outerrhist.icp6errs_paramprob_nextheader
+#define icp6s_oparamprob_option icp6s_outerrhist.icp6errs_paramprob_option
+#define icp6s_oredirect icp6s_outerrhist.icp6errs_redirect
+#define icp6s_ounknown icp6s_outerrhist.icp6errs_unknown
+};
+
+/*
+ * Names for ICMP sysctl objects
+ */
+#define ICMPV6CTL_STATS		1
+#define ICMPV6CTL_REDIRACCEPT	2	/* accept/process redirects */
+#define ICMPV6CTL_REDIRTIMEOUT	3	/* redirect cache time */
+#define ICMPV6CTL_ERRRATELIMIT	5	/* ICMPv6 error rate limitation */
+#define ICMPV6CTL_ND6_PRUNE	6
+#define ICMPV6CTL_ND6_DELAY	8
+#define ICMPV6CTL_ND6_UMAXTRIES	9
+#define ICMPV6CTL_ND6_MMAXTRIES		10
+#define ICMPV6CTL_ND6_USELOOPBACK	11
+/*#define ICMPV6CTL_ND6_PROXYALL	12	obsoleted, do not reuse here */
+#define ICMPV6CTL_NODEINFO	13
+#define ICMPV6CTL_ERRPPSLIMIT	14	/* ICMPv6 error pps limitation */
+#define ICMPV6CTL_ND6_MAXNUDHINT	15
+#define ICMPV6CTL_MAXID		16
+
+#define ICMPV6CTL_NAMES { \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ "rediraccept", CTLTYPE_INT }, \
+	{ "redirtimeout", CTLTYPE_INT }, \
+	{ 0, 0 }, \
+	{ "errratelimit", CTLTYPE_INT }, \
+	{ "nd6_prune", CTLTYPE_INT }, \
+	{ 0, 0 }, \
+	{ "nd6_delay", CTLTYPE_INT }, \
+	{ "nd6_umaxtries", CTLTYPE_INT }, \
+	{ "nd6_mmaxtries", CTLTYPE_INT }, \
+	{ "nd6_useloopback", CTLTYPE_INT }, \
+	{ 0, 0 }, \
+	{ "nodeinfo", CTLTYPE_INT }, \
+	{ "errppslimit", CTLTYPE_INT }, \
+	{ "nd6_maxnudhint", CTLTYPE_INT }, \
+}
+
+#define RTF_PROBEMTU	RTF_PROTO1
+
+#ifdef _KERNEL
+# ifdef __STDC__
+struct	rtentry;
+struct	rttimer;
+struct	in6_multi;
+# endif
+void	icmp6_init __P((void));
+void	icmp6_paramerror __P((struct mbuf *, int));
+void	icmp6_error __P((struct mbuf *, int, int, int));
+int	icmp6_input __P((struct mbuf **, int *, int));
+void	icmp6_fasttimo __P((void));
+void	icmp6_reflect __P((struct mbuf *, size_t));
+void	icmp6_prepare __P((struct mbuf *));
+void	icmp6_redirect_input __P((struct mbuf *, int));
+void	icmp6_redirect_output __P((struct mbuf *, struct rtentry *));
+
+/* XXX: is this the right place for these macros? */
+#define icmp6_ifstat_inc(ifp, tag) \
+do {								\
+	if ((ifp) && (ifp)->if_index <= if_index			\
+	 && (ifp)->if_index < icmp6_ifstatmax			\
+	 && icmp6_ifstat && icmp6_ifstat[(ifp)->if_index]) {	\
+		icmp6_ifstat[(ifp)->if_index]->tag++;		\
+	}							\
+} while (0)
+
+#define icmp6_ifoutstat_inc(ifp, type, code) \
+do { \
+		icmp6_ifstat_inc(ifp, ifs6_out_msg); \
+ 		if (type < ICMP6_INFOMSG_MASK) \
+ 			icmp6_ifstat_inc(ifp, ifs6_out_error); \
+		switch(type) { \
+		 case ICMP6_DST_UNREACH: \
+			 icmp6_ifstat_inc(ifp, ifs6_out_dstunreach); \
+			 if (code == ICMP6_DST_UNREACH_ADMIN) \
+				 icmp6_ifstat_inc(ifp, ifs6_out_adminprohib); \
+			 break; \
+		 case ICMP6_PACKET_TOO_BIG: \
+			 icmp6_ifstat_inc(ifp, ifs6_out_pkttoobig); \
+			 break; \
+		 case ICMP6_TIME_EXCEEDED: \
+			 icmp6_ifstat_inc(ifp, ifs6_out_timeexceed); \
+			 break; \
+		 case ICMP6_PARAM_PROB: \
+			 icmp6_ifstat_inc(ifp, ifs6_out_paramprob); \
+			 break; \
+		 case ICMP6_ECHO_REQUEST: \
+			 icmp6_ifstat_inc(ifp, ifs6_out_echo); \
+			 break; \
+		 case ICMP6_ECHO_REPLY: \
+			 icmp6_ifstat_inc(ifp, ifs6_out_echoreply); \
+			 break; \
+		 case MLD6_LISTENER_QUERY: \
+			 icmp6_ifstat_inc(ifp, ifs6_out_mldquery); \
+			 break; \
+		 case MLD6_LISTENER_REPORT: \
+			 icmp6_ifstat_inc(ifp, ifs6_out_mldreport); \
+			 break; \
+		 case MLD6_LISTENER_DONE: \
+			 icmp6_ifstat_inc(ifp, ifs6_out_mlddone); \
+			 break; \
+		 case ND_ROUTER_SOLICIT: \
+			 icmp6_ifstat_inc(ifp, ifs6_out_routersolicit); \
+			 break; \
+		 case ND_ROUTER_ADVERT: \
+			 icmp6_ifstat_inc(ifp, ifs6_out_routeradvert); \
+			 break; \
+		 case ND_NEIGHBOR_SOLICIT: \
+			 icmp6_ifstat_inc(ifp, ifs6_out_neighborsolicit); \
+			 break; \
+		 case ND_NEIGHBOR_ADVERT: \
+			 icmp6_ifstat_inc(ifp, ifs6_out_neighboradvert); \
+			 break; \
+		 case ND_REDIRECT: \
+			 icmp6_ifstat_inc(ifp, ifs6_out_redirect); \
+			 break; \
+		} \
+} while (0)
+
+extern int	icmp6_rediraccept;	/* accept/process redirects */
+extern int	icmp6_redirtimeout;	/* cache time for redirect routes */
+#endif /* _KERNEL */
+
+#endif /* not _NETINET_ICMP6_H_ */
diff --git a/sys/netinet/icmp_var.h b/sys/netinet/icmp_var.h
new file mode 100644
index 0000000..814e932
--- /dev/null
+++ b/sys/netinet/icmp_var.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)icmp_var.h	8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_ICMP_VAR_H_
+#define _NETINET_ICMP_VAR_H_
+
+
+/*
+ * Variables related to this implementation
+ * of the internet control message protocol.
+ */
+struct	icmpstat {
+/* statistics related to icmp packets generated */
+	u_long	icps_error;		/* # of calls to icmp_error */
+	u_long	icps_oldshort;		/* no error 'cuz old ip too short */
+	u_long	icps_oldicmp;		/* no error 'cuz old was icmp */
+	u_long	icps_outhist[ICMP_MAXTYPE + 1];
+/* statistics related to input messages processed */
+ 	u_long	icps_badcode;		/* icmp_code out of range */
+	u_long	icps_tooshort;		/* packet < ICMP_MINLEN */
+	u_long	icps_checksum;		/* bad checksum */
+	u_long	icps_badlen;		/* calculated bound mismatch */
+	u_long	icps_reflect;		/* number of responses */
+	u_long	icps_inhist[ICMP_MAXTYPE + 1];
+	u_long	icps_bmcastecho; 	/* b/mcast echo requests dropped */
+	u_long	icps_bmcasttstamp; 	/* b/mcast tstamp requests dropped */
+};
+
+/*
+ * Names for ICMP sysctl objects
+ */
+#define	ICMPCTL_MASKREPL	1	/* allow replies to netmask requests */
+#define	ICMPCTL_STATS		2	/* statistics (read-only) */
+#define ICMPCTL_ICMPLIM		3
+#define ICMPCTL_MAXID		4
+
+#define ICMPCTL_NAMES { \
+	{ 0, 0 }, \
+	{ "maskrepl", CTLTYPE_INT }, \
+	{ "stats", CTLTYPE_STRUCT }, \
+	{ "icmplim", CTLTYPE_INT }, \
+}
+
+#ifdef _KERNEL
+SYSCTL_DECL(_net_inet_icmp);
+extern int badport_bandlim __P((int));
+#define BANDLIM_UNLIMITED -1
+#define BANDLIM_ICMP_UNREACH 0
+#define BANDLIM_ICMP_ECHO 1 
+#define BANDLIM_ICMP_TSTAMP 2
+#define BANDLIM_RST_CLOSEDPORT 3 /* No connection, and no listeners */
+#define BANDLIM_RST_OPENPORT 4   /* No connection, listener */
+#define BANDLIM_MAX 4
+#endif
+
+#endif
diff --git a/sys/netinet/if_atm.c b/sys/netinet/if_atm.c
new file mode 100644
index 0000000..04b49bf
--- /dev/null
+++ b/sys/netinet/if_atm.c
@@ -0,0 +1,280 @@
+/*      $NetBSD: if_atm.c,v 1.6 1996/10/13 02:03:01 christos Exp $       */
+
+/*
+ *
+ * Copyright (c) 1996 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and 
+ *	Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * IP <=> ATM address resolution.
+ */
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+#include "opt_natm.h"
+
+#if defined(INET) || defined(INET6)
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/queue.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/syslog.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/route.h>
+#include <net/if_atm.h>
+
+#include <netinet/in.h>
+#include <netinet/if_atm.h>
+
+#ifdef NATM
+#include <netnatm/natm.h>
+#endif
+
+
+#define SDL(s) ((struct sockaddr_dl *)s)
+
+/*
+ * atm_rtrequest: handle ATM rt request (in support of generic code)
+ *   inputs: "req" = request code
+ *           "rt" = route entry
+ *           "sa" = sockaddr
+ */
+
+void
+atm_rtrequest(req, rt, sa)
+	int req;
+	register struct rtentry *rt;
+	struct sockaddr *sa;
+{
+	register struct sockaddr *gate = rt->rt_gateway;
+	struct atm_pseudoioctl api;
+#ifdef NATM
+	struct sockaddr_in *sin;
+	struct natmpcb *npcb = NULL;
+	struct atm_pseudohdr *aph;
+#endif
+	static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
+
+	if (rt->rt_flags & RTF_GATEWAY)   /* link level requests only */
+		return;
+
+	switch (req) {
+
+	case RTM_RESOLVE: /* resolve: only happens when cloning */
+		printf("atm_rtrequest: RTM_RESOLVE request detected?\n");
+		break;
+
+	case RTM_ADD:
+
+		/*
+		 * route added by a command (e.g. ifconfig, route, arp...).
+		 *
+		 * first check to see if this is not a host route, in which
+		 * case we are being called via "ifconfig" to set the address.
+		 */
+
+		if ((rt->rt_flags & RTF_HOST) == 0) { 
+			rt_setgate(rt,rt_key(rt),(struct sockaddr *)&null_sdl);
+			gate = rt->rt_gateway;
+			SDL(gate)->sdl_type = rt->rt_ifp->if_type;
+			SDL(gate)->sdl_index = rt->rt_ifp->if_index;
+			break;
+		}
+
+		if ((rt->rt_flags & RTF_CLONING) != 0) {
+			printf("atm_rtrequest: cloning route detected?\n");
+			break;
+		}
+		if (gate->sa_family != AF_LINK ||
+		    gate->sa_len < sizeof(null_sdl)) {
+			log(LOG_DEBUG, "atm_rtrequest: bad gateway value");
+			break;
+		}
+
+#ifdef DIAGNOSTIC
+		if (rt->rt_ifp->if_ioctl == NULL) panic("atm null ioctl");
+#endif
+
+#ifdef NATM
+		/*
+		 * let native ATM know we are using this VCI/VPI
+		 * (i.e. reserve it)
+		 */
+		sin = (struct sockaddr_in *) rt_key(rt);
+		if (sin->sin_family != AF_INET)
+			goto failed;
+		aph = (struct atm_pseudohdr *) LLADDR(SDL(gate));
+		npcb = npcb_add(NULL, rt->rt_ifp, ATM_PH_VCI(aph), 
+						ATM_PH_VPI(aph));
+		if (npcb == NULL) 
+			goto failed;
+		npcb->npcb_flags |= NPCB_IP;
+		npcb->ipaddr.s_addr = sin->sin_addr.s_addr;
+		/* XXX: move npcb to llinfo when ATM ARP is ready */
+		rt->rt_llinfo = (caddr_t) npcb;
+		rt->rt_flags |= RTF_LLINFO;
+#endif
+		/*
+		 * let the lower level know this circuit is active
+		 */
+		bcopy(LLADDR(SDL(gate)), &api.aph, sizeof(api.aph));
+		api.rxhand = NULL;
+		if (rt->rt_ifp->if_ioctl(rt->rt_ifp, SIOCATMENA, 
+							(caddr_t)&api) != 0) {
+			printf("atm: couldn't add VC\n");
+			goto failed;
+		}
+
+		SDL(gate)->sdl_type = rt->rt_ifp->if_type;
+		SDL(gate)->sdl_index = rt->rt_ifp->if_index;
+
+		break;
+
+failed:
+#ifdef NATM
+		if (npcb) {
+			npcb_free(npcb, NPCB_DESTROY);
+			rt->rt_llinfo = NULL;
+			rt->rt_flags &= ~RTF_LLINFO;
+		}
+#endif
+		rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0,
+			rt_mask(rt), 0, (struct rtentry **) 0);
+		break;
+
+	case RTM_DELETE:
+
+#ifdef NATM
+		/*
+		 * tell native ATM we are done with this VC
+		 */
+
+		if (rt->rt_flags & RTF_LLINFO) {
+			npcb_free((struct natmpcb *)rt->rt_llinfo, 
+								NPCB_DESTROY);
+			rt->rt_llinfo = NULL;
+			rt->rt_flags &= ~RTF_LLINFO;
+		}
+#endif
+		/*
+		 * tell the lower layer to disable this circuit
+		 */
+
+		bcopy(LLADDR(SDL(gate)), &api.aph, sizeof(api.aph));
+		api.rxhand = NULL;
+		(void)rt->rt_ifp->if_ioctl(rt->rt_ifp, SIOCATMDIS, 
+							(caddr_t)&api);
+
+		break;
+	}
+}
+
+/*
+ * atmresolve:
+ *   inputs:
+ *     [1] "rt" = the link level route to use (or null if need to look one up)
+ *     [2] "m" = mbuf containing the data to be sent
+ *     [3] "dst" = sockaddr_in (IP) address of dest.
+ *   output:
+ *     [4] "desten" = ATM pseudo header which we will fill in VPI/VCI info
+ *   return: 
+ *     0 == resolve FAILED; note that "m" gets m_freem'd in this case
+ *     1 == resolve OK; desten contains result
+ *
+ *   XXX: will need more work if we wish to support ATMARP in the kernel,
+ *   but this is enough for PVCs entered via the "route" command.
+ */
+
+int
+atmresolve(rt, m, dst, desten)
+
+register struct rtentry *rt;
+struct mbuf *m;
+register struct sockaddr *dst;
+register struct atm_pseudohdr *desten;	/* OUT */
+
+{
+	struct sockaddr_dl *sdl;
+
+	if (m->m_flags & (M_BCAST|M_MCAST)) {
+		log(LOG_INFO, "atmresolve: BCAST/MCAST packet detected/dumped");
+		goto bad;
+	}
+
+	if (rt == NULL) {
+		rt = RTALLOC1(dst, 0);
+		if (rt == NULL) goto bad; /* failed */
+		rt->rt_refcnt--;	/* don't keep LL references */
+		if ((rt->rt_flags & RTF_GATEWAY) != 0 || 
+			(rt->rt_flags & RTF_LLINFO) == 0 ||
+			/* XXX: are we using LLINFO? */
+			rt->rt_gateway->sa_family != AF_LINK) {
+				goto bad;
+		}
+	}
+
+	/*
+	 * note that rt_gateway is a sockaddr_dl which contains the 
+	 * atm_pseudohdr data structure for this route.   we currently
+	 * don't need any rt_llinfo info (but will if we want to support
+	 * ATM ARP [c.f. if_ether.c]).
+	 */
+
+	sdl = SDL(rt->rt_gateway);
+
+	/*
+	 * Check the address family and length is valid, the address
+	 * is resolved; otherwise, try to resolve.
+	 */
+
+
+	if (sdl->sdl_family == AF_LINK && sdl->sdl_alen == sizeof(*desten)) {
+		bcopy(LLADDR(sdl), desten, sdl->sdl_alen);
+		return(1);	/* ok, go for it! */
+	}
+
+	/*
+	 * we got an entry, but it doesn't have valid link address
+	 * info in it (it is prob. the interface route, which has
+	 * sdl_alen == 0).    dump packet.  (fall through to "bad").
+	 */
+
+bad:
+	m_freem(m);
+	return(0);
+}
+#endif /* INET */
diff --git a/sys/netinet/if_atm.h b/sys/netinet/if_atm.h
new file mode 100644
index 0000000..b448253
--- /dev/null
+++ b/sys/netinet/if_atm.h
@@ -0,0 +1,47 @@
+/* $FreeBSD$ */
+/*      $NetBSD: if_atm.h,v 1.2 1996/07/03 17:17:17 chuck Exp $       */
+
+/*
+ *
+ * Copyright (c) 1996 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and 
+ * 	Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * if_atm.h
+ */
+
+struct atm_pseudohdr;
+struct mbuf;
+struct rtentry;
+struct sockaddr;
+
+void atm_rtrequest __P((int, struct rtentry *, struct sockaddr *));
+int atmresolve __P((struct rtentry *, struct mbuf *, struct sockaddr *, 
+		struct atm_pseudohdr *));
diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c
new file mode 100644
index 0000000..de5d906
--- /dev/null
+++ b/sys/netinet/if_ether.c
@@ -0,0 +1,847 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)if_ether.c	8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+/*
+ * Ethernet address resolution protocol.
+ * TODO:
+ *	add "inuse/lock" bit (or ref. count) along with valid bit
+ */
+
+#include "opt_inet.h"
+#include "opt_bdg.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/syslog.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+#include <net/route.h>
+#include <net/netisr.h>
+#include <net/if_llc.h>
+#ifdef BRIDGE
+#include <net/ethernet.h>
+#include <net/bridge.h>
+#endif
+
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet/if_ether.h>
+
+#include <net/iso88025.h>
+
+#define SIN(s) ((struct sockaddr_in *)s)
+#define SDL(s) ((struct sockaddr_dl *)s)
+
+SYSCTL_DECL(_net_link_ether);
+SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, "");
+
+/* timer values */
+static int arpt_prune = (5*60*1); /* walk list every 5 minutes */
+static int arpt_keep = (20*60); /* once resolved, good for 20 more minutes */
+static int arpt_down = 20;	/* once declared down, don't send for 20 sec */
+
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, prune_intvl, CTLFLAG_RW,
+	   &arpt_prune, 0, "");
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_RW, 
+	   &arpt_keep, 0, "");
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, host_down_time, CTLFLAG_RW,
+	   &arpt_down, 0, "");
+
+#define	rt_expire rt_rmx.rmx_expire
+
+struct llinfo_arp {
+	LIST_ENTRY(llinfo_arp) la_le;
+	struct	rtentry *la_rt;
+	struct	mbuf *la_hold;		/* last packet until resolved/timeout */
+	long	la_asked;		/* last time we QUERIED for this addr */
+#define la_timer la_rt->rt_rmx.rmx_expire /* deletion time in seconds */
+};
+
+static	LIST_HEAD(, llinfo_arp) llinfo_arp;
+
+struct	ifqueue arpintrq;
+static int	arp_inuse, arp_allocated;
+
+static int	arp_maxtries = 5;
+static int	useloopback = 1; /* use loopback interface for local traffic */
+static int	arp_proxyall = 0;
+
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, maxtries, CTLFLAG_RW,
+	   &arp_maxtries, 0, "");
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, useloopback, CTLFLAG_RW,
+	   &useloopback, 0, "");
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_RW,
+	   &arp_proxyall, 0, "");
+
+static void	arp_init __P((void));
+static void	arp_rtrequest __P((int, struct rtentry *, struct sockaddr *));
+static void	arprequest __P((struct arpcom *,
+			struct in_addr *, struct in_addr *, u_char *));
+static void	arpintr __P((void));
+static void	arptfree __P((struct llinfo_arp *));
+static void	arptimer __P((void *));
+static struct llinfo_arp
+		*arplookup __P((u_long, int, int));
+#ifdef INET
+static void	in_arpinput __P((struct mbuf *));
+#endif
+
+/*
+ * Timeout routine.  Age arp_tab entries periodically.
+ */
+/* ARGSUSED */
+static void
+arptimer(ignored_arg)
+	void *ignored_arg;
+{
+	int s = splnet();
+	register struct llinfo_arp *la = LIST_FIRST(&llinfo_arp);
+	struct llinfo_arp *ola;
+
+	timeout(arptimer, (caddr_t)0, arpt_prune * hz);
+	while ((ola = la) != 0) {
+		register struct rtentry *rt = la->la_rt;
+		la = LIST_NEXT(la, la_le);
+		if (rt->rt_expire && rt->rt_expire <= time_second)
+			arptfree(ola); /* timer has expired, clear */
+	}
+	splx(s);
+}
+
+/*
+ * Parallel to llc_rtrequest.
+ */
+static void
+arp_rtrequest(req, rt, sa)
+	int req;
+	register struct rtentry *rt;
+	struct sockaddr *sa;
+{
+	register struct sockaddr *gate = rt->rt_gateway;
+	register struct llinfo_arp *la = (struct llinfo_arp *)rt->rt_llinfo;
+	static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
+	static int arpinit_done;
+
+	if (!arpinit_done) {
+		arpinit_done = 1;
+		LIST_INIT(&llinfo_arp);
+		timeout(arptimer, (caddr_t)0, hz);
+		register_netisr(NETISR_ARP, arpintr);
+	}
+	if (rt->rt_flags & RTF_GATEWAY)
+		return;
+	switch (req) {
+
+	case RTM_ADD:
+		/*
+		 * XXX: If this is a manually added route to interface
+		 * such as older version of routed or gated might provide,
+		 * restore cloning bit.
+		 */
+		if ((rt->rt_flags & RTF_HOST) == 0 &&
+		    SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff)
+			rt->rt_flags |= RTF_CLONING;
+		if (rt->rt_flags & RTF_CLONING) {
+			/*
+			 * Case 1: This route should come from a route to iface.
+			 */
+			rt_setgate(rt, rt_key(rt),
+					(struct sockaddr *)&null_sdl);
+			gate = rt->rt_gateway;
+			SDL(gate)->sdl_type = rt->rt_ifp->if_type;
+			SDL(gate)->sdl_index = rt->rt_ifp->if_index;
+			rt->rt_expire = time_second;
+			break;
+		}
+		/* Announce a new entry if requested. */
+		if (rt->rt_flags & RTF_ANNOUNCE)
+			arprequest((struct arpcom *)rt->rt_ifp,
+			    &SIN(rt_key(rt))->sin_addr,
+			    &SIN(rt_key(rt))->sin_addr,
+			    (u_char *)LLADDR(SDL(gate)));
+		/*FALLTHROUGH*/
+	case RTM_RESOLVE:
+		if (gate->sa_family != AF_LINK ||
+		    gate->sa_len < sizeof(null_sdl)) {
+			log(LOG_DEBUG, "arp_rtrequest: bad gateway value\n");
+			break;
+		}
+		SDL(gate)->sdl_type = rt->rt_ifp->if_type;
+		SDL(gate)->sdl_index = rt->rt_ifp->if_index;
+		if (la != 0)
+			break; /* This happens on a route change */
+		/*
+		 * Case 2:  This route may come from cloning, or a manual route
+		 * add with a LL address.
+		 */
+		R_Malloc(la, struct llinfo_arp *, sizeof(*la));
+		rt->rt_llinfo = (caddr_t)la;
+		if (la == 0) {
+			log(LOG_DEBUG, "arp_rtrequest: malloc failed\n");
+			break;
+		}
+		arp_inuse++, arp_allocated++;
+		Bzero(la, sizeof(*la));
+		la->la_rt = rt;
+		rt->rt_flags |= RTF_LLINFO;
+		LIST_INSERT_HEAD(&llinfo_arp, la, la_le);
+
+#ifdef INET
+		/*
+		 * This keeps the multicast addresses from showing up
+		 * in `arp -a' listings as unresolved.  It's not actually
+		 * functional.  Then the same for broadcast.
+		 */
+		if (IN_MULTICAST(ntohl(SIN(rt_key(rt))->sin_addr.s_addr))) {
+			ETHER_MAP_IP_MULTICAST(&SIN(rt_key(rt))->sin_addr,
+					       LLADDR(SDL(gate)));
+			SDL(gate)->sdl_alen = 6;
+			rt->rt_expire = 0;
+		}
+		if (in_broadcast(SIN(rt_key(rt))->sin_addr, rt->rt_ifp)) {
+			memcpy(LLADDR(SDL(gate)), etherbroadcastaddr, 6);
+			SDL(gate)->sdl_alen = 6;
+			rt->rt_expire = 0;
+		}
+#endif
+
+		if (SIN(rt_key(rt))->sin_addr.s_addr ==
+		    (IA_SIN(rt->rt_ifa))->sin_addr.s_addr) {
+		    /*
+		     * This test used to be
+		     *	if (loif.if_flags & IFF_UP)
+		     * It allowed local traffic to be forced
+		     * through the hardware by configuring the loopback down.
+		     * However, it causes problems during network configuration
+		     * for boards that can't receive packets they send.
+		     * It is now necessary to clear "useloopback" and remove
+		     * the route to force traffic out to the hardware.
+		     */
+			rt->rt_expire = 0;
+			Bcopy(((struct arpcom *)rt->rt_ifp)->ac_enaddr,
+				LLADDR(SDL(gate)), SDL(gate)->sdl_alen = 6);
+			if (useloopback)
+				rt->rt_ifp = loif;
+
+		}
+		break;
+
+	case RTM_DELETE:
+		if (la == 0)
+			break;
+		arp_inuse--;
+		LIST_REMOVE(la, la_le);
+		rt->rt_llinfo = 0;
+		rt->rt_flags &= ~RTF_LLINFO;
+		if (la->la_hold)
+			m_freem(la->la_hold);
+		Free((caddr_t)la);
+	}
+}
+
+/*
+ * Broadcast an ARP request. Caller specifies:
+ *	- arp header source ip address
+ *	- arp header target ip address
+ *	- arp header source ethernet address
+ */
+static void
+arprequest(ac, sip, tip, enaddr)
+	register struct arpcom *ac;
+	register struct in_addr *sip, *tip;
+	register u_char *enaddr;
+{
+	register struct mbuf *m;
+	register struct ether_header *eh;
+	register struct ether_arp *ea;
+	struct sockaddr sa;
+	static u_char	llcx[] = { 0x82, 0x40, LLC_SNAP_LSAP, LLC_SNAP_LSAP,
+				   LLC_UI, 0x00, 0x00, 0x00, 0x08, 0x06 };
+
+	if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
+		return;
+	m->m_pkthdr.rcvif = (struct ifnet *)0;
+	switch (ac->ac_if.if_type) {
+	case IFT_ISO88025:
+		m->m_len = sizeof(*ea) + sizeof(llcx);
+		m->m_pkthdr.len = sizeof(*ea) + sizeof(llcx);
+		MH_ALIGN(m, sizeof(*ea) + sizeof(llcx));
+		(void)memcpy(mtod(m, caddr_t), llcx, sizeof(llcx));
+		(void)memcpy(sa.sa_data, etherbroadcastaddr, 6);
+		(void)memcpy(sa.sa_data + 6, enaddr, 6);
+		sa.sa_data[6] |= TR_RII;
+		sa.sa_data[12] = TR_AC;
+		sa.sa_data[13] = TR_LLC_FRAME;
+		ea = (struct ether_arp *)(mtod(m, char *) + sizeof(llcx));
+		bzero((caddr_t)ea, sizeof (*ea));
+		ea->arp_hrd = htons(ARPHRD_IEEE802);
+		break;
+	case IFT_FDDI:
+	case IFT_ETHER:
+		/*
+		 * This may not be correct for types not explicitly
+		 * listed, but this is our best guess
+		 */
+	default:
+		m->m_len = sizeof(*ea);
+		m->m_pkthdr.len = sizeof(*ea);
+		MH_ALIGN(m, sizeof(*ea));
+		ea = mtod(m, struct ether_arp *);
+		eh = (struct ether_header *)sa.sa_data;
+		bzero((caddr_t)ea, sizeof (*ea));
+		/* if_output will not swap */
+		eh->ether_type = htons(ETHERTYPE_ARP);
+		(void)memcpy(eh->ether_dhost, etherbroadcastaddr,
+		    sizeof(eh->ether_dhost));
+		ea->arp_hrd = htons(ARPHRD_ETHER);
+		break;
+	}
+	ea->arp_pro = htons(ETHERTYPE_IP);
+	ea->arp_hln = sizeof(ea->arp_sha);	/* hardware address length */
+	ea->arp_pln = sizeof(ea->arp_spa);	/* protocol address length */
+	ea->arp_op = htons(ARPOP_REQUEST);
+	(void)memcpy(ea->arp_sha, enaddr, sizeof(ea->arp_sha));
+	(void)memcpy(ea->arp_spa, sip, sizeof(ea->arp_spa));
+	(void)memcpy(ea->arp_tpa, tip, sizeof(ea->arp_tpa));
+	sa.sa_family = AF_UNSPEC;
+	sa.sa_len = sizeof(sa);
+	(*ac->ac_if.if_output)(&ac->ac_if, m, &sa, (struct rtentry *)0);
+}
+
+/*
+ * Resolve an IP address into an ethernet address.  If success,
+ * desten is filled in.  If there is no entry in arptab,
+ * set one up and broadcast a request for the IP address.
+ * Hold onto this mbuf and resend it once the address
+ * is finally resolved.  A return value of 1 indicates
+ * that desten has been filled in and the packet should be sent
+ * normally; a 0 return indicates that the packet has been
+ * taken over here, either now or for later transmission.
+ */
+int
+arpresolve(ac, rt, m, dst, desten, rt0)
+	register struct arpcom *ac;
+	register struct rtentry *rt;
+	struct mbuf *m;
+	register struct sockaddr *dst;
+	register u_char *desten;
+	struct rtentry *rt0;
+{
+	register struct llinfo_arp *la = 0;
+	struct sockaddr_dl *sdl;
+
+	if (m->m_flags & M_BCAST) {	/* broadcast */
+		(void)memcpy(desten, etherbroadcastaddr, sizeof(etherbroadcastaddr));
+		return (1);
+	}
+	if (m->m_flags & M_MCAST) {	/* multicast */
+		ETHER_MAP_IP_MULTICAST(&SIN(dst)->sin_addr, desten);
+		return(1);
+	}
+	if (rt)
+		la = (struct llinfo_arp *)rt->rt_llinfo;
+	if (la == 0) {
+		la = arplookup(SIN(dst)->sin_addr.s_addr, 1, 0);
+		if (la)
+			rt = la->la_rt;
+	}
+	if (la == 0 || rt == 0) {
+		log(LOG_DEBUG, "arpresolve: can't allocate llinfo for %s%s%s\n",
+			inet_ntoa(SIN(dst)->sin_addr), la ? "la" : "",
+				rt ? "rt" : "");
+		m_freem(m);
+		return (0);
+	}
+	sdl = SDL(rt->rt_gateway);
+	/*
+	 * Check the address family and length is valid, the address
+	 * is resolved; otherwise, try to resolve.
+	 */
+	if ((rt->rt_expire == 0 || rt->rt_expire > time_second) &&
+	    sdl->sdl_family == AF_LINK && sdl->sdl_alen != 0) {
+		bcopy(LLADDR(sdl), desten, sdl->sdl_alen);
+		return 1;
+	}
+	/*
+	 * There is an arptab entry, but no ethernet address
+	 * response yet.  Replace the held mbuf with this
+	 * latest one.
+	 */
+	if (la->la_hold)
+		m_freem(la->la_hold);
+	la->la_hold = m;
+	if (rt->rt_expire) {
+		rt->rt_flags &= ~RTF_REJECT;
+		if (la->la_asked == 0 || rt->rt_expire != time_second) {
+			rt->rt_expire = time_second;
+			if (la->la_asked++ < arp_maxtries)
+			    arprequest(ac,
+			        &SIN(rt->rt_ifa->ifa_addr)->sin_addr,
+				&SIN(dst)->sin_addr, ac->ac_enaddr);
+			else {
+				rt->rt_flags |= RTF_REJECT;
+				rt->rt_expire += arpt_down;
+				la->la_asked = 0;
+			}
+
+		}
+	}
+	return (0);
+}
+
+/*
+ * Common length and type checks are done here,
+ * then the protocol-specific routine is called.
+ */
+static void
+arpintr()
+{
+	register struct mbuf *m;
+	register struct arphdr *ar;
+	int s;
+
+	while (arpintrq.ifq_head) {
+		s = splimp();
+		IF_DEQUEUE(&arpintrq, m);
+		splx(s);
+		if (m == 0 || (m->m_flags & M_PKTHDR) == 0)
+			panic("arpintr");
+	
+                if (m->m_len < sizeof(struct arphdr) &&
+                    ((m = m_pullup(m, sizeof(struct arphdr))) == NULL)) {
+			log(LOG_ERR, "arp: runt packet -- m_pullup failed\n");
+			continue;
+		}
+		ar = mtod(m, struct arphdr *);
+
+		if (ntohs(ar->ar_hrd) != ARPHRD_ETHER
+		    && ntohs(ar->ar_hrd) != ARPHRD_IEEE802) {
+			log(LOG_ERR,
+			    "arp: unknown hardware address format (0x%2D)\n",
+			    (unsigned char *)&ar->ar_hrd, "");
+			m_freem(m);
+			continue;
+		}
+
+		if (m->m_pkthdr.len < sizeof(struct arphdr) + 2 * ar->ar_hln
+		    + 2 * ar->ar_pln) {
+			log(LOG_ERR, "arp: runt packet\n");
+			m_freem(m);
+			continue;
+		}
+
+		switch (ntohs(ar->ar_pro)) {
+#ifdef INET
+			case ETHERTYPE_IP:
+				in_arpinput(m);
+				continue;
+#endif
+		}
+		m_freem(m);
+	}
+}
+
+#ifdef INET
+/*
+ * ARP for Internet protocols on 10 Mb/s Ethernet.
+ * Algorithm is that given in RFC 826.
+ * In addition, a sanity check is performed on the sender
+ * protocol address, to catch impersonators.
+ * We no longer handle negotiations for use of trailer protocol:
+ * Formerly, ARP replied for protocol type ETHERTYPE_TRAIL sent
+ * along with IP replies if we wanted trailers sent to us,
+ * and also sent them in response to IP replies.
+ * This allowed either end to announce the desire to receive
+ * trailer packets.
+ * We no longer reply to requests for ETHERTYPE_TRAIL protocol either,
+ * but formerly didn't normally send requests.
+ */
+static int log_arp_wrong_iface = 1;
+
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_wrong_iface, CTLFLAG_RW,
+	&log_arp_wrong_iface, 0,
+	"log arp packets arriving on the wrong interface");
+
+static void
+in_arpinput(m)
+	struct mbuf *m;
+{
+	register struct ether_arp *ea;
+	register struct arpcom *ac = (struct arpcom *)m->m_pkthdr.rcvif;
+	struct ether_header *eh;
+	struct iso88025_header *th = (struct iso88025_header *)0;
+	register struct llinfo_arp *la = 0;
+	register struct rtentry *rt;
+	struct in_ifaddr *ia, *maybe_ia = 0;
+	struct sockaddr_dl *sdl;
+	struct sockaddr sa;
+	struct in_addr isaddr, itaddr, myaddr;
+	int op, rif_len;
+
+	if (m->m_len < sizeof(struct ether_arp) &&
+	    (m = m_pullup(m, sizeof(struct ether_arp))) == NULL) {
+		log(LOG_ERR, "in_arp: runt packet -- m_pullup failed\n");
+		return;
+	}
+
+	ea = mtod(m, struct ether_arp *);
+	op = ntohs(ea->arp_op);
+	(void)memcpy(&isaddr, ea->arp_spa, sizeof (isaddr));
+	(void)memcpy(&itaddr, ea->arp_tpa, sizeof (itaddr));
+	TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
+		/*
+		 * For a bridge, we want to check the address irrespective
+		 * of the receive interface. (This will change slightly
+		 * when we have clusters of interfaces).
+		 */
+#ifdef BRIDGE
+#define BRIDGE_TEST (do_bridge)
+#else
+#define BRIDGE_TEST (0) /* cc will optimise the test away */
+#endif
+		if ((BRIDGE_TEST) || (ia->ia_ifp == &ac->ac_if)) {
+			maybe_ia = ia;
+			if ((itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) ||
+			     (isaddr.s_addr == ia->ia_addr.sin_addr.s_addr)) {
+				break;
+			}
+		}
+	}
+	if (maybe_ia == 0) {
+		m_freem(m);
+		return;
+	}
+	myaddr = ia ? ia->ia_addr.sin_addr : maybe_ia->ia_addr.sin_addr;
+	if (!bcmp((caddr_t)ea->arp_sha, (caddr_t)ac->ac_enaddr,
+	    sizeof (ea->arp_sha))) {
+		m_freem(m);	/* it's from me, ignore it. */
+		return;
+	}
+	if (!bcmp((caddr_t)ea->arp_sha, (caddr_t)etherbroadcastaddr,
+	    sizeof (ea->arp_sha))) {
+		log(LOG_ERR,
+		    "arp: ether address is broadcast for IP address %s!\n",
+		    inet_ntoa(isaddr));
+		m_freem(m);
+		return;
+	}
+	if (isaddr.s_addr == myaddr.s_addr) {
+		log(LOG_ERR,
+		   "arp: %6D is using my IP address %s!\n",
+		   ea->arp_sha, ":", inet_ntoa(isaddr));
+		itaddr = myaddr;
+		goto reply;
+	}
+	la = arplookup(isaddr.s_addr, itaddr.s_addr == myaddr.s_addr, 0);
+	if (la && (rt = la->la_rt) && (sdl = SDL(rt->rt_gateway))) {
+		/* the following is not an error when doing bridging */
+		if (!BRIDGE_TEST && rt->rt_ifp != &ac->ac_if) {
+			if (log_arp_wrong_iface)
+				log(LOG_ERR, "arp: %s is on %s%d but got reply from %6D on %s%d\n",
+				    inet_ntoa(isaddr),
+				    rt->rt_ifp->if_name, rt->rt_ifp->if_unit,
+				    ea->arp_sha, ":",
+				    ac->ac_if.if_name, ac->ac_if.if_unit);
+			goto reply;
+		}
+		if (sdl->sdl_alen &&
+		    bcmp((caddr_t)ea->arp_sha, LLADDR(sdl), sdl->sdl_alen)) {
+			if (rt->rt_expire)
+			    log(LOG_INFO, "arp: %s moved from %6D to %6D on %s%d\n",
+				inet_ntoa(isaddr), (u_char *)LLADDR(sdl), ":",
+				ea->arp_sha, ":",
+				ac->ac_if.if_name, ac->ac_if.if_unit);
+			else {
+			    log(LOG_ERR,
+				"arp: %6D attempts to modify permanent entry for %s on %s%d\n",
+				ea->arp_sha, ":", inet_ntoa(isaddr),
+				ac->ac_if.if_name, ac->ac_if.if_unit);
+			    goto reply;
+			}
+		}
+		(void)memcpy(LLADDR(sdl), ea->arp_sha, sizeof(ea->arp_sha));
+		sdl->sdl_alen = sizeof(ea->arp_sha);
+                sdl->sdl_rcf = (u_short)0;
+		/*
+		 * If we receive an arp from a token-ring station over
+		 * a token-ring nic then try to save the source
+		 * routing info.
+		 */
+		if (ac->ac_if.if_type == IFT_ISO88025) {
+			th = (struct iso88025_header *)m->m_pkthdr.header;
+			rif_len = TR_RCF_RIFLEN(th->rcf);
+			if ((th->iso88025_shost[0] & TR_RII) &&
+			    (rif_len > 2)) {
+				sdl->sdl_rcf = th->rcf;
+				sdl->sdl_rcf ^= htons(TR_RCF_DIR);
+				memcpy(sdl->sdl_route, th->rd, rif_len - 2);
+				sdl->sdl_rcf &= ~htons(TR_RCF_BCST_MASK);
+				/*
+				 * Set up source routing information for
+				 * reply packet (XXX)
+				 */
+				m->m_data -= rif_len;
+				m->m_len  += rif_len;
+				m->m_pkthdr.len += rif_len;
+			} else {
+				th->iso88025_shost[0] &= ~TR_RII;
+			}
+			m->m_data -= 8;
+			m->m_len  += 8;
+			m->m_pkthdr.len += 8;
+			th->rcf = sdl->sdl_rcf;
+		} else {
+			sdl->sdl_rcf = (u_short)0;
+		}
+		if (rt->rt_expire)
+			rt->rt_expire = time_second + arpt_keep;
+		rt->rt_flags &= ~RTF_REJECT;
+		la->la_asked = 0;
+		if (la->la_hold) {
+			(*ac->ac_if.if_output)(&ac->ac_if, la->la_hold,
+				rt_key(rt), rt);
+			la->la_hold = 0;
+		}
+	}
+reply:
+	if (op != ARPOP_REQUEST) {
+		m_freem(m);
+		return;
+	}
+	if (itaddr.s_addr == myaddr.s_addr) {
+		/* I am the target */
+		(void)memcpy(ea->arp_tha, ea->arp_sha, sizeof(ea->arp_sha));
+		(void)memcpy(ea->arp_sha, ac->ac_enaddr, sizeof(ea->arp_sha));
+	} else {
+		la = arplookup(itaddr.s_addr, 0, SIN_PROXY);
+		if (la == NULL) {
+			struct sockaddr_in sin;
+
+			if (!arp_proxyall) {
+				m_freem(m);
+				return;
+			}
+
+			bzero(&sin, sizeof sin);
+			sin.sin_family = AF_INET;
+			sin.sin_len = sizeof sin;
+			sin.sin_addr = itaddr;
+
+			rt = rtalloc1((struct sockaddr *)&sin, 0, 0UL);
+			if (!rt) {
+				m_freem(m);
+				return;
+			}
+			/*
+			 * Don't send proxies for nodes on the same interface
+			 * as this one came out of, or we'll get into a fight
+			 * over who claims what Ether address.
+			 */
+			if (rt->rt_ifp == &ac->ac_if) {
+				rtfree(rt);
+				m_freem(m);
+				return;
+			}
+			(void)memcpy(ea->arp_tha, ea->arp_sha, sizeof(ea->arp_sha));
+			(void)memcpy(ea->arp_sha, ac->ac_enaddr, sizeof(ea->arp_sha));
+			rtfree(rt);
+
+			/*
+			 * Also check that the node which sent the ARP packet
+			 * is on the the interface we expect it to be on. This
+			 * avoids ARP chaos if an interface is connected to the
+			 * wrong network.
+			 */
+			sin.sin_addr = isaddr;
+
+			rt = rtalloc1((struct sockaddr *)&sin, 0, 0UL);
+			if (!rt) {
+				m_freem(m);
+				return;
+			}
+			if (rt->rt_ifp != &ac->ac_if) {
+				log(LOG_INFO, "arp_proxy: ignoring request"
+				    " from %s via %s%d, expecting %s%d\n",
+				    inet_ntoa(isaddr), ac->ac_if.if_name,
+				    ac->ac_if.if_unit, rt->rt_ifp->if_name,
+				    rt->rt_ifp->if_unit);
+				rtfree(rt);
+				m_freem(m);
+				return;
+			}
+			rtfree(rt);
+
+#ifdef DEBUG_PROXY
+			printf("arp: proxying for %s\n",
+			       inet_ntoa(itaddr));
+#endif
+		} else {
+			rt = la->la_rt;
+			(void)memcpy(ea->arp_tha, ea->arp_sha, sizeof(ea->arp_sha));
+			sdl = SDL(rt->rt_gateway);
+			(void)memcpy(ea->arp_sha, LLADDR(sdl), sizeof(ea->arp_sha));
+		}
+	}
+
+	(void)memcpy(ea->arp_tpa, ea->arp_spa, sizeof(ea->arp_spa));
+	(void)memcpy(ea->arp_spa, &itaddr, sizeof(ea->arp_spa));
+	ea->arp_op = htons(ARPOP_REPLY);
+	ea->arp_pro = htons(ETHERTYPE_IP); /* let's be sure! */
+	switch (ac->ac_if.if_type) {
+	case IFT_ISO88025:
+		/* Re-arrange the source/dest address */
+		memcpy(th->iso88025_dhost, th->iso88025_shost,
+		    sizeof(th->iso88025_dhost));
+		memcpy(th->iso88025_shost, ac->ac_enaddr,
+		    sizeof(th->iso88025_shost));
+		/* Set the source routing bit if neccesary */
+		if (th->iso88025_dhost[0] & TR_RII) {
+			th->iso88025_dhost[0] &= ~TR_RII;
+			if (TR_RCF_RIFLEN(th->rcf) > 2)
+				th->iso88025_shost[0] |= TR_RII;
+		}
+		/* Copy the addresses, ac and fc into sa_data */
+		memcpy(sa.sa_data, th->iso88025_dhost,
+		    sizeof(th->iso88025_dhost) * 2);
+		sa.sa_data[(sizeof(th->iso88025_dhost) * 2)] = TR_AC;
+		sa.sa_data[(sizeof(th->iso88025_dhost) * 2) + 1] = TR_LLC_FRAME;
+		break;
+	case IFT_ETHER:
+	case IFT_FDDI:
+	/*
+	 * May not be correct for types not explictly
+	 * listed, but it is our best guess.
+	 */
+	default:
+		eh = (struct ether_header *)sa.sa_data;
+		(void)memcpy(eh->ether_dhost, ea->arp_tha,
+		    sizeof(eh->ether_dhost));
+		eh->ether_type = htons(ETHERTYPE_ARP);
+		break;
+	}
+	sa.sa_family = AF_UNSPEC;
+	sa.sa_len = sizeof(sa);
+	(*ac->ac_if.if_output)(&ac->ac_if, m, &sa, (struct rtentry *)0);
+	return;
+}
+#endif
+
+/*
+ * Free an arp entry.
+ */
+static void
+arptfree(la)
+	register struct llinfo_arp *la;
+{
+	register struct rtentry *rt = la->la_rt;
+	register struct sockaddr_dl *sdl;
+	if (rt == 0)
+		panic("arptfree");
+	if (rt->rt_refcnt > 0 && (sdl = SDL(rt->rt_gateway)) &&
+	    sdl->sdl_family == AF_LINK) {
+		sdl->sdl_alen = 0;
+		la->la_asked = 0;
+		rt->rt_flags &= ~RTF_REJECT;
+		return;
+	}
+	rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0, rt_mask(rt),
+			0, (struct rtentry **)0);
+}
+/*
+ * Lookup or enter a new address in arptab.
+ */
+static struct llinfo_arp *
+arplookup(addr, create, proxy)
+	u_long addr;
+	int create, proxy;
+{
+	register struct rtentry *rt;
+	static struct sockaddr_inarp sin = {sizeof(sin), AF_INET };
+	const char *why = 0;
+
+	sin.sin_addr.s_addr = addr;
+	sin.sin_other = proxy ? SIN_PROXY : 0;
+	rt = rtalloc1((struct sockaddr *)&sin, create, 0UL);
+	if (rt == 0)
+		return (0);
+	rt->rt_refcnt--;
+
+	if (rt->rt_flags & RTF_GATEWAY)
+		why = "host is not on local network";
+	else if ((rt->rt_flags & RTF_LLINFO) == 0)
+		why = "could not allocate llinfo";
+	else if (rt->rt_gateway->sa_family != AF_LINK)
+		why = "gateway route is not ours";
+
+	if (why && create) {
+		log(LOG_DEBUG, "arplookup %s failed: %s\n",
+		    inet_ntoa(sin.sin_addr), why);
+		return 0;
+	} else if (why) {
+		return 0;
+	}
+	return ((struct llinfo_arp *)rt->rt_llinfo);
+}
+
+void
+arp_ifinit(ac, ifa)
+	struct arpcom *ac;
+	struct ifaddr *ifa;
+{
+	if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY)
+		arprequest(ac, &IA_SIN(ifa)->sin_addr,
+			       &IA_SIN(ifa)->sin_addr, ac->ac_enaddr);
+	ifa->ifa_rtrequest = arp_rtrequest;
+	ifa->ifa_flags |= RTF_CLONING;
+}
+
+static void
+arp_init(void)
+{
+
+	arpintrq.ifq_maxlen = 50;
+	mtx_init(&arpintrq.ifq_mtx, "arp_inq", MTX_DEF);
+}
+
+SYSINIT(arp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, arp_init, 0);
diff --git a/sys/netinet/if_ether.h b/sys/netinet/if_ether.h
new file mode 100644
index 0000000..2d590ad
--- /dev/null
+++ b/sys/netinet/if_ether.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)if_ether.h	8.3 (Berkeley) 5/2/95
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_IF_ETHER_H_
+#define _NETINET_IF_ETHER_H_
+
+#include <net/ethernet.h>
+#include <net/if_arp.h>
+
+/*
+ * Macro to map an IP multicast address to an Ethernet multicast address.
+ * The high-order 25 bits of the Ethernet address are statically assigned,
+ * and the low-order 23 bits are taken from the low end of the IP address.
+ */
+#define ETHER_MAP_IP_MULTICAST(ipaddr, enaddr) \
+	/* struct in_addr *ipaddr; */ \
+	/* u_char enaddr[ETHER_ADDR_LEN];	   */ \
+{ \
+	(enaddr)[0] = 0x01; \
+	(enaddr)[1] = 0x00; \
+	(enaddr)[2] = 0x5e; \
+	(enaddr)[3] = ((u_char *)ipaddr)[1] & 0x7f; \
+	(enaddr)[4] = ((u_char *)ipaddr)[2]; \
+	(enaddr)[5] = ((u_char *)ipaddr)[3]; \
+}
+/*
+ * Macro to map an IP6 multicast address to an Ethernet multicast address.
+ * The high-order 16 bits of the Ethernet address are statically assigned,
+ * and the low-order 32 bits are taken from the low end of the IP6 address.
+ */
+#define ETHER_MAP_IPV6_MULTICAST(ip6addr, enaddr)			\
+/* struct	in6_addr *ip6addr; */					\
+/* u_char	enaddr[ETHER_ADDR_LEN]; */				\
+{                                                                       \
+	(enaddr)[0] = 0x33;						\
+	(enaddr)[1] = 0x33;						\
+	(enaddr)[2] = ((u_char *)ip6addr)[12];				\
+	(enaddr)[3] = ((u_char *)ip6addr)[13];				\
+	(enaddr)[4] = ((u_char *)ip6addr)[14];				\
+	(enaddr)[5] = ((u_char *)ip6addr)[15];				\
+}
+
+/*
+ * Ethernet Address Resolution Protocol.
+ *
+ * See RFC 826 for protocol description.  Structure below is adapted
+ * to resolving internet addresses.  Field names used correspond to
+ * RFC 826.
+ */
+struct	ether_arp {
+	struct	arphdr ea_hdr;	/* fixed-size header */
+	u_char	arp_sha[ETHER_ADDR_LEN];	/* sender hardware address */
+	u_char	arp_spa[4];	/* sender protocol address */
+	u_char	arp_tha[ETHER_ADDR_LEN];	/* target hardware address */
+	u_char	arp_tpa[4];	/* target protocol address */
+};
+#define	arp_hrd	ea_hdr.ar_hrd
+#define	arp_pro	ea_hdr.ar_pro
+#define	arp_hln	ea_hdr.ar_hln
+#define	arp_pln	ea_hdr.ar_pln
+#define	arp_op	ea_hdr.ar_op
+
+struct sockaddr_inarp {
+	u_char	sin_len;
+	u_char	sin_family;
+	u_short sin_port;
+	struct	in_addr sin_addr;
+	struct	in_addr sin_srcaddr;
+	u_short	sin_tos;
+	u_short	sin_other;
+#define SIN_PROXY 1
+};
+/*
+ * IP and ethernet specific routing flags
+ */
+#define	RTF_USETRAILERS	RTF_PROTO1	/* use trailers */
+#define RTF_ANNOUNCE	RTF_PROTO2	/* announce new arp entry */
+
+#ifdef	_KERNEL
+extern u_char	ether_ipmulticast_min[ETHER_ADDR_LEN];
+extern u_char	ether_ipmulticast_max[ETHER_ADDR_LEN];
+extern struct	ifqueue arpintrq;
+
+int	arpresolve __P((struct arpcom *, struct rtentry *, struct mbuf *,
+			struct sockaddr *, u_char *, struct rtentry *));
+void	arp_ifinit __P((struct arpcom *, struct ifaddr *));
+#endif
+
+#endif
diff --git a/sys/netinet/if_fddi.h b/sys/netinet/if_fddi.h
new file mode 100644
index 0000000..9f83882
--- /dev/null
+++ b/sys/netinet/if_fddi.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * Copyright (c) 1995 Matt Thomas (thomas@lkg.dec.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)if_fddi.h	8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_IF_FDDI_H_
+#define _NETINET_IF_FDDI_H_
+
+/*
+ * Structure of an 100Mb/s FDDI header.
+ */
+struct	fddi_header {
+	u_char	fddi_fc;
+	u_char	fddi_dhost[6];
+	u_char	fddi_shost[6];
+};
+
+#define	FDDIIPMTU		4352
+#define	FDDIMTU			4470
+#define	FDDIMIN			3
+
+#define	FDDIFC_C		0x80	/* 0b10000000 */
+#define	FDDIFC_L		0x40	/* 0b01000000 */
+#define	FDDIFC_F		0x30	/* 0b00110000 */
+#define	FDDIFC_Z		0x0F	/* 0b00001111 */
+
+#define	FDDIFC_LLC_ASYNC	0x50
+#define	FDDIFC_LLC_PRIO0	0
+#define	FDDIFC_LLC_PRIO1	1
+#define	FDDIFC_LLC_PRIO2	2
+#define	FDDIFC_LLC_PRIO3	3
+#define	FDDIFC_LLC_PRIO4	4
+#define	FDDIFC_LLC_PRIO5	5
+#define	FDDIFC_LLC_PRIO6	6
+#define	FDDIFC_LLC_PRIO7	7
+#define FDDIFC_LLC_SYNC         0xd0
+#define	FDDIFC_SMT		0x40
+
+#if defined(_KERNEL)
+#define	fddibroadcastaddr	etherbroadcastaddr
+#define	fddi_ipmulticast_min	ether_ipmulticast_min
+#define	fddi_ipmulticast_max	ether_ipmulticast_max
+#define	fddi_addmulti		ether_addmulti
+#define	fddi_delmulti		ether_delmulti
+#define	fddi_sprintf		ether_sprintf
+
+void    fddi_ifattach __P((struct ifnet *));
+void    fddi_input __P((struct ifnet *, struct fddi_header *, struct mbuf *));
+int     fddi_output __P((struct ifnet *,
+           struct mbuf *, struct sockaddr *, struct rtentry *)); 
+
+#endif
+
+#endif
diff --git a/sys/netinet/igmp.c b/sys/netinet/igmp.c
new file mode 100644
index 0000000..77be25b
--- /dev/null
+++ b/sys/netinet/igmp.c
@@ -0,0 +1,487 @@
+/*
+ * Copyright (c) 1988 Stephen Deering.
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Stephen Deering of Stanford University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
+ * $FreeBSD$
+ */
+
+/*
+ * Internet Group Management Protocol (IGMP) routines.
+ *
+ * Written by Steve Deering, Stanford, May 1988.
+ * Modified by Rosen Sharma, Stanford, Aug 1994.
+ * Modified by Bill Fenner, Xerox PARC, Feb 1995.
+ * Modified to fully comply to IGMPv2 by Bill Fenner, Oct 1995.
+ *
+ * MULTICAST Revision: 3.5.1.4
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/igmp.h>
+#include <netinet/igmp_var.h>
+
+#include <machine/in_cksum.h>
+
+static MALLOC_DEFINE(M_IGMP, "igmp", "igmp state");
+
+static struct router_info *
+		find_rti __P((struct ifnet *ifp));
+
+static struct igmpstat igmpstat;
+
+SYSCTL_STRUCT(_net_inet_igmp, IGMPCTL_STATS, stats, CTLFLAG_RD,
+	&igmpstat, igmpstat, "");
+
+static int igmp_timers_are_running;
+static u_long igmp_all_hosts_group;
+static u_long igmp_all_rtrs_group;
+static struct mbuf *router_alert;
+static struct router_info *Head;
+
+static void igmp_sendpkt __P((struct in_multi *, int, unsigned long));
+
+void
+igmp_init()
+{
+	struct ipoption *ra;
+
+	/*
+	 * To avoid byte-swapping the same value over and over again.
+	 */
+	igmp_all_hosts_group = htonl(INADDR_ALLHOSTS_GROUP);
+	igmp_all_rtrs_group = htonl(INADDR_ALLRTRS_GROUP);
+
+	igmp_timers_are_running = 0;
+
+	/*
+	 * Construct a Router Alert option to use in outgoing packets
+	 */
+	MGET(router_alert, M_DONTWAIT, MT_DATA);
+	ra = mtod(router_alert, struct ipoption *);
+	ra->ipopt_dst.s_addr = 0;
+	ra->ipopt_list[0] = IPOPT_RA;	/* Router Alert Option */
+	ra->ipopt_list[1] = 0x04;	/* 4 bytes long */
+	ra->ipopt_list[2] = 0x00;
+	ra->ipopt_list[3] = 0x00;
+	router_alert->m_len = sizeof(ra->ipopt_dst) + ra->ipopt_list[1];
+
+	Head = (struct router_info *) 0;
+}
+
+static struct router_info *
+find_rti(ifp)
+	struct ifnet *ifp;
+{
+        register struct router_info *rti = Head;
+
+#ifdef IGMP_DEBUG
+	printf("[igmp.c, _find_rti] --> entering \n");
+#endif
+        while (rti) {
+                if (rti->rti_ifp == ifp) {
+#ifdef IGMP_DEBUG
+			printf("[igmp.c, _find_rti] --> found old entry \n");
+#endif
+                        return rti;
+                }
+                rti = rti->rti_next;
+        }
+	MALLOC(rti, struct router_info *, sizeof *rti, M_IGMP, M_NOWAIT);
+        rti->rti_ifp = ifp;
+        rti->rti_type = IGMP_V2_ROUTER;
+        rti->rti_time = 0;
+        rti->rti_next = Head;
+        Head = rti;
+#ifdef IGMP_DEBUG
+	printf("[igmp.c, _find_rti] --> created an entry \n");
+#endif
+        return rti;
+}
+
+void
+igmp_input(m, off, proto)
+	register struct mbuf *m;
+	int off, proto;
+{
+	register int iphlen = off;
+	register struct igmp *igmp;
+	register struct ip *ip;
+	register int igmplen;
+	register struct ifnet *ifp = m->m_pkthdr.rcvif;
+	register int minlen;
+	register struct in_multi *inm;
+	register struct in_ifaddr *ia;
+	struct in_multistep step;
+	struct router_info *rti;
+	
+	int timer; /** timer value in the igmp query header **/
+
+	++igmpstat.igps_rcv_total;
+
+	ip = mtod(m, struct ip *);
+	igmplen = ip->ip_len;
+
+	/*
+	 * Validate lengths
+	 */
+	if (igmplen < IGMP_MINLEN) {
+		++igmpstat.igps_rcv_tooshort;
+		m_freem(m);
+		return;
+	}
+	minlen = iphlen + IGMP_MINLEN;
+	if ((m->m_flags & M_EXT || m->m_len < minlen) &&
+	    (m = m_pullup(m, minlen)) == 0) {
+		++igmpstat.igps_rcv_tooshort;
+		return;
+	}
+
+	/*
+	 * Validate checksum
+	 */
+	m->m_data += iphlen;
+	m->m_len -= iphlen;
+	igmp = mtod(m, struct igmp *);
+	if (in_cksum(m, igmplen)) {
+		++igmpstat.igps_rcv_badsum;
+		m_freem(m);
+		return;
+	}
+	m->m_data -= iphlen;
+	m->m_len += iphlen;
+
+	ip = mtod(m, struct ip *);
+	timer = igmp->igmp_code * PR_FASTHZ / IGMP_TIMER_SCALE;
+	if (timer == 0)
+		timer = 1;
+	rti = find_rti(ifp);
+
+	/*
+	 * In the IGMPv2 specification, there are 3 states and a flag.
+	 *
+	 * In Non-Member state, we simply don't have a membership record.
+	 * In Delaying Member state, our timer is running (inm->inm_timer)
+	 * In Idle Member state, our timer is not running (inm->inm_timer==0)
+	 *
+	 * The flag is inm->inm_state, it is set to IGMP_OTHERMEMBER if
+	 * we have heard a report from another member, or IGMP_IREPORTEDLAST
+	 * if I sent the last report.
+	 */
+	switch (igmp->igmp_type) {
+
+	case IGMP_MEMBERSHIP_QUERY:
+		++igmpstat.igps_rcv_queries;
+
+		if (ifp->if_flags & IFF_LOOPBACK)
+			break;
+
+		if (igmp->igmp_code == 0) {
+			/*
+			 * Old router.  Remember that the querier on this
+			 * interface is old, and set the timer to the
+			 * value in RFC 1112.
+			 */
+
+			rti->rti_type = IGMP_V1_ROUTER;
+			rti->rti_time = 0;
+
+			timer = IGMP_MAX_HOST_REPORT_DELAY * PR_FASTHZ;
+
+			if (ip->ip_dst.s_addr != igmp_all_hosts_group ||
+			    igmp->igmp_group.s_addr != 0) {
+				++igmpstat.igps_rcv_badqueries;
+				m_freem(m);
+				return;
+			}
+		} else {
+			/*
+			 * New router.  Simply do the new validity check.
+			 */
+			
+			if (igmp->igmp_group.s_addr != 0 &&
+			    !IN_MULTICAST(ntohl(igmp->igmp_group.s_addr))) {
+				++igmpstat.igps_rcv_badqueries;
+				m_freem(m);
+				return;
+			}
+		}
+
+		/*
+		 * - Start the timers in all of our membership records
+		 *   that the query applies to for the interface on
+		 *   which the query arrived excl. those that belong
+		 *   to the "all-hosts" group (224.0.0.1).
+		 * - Restart any timer that is already running but has
+		 *   a value longer than the requested timeout.
+		 * - Use the value specified in the query message as
+		 *   the maximum timeout.
+		 */
+		IN_FIRST_MULTI(step, inm);
+		while (inm != NULL) {
+			if (inm->inm_ifp == ifp &&
+			    inm->inm_addr.s_addr != igmp_all_hosts_group &&
+			    (igmp->igmp_group.s_addr == 0 ||
+			     igmp->igmp_group.s_addr == inm->inm_addr.s_addr)) {
+				if (inm->inm_timer == 0 ||
+				    inm->inm_timer > timer) {
+					inm->inm_timer =
+						IGMP_RANDOM_DELAY(timer);
+					igmp_timers_are_running = 1;
+				}
+			}
+			IN_NEXT_MULTI(step, inm);
+		}
+
+		break;
+
+	case IGMP_V1_MEMBERSHIP_REPORT:
+	case IGMP_V2_MEMBERSHIP_REPORT:
+		/*
+		 * For fast leave to work, we have to know that we are the
+		 * last person to send a report for this group.  Reports
+		 * can potentially get looped back if we are a multicast
+		 * router, so discard reports sourced by me.
+		 */
+		IFP_TO_IA(ifp, ia);
+		if (ia && ip->ip_src.s_addr == IA_SIN(ia)->sin_addr.s_addr)
+			break;
+
+		++igmpstat.igps_rcv_reports;
+
+		if (ifp->if_flags & IFF_LOOPBACK)
+			break;
+
+		if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr))) {
+			++igmpstat.igps_rcv_badreports;
+			m_freem(m);
+			return;
+		}
+
+		/*
+		 * KLUDGE: if the IP source address of the report has an
+		 * unspecified (i.e., zero) subnet number, as is allowed for
+		 * a booting host, replace it with the correct subnet number
+		 * so that a process-level multicast routing demon can
+		 * determine which subnet it arrived from.  This is necessary
+		 * to compensate for the lack of any way for a process to
+		 * determine the arrival interface of an incoming packet.
+		 */
+		if ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) == 0)
+			if (ia) ip->ip_src.s_addr = htonl(ia->ia_subnet);
+
+		/*
+		 * If we belong to the group being reported, stop
+		 * our timer for that group.
+		 */
+		IN_LOOKUP_MULTI(igmp->igmp_group, ifp, inm);
+
+		if (inm != NULL) {
+			inm->inm_timer = 0;
+			++igmpstat.igps_rcv_ourreports;
+
+			inm->inm_state = IGMP_OTHERMEMBER;
+		}
+
+		break;
+	}
+
+	/*
+	 * Pass all valid IGMP packets up to any process(es) listening
+	 * on a raw IGMP socket.
+	 */
+	rip_input(m, off, proto);
+}
+
+void
+igmp_joingroup(inm)
+	struct in_multi *inm;
+{
+	int s = splnet();
+
+	if (inm->inm_addr.s_addr == igmp_all_hosts_group
+	    || inm->inm_ifp->if_flags & IFF_LOOPBACK) {
+		inm->inm_timer = 0;
+		inm->inm_state = IGMP_OTHERMEMBER;
+	} else {
+		inm->inm_rti = find_rti(inm->inm_ifp);
+		igmp_sendpkt(inm, inm->inm_rti->rti_type, 0);
+		inm->inm_timer = IGMP_RANDOM_DELAY(
+					IGMP_MAX_HOST_REPORT_DELAY*PR_FASTHZ);
+		inm->inm_state = IGMP_IREPORTEDLAST;
+		igmp_timers_are_running = 1;
+	}
+	splx(s);
+}
+
+void
+igmp_leavegroup(inm)
+	struct in_multi *inm;
+{
+	if (inm->inm_state == IGMP_IREPORTEDLAST &&
+	    inm->inm_addr.s_addr != igmp_all_hosts_group &&
+	    !(inm->inm_ifp->if_flags & IFF_LOOPBACK) &&
+	    inm->inm_rti->rti_type != IGMP_V1_ROUTER)
+		igmp_sendpkt(inm, IGMP_V2_LEAVE_GROUP, igmp_all_rtrs_group);
+}
+
+void
+igmp_fasttimo()
+{
+	register struct in_multi *inm;
+	struct in_multistep step;
+	int s;
+
+	/*
+	 * Quick check to see if any work needs to be done, in order
+	 * to minimize the overhead of fasttimo processing.
+	 */
+
+	if (!igmp_timers_are_running)
+		return;
+
+	s = splnet();
+	igmp_timers_are_running = 0;
+	IN_FIRST_MULTI(step, inm);
+	while (inm != NULL) {
+		if (inm->inm_timer == 0) {
+			/* do nothing */
+		} else if (--inm->inm_timer == 0) {
+			igmp_sendpkt(inm, inm->inm_rti->rti_type, 0);
+			inm->inm_state = IGMP_IREPORTEDLAST;
+		} else {
+			igmp_timers_are_running = 1;
+		}
+		IN_NEXT_MULTI(step, inm);
+	}
+	splx(s);
+}
+
+void
+igmp_slowtimo()
+{
+	int s = splnet();
+	register struct router_info *rti =  Head;
+
+#ifdef IGMP_DEBUG
+	printf("[igmp.c,_slowtimo] -- > entering \n");
+#endif
+	while (rti) {
+	    if (rti->rti_type == IGMP_V1_ROUTER) {
+		rti->rti_time++;
+		if (rti->rti_time >= IGMP_AGE_THRESHOLD) {
+			rti->rti_type = IGMP_V2_ROUTER;
+		}
+	    }
+	    rti = rti->rti_next;
+	}
+#ifdef IGMP_DEBUG	
+	printf("[igmp.c,_slowtimo] -- > exiting \n");
+#endif
+	splx(s);
+}
+
+static struct route igmprt;
+
+static void
+igmp_sendpkt(inm, type, addr)
+	struct in_multi *inm;
+	int type;
+	unsigned long addr;
+{
+        struct mbuf *m;
+        struct igmp *igmp;
+        struct ip *ip;
+        struct ip_moptions imo;
+
+        MGETHDR(m, M_DONTWAIT, MT_HEADER);
+        if (m == NULL)
+                return;
+
+	m->m_pkthdr.rcvif = loif;
+	m->m_pkthdr.len = sizeof(struct ip) + IGMP_MINLEN;
+	MH_ALIGN(m, IGMP_MINLEN + sizeof(struct ip));
+	m->m_data += sizeof(struct ip);
+        m->m_len = IGMP_MINLEN;
+        igmp = mtod(m, struct igmp *);
+        igmp->igmp_type   = type;
+        igmp->igmp_code   = 0;
+        igmp->igmp_group  = inm->inm_addr;
+        igmp->igmp_cksum  = 0;
+        igmp->igmp_cksum  = in_cksum(m, IGMP_MINLEN);
+
+        m->m_data -= sizeof(struct ip);
+        m->m_len += sizeof(struct ip);
+        ip = mtod(m, struct ip *);
+        ip->ip_tos        = 0;
+        ip->ip_len        = sizeof(struct ip) + IGMP_MINLEN;
+        ip->ip_off        = 0;
+        ip->ip_p          = IPPROTO_IGMP;
+        ip->ip_src.s_addr = INADDR_ANY;
+        ip->ip_dst.s_addr = addr ? addr : igmp->igmp_group.s_addr;
+
+        imo.imo_multicast_ifp  = inm->inm_ifp;
+        imo.imo_multicast_ttl  = 1;
+	imo.imo_multicast_vif  = -1;
+        /*
+         * Request loopback of the report if we are acting as a multicast
+         * router, so that the process-level routing demon can hear it.
+         */
+        imo.imo_multicast_loop = (ip_mrouter != NULL);
+
+	/*
+	 * XXX
+	 * Do we have to worry about reentrancy here?  Don't think so.
+	 */
+        ip_output(m, router_alert, &igmprt, 0, &imo);
+
+        ++igmpstat.igps_snd_reports;
+}
diff --git a/sys/netinet/igmp.h b/sys/netinet/igmp.h
new file mode 100644
index 0000000..7d943d6
--- /dev/null
+++ b/sys/netinet/igmp.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 1988 Stephen Deering.
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Stephen Deering of Stanford University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)igmp.h	8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_IGMP_H_
+#define _NETINET_IGMP_H_
+
+/*
+ * Internet Group Management Protocol (IGMP) definitions.
+ *
+ * Written by Steve Deering, Stanford, May 1988.
+ *
+ * MULTICAST Revision: 3.5.1.2
+ */
+
+/*
+ * IGMP packet format.
+ */
+struct igmp {
+	u_char		igmp_type;	/* version & type of IGMP message  */
+	u_char		igmp_code;	/* subtype for routing msgs        */
+	u_short		igmp_cksum;	/* IP-style checksum               */
+	struct in_addr	igmp_group;	/* group address being reported    */
+};					/*  (zero for queries)             */
+
+#define IGMP_MINLEN		     8
+
+/*
+ * Message types, including version number.
+ */
+#define IGMP_MEMBERSHIP_QUERY   	0x11	/* membership query         */
+#define IGMP_V1_MEMBERSHIP_REPORT	0x12	/* Ver. 1 membership report */
+#define IGMP_V2_MEMBERSHIP_REPORT	0x16	/* Ver. 2 membership report */
+#define IGMP_V2_LEAVE_GROUP		0x17	/* Leave-group message	    */
+
+#define IGMP_DVMRP			0x13	/* DVMRP routing message    */
+#define IGMP_PIM			0x14	/* PIM routing message	    */
+
+#define IGMP_MTRACE_RESP		0x1e  /* traceroute resp.(to sender)*/
+#define IGMP_MTRACE			0x1f  /* mcast traceroute messages  */
+
+#define IGMP_MAX_HOST_REPORT_DELAY   10    /* max delay for response to     */
+					   /*  query (in seconds) according */
+					   /*  to RFC1112                   */
+
+
+#define IGMP_TIMER_SCALE     10		/* denotes that the igmp code field */
+					/* specifies time in 10th of seconds*/
+
+/*
+ * The following four defininitions are for backwards compatibility.
+ * They should be removed as soon as all applications are updated to
+ * use the new constant names.
+ */
+#define IGMP_HOST_MEMBERSHIP_QUERY	IGMP_MEMBERSHIP_QUERY
+#define IGMP_HOST_MEMBERSHIP_REPORT	IGMP_V1_MEMBERSHIP_REPORT
+#define IGMP_HOST_NEW_MEMBERSHIP_REPORT	IGMP_V2_MEMBERSHIP_REPORT
+#define IGMP_HOST_LEAVE_MESSAGE		IGMP_V2_LEAVE_GROUP
+
+#endif /* _NETINET_IGMP_H_ */
diff --git a/sys/netinet/igmp_var.h b/sys/netinet/igmp_var.h
new file mode 100644
index 0000000..5688f24
--- /dev/null
+++ b/sys/netinet/igmp_var.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 1988 Stephen Deering.
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Stephen Deering of Stanford University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)igmp_var.h	8.1 (Berkeley) 7/19/93
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_IGMP_VAR_H_
+#define _NETINET_IGMP_VAR_H_
+
+/*
+ * Internet Group Management Protocol (IGMP),
+ * implementation-specific definitions.
+ *
+ * Written by Steve Deering, Stanford, May 1988.
+ *
+ * MULTICAST Revision: 3.5.1.3
+ */
+
+struct igmpstat {
+	u_int	igps_rcv_total;		/* total IGMP messages received */
+	u_int	igps_rcv_tooshort;	/* received with too few bytes */
+	u_int	igps_rcv_badsum;	/* received with bad checksum */
+	u_int	igps_rcv_queries;	/* received membership queries */
+	u_int	igps_rcv_badqueries;	/* received invalid queries */
+	u_int	igps_rcv_reports;	/* received membership reports */
+	u_int	igps_rcv_badreports;	/* received invalid reports */
+	u_int	igps_rcv_ourreports;	/* received reports for our groups */
+	u_int	igps_snd_reports;	/* sent membership reports */
+};
+
+#ifdef _KERNEL
+#define IGMP_RANDOM_DELAY(X) (random() % (X) + 1)
+
+/*
+ * States for IGMPv2's leave processing
+ */
+#define IGMP_OTHERMEMBER			0
+#define IGMP_IREPORTEDLAST			1
+
+/*
+ * We must remember what version the subnet's querier is.
+ * We conveniently use the IGMP message type for the proper
+ * membership report to keep this state.
+ */
+#define IGMP_V1_ROUTER				IGMP_V1_MEMBERSHIP_REPORT
+#define IGMP_V2_ROUTER				IGMP_V2_MEMBERSHIP_REPORT
+
+/*
+ * Revert to new router if we haven't heard from an old router in
+ * this amount of time.
+ */
+#define IGMP_AGE_THRESHOLD			540
+
+void	igmp_init __P((void));
+void	igmp_input __P((struct mbuf *, int, int));
+void	igmp_joingroup __P((struct in_multi *));
+void	igmp_leavegroup __P((struct in_multi *));
+void	igmp_fasttimo __P((void));
+void	igmp_slowtimo __P((void));
+
+SYSCTL_DECL(_net_inet_igmp);
+
+#endif
+
+/*
+ * Names for IGMP sysctl objects
+ */
+#define IGMPCTL_STATS		1	/* statistics (read-only) */
+#define IGMPCTL_MAXID		2
+
+#define IGMPCTL_NAMES { \
+	{ 0, 0 }, \
+	{ "stats", CTLTYPE_STRUCT }, \
+}
+#endif
diff --git a/sys/netinet/in.c b/sys/netinet/in.c
new file mode 100644
index 0000000..95abe3f
--- /dev/null
+++ b/sys/netinet/in.c
@@ -0,0 +1,861 @@
+/*
+ * Copyright (c) 1982, 1986, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)in.c	8.4 (Berkeley) 1/9/95
+ * $FreeBSD$
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sockio.h>
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+
+#include <netinet/igmp_var.h>
+
+#include "gif.h"
+#if NGIF > 0
+#include <net/if_gif.h>
+#endif
+
+static MALLOC_DEFINE(M_IPMADDR, "in_multi", "internet multicast address");
+
+static int in_mask2len __P((struct in_addr *));
+static void in_len2mask __P((struct in_addr *, int));
+static int in_lifaddr_ioctl __P((struct socket *, u_long, caddr_t,
+	struct ifnet *, struct proc *));
+
+static void	in_socktrim __P((struct sockaddr_in *));
+static int	in_ifinit __P((struct ifnet *,
+	    struct in_ifaddr *, struct sockaddr_in *, int));
+
+static int subnetsarelocal = 0;
+SYSCTL_INT(_net_inet_ip, OID_AUTO, subnets_are_local, CTLFLAG_RW, 
+	&subnetsarelocal, 0, "");
+
+struct in_multihead in_multihead; /* XXX BSS initialization */
+
+/*
+ * Return 1 if an internet address is for a ``local'' host
+ * (one to which we have a connection).  If subnetsarelocal
+ * is true, this includes other subnets of the local net.
+ * Otherwise, it includes only the directly-connected (sub)nets.
+ */
+int
+in_localaddr(in)
+	struct in_addr in;
+{
+	register u_long i = ntohl(in.s_addr);
+	register struct in_ifaddr *ia;
+
+	if (subnetsarelocal) {
+		TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link)
+			if ((i & ia->ia_netmask) == ia->ia_net)
+				return (1);
+	} else {
+		TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link)
+			if ((i & ia->ia_subnetmask) == ia->ia_subnet)
+				return (1);
+	}
+	return (0);
+}
+
+/*
+ * Determine whether an IP address is in a reserved set of addresses
+ * that may not be forwarded, or whether datagrams to that destination
+ * may be forwarded.
+ */
+int
+in_canforward(in)
+	struct in_addr in;
+{
+	register u_long i = ntohl(in.s_addr);
+	register u_long net;
+
+	if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i))
+		return (0);
+	if (IN_CLASSA(i)) {
+		net = i & IN_CLASSA_NET;
+		if (net == 0 || net == (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))
+			return (0);
+	}
+	return (1);
+}
+
+/*
+ * Trim a mask in a sockaddr
+ */
+static void
+in_socktrim(ap)
+struct sockaddr_in *ap;
+{
+    register char *cplim = (char *) &ap->sin_addr;
+    register char *cp = (char *) (&ap->sin_addr + 1);
+
+    ap->sin_len = 0;
+    while (--cp >= cplim)
+        if (*cp) {
+	    (ap)->sin_len = cp - (char *) (ap) + 1;
+	    break;
+	}
+}
+
+static int
+in_mask2len(mask)
+	struct in_addr *mask;
+{
+	int x, y;
+	u_char *p;
+
+	p = (u_char *)mask;
+	for (x = 0; x < sizeof(*mask); x++) {
+		if (p[x] != 0xff)
+			break;
+	}
+	y = 0;
+	if (x < sizeof(*mask)) {
+		for (y = 0; y < 8; y++) {
+			if ((p[x] & (0x80 >> y)) == 0)
+				break;
+		}
+	}
+	return x * 8 + y;
+}
+
+static void
+in_len2mask(mask, len)
+	struct in_addr *mask;
+	int len;
+{
+	int i;
+	u_char *p;
+
+	p = (u_char *)mask;
+	bzero(mask, sizeof(*mask));
+	for (i = 0; i < len / 8; i++)
+		p[i] = 0xff;
+	if (len % 8)
+		p[i] = (0xff00 >> (len % 8)) & 0xff;
+}
+
+static int in_interfaces;	/* number of external internet interfaces */
+
+/*
+ * Generic internet control operations (ioctl's).
+ * Ifp is 0 if not an interface-specific ioctl.
+ */
+/* ARGSUSED */
+int
+in_control(so, cmd, data, ifp, p)
+	struct socket *so;
+	u_long cmd;
+	caddr_t data;
+	register struct ifnet *ifp;
+	struct proc *p;
+{
+	register struct ifreq *ifr = (struct ifreq *)data;
+	register struct in_ifaddr *ia = 0, *iap;
+	register struct ifaddr *ifa;
+	struct in_ifaddr *oia;
+	struct in_aliasreq *ifra = (struct in_aliasreq *)data;
+	struct sockaddr_in oldaddr;
+	int error, hostIsNew, maskIsNew, s;
+	u_long i;
+
+#if NGIF > 0
+        if (ifp && ifp->if_type == IFT_GIF) {
+                switch (cmd) {
+                case SIOCSIFPHYADDR:
+		case SIOCDIFPHYADDR:
+			if (p &&
+			    (error = suser(p)) != 0)
+        			return(error);
+                case SIOCGIFPSRCADDR:
+                case SIOCGIFPDSTADDR:
+                        return gif_ioctl(ifp, cmd, data);
+                }
+        }
+#endif
+
+	switch (cmd) {
+	case SIOCALIFADDR:
+	case SIOCDLIFADDR:
+		if (p && (error = suser(p)) != 0)
+			return error;
+		/*fall through*/
+	case SIOCGLIFADDR:
+		if (!ifp)
+			return EINVAL;
+		return in_lifaddr_ioctl(so, cmd, data, ifp, p);
+	}
+
+	/*
+	 * Find address for this interface, if it exists.
+	 *
+	 * If an alias address was specified, find that one instead of
+	 * the first one on the interface.
+	 */
+	if (ifp)
+		TAILQ_FOREACH(iap, &in_ifaddrhead, ia_link)
+			if (iap->ia_ifp == ifp) {
+				if (((struct sockaddr_in *)&ifr->ifr_addr)->sin_addr.s_addr ==
+				    iap->ia_addr.sin_addr.s_addr) {
+					ia = iap;
+					break;
+				} else if (ia == NULL) {
+					ia = iap;
+					if (ifr->ifr_addr.sa_family != AF_INET)
+						break;
+				}
+			}
+
+	switch (cmd) {
+
+	case SIOCAIFADDR:
+	case SIOCDIFADDR:
+		if (ifp == 0)
+			return (EADDRNOTAVAIL);
+		if (ifra->ifra_addr.sin_family == AF_INET) {
+			for (oia = ia; ia; ia = TAILQ_NEXT(ia, ia_link)) {
+				if (ia->ia_ifp == ifp  &&
+				    ia->ia_addr.sin_addr.s_addr ==
+				    ifra->ifra_addr.sin_addr.s_addr)
+					break;
+			}
+			if ((ifp->if_flags & IFF_POINTOPOINT)
+			    && (cmd == SIOCAIFADDR)
+			    && (ifra->ifra_dstaddr.sin_addr.s_addr
+				== INADDR_ANY)) {
+				return EDESTADDRREQ;
+			}
+		}
+		if (cmd == SIOCDIFADDR && ia == 0)
+			return (EADDRNOTAVAIL);
+		/* FALLTHROUGH */
+	case SIOCSIFADDR:
+	case SIOCSIFNETMASK:
+	case SIOCSIFDSTADDR:
+		if (p && (error = suser(p)) != 0)
+			return error;
+
+		if (ifp == 0)
+			return (EADDRNOTAVAIL);
+		if (ia == (struct in_ifaddr *)0) {
+			ia = (struct in_ifaddr *)
+				malloc(sizeof *ia, M_IFADDR, M_WAITOK | M_ZERO);
+			if (ia == (struct in_ifaddr *)NULL)
+				return (ENOBUFS);
+			/*
+			 * Protect from ipintr() traversing address list
+			 * while we're modifying it.
+			 */
+			s = splnet();
+			
+			TAILQ_INSERT_TAIL(&in_ifaddrhead, ia, ia_link);
+			ifa = &ia->ia_ifa;
+			TAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link);
+
+			ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr;
+			ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr;
+			ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask;
+			ia->ia_sockmask.sin_len = 8;
+			if (ifp->if_flags & IFF_BROADCAST) {
+				ia->ia_broadaddr.sin_len = sizeof(ia->ia_addr);
+				ia->ia_broadaddr.sin_family = AF_INET;
+			}
+			ia->ia_ifp = ifp;
+			if (!(ifp->if_flags & IFF_LOOPBACK))
+				in_interfaces++;
+			splx(s);
+		}
+		break;
+
+	case SIOCSIFBRDADDR:
+		if (p && (error = suser(p)) != 0)
+			return error;
+		/* FALLTHROUGH */
+
+	case SIOCGIFADDR:
+	case SIOCGIFNETMASK:
+	case SIOCGIFDSTADDR:
+	case SIOCGIFBRDADDR:
+		if (ia == (struct in_ifaddr *)0)
+			return (EADDRNOTAVAIL);
+		break;
+	}
+	switch (cmd) {
+
+	case SIOCGIFADDR:
+		*((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_addr;
+		break;
+
+	case SIOCGIFBRDADDR:
+		if ((ifp->if_flags & IFF_BROADCAST) == 0)
+			return (EINVAL);
+		*((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_broadaddr;
+		break;
+
+	case SIOCGIFDSTADDR:
+		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
+			return (EINVAL);
+		*((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_dstaddr;
+		break;
+
+	case SIOCGIFNETMASK:
+		*((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_sockmask;
+		break;
+
+	case SIOCSIFDSTADDR:
+		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
+			return (EINVAL);
+		oldaddr = ia->ia_dstaddr;
+		ia->ia_dstaddr = *(struct sockaddr_in *)&ifr->ifr_dstaddr;
+		if (ifp->if_ioctl && (error = (*ifp->if_ioctl)
+					(ifp, SIOCSIFDSTADDR, (caddr_t)ia))) {
+			ia->ia_dstaddr = oldaddr;
+			return (error);
+		}
+		if (ia->ia_flags & IFA_ROUTE) {
+			ia->ia_ifa.ifa_dstaddr = (struct sockaddr *)&oldaddr;
+			rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
+			ia->ia_ifa.ifa_dstaddr =
+					(struct sockaddr *)&ia->ia_dstaddr;
+			rtinit(&(ia->ia_ifa), (int)RTM_ADD, RTF_HOST|RTF_UP);
+		}
+		break;
+
+	case SIOCSIFBRDADDR:
+		if ((ifp->if_flags & IFF_BROADCAST) == 0)
+			return (EINVAL);
+		ia->ia_broadaddr = *(struct sockaddr_in *)&ifr->ifr_broadaddr;
+		break;
+
+	case SIOCSIFADDR:
+		return (in_ifinit(ifp, ia,
+		    (struct sockaddr_in *) &ifr->ifr_addr, 1));
+
+	case SIOCSIFNETMASK:
+		i = ifra->ifra_addr.sin_addr.s_addr;
+		ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr = i);
+		break;
+
+	case SIOCAIFADDR:
+		maskIsNew = 0;
+		hostIsNew = 1;
+		error = 0;
+		if (ia->ia_addr.sin_family == AF_INET) {
+			if (ifra->ifra_addr.sin_len == 0) {
+				ifra->ifra_addr = ia->ia_addr;
+				hostIsNew = 0;
+			} else if (ifra->ifra_addr.sin_addr.s_addr ==
+					       ia->ia_addr.sin_addr.s_addr)
+				hostIsNew = 0;
+		}
+		if (ifra->ifra_mask.sin_len) {
+			in_ifscrub(ifp, ia);
+			ia->ia_sockmask = ifra->ifra_mask;
+			ia->ia_subnetmask =
+			     ntohl(ia->ia_sockmask.sin_addr.s_addr);
+			maskIsNew = 1;
+		}
+		if ((ifp->if_flags & IFF_POINTOPOINT) &&
+		    (ifra->ifra_dstaddr.sin_family == AF_INET)) {
+			in_ifscrub(ifp, ia);
+			ia->ia_dstaddr = ifra->ifra_dstaddr;
+			maskIsNew  = 1; /* We lie; but the effect's the same */
+		}
+		if (ifra->ifra_addr.sin_family == AF_INET &&
+		    (hostIsNew || maskIsNew))
+			error = in_ifinit(ifp, ia, &ifra->ifra_addr, 0);
+		if ((ifp->if_flags & IFF_BROADCAST) &&
+		    (ifra->ifra_broadaddr.sin_family == AF_INET))
+			ia->ia_broadaddr = ifra->ifra_broadaddr;
+		return (error);
+
+	case SIOCDIFADDR:
+		/*
+		 * in_ifscrub kills the interface route.
+		 */
+		in_ifscrub(ifp, ia);
+		/*
+		 * in_ifadown gets rid of all the rest of
+		 * the routes.  This is not quite the right
+		 * thing to do, but at least if we are running
+		 * a routing process they will come back.
+		 */
+		in_ifadown(&ia->ia_ifa, 1);
+
+		/*
+		 * Protect from ipintr() traversing address list
+		 * while we're modifying it.
+		 */
+		s = splnet();
+
+		ifa = &ia->ia_ifa;
+		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
+		oia = ia;
+		TAILQ_REMOVE(&in_ifaddrhead, oia, ia_link);
+		IFAFREE(&oia->ia_ifa);
+		splx(s);
+		break;
+
+	default:
+		if (ifp == 0 || ifp->if_ioctl == 0)
+			return (EOPNOTSUPP);
+		return ((*ifp->if_ioctl)(ifp, cmd, data));
+	}
+	return (0);
+}
+
+/*
+ * SIOC[GAD]LIFADDR.
+ *	SIOCGLIFADDR: get first address. (?!?)
+ *	SIOCGLIFADDR with IFLR_PREFIX:
+ *		get first address that matches the specified prefix.
+ *	SIOCALIFADDR: add the specified address.
+ *	SIOCALIFADDR with IFLR_PREFIX:
+ *		EINVAL since we can't deduce hostid part of the address.
+ *	SIOCDLIFADDR: delete the specified address.
+ *	SIOCDLIFADDR with IFLR_PREFIX:
+ *		delete the first address that matches the specified prefix.
+ * return values:
+ *	EINVAL on invalid parameters
+ *	EADDRNOTAVAIL on prefix match failed/specified address not found
+ *	other values may be returned from in_ioctl()
+ */
+static int
+in_lifaddr_ioctl(so, cmd, data, ifp, p)
+	struct socket *so;
+	u_long cmd;
+	caddr_t	data;
+	struct ifnet *ifp;
+	struct proc *p;
+{
+	struct if_laddrreq *iflr = (struct if_laddrreq *)data;
+	struct ifaddr *ifa;
+
+	/* sanity checks */
+	if (!data || !ifp) {
+		panic("invalid argument to in_lifaddr_ioctl");
+		/*NOTRECHED*/
+	}
+
+	switch (cmd) {
+	case SIOCGLIFADDR:
+		/* address must be specified on GET with IFLR_PREFIX */
+		if ((iflr->flags & IFLR_PREFIX) == 0)
+			break;
+		/*FALLTHROUGH*/
+	case SIOCALIFADDR:
+	case SIOCDLIFADDR:
+		/* address must be specified on ADD and DELETE */
+		if (iflr->addr.ss_family != AF_INET)
+			return EINVAL;
+		if (iflr->addr.ss_len != sizeof(struct sockaddr_in))
+			return EINVAL;
+		/* XXX need improvement */
+		if (iflr->dstaddr.ss_family
+		 && iflr->dstaddr.ss_family != AF_INET)
+			return EINVAL;
+		if (iflr->dstaddr.ss_family
+		 && iflr->dstaddr.ss_len != sizeof(struct sockaddr_in))
+			return EINVAL;
+		break;
+	default: /*shouldn't happen*/
+		return EOPNOTSUPP;
+	}
+	if (sizeof(struct in_addr) * 8 < iflr->prefixlen)
+		return EINVAL;
+
+	switch (cmd) {
+	case SIOCALIFADDR:
+	    {
+		struct in_aliasreq ifra;
+
+		if (iflr->flags & IFLR_PREFIX)
+			return EINVAL;
+
+		/* copy args to in_aliasreq, perform ioctl(SIOCAIFADDR_IN6). */
+		bzero(&ifra, sizeof(ifra));
+		bcopy(iflr->iflr_name, ifra.ifra_name,
+			sizeof(ifra.ifra_name));
+
+		bcopy(&iflr->addr, &ifra.ifra_addr, iflr->addr.ss_len);
+
+		if (iflr->dstaddr.ss_family) {	/*XXX*/
+			bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr,
+				iflr->dstaddr.ss_len);
+		}
+
+		ifra.ifra_mask.sin_family = AF_INET;
+		ifra.ifra_mask.sin_len = sizeof(struct sockaddr_in);
+		in_len2mask(&ifra.ifra_mask.sin_addr, iflr->prefixlen);
+
+		return in_control(so, SIOCAIFADDR, (caddr_t)&ifra, ifp, p);
+	    }
+	case SIOCGLIFADDR:
+	case SIOCDLIFADDR:
+	    {
+		struct in_ifaddr *ia;
+		struct in_addr mask, candidate, match;
+		struct sockaddr_in *sin;
+		int cmp;
+
+		bzero(&mask, sizeof(mask));
+		if (iflr->flags & IFLR_PREFIX) {
+			/* lookup a prefix rather than address. */
+			in_len2mask(&mask, iflr->prefixlen);
+
+			sin = (struct sockaddr_in *)&iflr->addr;
+			match.s_addr = sin->sin_addr.s_addr;
+			match.s_addr &= mask.s_addr;
+
+			/* if you set extra bits, that's wrong */
+			if (match.s_addr != sin->sin_addr.s_addr)
+				return EINVAL;
+
+			cmp = 1;
+		} else {
+			if (cmd == SIOCGLIFADDR) {
+				/* on getting an address, take the 1st match */
+				cmp = 0;	/*XXX*/
+			} else {
+				/* on deleting an address, do exact match */
+				in_len2mask(&mask, 32);
+				sin = (struct sockaddr_in *)&iflr->addr;
+				match.s_addr = sin->sin_addr.s_addr;
+
+				cmp = 1;
+			}
+		}
+
+		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)	{
+			if (ifa->ifa_addr->sa_family != AF_INET6)
+				continue;
+			if (!cmp)
+				break;
+			candidate.s_addr = ((struct sockaddr_in *)&ifa->ifa_addr)->sin_addr.s_addr;
+			candidate.s_addr &= mask.s_addr;
+			if (candidate.s_addr == match.s_addr)
+				break;
+		}
+		if (!ifa)
+			return EADDRNOTAVAIL;
+		ia = (struct in_ifaddr *)ifa;
+
+		if (cmd == SIOCGLIFADDR) {
+			/* fill in the if_laddrreq structure */
+			bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin_len);
+
+			if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
+				bcopy(&ia->ia_dstaddr, &iflr->dstaddr,
+					ia->ia_dstaddr.sin_len);
+			} else
+				bzero(&iflr->dstaddr, sizeof(iflr->dstaddr));
+
+			iflr->prefixlen =
+				in_mask2len(&ia->ia_sockmask.sin_addr);
+
+			iflr->flags = 0;	/*XXX*/
+
+			return 0;
+		} else {
+			struct in_aliasreq ifra;
+
+			/* fill in_aliasreq and do ioctl(SIOCDIFADDR_IN6) */
+			bzero(&ifra, sizeof(ifra));
+			bcopy(iflr->iflr_name, ifra.ifra_name,
+				sizeof(ifra.ifra_name));
+
+			bcopy(&ia->ia_addr, &ifra.ifra_addr,
+				ia->ia_addr.sin_len);
+			if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
+				bcopy(&ia->ia_dstaddr, &ifra.ifra_dstaddr,
+					ia->ia_dstaddr.sin_len);
+			}
+			bcopy(&ia->ia_sockmask, &ifra.ifra_dstaddr,
+				ia->ia_sockmask.sin_len);
+
+			return in_control(so, SIOCDIFADDR, (caddr_t)&ifra,
+					  ifp, p);
+		}
+	    }
+	}
+
+	return EOPNOTSUPP;	/*just for safety*/
+}
+
+/*
+ * Delete any existing route for an interface.
+ */
+void
+in_ifscrub(ifp, ia)
+	register struct ifnet *ifp;
+	register struct in_ifaddr *ia;
+{
+
+	if ((ia->ia_flags & IFA_ROUTE) == 0)
+		return;
+	if (ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT))
+		rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
+	else
+		rtinit(&(ia->ia_ifa), (int)RTM_DELETE, 0);
+	ia->ia_flags &= ~IFA_ROUTE;
+}
+
+/*
+ * Initialize an interface's internet address
+ * and routing table entry.
+ */
+static int
+in_ifinit(ifp, ia, sin, scrub)
+	register struct ifnet *ifp;
+	register struct in_ifaddr *ia;
+	struct sockaddr_in *sin;
+	int scrub;
+{
+	register u_long i = ntohl(sin->sin_addr.s_addr);
+	struct sockaddr_in oldaddr;
+	int s = splimp(), flags = RTF_UP, error;
+
+	oldaddr = ia->ia_addr;
+	ia->ia_addr = *sin;
+	/*
+	 * Give the interface a chance to initialize
+	 * if this is its first address,
+	 * and to validate the address if necessary.
+	 */
+	if (ifp->if_ioctl &&
+	    (error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia))) {
+		splx(s);
+		ia->ia_addr = oldaddr;
+		return (error);
+	}
+	splx(s);
+	if (scrub) {
+		ia->ia_ifa.ifa_addr = (struct sockaddr *)&oldaddr;
+		in_ifscrub(ifp, ia);
+		ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
+	}
+	if (IN_CLASSA(i))
+		ia->ia_netmask = IN_CLASSA_NET;
+	else if (IN_CLASSB(i))
+		ia->ia_netmask = IN_CLASSB_NET;
+	else
+		ia->ia_netmask = IN_CLASSC_NET;
+	/*
+	 * The subnet mask usually includes at least the standard network part,
+	 * but may may be smaller in the case of supernetting.
+	 * If it is set, we believe it.
+	 */
+	if (ia->ia_subnetmask == 0) {
+		ia->ia_subnetmask = ia->ia_netmask;
+		ia->ia_sockmask.sin_addr.s_addr = htonl(ia->ia_subnetmask);
+	} else
+		ia->ia_netmask &= ia->ia_subnetmask;
+	ia->ia_net = i & ia->ia_netmask;
+	ia->ia_subnet = i & ia->ia_subnetmask;
+	in_socktrim(&ia->ia_sockmask);
+	/*
+	 * Add route for the network.
+	 */
+	ia->ia_ifa.ifa_metric = ifp->if_metric;
+	if (ifp->if_flags & IFF_BROADCAST) {
+		ia->ia_broadaddr.sin_addr.s_addr =
+			htonl(ia->ia_subnet | ~ia->ia_subnetmask);
+		ia->ia_netbroadcast.s_addr =
+			htonl(ia->ia_net | ~ ia->ia_netmask);
+	} else if (ifp->if_flags & IFF_LOOPBACK) {
+		ia->ia_ifa.ifa_dstaddr = ia->ia_ifa.ifa_addr;
+		flags |= RTF_HOST;
+	} else if (ifp->if_flags & IFF_POINTOPOINT) {
+		if (ia->ia_dstaddr.sin_family != AF_INET)
+			return (0);
+		flags |= RTF_HOST;
+	}
+	if ((error = rtinit(&(ia->ia_ifa), (int)RTM_ADD, flags)) == 0)
+		ia->ia_flags |= IFA_ROUTE;
+
+	/*
+	 * If the interface supports multicast, join the "all hosts"
+	 * multicast group on that interface.
+	 */
+	if (ifp->if_flags & IFF_MULTICAST) {
+		struct in_addr addr;
+
+		addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP);
+		in_addmulti(&addr, ifp);
+	}
+	return (error);
+}
+
+
+/*
+ * Return 1 if the address might be a local broadcast address.
+ */
+int
+in_broadcast(in, ifp)
+	struct in_addr in;
+        struct ifnet *ifp;
+{
+	register struct ifaddr *ifa;
+	u_long t;
+
+	if (in.s_addr == INADDR_BROADCAST ||
+	    in.s_addr == INADDR_ANY)
+		return 1;
+	if ((ifp->if_flags & IFF_BROADCAST) == 0)
+		return 0;
+	t = ntohl(in.s_addr);
+	/*
+	 * Look through the list of addresses for a match
+	 * with a broadcast address.
+	 */
+#define ia ((struct in_ifaddr *)ifa)
+	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
+		if (ifa->ifa_addr->sa_family == AF_INET &&
+		    (in.s_addr == ia->ia_broadaddr.sin_addr.s_addr ||
+		     in.s_addr == ia->ia_netbroadcast.s_addr ||
+		     /*
+		      * Check for old-style (host 0) broadcast.
+		      */
+		     t == ia->ia_subnet || t == ia->ia_net) &&
+		     /*
+		      * Check for an all one subnetmask. These
+		      * only exist when an interface gets a secondary
+		      * address.
+		      */
+		     ia->ia_subnetmask != (u_long)0xffffffff)
+			    return 1;
+	return (0);
+#undef ia
+}
+/*
+ * Add an address to the list of IP multicast addresses for a given interface.
+ */
+struct in_multi *
+in_addmulti(ap, ifp)
+	register struct in_addr *ap;
+	register struct ifnet *ifp;
+{
+	register struct in_multi *inm;
+	int error;
+	struct sockaddr_in sin;
+	struct ifmultiaddr *ifma;
+	int s = splnet();
+
+	/*
+	 * Call generic routine to add membership or increment
+	 * refcount.  It wants addresses in the form of a sockaddr,
+	 * so we build one here (being careful to zero the unused bytes).
+	 */
+	bzero(&sin, sizeof sin);
+	sin.sin_family = AF_INET;
+	sin.sin_len = sizeof sin;
+	sin.sin_addr = *ap;
+	error = if_addmulti(ifp, (struct sockaddr *)&sin, &ifma);
+	if (error) {
+		splx(s);
+		return 0;
+	}
+
+	/*
+	 * If ifma->ifma_protospec is null, then if_addmulti() created
+	 * a new record.  Otherwise, we are done.
+	 */
+	if (ifma->ifma_protospec != 0) {
+		splx(s);
+		return ifma->ifma_protospec;
+	}
+
+	/* XXX - if_addmulti uses M_WAITOK.  Can this really be called
+	   at interrupt time?  If so, need to fix if_addmulti. XXX */
+	inm = (struct in_multi *)malloc(sizeof(*inm), M_IPMADDR,
+	    M_NOWAIT | M_ZERO);
+	if (inm == NULL) {
+		splx(s);
+		return (NULL);
+	}
+
+	inm->inm_addr = *ap;
+	inm->inm_ifp = ifp;
+	inm->inm_ifma = ifma;
+	ifma->ifma_protospec = inm;
+	LIST_INSERT_HEAD(&in_multihead, inm, inm_link);
+
+	/*
+	 * Let IGMP know that we have joined a new IP multicast group.
+	 */
+	igmp_joingroup(inm);
+	splx(s);
+	return (inm);
+}
+
+/*
+ * Delete a multicast address record.
+ */
+void
+in_delmulti(inm)
+	register struct in_multi *inm;
+{
+	struct ifmultiaddr *ifma = inm->inm_ifma;
+	struct in_multi my_inm;
+	int s = splnet();
+
+	my_inm.inm_ifp = NULL ; /* don't send the leave msg */
+	if (ifma->ifma_refcount == 1) {
+		/*
+		 * No remaining claims to this record; let IGMP know that
+		 * we are leaving the multicast group.
+		 * But do it after the if_delmulti() which might reset
+		 * the interface and nuke the packet.
+		 */
+		my_inm = *inm ;
+		ifma->ifma_protospec = 0;
+		LIST_REMOVE(inm, inm_link);
+		free(inm, M_IPMADDR);
+	}
+	/* XXX - should be separate API for when we have an ifma? */
+	if_delmulti(ifma->ifma_ifp, ifma->ifma_addr);
+	if (my_inm.inm_ifp != NULL)
+		igmp_leavegroup(&my_inm);
+	splx(s);
+}
diff --git a/sys/netinet/in.h b/sys/netinet/in.h
new file mode 100644
index 0000000..65f8a8e
--- /dev/null
+++ b/sys/netinet/in.h
@@ -0,0 +1,492 @@
+/*
+ * Copyright (c) 1982, 1986, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)in.h	8.3 (Berkeley) 1/3/94
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_IN_H_
+#define _NETINET_IN_H_
+
+/*
+ * Constants and structures defined by the internet system,
+ * Per RFC 790, September 1981, and numerous additions.
+ */
+
+/*
+ * Protocols (RFC 1700)
+ */
+#define	IPPROTO_IP		0		/* dummy for IP */
+#define	IPPROTO_HOPOPTS		0		/* IP6 hop-by-hop options */
+#define	IPPROTO_ICMP		1		/* control message protocol */
+#define	IPPROTO_IGMP		2		/* group mgmt protocol */
+#define	IPPROTO_GGP		3		/* gateway^2 (deprecated) */
+#define IPPROTO_IPV4		4 		/* IPv4 encapsulation */
+#define IPPROTO_IPIP		IPPROTO_IPV4	/* for compatibility */
+#define	IPPROTO_TCP		6		/* tcp */
+#define	IPPROTO_ST		7		/* Stream protocol II */
+#define	IPPROTO_EGP		8		/* exterior gateway protocol */
+#define	IPPROTO_PIGP		9		/* private interior gateway */
+#define	IPPROTO_RCCMON		10		/* BBN RCC Monitoring */
+#define	IPPROTO_NVPII		11		/* network voice protocol*/
+#define	IPPROTO_PUP		12		/* pup */
+#define	IPPROTO_ARGUS		13		/* Argus */
+#define	IPPROTO_EMCON		14		/* EMCON */
+#define	IPPROTO_XNET		15		/* Cross Net Debugger */
+#define	IPPROTO_CHAOS		16		/* Chaos*/
+#define	IPPROTO_UDP		17		/* user datagram protocol */
+#define	IPPROTO_MUX		18		/* Multiplexing */
+#define	IPPROTO_MEAS		19		/* DCN Measurement Subsystems */
+#define	IPPROTO_HMP		20		/* Host Monitoring */
+#define	IPPROTO_PRM		21		/* Packet Radio Measurement */
+#define	IPPROTO_IDP		22		/* xns idp */
+#define	IPPROTO_TRUNK1		23		/* Trunk-1 */
+#define	IPPROTO_TRUNK2		24		/* Trunk-2 */
+#define	IPPROTO_LEAF1		25		/* Leaf-1 */
+#define	IPPROTO_LEAF2		26		/* Leaf-2 */
+#define	IPPROTO_RDP		27		/* Reliable Data */
+#define	IPPROTO_IRTP		28		/* Reliable Transaction */
+#define	IPPROTO_TP		29 		/* tp-4 w/ class negotiation */
+#define	IPPROTO_BLT		30		/* Bulk Data Transfer */
+#define	IPPROTO_NSP		31		/* Network Services */
+#define	IPPROTO_INP		32		/* Merit Internodal */
+#define	IPPROTO_SEP		33		/* Sequential Exchange */
+#define	IPPROTO_3PC		34		/* Third Party Connect */
+#define	IPPROTO_IDPR		35		/* InterDomain Policy Routing */
+#define	IPPROTO_XTP		36		/* XTP */
+#define	IPPROTO_DDP		37		/* Datagram Delivery */
+#define	IPPROTO_CMTP		38		/* Control Message Transport */
+#define	IPPROTO_TPXX		39		/* TP++ Transport */
+#define	IPPROTO_IL		40		/* IL transport protocol */
+#define	IPPROTO_IPV6		41		/* IP6 header */
+#define	IPPROTO_SDRP		42		/* Source Demand Routing */
+#define	IPPROTO_ROUTING		43		/* IP6 routing header */
+#define	IPPROTO_FRAGMENT	44		/* IP6 fragmentation header */
+#define	IPPROTO_IDRP		45		/* InterDomain Routing*/
+#define	IPPROTO_RSVP		46 		/* resource reservation */
+#define	IPPROTO_GRE		47		/* General Routing Encap. */
+#define	IPPROTO_MHRP		48		/* Mobile Host Routing */
+#define	IPPROTO_BHA		49		/* BHA */
+#define	IPPROTO_ESP		50		/* IP6 Encap Sec. Payload */
+#define	IPPROTO_AH		51		/* IP6 Auth Header */
+#define	IPPROTO_INLSP		52		/* Integ. Net Layer Security */
+#define	IPPROTO_SWIPE		53		/* IP with encryption */
+#define	IPPROTO_NHRP		54		/* Next Hop Resolution */
+#define IPPROTO_MOBILE		55		/* IP Mobility */
+#define IPPROTO_TLSP		56		/* Transport Layer Security */
+#define IPPROTO_SKIP		57		/* SKIP */
+#define	IPPROTO_ICMPV6		58		/* ICMP6 */
+#define	IPPROTO_NONE		59		/* IP6 no next header */
+#define	IPPROTO_DSTOPTS		60		/* IP6 destination option */
+#define	IPPROTO_AHIP		61		/* any host internal protocol */
+#define	IPPROTO_CFTP		62		/* CFTP */
+#define	IPPROTO_HELLO		63		/* "hello" routing protocol */
+#define	IPPROTO_SATEXPAK	64		/* SATNET/Backroom EXPAK */
+#define	IPPROTO_KRYPTOLAN	65		/* Kryptolan */
+#define	IPPROTO_RVD		66		/* Remote Virtual Disk */
+#define	IPPROTO_IPPC		67		/* Pluribus Packet Core */
+#define	IPPROTO_ADFS		68		/* Any distributed FS */
+#define	IPPROTO_SATMON		69		/* Satnet Monitoring */
+#define	IPPROTO_VISA		70		/* VISA Protocol */
+#define	IPPROTO_IPCV		71		/* Packet Core Utility */
+#define	IPPROTO_CPNX		72		/* Comp. Prot. Net. Executive */
+#define	IPPROTO_CPHB		73		/* Comp. Prot. HeartBeat */
+#define	IPPROTO_WSN		74		/* Wang Span Network */
+#define	IPPROTO_PVP		75		/* Packet Video Protocol */
+#define	IPPROTO_BRSATMON	76		/* BackRoom SATNET Monitoring */
+#define	IPPROTO_ND		77		/* Sun net disk proto (temp.) */
+#define	IPPROTO_WBMON		78		/* WIDEBAND Monitoring */
+#define	IPPROTO_WBEXPAK		79		/* WIDEBAND EXPAK */
+#define	IPPROTO_EON		80		/* ISO cnlp */
+#define	IPPROTO_VMTP		81		/* VMTP */
+#define	IPPROTO_SVMTP		82		/* Secure VMTP */
+#define	IPPROTO_VINES		83		/* Banyon VINES */
+#define	IPPROTO_TTP		84		/* TTP */
+#define	IPPROTO_IGP		85		/* NSFNET-IGP */
+#define	IPPROTO_DGP		86		/* dissimilar gateway prot. */
+#define	IPPROTO_TCF		87		/* TCF */
+#define	IPPROTO_IGRP		88		/* Cisco/GXS IGRP */
+#define	IPPROTO_OSPFIGP		89		/* OSPFIGP */
+#define	IPPROTO_SRPC		90		/* Strite RPC protocol */
+#define	IPPROTO_LARP		91		/* Locus Address Resoloution */
+#define	IPPROTO_MTP		92		/* Multicast Transport */
+#define	IPPROTO_AX25		93		/* AX.25 Frames */
+#define	IPPROTO_IPEIP		94		/* IP encapsulated in IP */
+#define	IPPROTO_MICP		95		/* Mobile Int.ing control */
+#define	IPPROTO_SCCSP		96		/* Semaphore Comm. security */
+#define	IPPROTO_ETHERIP		97		/* Ethernet IP encapsulation */
+#define	IPPROTO_ENCAP		98		/* encapsulation header */
+#define	IPPROTO_APES		99		/* any private encr. scheme */
+#define	IPPROTO_GMTP		100		/* GMTP*/
+#define	IPPROTO_IPCOMP		108		/* payload compression (IPComp) */
+/* 101-254: Partly Unassigned */
+#define	IPPROTO_PIM		103		/* Protocol Independent Mcast */
+#define	IPPROTO_PGM		113		/* PGM */
+/* 255: Reserved */
+/* BSD Private, local use, namespace incursion */
+#define	IPPROTO_DIVERT		254		/* divert pseudo-protocol */
+#define	IPPROTO_RAW		255		/* raw IP packet */
+#define	IPPROTO_MAX		256
+
+/* last return value of *_input(), meaning "all job for this pkt is done".  */
+#define	IPPROTO_DONE		257
+
+/*
+ * Local port number conventions:
+ *
+ * When a user does a bind(2) or connect(2) with a port number of zero,
+ * a non-conflicting local port address is chosen.
+ * The default range is IPPORT_RESERVED through
+ * IPPORT_USERRESERVED, although that is settable by sysctl.
+ *
+ * A user may set the IPPROTO_IP option IP_PORTRANGE to change this
+ * default assignment range.
+ *
+ * The value IP_PORTRANGE_DEFAULT causes the default behavior.
+ *
+ * The value IP_PORTRANGE_HIGH changes the range of candidate port numbers
+ * into the "high" range.  These are reserved for client outbound connections
+ * which do not want to be filtered by any firewalls.
+ *
+ * The value IP_PORTRANGE_LOW changes the range to the "low" are
+ * that is (by convention) restricted to privileged processes.  This
+ * convention is based on "vouchsafe" principles only.  It is only secure
+ * if you trust the remote host to restrict these ports.
+ *
+ * The default range of ports and the high range can be changed by
+ * sysctl(3).  (net.inet.ip.port{hi,low}{first,last}_auto)
+ *
+ * Changing those values has bad security implications if you are
+ * using a a stateless firewall that is allowing packets outside of that
+ * range in order to allow transparent outgoing connections.
+ *
+ * Such a firewall configuration will generally depend on the use of these
+ * default values.  If you change them, you may find your Security
+ * Administrator looking for you with a heavy object.
+ *
+ * For a slightly more orthodox text view on this:
+ *
+ *            ftp://ftp.isi.edu/in-notes/iana/assignments/port-numbers
+ *
+ *    port numbers are divided into three ranges:
+ *
+ *                0 -  1023 Well Known Ports
+ *             1024 - 49151 Registered Ports
+ *            49152 - 65535 Dynamic and/or Private Ports
+ *
+ */
+
+/*
+ * Ports < IPPORT_RESERVED are reserved for
+ * privileged processes (e.g. root).         (IP_PORTRANGE_LOW)
+ * Ports > IPPORT_USERRESERVED are reserved
+ * for servers, not necessarily privileged.  (IP_PORTRANGE_DEFAULT)
+ */
+#define	IPPORT_RESERVED		1024
+#define	IPPORT_USERRESERVED	5000
+
+/*
+ * Default local port range to use by setting IP_PORTRANGE_HIGH
+ */
+#define	IPPORT_HIFIRSTAUTO	49152
+#define	IPPORT_HILASTAUTO	65535
+
+/*
+ * Scanning for a free reserved port return a value below IPPORT_RESERVED,
+ * but higher than IPPORT_RESERVEDSTART.  Traditionally the start value was
+ * 512, but that conflicts with some well-known-services that firewalls may
+ * have a fit if we use.
+ */
+#define IPPORT_RESERVEDSTART	600
+
+/*
+ * Internet address (a structure for historical reasons)
+ */
+struct in_addr {
+	in_addr_t s_addr;
+};
+
+/*
+ * Definitions of bits in internet address integers.
+ * On subnets, the decomposition of addresses to host and net parts
+ * is done according to subnet mask, not the masks here.
+ */
+#define	IN_CLASSA(i)		(((u_int32_t)(i) & 0x80000000) == 0)
+#define	IN_CLASSA_NET		0xff000000
+#define	IN_CLASSA_NSHIFT	24
+#define	IN_CLASSA_HOST		0x00ffffff
+#define	IN_CLASSA_MAX		128
+
+#define	IN_CLASSB(i)		(((u_int32_t)(i) & 0xc0000000) == 0x80000000)
+#define	IN_CLASSB_NET		0xffff0000
+#define	IN_CLASSB_NSHIFT	16
+#define	IN_CLASSB_HOST		0x0000ffff
+#define	IN_CLASSB_MAX		65536
+
+#define	IN_CLASSC(i)		(((u_int32_t)(i) & 0xe0000000) == 0xc0000000)
+#define	IN_CLASSC_NET		0xffffff00
+#define	IN_CLASSC_NSHIFT	8
+#define	IN_CLASSC_HOST		0x000000ff
+
+#define	IN_CLASSD(i)		(((u_int32_t)(i) & 0xf0000000) == 0xe0000000)
+#define	IN_CLASSD_NET		0xf0000000	/* These ones aren't really */
+#define	IN_CLASSD_NSHIFT	28		/* net and host fields, but */
+#define	IN_CLASSD_HOST		0x0fffffff	/* routing needn't know.    */
+#define	IN_MULTICAST(i)		IN_CLASSD(i)
+
+#define	IN_EXPERIMENTAL(i)	(((u_int32_t)(i) & 0xf0000000) == 0xf0000000)
+#define	IN_BADCLASS(i)		(((u_int32_t)(i) & 0xf0000000) == 0xf0000000)
+
+#define	INADDR_ANY		(u_int32_t)0x00000000
+#define	INADDR_LOOPBACK		(u_int32_t)0x7f000001
+#define	INADDR_BROADCAST	(u_int32_t)0xffffffff	/* must be masked */
+#ifndef _KERNEL
+#define	INADDR_NONE		0xffffffff		/* -1 return */
+#endif
+
+#define	INADDR_UNSPEC_GROUP	(u_int32_t)0xe0000000	/* 224.0.0.0 */
+#define	INADDR_ALLHOSTS_GROUP	(u_int32_t)0xe0000001	/* 224.0.0.1 */
+#define	INADDR_ALLRTRS_GROUP	(u_int32_t)0xe0000002	/* 224.0.0.2 */
+#define	INADDR_MAX_LOCAL_GROUP	(u_int32_t)0xe00000ff	/* 224.0.0.255 */
+
+#define	IN_LOOPBACKNET		127			/* official! */
+
+/*
+ * Socket address, internet style.
+ */
+struct sockaddr_in {
+	u_char	sin_len;
+	u_char	sin_family;
+	u_short	sin_port;
+	struct	in_addr sin_addr;
+	char	sin_zero[8];
+};
+
+#define	INET_ADDRSTRLEN                 16
+
+/*
+ * Structure used to describe IP options.
+ * Used to store options internally, to pass them to a process,
+ * or to restore options retrieved earlier.
+ * The ip_dst is used for the first-hop gateway when using a source route
+ * (this gets put into the header proper).
+ */
+struct ip_opts {
+	struct	in_addr ip_dst;		/* first hop, 0 w/o src rt */
+	char	ip_opts[40];		/* actually variable in size */
+};
+
+/*
+ * Options for use with [gs]etsockopt at the IP level.
+ * First word of comment is data type; bool is stored in int.
+ */
+#define	IP_OPTIONS		1    /* buf/ip_opts; set/get IP options */
+#define	IP_HDRINCL		2    /* int; header is included with data */
+#define	IP_TOS			3    /* int; IP type of service and preced. */
+#define	IP_TTL			4    /* int; IP time to live */
+#define	IP_RECVOPTS		5    /* bool; receive all IP opts w/dgram */
+#define	IP_RECVRETOPTS		6    /* bool; receive IP opts for response */
+#define	IP_RECVDSTADDR		7    /* bool; receive IP dst addr w/dgram */
+#define	IP_RETOPTS		8    /* ip_opts; set/get IP options */
+#define	IP_MULTICAST_IF		9    /* u_char; set/get IP multicast i/f  */
+#define	IP_MULTICAST_TTL	10   /* u_char; set/get IP multicast ttl */
+#define	IP_MULTICAST_LOOP	11   /* u_char; set/get IP multicast loopback */
+#define	IP_ADD_MEMBERSHIP	12   /* ip_mreq; add an IP group membership */
+#define	IP_DROP_MEMBERSHIP	13   /* ip_mreq; drop an IP group membership */
+#define IP_MULTICAST_VIF	14   /* set/get IP mcast virt. iface */
+#define IP_RSVP_ON		15   /* enable RSVP in kernel */
+#define IP_RSVP_OFF		16   /* disable RSVP in kernel */
+#define IP_RSVP_VIF_ON		17   /* set RSVP per-vif socket */
+#define IP_RSVP_VIF_OFF		18   /* unset RSVP per-vif socket */
+#define IP_PORTRANGE		19   /* int; range to choose for unspec port */
+#define	IP_RECVIF		20   /* bool; receive reception if w/dgram */
+/* for IPSEC */
+#define	IP_IPSEC_POLICY		21   /* int; set/get security policy */
+#define	IP_FAITH		22   /* bool; accept FAITH'ed connections */
+
+#define	IP_FW_ADD     		50   /* add a firewall rule to chain */
+#define	IP_FW_DEL    		51   /* delete a firewall rule from chain */
+#define	IP_FW_FLUSH   		52   /* flush firewall rule chain */
+#define	IP_FW_ZERO    		53   /* clear single/all firewall counter(s) */
+#define	IP_FW_GET     		54   /* get entire firewall rule chain */
+#define	IP_FW_RESETLOG		55   /* reset logging counters */
+
+#define	IP_DUMMYNET_CONFIGURE	60   /* add/configure a dummynet pipe */
+#define	IP_DUMMYNET_DEL		61   /* delete a dummynet pipe from chain */
+#define	IP_DUMMYNET_FLUSH	62   /* flush dummynet */
+#define	IP_DUMMYNET_GET		64   /* get entire dummynet pipes */
+
+/*
+ * Defaults and limits for options
+ */
+#define	IP_DEFAULT_MULTICAST_TTL  1	/* normally limit m'casts to 1 hop  */
+#define	IP_DEFAULT_MULTICAST_LOOP 1	/* normally hear sends if a member  */
+#define	IP_MAX_MEMBERSHIPS	20	/* per socket */
+
+/*
+ * Argument structure for IP_ADD_MEMBERSHIP and IP_DROP_MEMBERSHIP.
+ */
+struct ip_mreq {
+	struct	in_addr imr_multiaddr;	/* IP multicast address of group */
+	struct	in_addr imr_interface;	/* local IP address of interface */
+};
+
+/*
+ * Argument for IP_PORTRANGE:
+ * - which range to search when port is unspecified at bind() or connect()
+ */
+#define	IP_PORTRANGE_DEFAULT	0	/* default range */
+#define	IP_PORTRANGE_HIGH	1	/* "high" - request firewall bypass */
+#define	IP_PORTRANGE_LOW	2	/* "low" - vouchsafe security */
+
+/*
+ * Definitions for inet sysctl operations.
+ *
+ * Third level is protocol number.
+ * Fourth level is desired variable within that protocol.
+ */
+#define	IPPROTO_MAXID	(IPPROTO_AH + 1)	/* don't list to IPPROTO_MAX */
+
+#define	CTL_IPPROTO_NAMES { \
+	{ "ip", CTLTYPE_NODE }, \
+	{ "icmp", CTLTYPE_NODE }, \
+	{ "igmp", CTLTYPE_NODE }, \
+	{ "ggp", CTLTYPE_NODE }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ "tcp", CTLTYPE_NODE }, \
+	{ 0, 0 }, \
+	{ "egp", CTLTYPE_NODE }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ "pup", CTLTYPE_NODE }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ "udp", CTLTYPE_NODE }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ "idp", CTLTYPE_NODE }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ "ipsec", CTLTYPE_NODE }, \
+}
+
+/*
+ * Names for IP sysctl objects
+ */
+#define	IPCTL_FORWARDING	1	/* act as router */
+#define	IPCTL_SENDREDIRECTS	2	/* may send redirects when forwarding */
+#define	IPCTL_DEFTTL		3	/* default TTL */
+#ifdef notyet
+#define	IPCTL_DEFMTU		4	/* default MTU */
+#endif
+#define IPCTL_RTEXPIRE		5	/* cloned route expiration time */
+#define IPCTL_RTMINEXPIRE	6	/* min value for expiration time */
+#define IPCTL_RTMAXCACHE	7	/* trigger level for dynamic expire */
+#define	IPCTL_SOURCEROUTE	8	/* may perform source routes */
+#define	IPCTL_DIRECTEDBROADCAST	9	/* may re-broadcast received packets */
+#define IPCTL_INTRQMAXLEN	10	/* max length of netisr queue */
+#define	IPCTL_INTRQDROPS	11	/* number of netisr q drops */
+#define	IPCTL_STATS		12	/* ipstat structure */
+#define	IPCTL_ACCEPTSOURCEROUTE	13	/* may accept source routed packets */
+#define	IPCTL_FASTFORWARDING	14	/* use fast IP forwarding code */
+#define	IPCTL_KEEPFAITH		15	/* FAITH IPv4->IPv6 translater ctl */
+#define	IPCTL_GIF_TTL		16	/* default TTL for gif encap packet */
+#define	IPCTL_MAXID		17
+
+#define	IPCTL_NAMES { \
+	{ 0, 0 }, \
+	{ "forwarding", CTLTYPE_INT }, \
+	{ "redirect", CTLTYPE_INT }, \
+	{ "ttl", CTLTYPE_INT }, \
+	{ "mtu", CTLTYPE_INT }, \
+	{ "rtexpire", CTLTYPE_INT }, \
+	{ "rtminexpire", CTLTYPE_INT }, \
+	{ "rtmaxcache", CTLTYPE_INT }, \
+	{ "sourceroute", CTLTYPE_INT }, \
+ 	{ "directed-broadcast", CTLTYPE_INT }, \
+	{ "intr-queue-maxlen", CTLTYPE_INT }, \
+	{ "intr-queue-drops", CTLTYPE_INT }, \
+	{ "stats", CTLTYPE_STRUCT }, \
+	{ "accept_sourceroute", CTLTYPE_INT }, \
+	{ "fastforwarding", CTLTYPE_INT }, \
+}
+
+/* INET6 stuff */
+#define	__KAME_NETINET_IN_H_INCLUDED_
+#include <netinet6/in6.h>
+#undef __KAME_NETINET_IN_H_INCLUDED_
+
+#ifdef _KERNEL
+struct ifnet; struct mbuf;	/* forward declarations for Standard C */
+struct proc;
+
+int	 in_broadcast __P((struct in_addr, struct ifnet *));
+int	 in_canforward __P((struct in_addr));
+int	 in_localaddr __P((struct in_addr));
+char 	*inet_ntoa __P((struct in_addr)); /* in libkern */
+char	*inet_ntoa_r __P((struct in_addr ina, char *buf)); /* in libkern */
+
+#endif
+
+#endif
diff --git a/sys/netinet/in_cksum.c b/sys/netinet/in_cksum.c
new file mode 100644
index 0000000..eaf1493
--- /dev/null
+++ b/sys/netinet/in_cksum.c
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 1988, 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)in_cksum.c	8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+
+/*
+ * Checksum routine for Internet Protocol family headers (Portable Version).
+ *
+ * This routine is very heavily used in the network
+ * code and should be modified for each CPU to be as fast as possible.
+ */
+
+#define ADDCARRY(x)  (x > 65535 ? x -= 65535 : x)
+#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);}
+
+int
+in_cksum(m, len)
+	register struct mbuf *m;
+	register int len;
+{
+	register u_short *w;
+	register int sum = 0;
+	register int mlen = 0;
+	int byte_swapped = 0;
+
+	union {
+		char	c[2];
+		u_short	s;
+	} s_util;
+	union {
+		u_short s[2];
+		long	l;
+	} l_util;
+
+	for (;m && len; m = m->m_next) {
+		if (m->m_len == 0)
+			continue;
+		w = mtod(m, u_short *);
+		if (mlen == -1) {
+			/*
+			 * The first byte of this mbuf is the continuation
+			 * of a word spanning between this mbuf and the
+			 * last mbuf.
+			 *
+			 * s_util.c[0] is already saved when scanning previous
+			 * mbuf.
+			 */
+			s_util.c[1] = *(char *)w;
+			sum += s_util.s;
+			w = (u_short *)((char *)w + 1);
+			mlen = m->m_len - 1;
+			len--;
+		} else
+			mlen = m->m_len;
+		if (len < mlen)
+			mlen = len;
+		len -= mlen;
+		/*
+		 * Force to even boundary.
+		 */
+		if ((1 & (int) w) && (mlen > 0)) {
+			REDUCE;
+			sum <<= 8;
+			s_util.c[0] = *(u_char *)w;
+			w = (u_short *)((char *)w + 1);
+			mlen--;
+			byte_swapped = 1;
+		}
+		/*
+		 * Unroll the loop to make overhead from
+		 * branches &c small.
+		 */
+		while ((mlen -= 32) >= 0) {
+			sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3];
+			sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7];
+			sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11];
+			sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15];
+			w += 16;
+		}
+		mlen += 32;
+		while ((mlen -= 8) >= 0) {
+			sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3];
+			w += 4;
+		}
+		mlen += 8;
+		if (mlen == 0 && byte_swapped == 0)
+			continue;
+		REDUCE;
+		while ((mlen -= 2) >= 0) {
+			sum += *w++;
+		}
+		if (byte_swapped) {
+			REDUCE;
+			sum <<= 8;
+			byte_swapped = 0;
+			if (mlen == -1) {
+				s_util.c[1] = *(char *)w;
+				sum += s_util.s;
+				mlen = 0;
+			} else
+				mlen = -1;
+		} else if (mlen == -1)
+			s_util.c[0] = *(char *)w;
+	}
+	if (len)
+		printf("cksum: out of data\n");
+	if (mlen == -1) {
+		/* The last mbuf has odd # of bytes. Follow the
+		   standard (the odd byte may be shifted left by 8 bits
+		   or not as determined by endian-ness of the machine) */
+		s_util.c[1] = 0;
+		sum += s_util.s;
+	}
+	REDUCE;
+	return (~sum & 0xffff);
+}
diff --git a/sys/netinet/in_gif.c b/sys/netinet/in_gif.c
new file mode 100644
index 0000000..17955ad
--- /dev/null
+++ b/sys/netinet/in_gif.c
@@ -0,0 +1,382 @@
+/*	$FreeBSD$	*/
+/*	$KAME: in_gif.c,v 1.44 2000/08/15 07:24:24 itojun Exp $	*/
+
+/*
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "opt_mrouting.h"
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/mbuf.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+
+#include <sys/malloc.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/in_gif.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_encap.h>
+#include <netinet/ip_ecn.h>
+
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+
+#ifdef MROUTING
+#include <netinet/ip_mroute.h>
+#endif /* MROUTING */
+
+#include <net/if_gif.h>	
+
+#include "gif.h"
+
+#include <machine/stdarg.h>
+
+#include <net/net_osdep.h>
+
+#if NGIF > 0
+int ip_gif_ttl = GIF_TTL;
+#else
+int ip_gif_ttl = 0;
+#endif
+SYSCTL_INT(_net_inet_ip, IPCTL_GIF_TTL, gifttl, CTLFLAG_RW,
+	&ip_gif_ttl,	0, "");
+
+int
+in_gif_output(ifp, family, m, rt)
+	struct ifnet	*ifp;
+	int		family;
+	struct mbuf	*m;
+	struct rtentry *rt;
+{
+	register struct gif_softc *sc = (struct gif_softc*)ifp;
+	struct sockaddr_in *dst = (struct sockaddr_in *)&sc->gif_ro.ro_dst;
+	struct sockaddr_in *sin_src = (struct sockaddr_in *)sc->gif_psrc;
+	struct sockaddr_in *sin_dst = (struct sockaddr_in *)sc->gif_pdst;
+	struct ip iphdr;	/* capsule IP header, host byte ordered */
+	int proto, error;
+	u_int8_t tos;
+
+	if (sin_src == NULL || sin_dst == NULL ||
+	    sin_src->sin_family != AF_INET ||
+	    sin_dst->sin_family != AF_INET) {
+		m_freem(m);
+		return EAFNOSUPPORT;
+	}
+
+	switch (family) {
+#ifdef INET
+	case AF_INET:
+	    {
+		struct ip *ip;
+
+		proto = IPPROTO_IPV4;
+		if (m->m_len < sizeof(*ip)) {
+			m = m_pullup(m, sizeof(*ip));
+			if (!m)
+				return ENOBUFS;
+		}
+		ip = mtod(m, struct ip *);
+		tos = ip->ip_tos;
+		break;
+	    }
+#endif /*INET*/
+#ifdef INET6
+	case AF_INET6:
+	    {
+		struct ip6_hdr *ip6;
+		proto = IPPROTO_IPV6;
+		if (m->m_len < sizeof(*ip6)) {
+			m = m_pullup(m, sizeof(*ip6));
+			if (!m)
+				return ENOBUFS;
+		}
+		ip6 = mtod(m, struct ip6_hdr *);
+		tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+		break;
+	    }
+#endif /*INET6*/
+	default:
+#ifdef DEBUG
+		printf("in_gif_output: warning: unknown family %d passed\n",
+			family);
+#endif
+		m_freem(m);
+		return EAFNOSUPPORT;
+	}
+
+	bzero(&iphdr, sizeof(iphdr));
+	iphdr.ip_src = sin_src->sin_addr;
+	if (ifp->if_flags & IFF_LINK0) {
+		/* multi-destination mode */
+		if (sin_dst->sin_addr.s_addr != INADDR_ANY)
+			iphdr.ip_dst = sin_dst->sin_addr;
+		else if (rt) {
+			if (family != AF_INET) {
+				m_freem(m);
+				return EINVAL;	/*XXX*/
+			}
+			iphdr.ip_dst = ((struct sockaddr_in *)
+					(rt->rt_gateway))->sin_addr;
+		} else {
+			m_freem(m);
+			return ENETUNREACH;
+		}
+	} else {
+		/* bidirectional configured tunnel mode */
+		if (sin_dst->sin_addr.s_addr != INADDR_ANY)
+			iphdr.ip_dst = sin_dst->sin_addr;
+		else {
+			m_freem(m);
+			return ENETUNREACH;
+		}
+	}
+	iphdr.ip_p = proto;
+	/* version will be set in ip_output() */
+	iphdr.ip_ttl = ip_gif_ttl;
+	iphdr.ip_len = m->m_pkthdr.len + sizeof(struct ip);
+	if (ifp->if_flags & IFF_LINK1)
+		ip_ecn_ingress(ECN_ALLOWED, &iphdr.ip_tos, &tos);
+
+	/* prepend new IP header */
+	M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
+	if (m && m->m_len < sizeof(struct ip))
+		m = m_pullup(m, sizeof(struct ip));
+	if (m == NULL) {
+		printf("ENOBUFS in in_gif_output %d\n", __LINE__);
+		return ENOBUFS;
+	}
+	bcopy(&iphdr, mtod(m, struct ip *), sizeof(struct ip));
+
+	if (dst->sin_family != sin_dst->sin_family ||
+	    dst->sin_addr.s_addr != sin_dst->sin_addr.s_addr) {
+		/* cache route doesn't match */
+		dst->sin_family = sin_dst->sin_family;
+		dst->sin_len = sizeof(struct sockaddr_in);
+		dst->sin_addr = sin_dst->sin_addr;
+		if (sc->gif_ro.ro_rt) {
+			RTFREE(sc->gif_ro.ro_rt);
+			sc->gif_ro.ro_rt = NULL;
+		}
+#if 0
+		sc->gif_if.if_mtu = GIF_MTU;
+#endif
+	}
+
+	if (sc->gif_ro.ro_rt == NULL) {
+		rtalloc(&sc->gif_ro);
+		if (sc->gif_ro.ro_rt == NULL) {
+			m_freem(m);
+			return ENETUNREACH;
+		}
+
+		/* if it constitutes infinite encapsulation, punt. */
+		if (sc->gif_ro.ro_rt->rt_ifp == ifp) {
+			m_freem(m);
+			return ENETUNREACH;	/*XXX*/
+		}
+#if 0
+		ifp->if_mtu = sc->gif_ro.ro_rt->rt_ifp->if_mtu
+			- sizeof(struct ip);
+#endif
+	}
+
+	error = ip_output(m, NULL, &sc->gif_ro, 0, NULL);
+	return(error);
+}
+
+void
+#if __STDC__
+in_gif_input(struct mbuf *m, ...)
+#else
+in_gif_input(m, va_alist)
+	struct mbuf *m;
+	va_dcl
+#endif
+{
+	int off, proto;
+	struct ifnet *gifp = NULL;
+	struct ip *ip;
+	va_list ap;
+	int af;
+	u_int8_t otos;
+
+	va_start(ap, m);
+	off = va_arg(ap, int);
+	proto = va_arg(ap, int);
+	va_end(ap);
+
+	ip = mtod(m, struct ip *);
+
+	gifp = (struct ifnet *)encap_getarg(m);
+
+	if (gifp == NULL || (gifp->if_flags & IFF_UP) == 0) {
+		m_freem(m);
+		ipstat.ips_nogif++;
+		return;
+	}
+
+	otos = ip->ip_tos;
+	m_adj(m, off);
+
+	switch (proto) {
+#ifdef INET
+	case IPPROTO_IPV4:
+	    {
+		struct ip *ip;
+		af = AF_INET;
+		if (m->m_len < sizeof(*ip)) {
+			m = m_pullup(m, sizeof(*ip));
+			if (!m)
+				return;
+		}
+		ip = mtod(m, struct ip *);
+		if (gifp->if_flags & IFF_LINK1)
+			ip_ecn_egress(ECN_ALLOWED, &otos, &ip->ip_tos);
+		break;
+	    }
+#endif
+#ifdef INET6
+	case IPPROTO_IPV6:
+	    {
+		struct ip6_hdr *ip6;
+		u_int8_t itos;
+		af = AF_INET6;
+		if (m->m_len < sizeof(*ip6)) {
+			m = m_pullup(m, sizeof(*ip6));
+			if (!m)
+				return;
+		}
+		ip6 = mtod(m, struct ip6_hdr *);
+		itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+		if (gifp->if_flags & IFF_LINK1)
+			ip_ecn_egress(ECN_ALLOWED, &otos, &itos);
+		ip6->ip6_flow &= ~htonl(0xff << 20);
+		ip6->ip6_flow |= htonl((u_int32_t)itos << 20);
+		break;
+	    }
+#endif /* INET6 */
+	default:
+		ipstat.ips_nogif++;
+		m_freem(m);
+		return;
+	}
+	gif_input(m, af, gifp);
+	return;
+}
+
+/*
+ * we know that we are in IFF_UP, outer address available, and outer family
+ * matched the physical addr family.  see gif_encapcheck().
+ */
+int
+gif_encapcheck4(m, off, proto, arg)
+	const struct mbuf *m;
+	int off;
+	int proto;
+	void *arg;
+{
+	struct ip ip;
+	struct gif_softc *sc;
+	struct sockaddr_in *src, *dst;
+	int addrmatch;
+	struct in_ifaddr *ia4;
+
+	/* sanity check done in caller */
+	sc = (struct gif_softc *)arg;
+	src = (struct sockaddr_in *)sc->gif_psrc;
+	dst = (struct sockaddr_in *)sc->gif_pdst;
+
+	/* LINTED const cast */
+	m_copydata((struct mbuf *)m, 0, sizeof(ip), (caddr_t)&ip);
+
+	/* check for address match */
+	addrmatch = 0;
+	if (src->sin_addr.s_addr == ip.ip_dst.s_addr)
+		addrmatch |= 1;
+	if (dst->sin_addr.s_addr == ip.ip_src.s_addr)
+		addrmatch |= 2;
+	else if ((sc->gif_if.if_flags & IFF_LINK0) != 0 &&
+		 dst->sin_addr.s_addr == INADDR_ANY) {
+		addrmatch |= 2; /* we accept any source */
+	}
+	if (addrmatch != 3)
+		return 0;
+
+	/* martian filters on outer source - NOT done in ip_input! */
+	if (IN_MULTICAST(ntohl(ip.ip_src.s_addr)))
+		return 0;
+	switch ((ntohl(ip.ip_src.s_addr) & 0xff000000) >> 24) {
+	case 0: case 127: case 255:
+		return 0;
+	}
+	/* reject packets with broadcast on source */
+	TAILQ_FOREACH(ia4, &in_ifaddrhead, ia_link)
+	{
+		if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0)
+			continue;
+		if (ip.ip_src.s_addr == ia4->ia_broadaddr.sin_addr.s_addr)
+			return 0;
+	}
+
+	/* ingress filters on outer source */
+	if ((m->m_flags & M_PKTHDR) != 0 && m->m_pkthdr.rcvif) {
+		struct sockaddr_in sin;
+		struct rtentry *rt;
+
+		bzero(&sin, sizeof(sin));
+		sin.sin_family = AF_INET;
+		sin.sin_len = sizeof(struct sockaddr_in);
+		sin.sin_addr = ip.ip_src;
+		rt = rtalloc1((struct sockaddr *)&sin, 0, 0UL);
+		if (!rt)
+			return 0;
+		if (rt->rt_ifp != m->m_pkthdr.rcvif) {
+			rtfree(rt);
+			return 0;
+		}
+		rtfree(rt);
+	}
+
+	/* prioritize: IFF_LINK0 mode is less preferred */
+	return (sc->gif_if.if_flags & IFF_LINK0) ? 32 : 32 * 2;
+}
diff --git a/sys/netinet/in_gif.h b/sys/netinet/in_gif.h
new file mode 100644
index 0000000..de03c6e
--- /dev/null
+++ b/sys/netinet/in_gif.h
@@ -0,0 +1,44 @@
+/*	$FreeBSD$	*/
+/*	$KAME: in_gif.h,v 1.5 2000/04/14 08:36:02 itojun Exp $	*/
+
+/*
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _NETINET_IN_GIF_H_
+#define _NETINET_IN_GIF_H_
+
+#define GIF_TTL		30
+
+extern int ip_gif_ttl;
+
+void in_gif_input __P((struct mbuf *, ...));
+int in_gif_output __P((struct ifnet *, int, struct mbuf *, struct rtentry *));
+int gif_encapcheck4 __P((const struct mbuf *, int, int, void *));
+
+#endif /*_NETINET_IN_GIF_H_*/
diff --git a/sys/netinet/in_hostcache.c b/sys/netinet/in_hostcache.c
new file mode 100644
index 0000000..36a92fd
--- /dev/null
+++ b/sys/netinet/in_hostcache.c
@@ -0,0 +1,157 @@
+/*
+ * Copyright 1997 Massachusetts Institute of Technology
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that both the above copyright notice and this
+ * permission notice appear in all copies, that both the above
+ * copyright notice and this permission notice appear in all
+ * supporting documentation, and that the name of M.I.T. not be used
+ * in advertising or publicity pertaining to distribution of the
+ * software without specific, written prior permission.  M.I.T. makes
+ * no representations about the suitability of this software for any
+ * purpose.  It is provided "as is" without express or implied
+ * warranty.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
+ * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
+ * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+#include <net/hostcache.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_hostcache.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+
+/*
+ * Manage the IP per-host cache (really a thin veneer over the generic
+ * per-host cache code).
+ */
+
+/* Look up an entry -- can be called from interrupt context. */
+struct in_hcentry *
+inhc_lookup(struct sockaddr_in *sin)
+{
+	struct hcentry *hc;
+
+	hc = hc_get((struct sockaddr *)sin);
+	return ((struct in_hcentry *)hc);
+}
+
+/* Look up and possibly create an entry -- must be called from user mode. */
+struct in_hcentry *
+inhc_alloc(struct sockaddr_in *sin)
+{
+	struct in_hcentry *inhc;
+	struct rtentry *rt;
+	int error;
+	/* xxx mutual exclusion for smp */
+
+	inhc = inhc_lookup(sin);
+	if (inhc != 0)
+		return inhc;
+
+	rt = rtalloc1(inhc->inhc_hc.hc_host, 1, 0);
+	if (rt == 0)
+		return 0;
+
+	MALLOC(inhc, struct in_hcentry *, sizeof *inhc, M_HOSTCACHE,
+		M_WAITOK | M_ZERO);
+	inhc->inhc_hc.hc_host = dup_sockaddr((struct sockaddr *)sin, 1);
+	if (in_broadcast(sin->sin_addr, rt->rt_ifp))
+		inhc->inhc_flags |= INHC_BROADCAST;
+	else if (((struct sockaddr_in *)rt->rt_ifa->ifa_addr)->sin_addr.s_addr
+		== sin->sin_addr.s_addr)
+		inhc->inhc_flags |= INHC_LOCAL;
+	else if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
+		inhc->inhc_flags |= INHC_MULTICAST;
+	inhc->inhc_pmtu = rt->rt_rmx.rmx_mtu;
+	inhc->inhc_recvpipe = rt->rt_rmx.rmx_recvpipe;
+	inhc->inhc_sendpipe = rt->rt_rmx.rmx_sendpipe;
+	inhc->inhc_ssthresh = rt->rt_rmx.rmx_ssthresh;
+	if (rt->rt_rmx.rmx_locks & RTV_RTT)
+		inhc->inhc_rttmin = rt->rt_rmx.rmx_rtt
+			/ (RTM_RTTUNIT / TCP_RTT_SCALE);
+	inhc->inhc_hc.hc_rt = rt;
+	error = hc_insert(&inhc->inhc_hc);
+	if (error != 0) {
+		RTFREE(rt);
+		FREE(inhc, M_HOSTCACHE);
+		return 0;
+	}
+	/*
+	 * We don't return the structure directly because hc_get() needs
+	 * to be allowed to do its own processing.
+	 */
+	return (inhc_lookup(sin));
+}
+
+/*
+ * This is Van Jacobson's hash function for IPv4 addresses.
+ * It is designed to work with a power-of-two-sized hash table.
+ */
+static u_long
+inhc_hash(struct sockaddr *sa, u_long nbuckets)
+{
+	u_long ip;
+
+	ip = ((struct sockaddr_in *)sa)->sin_addr.s_addr;
+	return ((ip ^ (ip >> 23) ^ (ip >> 17)) & ~(nbuckets - 1));
+}
+
+/*
+ * We don't need to do any special work... if there are no references,
+ * as the caller has already ensured, then it's OK to kill.
+ */
+static int
+inhc_delete(struct hcentry *hc)
+{
+	return 0;
+}
+
+/*
+ * Return the next increment for the number of buckets in the hash table.
+ * Zero means ``do not bump''.
+ */
+static u_long
+inhc_bump(u_long oldsize)
+{
+	if (oldsize < 512)
+		return (oldsize << 1);
+	return 0;
+}
+
+static struct hccallback inhc_cb = {
+	inhc_hash, inhc_delete, inhc_bump
+};
+
+int
+inhc_init(void)
+{
+
+	return (hc_init(AF_INET, &inhc_cb, 128, 0));
+}
+
diff --git a/sys/netinet/in_hostcache.h b/sys/netinet/in_hostcache.h
new file mode 100644
index 0000000..710756a
--- /dev/null
+++ b/sys/netinet/in_hostcache.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright 1997 Massachusetts Institute of Technology
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that both the above copyright notice and this
+ * permission notice appear in all copies, that both the above
+ * copyright notice and this permission notice appear in all
+ * supporting documentation, and that the name of M.I.T. not be used
+ * in advertising or publicity pertaining to distribution of the
+ * software without specific, written prior permission.  M.I.T. makes
+ * no representations about the suitability of this software for any
+ * purpose.  It is provided "as is" without express or implied
+ * warranty.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
+ * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
+ * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_IN_HOSTCACHE_H
+#define	_NETINET_IN_HOSTCACHE_H	1
+
+/*
+ * This file defines the particular structures contained in the host cache
+ * for the use of IP.
+ */
+
+/*
+ * An IP host cache entry.  Note that we include the srtt/var here,
+ * with the expectation that it might be used to keep a persistent,
+ * cross-connection view of this statistic.
+ */
+struct in_hcentry {
+	struct	hcentry inhc_hc;
+	u_long	inhc_pmtu;
+	u_long	inhc_recvpipe;
+	u_long	inhc_sendpipe;
+	u_long	inhc_pksent;
+	u_long	inhc_flags;
+	u_long	inhc_ssthresh;
+	int	inhc_srtt;	/* VJ RTT estimator */
+	int	inhc_srttvar;	/* VJ */
+	u_int	inhc_rttmin;	/* VJ */
+	int	inhc_rxt;	/* TCP retransmit timeout */
+	u_long	inhc_cc;	/* deliberate type pun with tcp_cc */
+	u_long	inhc_ccsent;	/* as above */
+	u_short	inhc_mssopt;
+};
+
+#define	inhc_addr(inhc)	((struct sockaddr_in *)(inhc)->inhc_hc.hc_host)
+
+/* Flags for inhc_flags... */
+#define	INHC_LOCAL	0x0001	/* this address is local */
+#define	INHC_BROADCAST	0x0002	/* this address is broadcast */
+#define	INHC_MULTICAST	0x0004	/* this address is multicast */
+#define	INHC_REDUCEDMTU	0x0008	/* we reduced the mtu via PMTU discovery */
+
+#ifdef _KERNEL
+/*
+ * inhc_alloc can block while adding a new entry to the cache;
+ * inhc_lookup will does not add new entries and so can be called
+ * in non-process context.
+ */
+struct	in_hcentry *inhc_alloc(struct sockaddr_in *sin);
+int	inhc_init(void);
+struct	in_hcentry *inhc_lookup(struct sockaddr_in *sin);
+#define	inhc_ref(inhc)	(hc_ref(&(inhc)->inhc_hc))
+#define	inhc_rele(inhc)	(hc_rele(&(inhc)->inhc_hc))
+#endif
+
+#endif /* _NETINET_IN_HOSTCACHE_H */
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
new file mode 100644
index 0000000..b24b404
--- /dev/null
+++ b/sys/netinet/in_pcb.c
@@ -0,0 +1,1014 @@
+/*
+ * Copyright (c) 1982, 1986, 1991, 1993, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)in_pcb.c	8.4 (Berkeley) 5/24/95
+ * $FreeBSD$
+ */
+
+#include "opt_ipsec.h"
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/proc.h>
+#include <sys/jail.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+
+#include <machine/limits.h>
+
+#include <vm/vm_zone.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
+#endif /* INET6 */
+
+#include "faith.h"
+
+#ifdef IPSEC
+#include <netinet6/ipsec.h>
+#include <netkey/key.h>
+#endif /* IPSEC */
+
+struct	in_addr zeroin_addr;
+
+/*
+ * These configure the range of local port addresses assigned to
+ * "unspecified" outgoing connections/packets/whatever.
+ */
+int	ipport_lowfirstauto  = IPPORT_RESERVED - 1;	/* 1023 */
+int	ipport_lowlastauto = IPPORT_RESERVEDSTART;	/* 600 */
+int	ipport_firstauto = IPPORT_RESERVED;		/* 1024 */
+int	ipport_lastauto  = IPPORT_USERRESERVED;		/* 5000 */
+int	ipport_hifirstauto = IPPORT_HIFIRSTAUTO;	/* 49152 */
+int	ipport_hilastauto  = IPPORT_HILASTAUTO;		/* 65535 */
+
+#define RANGECHK(var, min, max) \
+	if ((var) < (min)) { (var) = (min); } \
+	else if ((var) > (max)) { (var) = (max); }
+
+static int
+sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS)
+{
+	int error = sysctl_handle_int(oidp,
+		oidp->oid_arg1, oidp->oid_arg2, req);
+	if (!error) {
+		RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
+		RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
+		RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX);
+		RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX);
+		RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX);
+		RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX);
+	}
+	return error;
+}
+
+#undef RANGECHK
+
+SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports");
+
+SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW,
+	   &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
+SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW,
+	   &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
+SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW,
+	   &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
+SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW,
+	   &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
+SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW,
+	   &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
+SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW,
+	   &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
+
+/*
+ * in_pcb.c: manage the Protocol Control Blocks.
+ *
+ * NOTE: It is assumed that most of these functions will be called at
+ * splnet(). XXX - There are, unfortunately, a few exceptions to this
+ * rule that should be fixed.
+ */
+
+/*
+ * Allocate a PCB and associate it with the socket.
+ */
+int
+in_pcballoc(so, pcbinfo, p)
+	struct socket *so;
+	struct inpcbinfo *pcbinfo;
+	struct proc *p;
+{
+	register struct inpcb *inp;
+
+	inp = zalloc(pcbinfo->ipi_zone);
+	if (inp == NULL)
+		return (ENOBUFS);
+	bzero((caddr_t)inp, sizeof(*inp));
+	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
+	inp->inp_pcbinfo = pcbinfo;
+	inp->inp_socket = so;
+#if defined(INET6)
+	if (ip6_mapped_addr_on)
+		inp->inp_flags &= ~IN6P_BINDV6ONLY;
+	else
+		inp->inp_flags |= IN6P_BINDV6ONLY;
+#endif
+	LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list);
+	pcbinfo->ipi_count++;
+	so->so_pcb = (caddr_t)inp;
+	return (0);
+}
+
+int
+in_pcbbind(inp, nam, p)
+	register struct inpcb *inp;
+	struct sockaddr *nam;
+	struct proc *p;
+{
+	register struct socket *so = inp->inp_socket;
+	unsigned short *lastport;
+	struct sockaddr_in *sin;
+	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
+	u_short lport = 0;
+	int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
+	int error, prison = 0;
+
+	if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
+		return (EADDRNOTAVAIL);
+	if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY)
+		return (EINVAL);
+	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
+		wild = 1;
+	if (nam) {
+		sin = (struct sockaddr_in *)nam;
+		if (nam->sa_len != sizeof (*sin))
+			return (EINVAL);
+#ifdef notdef
+		/*
+		 * We should check the family, but old programs
+		 * incorrectly fail to initialize it.
+		 */
+		if (sin->sin_family != AF_INET)
+			return (EAFNOSUPPORT);
+#endif
+		if (sin->sin_addr.s_addr != INADDR_ANY)
+			if (prison_ip(p->p_ucred, 0, &sin->sin_addr.s_addr))
+				return(EINVAL);
+		lport = sin->sin_port;
+		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
+			/*
+			 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
+			 * allow complete duplication of binding if
+			 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
+			 * and a multicast address is bound on both
+			 * new and duplicated sockets.
+			 */
+			if (so->so_options & SO_REUSEADDR)
+				reuseport = SO_REUSEADDR|SO_REUSEPORT;
+		} else if (sin->sin_addr.s_addr != INADDR_ANY) {
+			sin->sin_port = 0;		/* yech... */
+			if (ifa_ifwithaddr((struct sockaddr *)sin) == 0)
+				return (EADDRNOTAVAIL);
+		}
+		if (lport) {
+			struct inpcb *t;
+			/* GROSS */
+			if (ntohs(lport) < IPPORT_RESERVED && p &&
+			    suser_xxx(0, p, PRISON_ROOT))
+				return (EACCES);
+			if (p && jailed(p->p_ucred))
+				prison = 1;
+			if (so->so_cred->cr_uid != 0 &&
+			    !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
+				t = in_pcblookup_local(inp->inp_pcbinfo,
+				    sin->sin_addr, lport,
+				    prison ? 0 :  INPLOOKUP_WILDCARD);
+				if (t &&
+				    (ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
+				     ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
+				     (t->inp_socket->so_options &
+					 SO_REUSEPORT) == 0) &&
+				    (so->so_cred->cr_uid !=
+				     t->inp_socket->so_cred->cr_uid)) {
+#if defined(INET6)
+					if ((inp->inp_flags &
+					     IN6P_BINDV6ONLY) != 0 ||
+					    ntohl(sin->sin_addr.s_addr) !=
+					    INADDR_ANY ||
+					    ntohl(t->inp_laddr.s_addr) !=
+					    INADDR_ANY ||
+					    INP_SOCKAF(so) ==
+					    INP_SOCKAF(t->inp_socket))
+#endif /* defined(INET6) */
+					return (EADDRINUSE);
+				}
+			}
+			if (prison &&
+			    prison_ip(p->p_ucred, 0, &sin->sin_addr.s_addr))
+				return (EADDRNOTAVAIL);
+			t = in_pcblookup_local(pcbinfo, sin->sin_addr,
+			    lport, prison ? 0 : wild);
+			if (t &&
+			    (reuseport & t->inp_socket->so_options) == 0) {
+#if defined(INET6)
+				if ((inp->inp_flags & IN6P_BINDV6ONLY) != 0 ||
+				    ntohl(sin->sin_addr.s_addr) !=
+				    INADDR_ANY ||
+				    ntohl(t->inp_laddr.s_addr) !=
+				    INADDR_ANY ||
+				    INP_SOCKAF(so) ==
+				    INP_SOCKAF(t->inp_socket))
+#endif /* defined(INET6) */
+				return (EADDRINUSE);
+			}
+		}
+		inp->inp_laddr = sin->sin_addr;
+	}
+	if (lport == 0) {
+		ushort first, last;
+		int count;
+
+		if (inp->inp_laddr.s_addr != INADDR_ANY)
+			if (prison_ip(p->p_ucred, 0, &inp->inp_laddr.s_addr )) {
+				inp->inp_laddr.s_addr = INADDR_ANY;
+				return (EINVAL);
+			}
+		inp->inp_flags |= INP_ANONPORT;
+
+		if (inp->inp_flags & INP_HIGHPORT) {
+			first = ipport_hifirstauto;	/* sysctl */
+			last  = ipport_hilastauto;
+			lastport = &pcbinfo->lasthi;
+		} else if (inp->inp_flags & INP_LOWPORT) {
+			if (p && (error = suser_xxx(0, p, PRISON_ROOT))) {
+				inp->inp_laddr.s_addr = INADDR_ANY;
+				return error;
+			}
+			first = ipport_lowfirstauto;	/* 1023 */
+			last  = ipport_lowlastauto;	/* 600 */
+			lastport = &pcbinfo->lastlow;
+		} else {
+			first = ipport_firstauto;	/* sysctl */
+			last  = ipport_lastauto;
+			lastport = &pcbinfo->lastport;
+		}
+		/*
+		 * Simple check to ensure all ports are not used up causing
+		 * a deadlock here.
+		 *
+		 * We split the two cases (up and down) so that the direction
+		 * is not being tested on each round of the loop.
+		 */
+		if (first > last) {
+			/*
+			 * counting down
+			 */
+			count = first - last;
+
+			do {
+				if (count-- < 0) {	/* completely used? */
+					inp->inp_laddr.s_addr = INADDR_ANY;
+					return (EADDRNOTAVAIL);
+				}
+				--*lastport;
+				if (*lastport > first || *lastport < last)
+					*lastport = first;
+				lport = htons(*lastport);
+			} while (in_pcblookup_local(pcbinfo,
+				 inp->inp_laddr, lport, wild));
+		} else {
+			/*
+			 * counting up
+			 */
+			count = last - first;
+
+			do {
+				if (count-- < 0) {	/* completely used? */
+					/*
+					 * Undo any address bind that may have
+					 * occurred above.
+					 */
+					inp->inp_laddr.s_addr = INADDR_ANY;
+					return (EADDRNOTAVAIL);
+				}
+				++*lastport;
+				if (*lastport < first || *lastport > last)
+					*lastport = first;
+				lport = htons(*lastport);
+			} while (in_pcblookup_local(pcbinfo,
+				 inp->inp_laddr, lport, wild));
+		}
+	}
+	inp->inp_lport = lport;
+	if (prison_ip(p->p_ucred, 0, &inp->inp_laddr.s_addr)) {
+		inp->inp_laddr.s_addr = INADDR_ANY;
+		inp->inp_lport = 0;
+		return(EINVAL);
+	}
+	if (in_pcbinshash(inp) != 0) {
+		inp->inp_laddr.s_addr = INADDR_ANY;
+		inp->inp_lport = 0;
+		return (EAGAIN);
+	}
+	return (0);
+}
+
+/*
+ *   Transform old in_pcbconnect() into an inner subroutine for new
+ *   in_pcbconnect(): Do some validity-checking on the remote
+ *   address (in mbuf 'nam') and then determine local host address
+ *   (i.e., which interface) to use to access that remote host.
+ *
+ *   This preserves definition of in_pcbconnect(), while supporting a
+ *   slightly different version for T/TCP.  (This is more than
+ *   a bit of a kludge, but cleaning up the internal interfaces would
+ *   have forced minor changes in every protocol).
+ */
+
+int
+in_pcbladdr(inp, nam, plocal_sin)
+	register struct inpcb *inp;
+	struct sockaddr *nam;
+	struct sockaddr_in **plocal_sin;
+{
+	struct in_ifaddr *ia;
+	register struct sockaddr_in *sin = (struct sockaddr_in *)nam;
+
+	if (nam->sa_len != sizeof (*sin))
+		return (EINVAL);
+	if (sin->sin_family != AF_INET)
+		return (EAFNOSUPPORT);
+	if (sin->sin_port == 0)
+		return (EADDRNOTAVAIL);
+	if (!TAILQ_EMPTY(&in_ifaddrhead)) {
+		/*
+		 * If the destination address is INADDR_ANY,
+		 * use the primary local address.
+		 * If the supplied address is INADDR_BROADCAST,
+		 * and the primary interface supports broadcast,
+		 * choose the broadcast address for that interface.
+		 */
+#define	satosin(sa)	((struct sockaddr_in *)(sa))
+#define sintosa(sin)	((struct sockaddr *)(sin))
+#define ifatoia(ifa)	((struct in_ifaddr *)(ifa))
+		if (sin->sin_addr.s_addr == INADDR_ANY)
+		    sin->sin_addr = IA_SIN(TAILQ_FIRST(&in_ifaddrhead))->sin_addr;
+		else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST &&
+		  (TAILQ_FIRST(&in_ifaddrhead)->ia_ifp->if_flags & IFF_BROADCAST))
+		    sin->sin_addr = satosin(&TAILQ_FIRST(&in_ifaddrhead)->ia_broadaddr)->sin_addr;
+	}
+	if (inp->inp_laddr.s_addr == INADDR_ANY) {
+		register struct route *ro;
+
+		ia = (struct in_ifaddr *)0;
+		/*
+		 * If route is known or can be allocated now,
+		 * our src addr is taken from the i/f, else punt.
+		 */
+		ro = &inp->inp_route;
+		if (ro->ro_rt &&
+		    (satosin(&ro->ro_dst)->sin_addr.s_addr !=
+			sin->sin_addr.s_addr ||
+		    inp->inp_socket->so_options & SO_DONTROUTE)) {
+			RTFREE(ro->ro_rt);
+			ro->ro_rt = (struct rtentry *)0;
+		}
+		if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
+		    (ro->ro_rt == (struct rtentry *)0 ||
+		    ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
+			/* No route yet, so try to acquire one */
+			ro->ro_dst.sa_family = AF_INET;
+			ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
+			((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
+				sin->sin_addr;
+			rtalloc(ro);
+		}
+		/*
+		 * If we found a route, use the address
+		 * corresponding to the outgoing interface
+		 * unless it is the loopback (in case a route
+		 * to our address on another net goes to loopback).
+		 */
+		if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))
+			ia = ifatoia(ro->ro_rt->rt_ifa);
+		if (ia == 0) {
+			u_short fport = sin->sin_port;
+
+			sin->sin_port = 0;
+			ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin)));
+			if (ia == 0)
+				ia = ifatoia(ifa_ifwithnet(sintosa(sin)));
+			sin->sin_port = fport;
+			if (ia == 0)
+				ia = TAILQ_FIRST(&in_ifaddrhead);
+			if (ia == 0)
+				return (EADDRNOTAVAIL);
+		}
+		/*
+		 * If the destination address is multicast and an outgoing
+		 * interface has been set as a multicast option, use the
+		 * address of that interface as our source address.
+		 */
+		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
+		    inp->inp_moptions != NULL) {
+			struct ip_moptions *imo;
+			struct ifnet *ifp;
+
+			imo = inp->inp_moptions;
+			if (imo->imo_multicast_ifp != NULL) {
+				ifp = imo->imo_multicast_ifp;
+				TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link)
+					if (ia->ia_ifp == ifp)
+						break;
+				if (ia == 0)
+					return (EADDRNOTAVAIL);
+			}
+		}
+	/*
+	 * Don't do pcblookup call here; return interface in plocal_sin
+	 * and exit to caller, that will do the lookup.
+	 */
+		*plocal_sin = &ia->ia_addr;
+
+	}
+	return(0);
+}
+
+/*
+ * Outer subroutine:
+ * Connect from a socket to a specified address.
+ * Both address and port must be specified in argument sin.
+ * If don't have a local address for this socket yet,
+ * then pick one.
+ */
+int
+in_pcbconnect(inp, nam, p)
+	register struct inpcb *inp;
+	struct sockaddr *nam;
+	struct proc *p;
+{
+	struct sockaddr_in *ifaddr;
+	struct sockaddr_in *sin = (struct sockaddr_in *)nam;
+	struct sockaddr_in sa;
+	struct ucred *cred;
+	int error;
+
+	cred = inp->inp_socket->so_cred;
+	if (inp->inp_laddr.s_addr == INADDR_ANY && jailed(cred)) {
+		bzero(&sa, sizeof (sa));
+		sa.sin_addr.s_addr = htonl(cred->cr_prison->pr_ip);
+		sa.sin_len=sizeof (sa);
+		sa.sin_family = AF_INET;
+		error = in_pcbbind(inp, (struct sockaddr *)&sa, p);
+		if (error)
+		    return (error);
+	}
+	/*
+	 *   Call inner routine, to assign local interface address.
+	 */
+	if ((error = in_pcbladdr(inp, nam, &ifaddr)) != 0)
+		return(error);
+
+	if (in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
+	    inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr,
+	    inp->inp_lport, 0, NULL) != NULL) {
+		return (EADDRINUSE);
+	}
+	if (inp->inp_laddr.s_addr == INADDR_ANY) {
+		if (inp->inp_lport == 0) {
+			error = in_pcbbind(inp, (struct sockaddr *)0, p);
+			if (error)
+				return (error);
+		}
+		inp->inp_laddr = ifaddr->sin_addr;
+	}
+	inp->inp_faddr = sin->sin_addr;
+	inp->inp_fport = sin->sin_port;
+	in_pcbrehash(inp);
+	return (0);
+}
+
+void
+in_pcbdisconnect(inp)
+	struct inpcb *inp;
+{
+
+	inp->inp_faddr.s_addr = INADDR_ANY;
+	inp->inp_fport = 0;
+	in_pcbrehash(inp);
+	if (inp->inp_socket->so_state & SS_NOFDREF)
+		in_pcbdetach(inp);
+}
+
+void
+in_pcbdetach(inp)
+	struct inpcb *inp;
+{
+	struct socket *so = inp->inp_socket;
+	struct inpcbinfo *ipi = inp->inp_pcbinfo;
+	struct rtentry *rt  = inp->inp_route.ro_rt;
+
+#ifdef IPSEC
+	ipsec4_delete_pcbpolicy(inp);
+#endif /*IPSEC*/
+	inp->inp_gencnt = ++ipi->ipi_gencnt;
+	in_pcbremlists(inp);
+	so->so_pcb = 0;
+	sofree(so);
+	if (inp->inp_options)
+		(void)m_free(inp->inp_options);
+	if (rt) {
+		/* 
+		 * route deletion requires reference count to be <= zero 
+		 */
+		if ((rt->rt_flags & RTF_DELCLONE) &&
+		    (rt->rt_flags & RTF_WASCLONED) &&
+		    (rt->rt_refcnt <= 1)) {
+			rt->rt_refcnt--;
+			rt->rt_flags &= ~RTF_UP;
+			rtrequest(RTM_DELETE, rt_key(rt),
+				  rt->rt_gateway, rt_mask(rt),
+				  rt->rt_flags, (struct rtentry **)0);
+		}
+		else
+			rtfree(rt);
+	}
+	ip_freemoptions(inp->inp_moptions);
+	inp->inp_vflag = 0;
+	zfree(ipi->ipi_zone, inp);
+}
+
+/*
+ * The calling convention of in_setsockaddr() and in_setpeeraddr() was
+ * modified to match the pru_sockaddr() and pru_peeraddr() entry points
+ * in struct pr_usrreqs, so that protocols can just reference then directly
+ * without the need for a wrapper function.  The socket must have a valid
+ * (i.e., non-nil) PCB, but it should be impossible to get an invalid one
+ * except through a kernel programming error, so it is acceptable to panic
+ * (or in this case trap) if the PCB is invalid.  (Actually, we don't trap
+ * because there actually /is/ a programming error somewhere... XXX)
+ */
+int
+in_setsockaddr(so, nam)
+	struct socket *so;
+	struct sockaddr **nam;
+{
+	int s;
+	register struct inpcb *inp;
+	register struct sockaddr_in *sin;
+
+	/*
+	 * Do the malloc first in case it blocks.
+	 */
+	MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME,
+		M_WAITOK | M_ZERO);
+	sin->sin_family = AF_INET;
+	sin->sin_len = sizeof(*sin);
+
+	s = splnet();
+	inp = sotoinpcb(so);
+	if (!inp) {
+		splx(s);
+		free(sin, M_SONAME);
+		return ECONNRESET;
+	}
+	sin->sin_port = inp->inp_lport;
+	sin->sin_addr = inp->inp_laddr;
+	splx(s);
+
+	*nam = (struct sockaddr *)sin;
+	return 0;
+}
+
+int
+in_setpeeraddr(so, nam)
+	struct socket *so;
+	struct sockaddr **nam;
+{
+	int s;
+	struct inpcb *inp;
+	register struct sockaddr_in *sin;
+
+	/*
+	 * Do the malloc first in case it blocks.
+	 */
+	MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME,
+		M_WAITOK | M_ZERO);
+	sin->sin_family = AF_INET;
+	sin->sin_len = sizeof(*sin);
+
+	s = splnet();
+	inp = sotoinpcb(so);
+	if (!inp) {
+		splx(s);
+		free(sin, M_SONAME);
+		return ECONNRESET;
+	}
+	sin->sin_port = inp->inp_fport;
+	sin->sin_addr = inp->inp_faddr;
+	splx(s);
+
+	*nam = (struct sockaddr *)sin;
+	return 0;
+}
+
+void
+in_pcbnotifyall(head, faddr, errno, notify)
+	struct inpcbhead *head;
+	struct in_addr faddr;
+	int errno;
+	void (*notify) __P((struct inpcb *, int));
+{
+	struct inpcb *inp, *ninp;
+	int s;
+
+	s = splnet();
+	for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) {
+		ninp = LIST_NEXT(inp, inp_list);
+#ifdef INET6
+		if ((inp->inp_vflag & INP_IPV4) == 0)
+			continue;
+#endif
+		if (inp->inp_faddr.s_addr != faddr.s_addr ||
+		    inp->inp_socket == NULL)
+				continue;
+		(*notify)(inp, errno);
+	}
+	splx(s);
+}
+
+/*
+ * Check for alternatives when higher level complains
+ * about service problems.  For now, invalidate cached
+ * routing information.  If the route was created dynamically
+ * (by a redirect), time to try a default gateway again.
+ */
+void
+in_losing(inp)
+	struct inpcb *inp;
+{
+	register struct rtentry *rt;
+	struct rt_addrinfo info;
+
+	if ((rt = inp->inp_route.ro_rt)) {
+		inp->inp_route.ro_rt = 0;
+		bzero((caddr_t)&info, sizeof(info));
+		info.rti_info[RTAX_DST] =
+			(struct sockaddr *)&inp->inp_route.ro_dst;
+		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
+		rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
+		if (rt->rt_flags & RTF_DYNAMIC)
+			(void) rtrequest(RTM_DELETE, rt_key(rt),
+				rt->rt_gateway, rt_mask(rt), rt->rt_flags,
+				(struct rtentry **)0);
+		else
+		/*
+		 * A new route can be allocated
+		 * the next time output is attempted.
+		 */
+			rtfree(rt);
+	}
+}
+
+/*
+ * After a routing change, flush old routing
+ * and allocate a (hopefully) better one.
+ */
+void
+in_rtchange(inp, errno)
+	register struct inpcb *inp;
+	int errno;
+{
+	if (inp->inp_route.ro_rt) {
+		rtfree(inp->inp_route.ro_rt);
+		inp->inp_route.ro_rt = 0;
+		/*
+		 * A new route can be allocated the next time
+		 * output is attempted.
+		 */
+	}
+}
+
+/*
+ * Lookup a PCB based on the local address and port.
+ */
+struct inpcb *
+in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay)
+	struct inpcbinfo *pcbinfo;
+	struct in_addr laddr;
+	u_int lport_arg;
+	int wild_okay;
+{
+	register struct inpcb *inp;
+	int matchwild = 3, wildcard;
+	u_short lport = lport_arg;
+
+	if (!wild_okay) {
+		struct inpcbhead *head;
+		/*
+		 * Look for an unconnected (wildcard foreign addr) PCB that
+		 * matches the local address and port we're looking for.
+		 */
+		head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
+		LIST_FOREACH(inp, head, inp_hash) {
+#ifdef INET6
+			if ((inp->inp_vflag & INP_IPV4) == 0)
+				continue;
+#endif
+			if (inp->inp_faddr.s_addr == INADDR_ANY &&
+			    inp->inp_laddr.s_addr == laddr.s_addr &&
+			    inp->inp_lport == lport) {
+				/*
+				 * Found.
+				 */
+				return (inp);
+			}
+		}
+		/*
+		 * Not found.
+		 */
+		return (NULL);
+	} else {
+		struct inpcbporthead *porthash;
+		struct inpcbport *phd;
+		struct inpcb *match = NULL;
+		/*
+		 * Best fit PCB lookup.
+		 *
+		 * First see if this local port is in use by looking on the
+		 * port hash list.
+		 */
+		porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
+		    pcbinfo->porthashmask)];
+		LIST_FOREACH(phd, porthash, phd_hash) {
+			if (phd->phd_port == lport)
+				break;
+		}
+		if (phd != NULL) {
+			/*
+			 * Port is in use by one or more PCBs. Look for best
+			 * fit.
+			 */
+			LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
+				wildcard = 0;
+#ifdef INET6
+				if ((inp->inp_vflag & INP_IPV4) == 0)
+					continue;
+#endif
+				if (inp->inp_faddr.s_addr != INADDR_ANY)
+					wildcard++;
+				if (inp->inp_laddr.s_addr != INADDR_ANY) {
+					if (laddr.s_addr == INADDR_ANY)
+						wildcard++;
+					else if (inp->inp_laddr.s_addr != laddr.s_addr)
+						continue;
+				} else {
+					if (laddr.s_addr != INADDR_ANY)
+						wildcard++;
+				}
+				if (wildcard < matchwild) {
+					match = inp;
+					matchwild = wildcard;
+					if (matchwild == 0) {
+						break;
+					}
+				}
+			}
+		}
+		return (match);
+	}
+}
+
+/*
+ * Lookup PCB in hash list.
+ */
+struct inpcb *
+in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard,
+		  ifp)
+	struct inpcbinfo *pcbinfo;
+	struct in_addr faddr, laddr;
+	u_int fport_arg, lport_arg;
+	int wildcard;
+	struct ifnet *ifp;
+{
+	struct inpcbhead *head;
+	register struct inpcb *inp;
+	u_short fport = fport_arg, lport = lport_arg;
+
+	/*
+	 * First look for an exact match.
+	 */
+	head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)];
+	LIST_FOREACH(inp, head, inp_hash) {
+#ifdef INET6
+		if ((inp->inp_vflag & INP_IPV4) == 0)
+			continue;
+#endif
+		if (inp->inp_faddr.s_addr == faddr.s_addr &&
+		    inp->inp_laddr.s_addr == laddr.s_addr &&
+		    inp->inp_fport == fport &&
+		    inp->inp_lport == lport) {
+			/*
+			 * Found.
+			 */
+			return (inp);
+		}
+	}
+	if (wildcard) {
+		struct inpcb *local_wild = NULL;
+#if defined(INET6)
+		struct inpcb *local_wild_mapped = NULL;
+#endif /* defined(INET6) */
+
+		head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
+		LIST_FOREACH(inp, head, inp_hash) {
+#ifdef INET6
+			if ((inp->inp_vflag & INP_IPV4) == 0)
+				continue;
+#endif
+			if (inp->inp_faddr.s_addr == INADDR_ANY &&
+			    inp->inp_lport == lport) {
+#if defined(NFAITH) && NFAITH > 0
+				if (ifp && ifp->if_type == IFT_FAITH &&
+				    (inp->inp_flags & INP_FAITH) == 0)
+					continue;
+#endif
+				if (inp->inp_laddr.s_addr == laddr.s_addr)
+					return (inp);
+				else if (inp->inp_laddr.s_addr == INADDR_ANY) {
+#if defined(INET6)
+					if (INP_CHECK_SOCKAF(inp->inp_socket,
+							     AF_INET6))
+						local_wild_mapped = inp;
+					else
+#endif /* defined(INET6) */
+					local_wild = inp;
+				}
+			}
+		}
+#if defined(INET6)
+		if (local_wild == NULL)
+			return (local_wild_mapped);
+#endif /* defined(INET6) */
+		return (local_wild);
+	}
+
+	/*
+	 * Not found.
+	 */
+	return (NULL);
+}
+
+/*
+ * Insert PCB onto various hash lists.
+ */
+int
+in_pcbinshash(inp)
+	struct inpcb *inp;
+{
+	struct inpcbhead *pcbhash;
+	struct inpcbporthead *pcbporthash;
+	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
+	struct inpcbport *phd;
+	u_int32_t hashkey_faddr;
+
+#ifdef INET6
+	if (inp->inp_vflag & INP_IPV6)
+		hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
+	else
+#endif /* INET6 */
+	hashkey_faddr = inp->inp_faddr.s_addr;
+
+	pcbhash = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr,
+		 inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)];
+
+	pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport,
+	    pcbinfo->porthashmask)];
+
+	/*
+	 * Go through port list and look for a head for this lport.
+	 */
+	LIST_FOREACH(phd, pcbporthash, phd_hash) {
+		if (phd->phd_port == inp->inp_lport)
+			break;
+	}
+	/*
+	 * If none exists, malloc one and tack it on.
+	 */
+	if (phd == NULL) {
+		MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_NOWAIT);
+		if (phd == NULL) {
+			return (ENOBUFS); /* XXX */
+		}
+		phd->phd_port = inp->inp_lport;
+		LIST_INIT(&phd->phd_pcblist);
+		LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
+	}
+	inp->inp_phd = phd;
+	LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
+	LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
+	return (0);
+}
+
+/*
+ * Move PCB to the proper hash bucket when { faddr, fport } have  been
+ * changed. NOTE: This does not handle the case of the lport changing (the
+ * hashed port list would have to be updated as well), so the lport must
+ * not change after in_pcbinshash() has been called.
+ */
+void
+in_pcbrehash(inp)
+	struct inpcb *inp;
+{
+	struct inpcbhead *head;
+	u_int32_t hashkey_faddr;
+
+#ifdef INET6
+	if (inp->inp_vflag & INP_IPV6)
+		hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
+	else
+#endif /* INET6 */
+	hashkey_faddr = inp->inp_faddr.s_addr;
+
+	head = &inp->inp_pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr,
+		inp->inp_lport, inp->inp_fport, inp->inp_pcbinfo->hashmask)];
+
+	LIST_REMOVE(inp, inp_hash);
+	LIST_INSERT_HEAD(head, inp, inp_hash);
+}
+
+/*
+ * Remove PCB from various lists.
+ */
+void
+in_pcbremlists(inp)
+	struct inpcb *inp;
+{
+	inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
+	if (inp->inp_lport) {
+		struct inpcbport *phd = inp->inp_phd;
+
+		LIST_REMOVE(inp, inp_hash);
+		LIST_REMOVE(inp, inp_portlist);
+		if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
+			LIST_REMOVE(phd, phd_hash);
+			free(phd, M_PCB);
+		}
+	}
+	LIST_REMOVE(inp, inp_list);
+	inp->inp_pcbinfo->ipi_count--;
+}
+
+int
+prison_xinpcb(struct proc *p, struct inpcb *inp)
+{
+	if (!jailed(p->p_ucred))
+		return (0);
+	if (ntohl(inp->inp_laddr.s_addr) == p->p_ucred->cr_prison->pr_ip)
+		return (0);
+	return (1);
+}
diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
new file mode 100644
index 0000000..f4abb4d
--- /dev/null
+++ b/sys/netinet/in_pcb.h
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 1982, 1986, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)in_pcb.h	8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_IN_PCB_H_
+#define _NETINET_IN_PCB_H_
+
+#include <sys/queue.h>
+
+
+#include <netinet6/ipsec.h> /* for IPSEC */
+
+#define	in6pcb		inpcb	/* for KAME src sync over BSD*'s */
+#define	in6p_sp		inp_sp	/* for KAME src sync over BSD*'s */
+
+/*
+ * Common structure pcb for internet protocol implementation.
+ * Here are stored pointers to local and foreign host table
+ * entries, local and foreign socket numbers, and pointers
+ * up (to a socket structure) and down (to a protocol-specific)
+ * control block.
+ */
+LIST_HEAD(inpcbhead, inpcb);
+LIST_HEAD(inpcbporthead, inpcbport);
+typedef	u_quad_t	inp_gen_t;
+
+/*
+ * PCB with AF_INET6 null bind'ed laddr can receive AF_INET input packet.
+ * So, AF_INET6 null laddr is also used as AF_INET null laddr,
+ * by utilize following structure. (At last, same as INRIA)
+ */
+struct in_addr_4in6 {
+	u_int32_t	ia46_pad32[3];
+	struct	in_addr	ia46_addr4;
+};
+
+/*
+ * NB: the zone allocator is type-stable EXCEPT FOR THE FIRST TWO LONGS
+ * of the structure.  Therefore, it is important that the members in
+ * that position not contain any information which is required to be
+ * stable.
+ */
+struct	icmp6_filter;
+
+struct inpcb {
+	LIST_ENTRY(inpcb) inp_hash; /* hash list */
+	u_short	inp_fport;		/* foreign port */
+	u_short	inp_lport;		/* local port */
+	LIST_ENTRY(inpcb) inp_list; /* list for all PCBs of this proto */
+	u_int32_t	inp_flow;
+
+	/* protocol dependent part, local and foreign addr */
+	union {
+		/* foreign host table entry */
+		struct	in_addr_4in6 inp46_foreign;
+		struct	in6_addr inp6_foreign;
+	} inp_dependfaddr;
+	union {
+		/* local host table entry */
+		struct	in_addr_4in6 inp46_local;
+		struct	in6_addr inp6_local;
+	} inp_dependladdr;
+
+	caddr_t	inp_ppcb;		/* pointer to per-protocol pcb */
+	struct	inpcbinfo *inp_pcbinfo;	/* PCB list info */
+	struct	socket *inp_socket;	/* back pointer to socket */
+					/* list for this PCB's local port */
+	int	inp_flags;		/* generic IP/datagram flags */
+
+	/* protocol dependent part; cached route */
+	union {
+		/* placeholder for routing entry */
+		struct	route inp4_route;
+		struct	route_in6 inp6_route;
+	} inp_dependroute;
+
+	struct	inpcbpolicy *inp_sp; /* for IPSEC */
+	u_char	inp_vflag;
+#define	INP_IPV4	0x1
+#define	INP_IPV6	0x2
+	u_char	inp_ip_ttl;		/* time to live proto */
+	u_char	inp_ip_p;		/* protocol proto */
+
+	/* protocol dependent part; options */
+	struct {
+		u_char	inp4_ip_tos;		/* type of service proto */
+		struct	mbuf *inp4_options;	/* IP options */
+		struct	ip_moptions *inp4_moptions; /* IP multicast options */
+	} inp_depend4;
+#define	inp_faddr	inp_dependfaddr.inp46_foreign.ia46_addr4
+#define	inp_laddr	inp_dependladdr.inp46_local.ia46_addr4
+#define	inp_route	inp_dependroute.inp4_route
+#define	inp_ip_tos	inp_depend4.inp4_ip_tos
+#define	inp_options	inp_depend4.inp4_options
+#define	inp_moptions	inp_depend4.inp4_moptions
+	struct {
+		/* IP options */
+		struct	mbuf *inp6_options;
+		/* IP6 options for outgoing packets */
+		struct	ip6_pktopts *inp6_outputopts;
+		/* IP multicast options */
+		struct	ip6_moptions *inp6_moptions;
+		/* ICMPv6 code type filter */
+		struct	icmp6_filter *inp6_icmp6filt;
+		/* IPV6_CHECKSUM setsockopt */
+		int	inp6_cksum;
+		u_short	inp6_ifindex;
+		short	inp6_hops;
+		u_int8_t	inp6_hlim;
+	} inp_depend6;
+	LIST_ENTRY(inpcb) inp_portlist;
+	struct	inpcbport *inp_phd;	/* head of this list */
+	inp_gen_t	inp_gencnt;	/* generation count of this instance */
+#define	in6p_faddr	inp_dependfaddr.inp6_foreign
+#define	in6p_laddr	inp_dependladdr.inp6_local
+#define	in6p_route	inp_dependroute.inp6_route
+#define	in6p_ip6_hlim	inp_depend6.inp6_hlim
+#define	in6p_hops	inp_depend6.inp6_hops	/* default hop limit */
+#define	in6p_ip6_nxt	inp_ip_p
+#define	in6p_flowinfo	inp_flow
+#define	in6p_vflag	inp_vflag
+#define	in6p_options	inp_depend6.inp6_options
+#define	in6p_outputopts	inp_depend6.inp6_outputopts
+#define	in6p_moptions	inp_depend6.inp6_moptions
+#define	in6p_icmp6filt	inp_depend6.inp6_icmp6filt
+#define	in6p_cksum	inp_depend6.inp6_cksum
+#define	inp6_ifindex	inp_depend6.inp6_ifindex
+#define	in6p_flags	inp_flags  /* for KAME src sync over BSD*'s */
+#define	in6p_socket	inp_socket  /* for KAME src sync over BSD*'s */
+#define	in6p_lport	inp_lport  /* for KAME src sync over BSD*'s */
+#define	in6p_fport	inp_fport  /* for KAME src sync over BSD*'s */
+#define	in6p_ppcb	inp_ppcb  /* for KAME src sync over BSD*'s */
+};
+/*
+ * The range of the generation count, as used in this implementation,
+ * is 9e19.  We would have to create 300 billion connections per
+ * second for this number to roll over in a year.  This seems sufficiently
+ * unlikely that we simply don't concern ourselves with that possibility.
+ */
+
+/*
+ * Interface exported to userland by various protocols which use
+ * inpcbs.  Hack alert -- only define if struct xsocket is in scope.
+ */
+#ifdef _SYS_SOCKETVAR_H_
+struct	xinpcb {
+	size_t	xi_len;		/* length of this structure */
+	struct	inpcb xi_inp;
+	struct	xsocket xi_socket;
+	u_quad_t	xi_alignment_hack;
+};
+
+struct	xinpgen {
+	size_t	xig_len;	/* length of this structure */
+	u_int	xig_count;	/* number of PCBs at this time */
+	inp_gen_t xig_gen;	/* generation count at this time */
+	so_gen_t xig_sogen;	/* socket generation count at this time */
+};
+#endif /* _SYS_SOCKETVAR_H_ */
+
+struct inpcbport {
+	LIST_ENTRY(inpcbport) phd_hash;
+	struct inpcbhead phd_pcblist;
+	u_short phd_port;
+};
+
+struct inpcbinfo {		/* XXX documentation, prefixes */
+	struct	inpcbhead *hashbase;
+	u_long	hashmask;
+	struct	inpcbporthead *porthashbase;
+	u_long	porthashmask;
+	struct	inpcbhead *listhead;
+	u_short	lastport;
+	u_short	lastlow;
+	u_short	lasthi;
+	struct	vm_zone *ipi_zone; /* zone to allocate pcbs from */
+	u_int	ipi_count;	/* number of pcbs in this list */
+	u_quad_t ipi_gencnt;	/* current generation count */
+};
+
+#define INP_PCBHASH(faddr, lport, fport, mask) \
+	(((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask))
+#define INP_PCBPORTHASH(lport, mask) \
+	(ntohs((lport)) & (mask))
+
+/* flags in inp_flags: */
+#define	INP_RECVOPTS		0x01	/* receive incoming IP options */
+#define	INP_RECVRETOPTS		0x02	/* receive IP options for reply */
+#define	INP_RECVDSTADDR		0x04	/* receive IP dst address */
+#define	INP_HDRINCL		0x08	/* user supplies entire IP header */
+#define	INP_HIGHPORT		0x10	/* user wants "high" port binding */
+#define	INP_LOWPORT		0x20	/* user wants "low" port binding */
+#define	INP_ANONPORT		0x40	/* port chosen for user */
+#define	INP_RECVIF		0x80	/* receive incoming interface */
+#define	INP_MTUDISC		0x100	/* user can do MTU discovery */
+#define	INP_FAITH		0x200	/* accept FAITH'ed connections */
+#define	IN6P_PKTINFO		0x010000
+#define	IN6P_HOPLIMIT		0x020000
+#define	IN6P_NEXTHOP		0x040000
+#define	IN6P_HOPOPTS		0x080000
+#define	IN6P_DSTOPTS		0x100000
+#define	IN6P_RTHDR		0x200000
+#define	IN6P_BINDV6ONLY		0x400000
+#define	INP_CONTROLOPTS		(INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR|\
+					INP_RECVIF|\
+				 IN6P_PKTINFO|IN6P_HOPLIMIT|IN6P_NEXTHOP|\
+				 IN6P_HOPOPTS|IN6P_DSTOPTS|IN6P_RTHDR)
+
+#define	INP_UNMAPPABLEOPTS	(IN6P_HOPOPTS|IN6P_DSTOPTS|IN6P_RTHDR)
+
+ /* for KAME src sync over BSD*'s */
+#define	IN6P_RECVOPTS		INP_RECVOPTS
+#define	IN6P_RECVRETOPTS	INP_RECVRETOPTS
+#define	IN6P_RECVDSTADDR	INP_RECVDSTADDR
+#define	IN6P_HDRINCL		INP_HDRINCL
+#define	IN6P_HIGHPORT		INP_HIGHPORT
+#define	IN6P_LOWPORT		INP_LOWPORT
+#define	IN6P_ANONPORT		INP_ANONPORT
+#define	IN6P_RECVIF		INP_RECVIF
+#define	IN6P_MTUDISC		INP_MTUDISC
+#define	IN6P_FAITH		INP_FAITH
+#define	IN6P_CONTROLOPTS INP_CONTROLOPTS
+	/*
+	 * socket AF version is {newer than,or include}
+	 * actual datagram AF version
+	 */
+
+#define	INPLOOKUP_WILDCARD	1
+#define	sotoinpcb(so)	((struct inpcb *)(so)->so_pcb)
+#define	sotoin6pcb(so)	sotoinpcb(so) /* for KAME src sync over BSD*'s */
+
+#define	INP_SOCKAF(so) so->so_proto->pr_domain->dom_family
+
+#define	INP_CHECK_SOCKAF(so, af) 	(INP_SOCKAF(so) == af)
+
+#ifdef _KERNEL
+extern int	ipport_lowfirstauto;
+extern int	ipport_lowlastauto;
+extern int	ipport_firstauto;
+extern int	ipport_lastauto;
+extern int	ipport_hifirstauto;
+extern int	ipport_hilastauto;
+
+void	in_losing __P((struct inpcb *));
+void	in_rtchange __P((struct inpcb *, int));
+int	in_pcballoc __P((struct socket *, struct inpcbinfo *, struct proc *));
+int	in_pcbbind __P((struct inpcb *, struct sockaddr *, struct proc *));
+int	in_pcbconnect __P((struct inpcb *, struct sockaddr *, struct proc *));
+void	in_pcbdetach __P((struct inpcb *));
+void	in_pcbdisconnect __P((struct inpcb *));
+int	in_pcbinshash __P((struct inpcb *));
+int	in_pcbladdr __P((struct inpcb *, struct sockaddr *,
+	    struct sockaddr_in **));
+struct inpcb *
+	in_pcblookup_local __P((struct inpcbinfo *,
+	    struct in_addr, u_int, int));
+struct inpcb *
+	in_pcblookup_hash __P((struct inpcbinfo *,
+			       struct in_addr, u_int, struct in_addr, u_int,
+			       int, struct ifnet *));
+void	in_pcbnotifyall __P((struct inpcbhead *, struct in_addr,
+	    int, void (*)(struct inpcb *, int)));
+void	in_pcbrehash __P((struct inpcb *));
+int	in_setpeeraddr __P((struct socket *so, struct sockaddr **nam));
+int	in_setsockaddr __P((struct socket *so, struct sockaddr **nam));
+void	in_pcbremlists __P((struct inpcb *inp));
+int	prison_xinpcb __P((struct proc *p, struct inpcb *inp));
+#endif /* _KERNEL */
+
+#endif /* !_NETINET_IN_PCB_H_ */
diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c
new file mode 100644
index 0000000..02e6313
--- /dev/null
+++ b/sys/netinet/in_proto.c
@@ -0,0 +1,255 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)in_proto.c	8.2 (Berkeley) 2/9/95
+ * $FreeBSD$
+ */
+
+#include "opt_ipdivert.h"
+#include "opt_ipx.h"
+#include "opt_ipsec.h"
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/queue.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/igmp_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+#include <netinet/ip_encap.h>
+
+#include <netinet/ipprotosw.h>
+
+/*
+ * TCP/IP protocol family: IP, ICMP, UDP, TCP.
+ */
+
+#ifdef IPSEC
+#include <netinet6/ipsec.h>
+#include <netinet6/ah.h>
+#ifdef IPSEC_ESP
+#include <netinet6/esp.h>
+#endif
+#endif /* IPSEC */
+
+#include "gif.h"
+#if NGIF > 0
+#include <netinet/in_gif.h>
+#endif
+
+#include "stf.h"
+#if NSTF > 0
+#include <net/if_stf.h>
+#endif
+
+#ifdef IPXIP
+#include <netipx/ipx_ip.h>
+#endif
+
+#ifdef NSIP
+#include <netns/ns.h>
+#include <netns/ns_if.h>
+#endif
+
+extern	struct domain inetdomain;
+static	struct pr_usrreqs nousrreqs;
+
+struct ipprotosw inetsw[] = {
+{ 0,		&inetdomain,	0,		0,
+  0,		0,		0,		0,
+  0,
+  ip_init,	0,		ip_slowtimo,	ip_drain,
+  &nousrreqs
+},
+{ SOCK_DGRAM,	&inetdomain,	IPPROTO_UDP,	PR_ATOMIC|PR_ADDR,
+  udp_input,	0,		udp_ctlinput,	ip_ctloutput,
+  0,
+  udp_init,	0,		0,		0,
+  &udp_usrreqs
+},
+{ SOCK_STREAM,	&inetdomain,	IPPROTO_TCP,
+	PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD,
+  tcp_input,	0,		tcp_ctlinput,	tcp_ctloutput,
+  0,
+  tcp_init,	0,		tcp_slowtimo,	tcp_drain,
+  &tcp_usrreqs
+},
+{ SOCK_RAW,	&inetdomain,	IPPROTO_RAW,	PR_ATOMIC|PR_ADDR,
+  rip_input,	0,		rip_ctlinput,	rip_ctloutput,
+  0,
+  0,		0,		0,		0,
+  &rip_usrreqs
+},
+{ SOCK_RAW,	&inetdomain,	IPPROTO_ICMP,	PR_ATOMIC|PR_ADDR,
+  icmp_input,	0,		0,		rip_ctloutput,
+  0,
+  0,		0,		0,		0,
+  &rip_usrreqs
+},
+{ SOCK_RAW,	&inetdomain,	IPPROTO_IGMP,	PR_ATOMIC|PR_ADDR,
+  igmp_input,	0,		0,		rip_ctloutput,
+  0,
+  igmp_init,	igmp_fasttimo,	igmp_slowtimo,	0,
+  &rip_usrreqs
+},
+{ SOCK_RAW,	&inetdomain,	IPPROTO_RSVP,	PR_ATOMIC|PR_ADDR,
+  rsvp_input,	0,		0,		rip_ctloutput,
+  0,
+  0,		0,		0,		0,
+  &rip_usrreqs
+},
+#ifdef IPSEC
+{ SOCK_RAW,	&inetdomain,	IPPROTO_AH,	PR_ATOMIC|PR_ADDR,
+  ah4_input,	0,	 	0,		0,
+  0,	  
+  0,		0,		0,		0,
+  &nousrreqs
+},
+#ifdef IPSEC_ESP
+{ SOCK_RAW,	&inetdomain,	IPPROTO_ESP,	PR_ATOMIC|PR_ADDR,
+  esp4_input,	0,	 	0,		0,
+  0,	  
+  0,		0,		0,		0,
+  &nousrreqs
+},
+#endif
+#endif /* IPSEC */
+{ SOCK_RAW,	&inetdomain,	IPPROTO_IPV4,	PR_ATOMIC|PR_ADDR,
+  encap4_input,	0,	 	0,		rip_ctloutput,
+  0,
+  encap_init,		0,		0,		0,
+  &nousrreqs
+},
+# ifdef INET6
+{ SOCK_RAW,	&inetdomain,	IPPROTO_IPV6,	PR_ATOMIC|PR_ADDR,
+  encap4_input,	0,	 	0,		rip_ctloutput,
+  0,
+  0,		0,		0,		0,
+  &nousrreqs
+},
+#endif
+#ifdef IPDIVERT
+{ SOCK_RAW,	&inetdomain,	IPPROTO_DIVERT,	PR_ATOMIC|PR_ADDR,
+  div_input,	0,	 	0,		ip_ctloutput,
+  0,
+  div_init,	0,		0,		0,
+  &div_usrreqs,
+},
+#endif
+#ifdef IPXIP
+{ SOCK_RAW,	&inetdomain,	IPPROTO_IDP,	PR_ATOMIC|PR_ADDR,
+  ipxip_input,	0,		ipxip_ctlinput,	0,
+  0,
+  0,		0,		0,		0,
+  &rip_usrreqs
+},
+#endif
+#ifdef NSIP
+{ SOCK_RAW,	&inetdomain,	IPPROTO_IDP,	PR_ATOMIC|PR_ADDR,
+  idpip_input,	0,		nsip_ctlinput,	0,
+  0,
+  0,		0,		0,		0,
+  &rip_usrreqs
+},
+#endif
+	/* raw wildcard */
+{ SOCK_RAW,	&inetdomain,	0,		PR_ATOMIC|PR_ADDR,
+  rip_input,	0,		0,		rip_ctloutput,
+  0,
+  rip_init,	0,		0,		0,
+  &rip_usrreqs
+},
+};
+
+#if NGIF > 0
+struct ipprotosw in_gif_protosw =
+{ SOCK_RAW,	&inetdomain,	0/*IPPROTO_IPV[46]*/,	PR_ATOMIC|PR_ADDR,
+  in_gif_input, rip_output,	0,		rip_ctloutput,
+  0,
+  0,            0,              0,              0,
+  &rip_usrreqs
+};
+#endif /*NGIF*/
+
+#if NSTF > 0
+struct ipprotosw in_stf_protosw =
+{ SOCK_RAW,	&inetdomain,	IPPROTO_IPV6,	PR_ATOMIC|PR_ADDR,
+  in_stf_input, rip_output,	0,		rip_ctloutput,
+  0,
+  0,            0,              0,              0,
+  &rip_usrreqs
+};
+#endif /*NSTF*/
+
+extern int in_inithead __P((void **, int));
+
+struct domain inetdomain =
+    { AF_INET, "internet", 0, 0, 0, 
+      (struct protosw *)inetsw,
+      (struct protosw *)&inetsw[sizeof(inetsw)/sizeof(inetsw[0])], 0,
+      in_inithead, 32, sizeof(struct sockaddr_in)
+    };
+
+DOMAIN_SET(inet);
+
+SYSCTL_NODE(_net,      PF_INET,		inet,	CTLFLAG_RW, 0,
+	"Internet Family");
+
+SYSCTL_NODE(_net_inet, IPPROTO_IP,	ip,	CTLFLAG_RW, 0,	"IP");
+SYSCTL_NODE(_net_inet, IPPROTO_ICMP,	icmp,	CTLFLAG_RW, 0,	"ICMP");
+SYSCTL_NODE(_net_inet, IPPROTO_UDP,	udp,	CTLFLAG_RW, 0,	"UDP");
+SYSCTL_NODE(_net_inet, IPPROTO_TCP,	tcp,	CTLFLAG_RW, 0,	"TCP");
+SYSCTL_NODE(_net_inet, IPPROTO_IGMP,	igmp,	CTLFLAG_RW, 0,	"IGMP");
+#ifdef IPSEC
+SYSCTL_NODE(_net_inet, IPPROTO_AH,	ipsec,	CTLFLAG_RW, 0,	"IPSEC");
+#endif /* IPSEC */
+SYSCTL_NODE(_net_inet, IPPROTO_RAW,	raw,	CTLFLAG_RW, 0,	"RAW");
+#ifdef IPDIVERT
+SYSCTL_NODE(_net_inet, IPPROTO_DIVERT,	divert,	CTLFLAG_RW, 0,	"DIVERT");
+#endif
+
diff --git a/sys/netinet/in_rmx.c b/sys/netinet/in_rmx.c
new file mode 100644
index 0000000..bfd65e6
--- /dev/null
+++ b/sys/netinet/in_rmx.c
@@ -0,0 +1,428 @@
+/*
+ * Copyright 1994, 1995 Massachusetts Institute of Technology
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that both the above copyright notice and this
+ * permission notice appear in all copies, that both the above
+ * copyright notice and this permission notice appear in all
+ * supporting documentation, and that the name of M.I.T. not be used
+ * in advertising or publicity pertaining to distribution of the
+ * software without specific, written prior permission.  M.I.T. makes
+ * no representations about the suitability of this software for any
+ * purpose.  It is provided "as is" without express or implied
+ * warranty.
+ *
+ * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
+ * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
+ * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * This code does two things necessary for the enhanced TCP metrics to
+ * function in a useful manner:
+ *  1) It marks all non-host routes as `cloning', thus ensuring that
+ *     every actual reference to such a route actually gets turned
+ *     into a reference to a host route to the specific destination
+ *     requested.
+ *  2) When such routes lose all their references, it arranges for them
+ *     to be deleted in some random collection of circumstances, so that
+ *     a large quantity of stale routing data is not kept in kernel memory
+ *     indefinitely.  See in_rtqtimo() below for the exact mechanism.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/socket.h>
+#include <sys/mbuf.h>
+#include <sys/syslog.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+
+extern int	in_inithead __P((void **head, int off));
+
+#define RTPRF_OURS		RTF_PROTO3	/* set on routes we manage */
+
+/*
+ * Do what we need to do when inserting a route.
+ */
+static struct radix_node *
+in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
+	    struct radix_node *treenodes)
+{
+	struct rtentry *rt = (struct rtentry *)treenodes;
+	struct sockaddr_in *sin = (struct sockaddr_in *)rt_key(rt);
+	struct radix_node *ret;
+
+	/*
+	 * For IP, all unicast non-host routes are automatically cloning.
+	 */
+	if(IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
+		rt->rt_flags |= RTF_MULTICAST;
+
+	if(!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST))) {
+		rt->rt_flags |= RTF_PRCLONING;
+	}
+
+	/*
+	 * A little bit of help for both IP output and input:
+	 *   For host routes, we make sure that RTF_BROADCAST
+	 *   is set for anything that looks like a broadcast address.
+	 *   This way, we can avoid an expensive call to in_broadcast()
+	 *   in ip_output() most of the time (because the route passed
+	 *   to ip_output() is almost always a host route).
+	 *
+	 *   We also do the same for local addresses, with the thought
+	 *   that this might one day be used to speed up ip_input().
+	 *
+	 * We also mark routes to multicast addresses as such, because
+	 * it's easy to do and might be useful (but this is much more
+	 * dubious since it's so easy to inspect the address).  (This
+	 * is done above.)
+	 */
+	if (rt->rt_flags & RTF_HOST) {
+		if (in_broadcast(sin->sin_addr, rt->rt_ifp)) {
+			rt->rt_flags |= RTF_BROADCAST;
+		} else {
+#define satosin(sa) ((struct sockaddr_in *)sa)
+			if (satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr
+			    == sin->sin_addr.s_addr)
+				rt->rt_flags |= RTF_LOCAL;
+#undef satosin
+		}
+	}
+
+	if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU) 
+	    && rt->rt_ifp)
+		rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
+
+	ret = rn_addroute(v_arg, n_arg, head, treenodes);
+	if (ret == NULL && rt->rt_flags & RTF_HOST) {
+		struct rtentry *rt2;
+		/*
+		 * We are trying to add a host route, but can't.
+		 * Find out if it is because of an
+		 * ARP entry and delete it if so.
+		 */
+		rt2 = rtalloc1((struct sockaddr *)sin, 0,
+				RTF_CLONING | RTF_PRCLONING);
+		if (rt2) {
+			if (rt2->rt_flags & RTF_LLINFO &&
+				rt2->rt_flags & RTF_HOST &&
+				rt2->rt_gateway &&
+				rt2->rt_gateway->sa_family == AF_LINK) {
+				rtrequest(RTM_DELETE,
+					  (struct sockaddr *)rt_key(rt2),
+					  rt2->rt_gateway,
+					  rt_mask(rt2), rt2->rt_flags, 0);
+				ret = rn_addroute(v_arg, n_arg, head,
+					treenodes);
+			}
+			RTFREE(rt2);
+		}
+	}
+
+	/*
+	 * If the new route created successfully, and we are forwarding,
+	 * and there is a cached route, free it.  Otherwise, we may end
+	 * up using the wrong route.
+	 */
+	if (ret != NULL && ipforwarding && ipforward_rt.ro_rt) {
+		RTFREE(ipforward_rt.ro_rt);
+		ipforward_rt.ro_rt = 0;
+	}
+
+	return ret;
+}
+
+/*
+ * This code is the inverse of in_clsroute: on first reference, if we
+ * were managing the route, stop doing so and set the expiration timer
+ * back off again.
+ */
+static struct radix_node *
+in_matroute(void *v_arg, struct radix_node_head *head)
+{
+	struct radix_node *rn = rn_match(v_arg, head);
+	struct rtentry *rt = (struct rtentry *)rn;
+
+	if(rt && rt->rt_refcnt == 0) { /* this is first reference */
+		if(rt->rt_flags & RTPRF_OURS) {
+			rt->rt_flags &= ~RTPRF_OURS;
+			rt->rt_rmx.rmx_expire = 0;
+		}
+	}
+	return rn;
+}
+
+static int rtq_reallyold = 60*60;
+	/* one hour is ``really old'' */
+SYSCTL_INT(_net_inet_ip, IPCTL_RTEXPIRE, rtexpire, CTLFLAG_RW, 
+    &rtq_reallyold , 0, 
+    "Default expiration time on dynamically learned routes");
+				   
+static int rtq_minreallyold = 10;
+	/* never automatically crank down to less */
+SYSCTL_INT(_net_inet_ip, IPCTL_RTMINEXPIRE, rtminexpire, CTLFLAG_RW, 
+    &rtq_minreallyold , 0, 
+    "Minimum time to attempt to hold onto dynamically learned routes");
+				   
+static int rtq_toomany = 128;
+	/* 128 cached routes is ``too many'' */
+SYSCTL_INT(_net_inet_ip, IPCTL_RTMAXCACHE, rtmaxcache, CTLFLAG_RW, 
+    &rtq_toomany , 0, "Upper limit on dynamically learned routes");
+
+/*
+ * On last reference drop, mark the route as belong to us so that it can be
+ * timed out.
+ */
+static void
+in_clsroute(struct radix_node *rn, struct radix_node_head *head)
+{
+	struct rtentry *rt = (struct rtentry *)rn;
+
+	if(!(rt->rt_flags & RTF_UP))
+		return;		/* prophylactic measures */
+
+	if((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST)
+		return;
+
+	if((rt->rt_flags & (RTF_WASCLONED | RTPRF_OURS))
+	   != RTF_WASCLONED)
+		return;
+
+	/*
+	 * As requested by David Greenman:
+	 * If rtq_reallyold is 0, just delete the route without
+	 * waiting for a timeout cycle to kill it.
+	 */
+	if(rtq_reallyold != 0) {
+		rt->rt_flags |= RTPRF_OURS;
+		rt->rt_rmx.rmx_expire = time_second + rtq_reallyold;
+	} else {
+		rtrequest(RTM_DELETE,
+			  (struct sockaddr *)rt_key(rt),
+			  rt->rt_gateway, rt_mask(rt),
+			  rt->rt_flags, 0);
+	}
+}
+
+struct rtqk_arg {
+	struct radix_node_head *rnh;
+	int draining;
+	int killed;
+	int found;
+	int updating;
+	time_t nextstop;
+};
+
+/*
+ * Get rid of old routes.  When draining, this deletes everything, even when
+ * the timeout is not expired yet.  When updating, this makes sure that
+ * nothing has a timeout longer than the current value of rtq_reallyold.
+ */
+static int
+in_rtqkill(struct radix_node *rn, void *rock)
+{
+	struct rtqk_arg *ap = rock;
+	struct rtentry *rt = (struct rtentry *)rn;
+	int err;
+
+	if(rt->rt_flags & RTPRF_OURS) {
+		ap->found++;
+
+		if(ap->draining || rt->rt_rmx.rmx_expire <= time_second) {
+			if(rt->rt_refcnt > 0)
+				panic("rtqkill route really not free");
+
+			err = rtrequest(RTM_DELETE,
+					(struct sockaddr *)rt_key(rt),
+					rt->rt_gateway, rt_mask(rt),
+					rt->rt_flags, 0);
+			if(err) {
+				log(LOG_WARNING, "in_rtqkill: error %d\n", err);
+			} else {
+				ap->killed++;
+			}
+		} else {
+			if(ap->updating
+			   && (rt->rt_rmx.rmx_expire - time_second
+			       > rtq_reallyold)) {
+				rt->rt_rmx.rmx_expire = time_second
+					+ rtq_reallyold;
+			}
+			ap->nextstop = lmin(ap->nextstop,
+					    rt->rt_rmx.rmx_expire);
+		}
+	}
+
+	return 0;
+}
+
+#define RTQ_TIMEOUT	60*10	/* run no less than once every ten minutes */
+static int rtq_timeout = RTQ_TIMEOUT;
+
+static void
+in_rtqtimo(void *rock)
+{
+	struct radix_node_head *rnh = rock;
+	struct rtqk_arg arg;
+	struct timeval atv;
+	static time_t last_adjusted_timeout = 0;
+	int s;
+
+	arg.found = arg.killed = 0;
+	arg.rnh = rnh;
+	arg.nextstop = time_second + rtq_timeout;
+	arg.draining = arg.updating = 0;
+	s = splnet();
+	rnh->rnh_walktree(rnh, in_rtqkill, &arg);
+	splx(s);
+
+	/*
+	 * Attempt to be somewhat dynamic about this:
+	 * If there are ``too many'' routes sitting around taking up space,
+	 * then crank down the timeout, and see if we can't make some more
+	 * go away.  However, we make sure that we will never adjust more
+	 * than once in rtq_timeout seconds, to keep from cranking down too
+	 * hard.
+	 */
+	if((arg.found - arg.killed > rtq_toomany)
+	   && (time_second - last_adjusted_timeout >= rtq_timeout)
+	   && rtq_reallyold > rtq_minreallyold) {
+		rtq_reallyold = 2*rtq_reallyold / 3;
+		if(rtq_reallyold < rtq_minreallyold) {
+			rtq_reallyold = rtq_minreallyold;
+		}
+
+		last_adjusted_timeout = time_second;
+#ifdef DIAGNOSTIC
+		log(LOG_DEBUG, "in_rtqtimo: adjusted rtq_reallyold to %d\n",
+		    rtq_reallyold);
+#endif
+		arg.found = arg.killed = 0;
+		arg.updating = 1;
+		s = splnet();
+		rnh->rnh_walktree(rnh, in_rtqkill, &arg);
+		splx(s);
+	}
+
+	atv.tv_usec = 0;
+	atv.tv_sec = arg.nextstop - time_second;
+	timeout(in_rtqtimo, rock, tvtohz(&atv));
+}
+
+void
+in_rtqdrain(void)
+{
+	struct radix_node_head *rnh = rt_tables[AF_INET];
+	struct rtqk_arg arg;
+	int s;
+	arg.found = arg.killed = 0;
+	arg.rnh = rnh;
+	arg.nextstop = 0;
+	arg.draining = 1;
+	arg.updating = 0;
+	s = splnet();
+	rnh->rnh_walktree(rnh, in_rtqkill, &arg);
+	splx(s);
+}
+
+/*
+ * Initialize our routing tree.
+ */
+int
+in_inithead(void **head, int off)
+{
+	struct radix_node_head *rnh;
+
+	if(!rn_inithead(head, off))
+		return 0;
+
+	if(head != (void **)&rt_tables[AF_INET]) /* BOGUS! */
+		return 1;	/* only do this for the real routing table */
+
+	rnh = *head;
+	rnh->rnh_addaddr = in_addroute;
+	rnh->rnh_matchaddr = in_matroute;
+	rnh->rnh_close = in_clsroute;
+	in_rtqtimo(rnh);	/* kick off timeout first time */
+	return 1;
+}
+
+
+/*
+ * This zaps old routes when the interface goes down or interface
+ * address is deleted.  In the latter case, it deletes static routes
+ * that point to this address.  If we don't do this, we may end up
+ * using the old address in the future.  The ones we always want to
+ * get rid of are things like ARP entries, since the user might down
+ * the interface, walk over to a completely different network, and
+ * plug back in.
+ */
+struct in_ifadown_arg {
+	struct radix_node_head *rnh;
+	struct ifaddr *ifa;
+	int del;
+};
+
+static int
+in_ifadownkill(struct radix_node *rn, void *xap)
+{
+	struct in_ifadown_arg *ap = xap;
+	struct rtentry *rt = (struct rtentry *)rn;
+	int err;
+
+	if (rt->rt_ifa == ap->ifa &&
+	    (ap->del || !(rt->rt_flags & RTF_STATIC))) {
+		/*
+		 * We need to disable the automatic prune that happens
+		 * in this case in rtrequest() because it will blow
+		 * away the pointers that rn_walktree() needs in order
+		 * continue our descent.  We will end up deleting all
+		 * the routes that rtrequest() would have in any case,
+		 * so that behavior is not needed there.
+		 */
+		rt->rt_flags &= ~(RTF_CLONING | RTF_PRCLONING);
+		err = rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt),
+				rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
+		if (err) {
+			log(LOG_WARNING, "in_ifadownkill: error %d\n", err);
+		}
+	}
+	return 0;
+}
+
+int
+in_ifadown(struct ifaddr *ifa, int delete)
+{
+	struct in_ifadown_arg arg;
+	struct radix_node_head *rnh;
+
+	if (ifa->ifa_addr->sa_family != AF_INET)
+		return 1;
+
+	arg.rnh = rnh = rt_tables[AF_INET];
+	arg.ifa = ifa;
+	arg.del = delete;
+	rnh->rnh_walktree(rnh, in_ifadownkill, &arg);
+	ifa->ifa_flags &= ~IFA_ROUTE;
+	return 0;
+}
diff --git a/sys/netinet/in_systm.h b/sys/netinet/in_systm.h
new file mode 100644
index 0000000..13fa81d
--- /dev/null
+++ b/sys/netinet/in_systm.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)in_systm.h	8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_IN_SYSTM_H_
+#define _NETINET_IN_SYSTM_H_
+
+/*
+ * Miscellaneous internetwork
+ * definitions for kernel.
+ */
+
+/*
+ * Network types.
+ *
+ * Internally the system keeps counters in the headers with the bytes
+ * swapped so that VAX instructions will work on them.  It reverses
+ * the bytes before transmission at each protocol level.  The n_ types
+ * represent the types with the bytes in ``high-ender'' order.
+ */
+typedef u_int16_t n_short;		/* short as received from the net */
+typedef u_int32_t n_long;		/* long as received from the net */
+
+typedef	u_int32_t n_time;		/* ms since 00:00 GMT, byte rev */
+
+#ifdef _KERNEL
+n_time	 iptime __P((void));
+#endif
+
+#endif
diff --git a/sys/netinet/in_var.h b/sys/netinet/in_var.h
new file mode 100644
index 0000000..f5c1464
--- /dev/null
+++ b/sys/netinet/in_var.h
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 1985, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)in_var.h	8.2 (Berkeley) 1/9/95
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_IN_VAR_H_
+#define _NETINET_IN_VAR_H_
+
+#include <sys/queue.h>
+
+/*
+ * Interface address, Internet version.  One of these structures
+ * is allocated for each Internet address on an interface.
+ * The ifaddr structure contains the protocol-independent part
+ * of the structure and is assumed to be first.
+ */
+struct in_ifaddr {
+	struct	ifaddr ia_ifa;		/* protocol-independent info */
+#define	ia_ifp		ia_ifa.ifa_ifp
+#define ia_flags	ia_ifa.ifa_flags
+					/* ia_{,sub}net{,mask} in host order */
+	u_long	ia_net;			/* network number of interface */
+	u_long	ia_netmask;		/* mask of net part */
+	u_long	ia_subnet;		/* subnet number, including net */
+	u_long	ia_subnetmask;		/* mask of subnet part */
+	struct	in_addr ia_netbroadcast; /* to recognize net broadcasts */
+	TAILQ_ENTRY(in_ifaddr) ia_link;	/* tailq macro glue */
+	struct	sockaddr_in ia_addr;	/* reserve space for interface name */
+	struct	sockaddr_in ia_dstaddr; /* reserve space for broadcast addr */
+#define	ia_broadaddr	ia_dstaddr
+	struct	sockaddr_in ia_sockmask; /* reserve space for general netmask */
+};
+
+struct	in_aliasreq {
+	char	ifra_name[IFNAMSIZ];		/* if name, e.g. "en0" */
+	struct	sockaddr_in ifra_addr;
+	struct	sockaddr_in ifra_broadaddr;
+#define ifra_dstaddr ifra_broadaddr
+	struct	sockaddr_in ifra_mask;
+};
+/*
+ * Given a pointer to an in_ifaddr (ifaddr),
+ * return a pointer to the addr as a sockaddr_in.
+ */
+#define IA_SIN(ia)    (&(((struct in_ifaddr *)(ia))->ia_addr))
+#define IA_DSTSIN(ia) (&(((struct in_ifaddr *)(ia))->ia_dstaddr))
+
+#define IN_LNAOF(in, ifa) \
+	((ntohl((in).s_addr) & ~((struct in_ifaddr *)(ifa)->ia_subnetmask))
+
+
+#ifdef	_KERNEL
+extern	TAILQ_HEAD(in_ifaddrhead, in_ifaddr) in_ifaddrhead;
+extern	struct	ifqueue	ipintrq;		/* ip packet input queue */
+extern	struct	in_addr zeroin_addr;
+extern	u_char	inetctlerrmap[];
+
+/*
+ * Macro for finding the interface (ifnet structure) corresponding to one
+ * of our IP addresses.
+ */
+#define INADDR_TO_IFP(addr, ifp) \
+	/* struct in_addr addr; */ \
+	/* struct ifnet *ifp; */ \
+{ \
+	register struct in_ifaddr *ia; \
+\
+	for (ia = TAILQ_FIRST(&in_ifaddrhead); \
+	    ia != NULL && ((ia->ia_ifp->if_flags & IFF_POINTOPOINT)? \
+		IA_DSTSIN(ia):IA_SIN(ia))->sin_addr.s_addr != (addr).s_addr; \
+	    ia = TAILQ_NEXT(ia, ia_link)) \
+		 continue; \
+	if (ia == NULL) \
+	    TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) \
+		    if (ia->ia_ifp->if_flags & IFF_POINTOPOINT && \
+			IA_SIN(ia)->sin_addr.s_addr == (addr).s_addr) \
+			    break; \
+	(ifp) = (ia == NULL) ? NULL : ia->ia_ifp; \
+}
+
+/*
+ * Macro for finding the internet address structure (in_ifaddr) corresponding
+ * to a given interface (ifnet structure).
+ */
+#define IFP_TO_IA(ifp, ia) \
+	/* struct ifnet *ifp; */ \
+	/* struct in_ifaddr *ia; */ \
+{ \
+	for ((ia) = TAILQ_FIRST(&in_ifaddrhead); \
+	    (ia) != NULL && (ia)->ia_ifp != (ifp); \
+	    (ia) = TAILQ_NEXT((ia), ia_link)) \
+		continue; \
+}
+#endif
+
+/*
+ * This information should be part of the ifnet structure but we don't wish
+ * to change that - as it might break a number of things
+ */
+
+struct router_info {
+	struct ifnet *rti_ifp;
+	int    rti_type; /* type of router which is querier on this interface */
+	int    rti_time; /* # of slow timeouts since last old query */
+	struct router_info *rti_next;
+};
+
+/*
+ * Internet multicast address structure.  There is one of these for each IP
+ * multicast group to which this host belongs on a given network interface.
+ * For every entry on the interface's if_multiaddrs list which represents
+ * an IP multicast group, there is one of these structures.  They are also
+ * kept on a system-wide list to make it easier to keep our legacy IGMP code
+ * compatible with the rest of the world (see IN_FIRST_MULTI et al, below).
+ */
+struct in_multi {
+	LIST_ENTRY(in_multi) inm_link;	/* queue macro glue */
+	struct	in_addr inm_addr;	/* IP multicast address, convenience */
+	struct	ifnet *inm_ifp;		/* back pointer to ifnet */
+	struct	ifmultiaddr *inm_ifma;	/* back pointer to ifmultiaddr */
+	u_int	inm_timer;		/* IGMP membership report timer */
+	u_int	inm_state;		/*  state of the membership */
+	struct	router_info *inm_rti;	/* router info*/
+};
+
+#ifdef _KERNEL
+
+#ifdef SYSCTL_DECL
+SYSCTL_DECL(_net_inet_ip);
+SYSCTL_DECL(_net_inet_raw);
+#endif
+
+extern LIST_HEAD(in_multihead, in_multi) in_multihead;
+
+/*
+ * Structure used by macros below to remember position when stepping through
+ * all of the in_multi records.
+ */
+struct in_multistep {
+	struct in_multi *i_inm;
+};
+
+/*
+ * Macro for looking up the in_multi record for a given IP multicast address
+ * on a given interface.  If no matching record is found, "inm" is set null.
+ */
+#define IN_LOOKUP_MULTI(addr, ifp, inm) \
+	/* struct in_addr addr; */ \
+	/* struct ifnet *ifp; */ \
+	/* struct in_multi *inm; */ \
+do { \
+	register struct ifmultiaddr *ifma; \
+\
+	TAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) { \
+		if (ifma->ifma_addr->sa_family == AF_INET \
+		    && ((struct sockaddr_in *)ifma->ifma_addr)->sin_addr.s_addr == \
+		    (addr).s_addr) \
+			break; \
+	} \
+	(inm) = ifma ? ifma->ifma_protospec : 0; \
+} while(0)
+
+/*
+ * Macro to step through all of the in_multi records, one at a time.
+ * The current position is remembered in "step", which the caller must
+ * provide.  IN_FIRST_MULTI(), below, must be called to initialize "step"
+ * and get the first record.  Both macros return a NULL "inm" when there
+ * are no remaining records.
+ */
+#define IN_NEXT_MULTI(step, inm) \
+	/* struct in_multistep  step; */ \
+	/* struct in_multi *inm; */ \
+do { \
+	if (((inm) = (step).i_inm) != NULL) \
+		(step).i_inm = LIST_NEXT((step).i_inm, inm_link); \
+} while(0)
+
+#define IN_FIRST_MULTI(step, inm) \
+	/* struct in_multistep step; */ \
+	/* struct in_multi *inm; */ \
+do { \
+	(step).i_inm = LIST_FIRST(&in_multihead); \
+	IN_NEXT_MULTI((step), (inm)); \
+} while(0)
+
+struct	route;
+struct	in_multi *in_addmulti __P((struct in_addr *, struct ifnet *));
+void	in_delmulti __P((struct in_multi *));
+int	in_control __P((struct socket *, u_long, caddr_t, struct ifnet *,
+			struct proc *));
+void	in_rtqdrain __P((void));
+void	ip_input __P((struct mbuf *));
+int	in_ifadown __P((struct ifaddr *ifa, int));
+void	in_ifscrub __P((struct ifnet *, struct in_ifaddr *));
+int	ipflow_fastforward __P((struct mbuf *));
+void	ipflow_create __P((const struct route *, struct mbuf *));
+void	ipflow_slowtimo __P((void));
+
+#endif /* _KERNEL */
+
+/* INET6 stuff */
+#include <netinet6/in6_var.h>
+
+#endif /* _NETINET_IN_VAR_H_ */
diff --git a/sys/netinet/ip.h b/sys/netinet/ip.h
new file mode 100644
index 0000000..7f92ba7
--- /dev/null
+++ b/sys/netinet/ip.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ip.h	8.2 (Berkeley) 6/1/94
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_IP_H_
+#define _NETINET_IP_H_
+
+/*
+ * Definitions for internet protocol version 4.
+ * Per RFC 791, September 1981.
+ */
+#define	IPVERSION	4
+
+/*
+ * Structure of an internet header, naked of options.
+ */
+struct ip {
+#ifdef _IP_VHL
+	u_char	ip_vhl;			/* version << 4 | header length >> 2 */
+#else
+#if BYTE_ORDER == LITTLE_ENDIAN
+	u_int	ip_hl:4,		/* header length */
+		ip_v:4;			/* version */
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+	u_int	ip_v:4,			/* version */
+		ip_hl:4;		/* header length */
+#endif
+#endif /* not _IP_VHL */
+	u_char	ip_tos;			/* type of service */
+	u_short	ip_len;			/* total length */
+	u_short	ip_id;			/* identification */
+	u_short	ip_off;			/* fragment offset field */
+#define	IP_RF 0x8000			/* reserved fragment flag */
+#define	IP_DF 0x4000			/* dont fragment flag */
+#define	IP_MF 0x2000			/* more fragments flag */
+#define	IP_OFFMASK 0x1fff		/* mask for fragmenting bits */
+	u_char	ip_ttl;			/* time to live */
+	u_char	ip_p;			/* protocol */
+	u_short	ip_sum;			/* checksum */
+	struct	in_addr ip_src,ip_dst;	/* source and dest address */
+};
+
+#ifdef _IP_VHL
+#define	IP_MAKE_VHL(v, hl)	((v) << 4 | (hl))
+#define	IP_VHL_HL(vhl)		((vhl) & 0x0f)
+#define	IP_VHL_V(vhl)		((vhl) >> 4)
+#define	IP_VHL_BORING		0x45
+#endif
+
+#define	IP_MAXPACKET	65535		/* maximum packet size */
+
+/*
+ * Definitions for IP type of service (ip_tos)
+ */
+#define	IPTOS_LOWDELAY		0x10
+#define	IPTOS_THROUGHPUT	0x08
+#define	IPTOS_RELIABILITY	0x04
+#define	IPTOS_MINCOST		0x02
+/* ECN bits proposed by Sally Floyd */
+#define	IPTOS_CE		0x01	/* congestion experienced */
+#define	IPTOS_ECT		0x02	/* ECN-capable transport */
+
+
+/*
+ * Definitions for IP precedence (also in ip_tos) (hopefully unused)
+ */
+#define	IPTOS_PREC_NETCONTROL		0xe0
+#define	IPTOS_PREC_INTERNETCONTROL	0xc0
+#define	IPTOS_PREC_CRITIC_ECP		0xa0
+#define	IPTOS_PREC_FLASHOVERRIDE	0x80
+#define	IPTOS_PREC_FLASH		0x60
+#define	IPTOS_PREC_IMMEDIATE		0x40
+#define	IPTOS_PREC_PRIORITY		0x20
+#define	IPTOS_PREC_ROUTINE		0x00
+
+/*
+ * Definitions for options.
+ */
+#define	IPOPT_COPIED(o)		((o)&0x80)
+#define	IPOPT_CLASS(o)		((o)&0x60)
+#define	IPOPT_NUMBER(o)		((o)&0x1f)
+
+#define	IPOPT_CONTROL		0x00
+#define	IPOPT_RESERVED1		0x20
+#define	IPOPT_DEBMEAS		0x40
+#define	IPOPT_RESERVED2		0x60
+
+#define	IPOPT_EOL		0		/* end of option list */
+#define	IPOPT_NOP		1		/* no operation */
+
+#define	IPOPT_RR		7		/* record packet route */
+#define	IPOPT_TS		68		/* timestamp */
+#define	IPOPT_SECURITY		130		/* provide s,c,h,tcc */
+#define	IPOPT_LSRR		131		/* loose source route */
+#define	IPOPT_SATID		136		/* satnet id */
+#define	IPOPT_SSRR		137		/* strict source route */
+#define	IPOPT_RA		148		/* router alert */
+
+/*
+ * Offsets to fields in options other than EOL and NOP.
+ */
+#define	IPOPT_OPTVAL		0		/* option ID */
+#define	IPOPT_OLEN		1		/* option length */
+#define IPOPT_OFFSET		2		/* offset within option */
+#define	IPOPT_MINOFF		4		/* min value of above */
+
+/*
+ * Time stamp option structure.
+ */
+struct	ip_timestamp {
+	u_char	ipt_code;		/* IPOPT_TS */
+	u_char	ipt_len;		/* size of structure (variable) */
+	u_char	ipt_ptr;		/* index of current entry */
+#if BYTE_ORDER == LITTLE_ENDIAN
+	u_int	ipt_flg:4,		/* flags, see below */
+		ipt_oflw:4;		/* overflow counter */
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+	u_int	ipt_oflw:4,		/* overflow counter */
+		ipt_flg:4;		/* flags, see below */
+#endif
+	union ipt_timestamp {
+		n_long	ipt_time[1];
+		struct	ipt_ta {
+			struct in_addr ipt_addr;
+			n_long ipt_time;
+		} ipt_ta[1];
+	} ipt_timestamp;
+};
+
+/* flag bits for ipt_flg */
+#define	IPOPT_TS_TSONLY		0		/* timestamps only */
+#define	IPOPT_TS_TSANDADDR	1		/* timestamps and addresses */
+#define	IPOPT_TS_PRESPEC	3		/* specified modules only */
+
+/* bits for security (not byte swapped) */
+#define	IPOPT_SECUR_UNCLASS	0x0000
+#define	IPOPT_SECUR_CONFID	0xf135
+#define	IPOPT_SECUR_EFTO	0x789a
+#define	IPOPT_SECUR_MMMM	0xbc4d
+#define	IPOPT_SECUR_RESTR	0xaf13
+#define	IPOPT_SECUR_SECRET	0xd788
+#define	IPOPT_SECUR_TOPSECRET	0x6bc5
+
+/*
+ * Internet implementation parameters.
+ */
+#define	MAXTTL		255		/* maximum time to live (seconds) */
+#define	IPDEFTTL	64		/* default ttl, from RFC 1340 */
+#define	IPFRAGTTL	60		/* time to live for frags, slowhz */
+#define	IPTTLDEC	1		/* subtracted when forwarding */
+
+#define	IP_MSS		576		/* default maximum segment size */
+
+#endif
diff --git a/sys/netinet/ip6.h b/sys/netinet/ip6.h
new file mode 100644
index 0000000..77d1ab6
--- /dev/null
+++ b/sys/netinet/ip6.h
@@ -0,0 +1,296 @@
+/*	$FreeBSD$	*/
+/*	$KAME: ip6.h,v 1.9 2000/07/02 21:01:32 itojun Exp $	*/
+
+/*
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ip.h	8.1 (Berkeley) 6/10/93
+ */
+
+#ifndef _NETINET_IP6_H_
+#define _NETINET_IP6_H_
+
+/*
+ * Definition for internet protocol version 6.
+ * RFC 2460
+ */
+
+struct ip6_hdr {
+	union {
+		struct ip6_hdrctl {
+			u_int32_t ip6_un1_flow;	/* 20 bits of flow-ID */
+			u_int16_t ip6_un1_plen;	/* payload length */
+			u_int8_t  ip6_un1_nxt;	/* next header */
+			u_int8_t  ip6_un1_hlim;	/* hop limit */
+		} ip6_un1;
+		u_int8_t ip6_un2_vfc;	/* 4 bits version, top 4 bits class */
+	} ip6_ctlun;
+	struct in6_addr ip6_src;	/* source address */
+	struct in6_addr ip6_dst;	/* destination address */
+};
+
+#define ip6_vfc		ip6_ctlun.ip6_un2_vfc
+#define ip6_flow	ip6_ctlun.ip6_un1.ip6_un1_flow
+#define ip6_plen	ip6_ctlun.ip6_un1.ip6_un1_plen
+#define ip6_nxt		ip6_ctlun.ip6_un1.ip6_un1_nxt
+#define ip6_hlim	ip6_ctlun.ip6_un1.ip6_un1_hlim
+#define ip6_hops	ip6_ctlun.ip6_un1.ip6_un1_hlim
+
+#define IPV6_VERSION		0x60
+#define IPV6_VERSION_MASK	0xf0
+
+#if BYTE_ORDER == BIG_ENDIAN
+#define IPV6_FLOWINFO_MASK	0x0fffffff	/* flow info (28 bits) */
+#define IPV6_FLOWLABEL_MASK	0x000fffff	/* flow label (20 bits) */
+#else
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define IPV6_FLOWINFO_MASK	0xffffff0f	/* flow info (28 bits) */
+#define IPV6_FLOWLABEL_MASK	0xffff0f00	/* flow label (20 bits) */
+#endif /* LITTLE_ENDIAN */
+#endif
+/* ECN bits proposed by Sally Floyd */
+#define IP6TOS_CE		0x01	/* congestion experienced */
+#define IP6TOS_ECT		0x02	/* ECN-capable transport */
+
+/*
+ * Extension Headers
+ */
+
+struct	ip6_ext {
+	u_char	ip6e_nxt;
+	u_char	ip6e_len;
+};
+
+/* Hop-by-Hop options header */
+/* XXX should we pad it to force alignment on an 8-byte boundary? */
+struct ip6_hbh {
+	u_int8_t ip6h_nxt;	/* next header */
+	u_int8_t ip6h_len;	/* length in units of 8 octets */
+	/* followed by options */
+};
+
+/* Destination options header */
+/* XXX should we pad it to force alignment on an 8-byte boundary? */
+struct ip6_dest {
+	u_int8_t ip6d_nxt;	/* next header */
+	u_int8_t ip6d_len;	/* length in units of 8 octets */
+	/* followed by options */
+};
+
+/* Option types and related macros */
+#define IP6OPT_PAD1		0x00	/* 00 0 00000 */
+#define IP6OPT_PADN		0x01	/* 00 0 00001 */
+#define IP6OPT_JUMBO		0xC2	/* 11 0 00010 = 194 */
+#define IP6OPT_JUMBO_LEN	6
+#define IP6OPT_RTALERT		0x05	/* 00 0 00101 */
+#define IP6OPT_RTALERT_LEN	4
+#define IP6OPT_RTALERT_MLD	0	/* Datagram contains an MLD message */
+#define IP6OPT_RTALERT_RSVP	1	/* Datagram contains an RSVP message */
+#define IP6OPT_RTALERT_ACTNET	2 	/* contains an Active Networks msg */
+#define IP6OPT_MINLEN		2
+
+#define IP6OPT_TYPE(o)		((o) & 0xC0)
+#define IP6OPT_TYPE_SKIP	0x00
+#define IP6OPT_TYPE_DISCARD	0x40
+#define IP6OPT_TYPE_FORCEICMP	0x80
+#define IP6OPT_TYPE_ICMP	0xC0
+
+#define IP6OPT_MUTABLE		0x20
+
+/* Routing header */
+struct ip6_rthdr {
+	u_int8_t  ip6r_nxt;	/* next header */
+	u_int8_t  ip6r_len;	/* length in units of 8 octets */
+	u_int8_t  ip6r_type;	/* routing type */
+	u_int8_t  ip6r_segleft;	/* segments left */
+	/* followed by routing type specific data */
+};
+
+/* Type 0 Routing header */
+struct ip6_rthdr0 {
+	u_int8_t  ip6r0_nxt;		/* next header */
+	u_int8_t  ip6r0_len;		/* length in units of 8 octets */
+	u_int8_t  ip6r0_type;		/* always zero */
+	u_int8_t  ip6r0_segleft;	/* segments left */
+	u_int8_t  ip6r0_reserved;	/* reserved field */
+	u_int8_t  ip6r0_slmap[3];	/* strict/loose bit map */
+	struct in6_addr  ip6r0_addr[1];	/* up to 23 addresses */
+};
+
+/* Fragment header */
+struct ip6_frag {
+	u_int8_t  ip6f_nxt;		/* next header */
+	u_int8_t  ip6f_reserved;	/* reserved field */
+	u_int16_t ip6f_offlg;		/* offset, reserved, and flag */
+	u_int32_t ip6f_ident;		/* identification */
+};
+
+#if BYTE_ORDER == BIG_ENDIAN
+#define IP6F_OFF_MASK		0xfff8	/* mask out offset from _offlg */
+#define IP6F_RESERVED_MASK	0x0006	/* reserved bits in ip6f_offlg */
+#define IP6F_MORE_FRAG		0x0001	/* more-fragments flag */
+#else /* BYTE_ORDER == LITTLE_ENDIAN */
+#define IP6F_OFF_MASK		0xf8ff	/* mask out offset from _offlg */
+#define IP6F_RESERVED_MASK	0x0600	/* reserved bits in ip6f_offlg */
+#define IP6F_MORE_FRAG		0x0100	/* more-fragments flag */
+#endif /* BYTE_ORDER == LITTLE_ENDIAN */
+
+/*
+ * Internet implementation parameters.
+ */
+#define IPV6_MAXHLIM	255	/* maximun hoplimit */
+#define IPV6_DEFHLIM	64	/* default hlim */
+#define IPV6_FRAGTTL	120	/* ttl for fragment packets, in slowtimo tick */
+#define IPV6_HLIMDEC	1	/* subtracted when forwaeding */
+
+#define IPV6_MMTU	1280	/* minimal MTU and reassembly. 1024 + 256 */
+#define IPV6_MAXPACKET	65535	/* ip6 max packet size without Jumbo payload*/
+
+#ifdef _KERNEL
+/*
+ * IP6_EXTHDR_CHECK ensures that region between the IP6 header and the
+ * target header (including IPv6 itself, extension headers and
+ * TCP/UDP/ICMP6 headers) are continuous. KAME requires drivers
+ * to store incoming data into one internal mbuf or one or more external
+ * mbufs(never into two or more internal mbufs). Thus, the third case is
+ * supposed to never be matched but is prepared just in case.
+ */
+
+#define IP6_EXTHDR_CHECK(m, off, hlen, ret)				\
+do {									\
+    if ((m)->m_next != NULL) {						\
+	if (((m)->m_flags & M_LOOP) &&					\
+	    ((m)->m_len < (off) + (hlen)) &&				\
+	    (((m) = m_pullup((m), (off) + (hlen))) == NULL)) {		\
+		ip6stat.ip6s_exthdrtoolong++;				\
+		return ret;						\
+	} else if ((m)->m_flags & M_EXT) {				\
+		if ((m)->m_len < (off) + (hlen)) {			\
+			ip6stat.ip6s_exthdrtoolong++;			\
+			m_freem(m);					\
+			return ret;					\
+		}							\
+	} else {							\
+		if ((m)->m_len < (off) + (hlen)) {			\
+			ip6stat.ip6s_exthdrtoolong++;			\
+			m_freem(m);					\
+			return ret;					\
+		}							\
+	}								\
+    } else {								\
+	if ((m)->m_len < (off) + (hlen)) {				\
+		ip6stat.ip6s_tooshort++;				\
+		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);	\
+		m_freem(m);						\
+		return ret;						\
+	}								\
+    }									\
+} while (0)
+
+/*
+ * IP6_EXTHDR_GET ensures that intermediate protocol header (from "off" to
+ * "len") is located in single mbuf, on contiguous memory region.
+ * The pointer to the region will be returned to pointer variable "val",
+ * with type "typ".
+ * IP6_EXTHDR_GET0 does the same, except that it aligns the structure at the
+ * very top of mbuf.  GET0 is likely to make memory copy than GET.
+ *
+ * XXX we're now testing this, needs m_pulldown()
+ */
+#define IP6_EXTHDR_GET(val, typ, m, off, len) \
+do {									\
+	struct mbuf *t;							\
+	int tmp;							\
+	if ((m)->m_len >= (off) + (len))				\
+		(val) = (typ)(mtod((m), caddr_t) + (off));		\
+	else {								\
+		t = m_pulldown((m), (off), (len), &tmp);		\
+		if (t) {						\
+			if (t->m_len < tmp + (len))			\
+				panic("m_pulldown malfunction");	\
+			(val) = (typ)(mtod(t, caddr_t) + tmp);		\
+		} else {						\
+			(val) = (typ)NULL;				\
+			(m) = NULL;					\
+		}							\
+	}								\
+} while (0)
+
+#define IP6_EXTHDR_GET0(val, typ, m, off, len) \
+do {									\
+	struct mbuf *t;							\
+	if ((off) == 0)							\
+		(val) = (typ)mtod(m, caddr_t);				\
+	else {								\
+		t = m_pulldown((m), (off), (len), NULL);		\
+		if (t) {						\
+			if (t->m_len < (len))				\
+				panic("m_pulldown malfunction");	\
+			(val) = (typ)mtod(t, caddr_t);			\
+		} else {						\
+			(val) = (typ)NULL;				\
+			(m) = NULL;					\
+		}							\
+	}								\
+} while (0)
+#endif /*_KERNEL*/
+
+#endif /* not _NETINET_IP6_H_ */
diff --git a/sys/netinet/ip_auth.c b/sys/netinet/ip_auth.c
new file mode 100644
index 0000000..2931cc6
--- /dev/null
+++ b/sys/netinet/ip_auth.c
@@ -0,0 +1,543 @@
+/*
+ * Copyright (C) 1998-2000 by Darren Reed & Guido van Rooij.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that this notice is preserved and due credit is given
+ * to the original author and the contributors.
+ */
+#if !defined(lint)
+/*static const char rcsid[] = "@(#)$Id: ip_auth.c,v 2.1.2.2 2000/01/16 10:12:14 darrenr Exp $";*/
+static const char rcsid[] = "@(#)$FreeBSD$";
+#endif
+
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/file.h>
+#if !defined(_KERNEL) && !defined(KERNEL)
+# include <stdio.h>
+# include <stdlib.h>
+# include <string.h>
+#endif
+#if (defined(KERNEL) || defined(_KERNEL)) && (__FreeBSD_version >= 220000)
+# include <sys/filio.h>
+# include <sys/fcntl.h>
+#else
+# include <sys/ioctl.h>
+#endif
+#include <sys/uio.h>
+#ifndef linux
+# include <sys/protosw.h>
+#endif
+#include <sys/socket.h>
+#if (defined(_KERNEL) || defined(KERNEL)) && !defined(linux)
+# include <sys/systm.h>
+#endif
+#if !defined(__SVR4) && !defined(__svr4__)
+# ifndef linux
+#  include <sys/mbuf.h>
+# endif
+#else
+# include <sys/filio.h>
+# include <sys/byteorder.h>
+# ifdef _KERNEL
+#  include <sys/dditypes.h>
+# endif
+# include <sys/stream.h>
+# include <sys/kmem.h>
+#endif
+#if (_BSDI_VERSION >= 199802) || (__FreeBSD_version >= 400000)
+# include <sys/queue.h>
+#endif
+#if defined(__NetBSD__) || defined(__OpenBSD__) || defined(bsdi)
+# include <machine/cpu.h>
+#endif
+#include <net/if.h>
+#ifdef sun
+# include <net/af.h>
+#endif
+#include <net/route.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifndef	KERNEL
+# define	KERNEL
+# define	NOT_KERNEL
+#endif
+#ifndef linux
+# include <netinet/ip_var.h>
+#endif
+#ifdef	NOT_KERNEL
+# undef	KERNEL
+#endif
+#ifdef __sgi
+# ifdef IFF_DRVRLOCK /* IRIX6 */
+#  include <sys/hashing.h>
+# endif
+#endif
+#include <netinet/tcp.h>
+#if defined(__sgi) && !defined(IFF_DRVRLOCK) /* IRIX < 6 */
+extern struct ifqueue   ipintrq;                /* ip packet input queue */
+#else
+# ifndef linux
+#  if __FreeBSD_version >= 300000
+#   include <net/if_var.h>
+#  endif
+#  include <netinet/in_var.h>
+#  include <netinet/tcp_fsm.h>
+# endif
+#endif
+#include <netinet/udp.h>
+#include <netinet/ip_icmp.h>
+#include "netinet/ip_compat.h"
+#include <netinet/tcpip.h>
+#include "netinet/ip_fil.h"
+#include "netinet/ip_auth.h"
+#if !SOLARIS && !defined(linux)
+# include <net/netisr.h>
+# ifdef __FreeBSD__
+#  include <machine/cpufunc.h>
+# endif
+#endif
+#if (__FreeBSD_version >= 300000)
+# include <sys/malloc.h>
+# if (defined(_KERNEL) || defined(KERNEL)) && !defined(IPFILTER_LKM)
+#  include <sys/libkern.h>
+#  include <sys/systm.h>
+# endif
+#endif
+
+
+
+#if (SOLARIS || defined(__sgi)) && defined(_KERNEL)
+extern KRWLOCK_T ipf_auth;
+extern kmutex_t ipf_authmx;
+# if SOLARIS
+extern kcondvar_t ipfauthwait;
+# endif
+#endif
+#ifdef linux
+static struct wait_queue *ipfauthwait = NULL;
+#endif
+
+int	fr_authsize = FR_NUMAUTH;
+int	fr_authused = 0;
+int	fr_defaultauthage = 600;
+int	fr_auth_lock = 0;
+fr_authstat_t	fr_authstats;
+static frauth_t fr_auth[FR_NUMAUTH];
+mb_t	*fr_authpkts[FR_NUMAUTH];
+static int	fr_authstart = 0, fr_authend = 0, fr_authnext = 0;
+static frauthent_t	*fae_list = NULL;
+frentry_t	*ipauth = NULL;
+
+
+/*
+ * Check if a packet has authorization.  If the packet is found to match an
+ * authorization result and that would result in a feedback loop (i.e. it
+ * will end up returning FR_AUTH) then return FR_BLOCK instead.
+ */
+u_32_t fr_checkauth(ip, fin)
+ip_t *ip;
+fr_info_t *fin;
+{
+	u_short id = ip->ip_id;
+	u_32_t pass;
+	int i;
+
+	if (fr_auth_lock)
+		return 0;
+
+	READ_ENTER(&ipf_auth);
+	for (i = fr_authstart; i != fr_authend; ) {
+		/*
+		 * index becomes -2 only after an SIOCAUTHW.  Check this in
+		 * case the same packet gets sent again and it hasn't yet been
+		 * auth'd.
+		 */
+		if ((fr_auth[i].fra_index == -2) &&
+		    (id == fr_auth[i].fra_info.fin_id) &&
+		    !bcmp((char *)fin,(char *)&fr_auth[i].fra_info,FI_CSIZE)) {
+			/*
+			 * Avoid feedback loop.
+			 */
+			if (!(pass = fr_auth[i].fra_pass) || (pass & FR_AUTH))
+				pass = FR_BLOCK;
+			RWLOCK_EXIT(&ipf_auth);
+			WRITE_ENTER(&ipf_auth);
+			fr_authstats.fas_hits++;
+			fr_auth[i].fra_index = -1;
+			fr_authused--;
+			if (i == fr_authstart) {
+				while (fr_auth[i].fra_index == -1) {
+					i++;
+					if (i == FR_NUMAUTH)
+						i = 0;
+					fr_authstart = i;
+					if (i == fr_authend)
+						break;
+				}
+				if (fr_authstart == fr_authend) {
+					fr_authnext = 0;
+					fr_authstart = fr_authend = 0;
+				}
+			}
+			RWLOCK_EXIT(&ipf_auth);
+			return pass;
+		}
+		i++;
+		if (i == FR_NUMAUTH)
+			i = 0;
+	}
+	fr_authstats.fas_miss++;
+	RWLOCK_EXIT(&ipf_auth);
+	return 0;
+}
+
+
+/*
+ * Check if we have room in the auth array to hold details for another packet.
+ * If we do, store it and wake up any user programs which are waiting to
+ * hear about these events.
+ */
+int fr_newauth(m, fin, ip)
+mb_t *m;
+fr_info_t *fin;
+ip_t *ip;
+{
+#if defined(_KERNEL) && SOLARIS
+	qif_t *qif = fin->fin_qif;
+#endif
+	int i;
+
+	if (fr_auth_lock)
+		return 0;
+
+	WRITE_ENTER(&ipf_auth);
+	if (fr_authstart > fr_authend) {
+		fr_authstats.fas_nospace++;
+		RWLOCK_EXIT(&ipf_auth);
+		return 0;
+	} else {
+		if ((fr_authstart == 0) && (fr_authend == FR_NUMAUTH - 1)) {
+			fr_authstats.fas_nospace++;
+			RWLOCK_EXIT(&ipf_auth);
+			return 0;
+		}
+	}
+
+	fr_authstats.fas_added++;
+	fr_authused++;
+	i = fr_authend++;
+	if (fr_authend == FR_NUMAUTH)
+		fr_authend = 0;
+	RWLOCK_EXIT(&ipf_auth);
+	fr_auth[i].fra_index = i;
+	fr_auth[i].fra_pass = 0;
+	fr_auth[i].fra_age = fr_defaultauthage;
+	bcopy((char *)fin, (char *)&fr_auth[i].fra_info, sizeof(*fin));
+#if !defined(sparc) && !defined(m68k)
+	/*
+	 * No need to copyback here as we want to undo the changes, not keep
+	 * them.
+	 */
+# if SOLARIS && defined(_KERNEL)
+	if ((ip == (ip_t *)m->b_rptr) && (ip->ip_v == 4))
+# endif
+	{
+		register u_short bo;
+
+		bo = ip->ip_len;
+		ip->ip_len = htons(bo);
+# if !SOLARIS && !defined(__NetBSD__) && !defined(__FreeBSD__)
+		/* 4.4BSD converts this ip_input.c, but I don't in solaris.c */
+		bo = ip->ip_id;
+		ip->ip_id = htons(bo);
+# endif
+		bo = ip->ip_off;
+		ip->ip_off = htons(bo);
+	}
+#endif
+#if SOLARIS && defined(_KERNEL)
+	m->b_rptr -= qif->qf_off;
+	fr_authpkts[i] = *(mblk_t **)fin->fin_mp;
+	fr_auth[i].fra_q = qif->qf_q;
+	cv_signal(&ipfauthwait);
+#else
+	fr_authpkts[i] = m;
+# if defined(linux) && defined(_KERNEL)
+	wake_up_interruptible(&ipfauthwait);
+# else
+	WAKEUP(&fr_authnext);
+# endif
+#endif
+	return 1;
+}
+
+
+int fr_auth_ioctl(data, cmd, fr, frptr)
+caddr_t data;
+#if defined(__NetBSD__) || defined(__OpenBSD__) || (FreeBSD_version >= 300003)
+u_long cmd;
+#else
+int cmd;
+#endif
+frentry_t *fr, **frptr;
+{
+	mb_t *m;
+	frauth_t auth, *au = &auth;
+	frauthent_t *fae, **faep;
+	int i, error = 0;
+
+	switch (cmd)
+	{
+	case SIOCSTLCK :
+		error = fr_lock(data, &fr_auth_lock);
+		break;
+	case SIOCINIFR :
+	case SIOCRMIFR :
+	case SIOCADIFR :
+		error = EINVAL;
+		break;
+	case SIOCINAFR :
+		error = EINVAL;
+		break;
+	case SIOCRMAFR :
+	case SIOCADAFR :
+		for (faep = &fae_list; (fae = *faep); )
+			if (&fae->fae_fr == fr)
+				break;
+			else
+				faep = &fae->fae_next;
+		if (cmd == SIOCRMAFR) {
+			if (!fae)
+				error = ESRCH;
+			else {
+				WRITE_ENTER(&ipf_auth);
+				*faep = fae->fae_next;
+				*frptr = fr->fr_next;
+				RWLOCK_EXIT(&ipf_auth);
+				KFREE(fae);
+			}
+		} else {
+			KMALLOC(fae, frauthent_t *);
+			if (fae != NULL) {
+				bcopy((char *)fr, (char *)&fae->fae_fr,
+				      sizeof(*fr));
+				WRITE_ENTER(&ipf_auth);
+				fae->fae_age = fr_defaultauthage;
+				fae->fae_fr.fr_hits = 0;
+				fae->fae_fr.fr_next = *frptr;
+				*frptr = &fae->fae_fr;
+				fae->fae_next = *faep;
+				*faep = fae;
+				ipauth = &fae_list->fae_fr;
+				RWLOCK_EXIT(&ipf_auth);
+			} else
+				error = ENOMEM;
+		}
+		break;
+	case SIOCATHST:
+		READ_ENTER(&ipf_auth);
+		fr_authstats.fas_faelist = fae_list;
+		RWLOCK_EXIT(&ipf_auth);
+		error = IWCOPYPTR((char *)&fr_authstats, data,
+				   sizeof(fr_authstats));
+		break;
+	case SIOCAUTHW:
+fr_authioctlloop:
+		READ_ENTER(&ipf_auth);
+		if ((fr_authnext != fr_authend) && fr_authpkts[fr_authnext]) {
+			error = IWCOPYPTR((char *)&fr_auth[fr_authnext], data,
+					  sizeof(frauth_t));
+			RWLOCK_EXIT(&ipf_auth);
+			if (error)
+				break;
+			WRITE_ENTER(&ipf_auth);
+			fr_authnext++;
+			if (fr_authnext == FR_NUMAUTH)
+				fr_authnext = 0;
+			RWLOCK_EXIT(&ipf_auth);
+			return 0;
+		}
+#ifdef	_KERNEL
+# if	SOLARIS
+		mutex_enter(&ipf_authmx);
+		if (!cv_wait_sig(&ipfauthwait, &ipf_authmx)) {
+			mutex_exit(&ipf_authmx);
+			return EINTR;
+		}
+		mutex_exit(&ipf_authmx);
+# else
+#  ifdef linux
+		interruptible_sleep_on(&ipfauthwait);
+		if (current->signal & ~current->blocked)
+			error = -EINTR;
+#  else
+		error = SLEEP(&fr_authnext, "fr_authnext");
+# endif
+# endif
+#endif
+		RWLOCK_EXIT(&ipf_auth);
+		if (!error)
+			goto fr_authioctlloop;
+		break;
+	case SIOCAUTHR:
+		error = IRCOPYPTR(data, (caddr_t)&auth, sizeof(auth));
+		if (error)
+			return error;
+		WRITE_ENTER(&ipf_auth);
+		i = au->fra_index;
+		if ((i < 0) || (i > FR_NUMAUTH) ||
+		    (fr_auth[i].fra_info.fin_id != au->fra_info.fin_id)) {
+			RWLOCK_EXIT(&ipf_auth);
+			return EINVAL;
+		}
+		m = fr_authpkts[i];
+		fr_auth[i].fra_index = -2;
+		fr_auth[i].fra_pass = au->fra_pass;
+		fr_authpkts[i] = NULL;
+#ifdef	_KERNEL
+		RWLOCK_EXIT(&ipf_auth);
+# ifndef linux
+		if (m && au->fra_info.fin_out) {
+#  if SOLARIS
+			error = fr_qout(fr_auth[i].fra_q, m);
+#  else /* SOLARIS */
+#   if (_BSDI_VERSION >= 199802) || defined(__OpenBSD__)
+			error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL,
+					  NULL);
+#   else
+			error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL);
+#   endif
+#  endif /* SOLARIS */
+			if (error)
+				fr_authstats.fas_sendfail++;
+			else
+				fr_authstats.fas_sendok++;
+		} else if (m) {
+# if SOLARIS
+			error = fr_qin(fr_auth[i].fra_q, m);
+# else /* SOLARIS */
+			if (! IF_HANDOFF(&ipintrq, m, NULL))
+				error = ENOBUFS;
+			else
+				schednetisr(NETISR_IP);
+# endif /* SOLARIS */
+			if (error)
+				fr_authstats.fas_quefail++;
+			else
+				fr_authstats.fas_queok++;
+		} else
+			error = EINVAL;
+# endif
+# if SOLARIS
+		if (error)
+			error = EINVAL;
+# else
+		/*
+		 * If we experience an error which will result in the packet
+		 * not being processed, make sure we advance to the next one.
+		 */ 
+		if (error == ENOBUFS) {
+			fr_authused--;
+			fr_auth[i].fra_index = -1;
+			fr_auth[i].fra_pass = 0;
+			if (i == fr_authstart) {
+				while (fr_auth[i].fra_index == -1) {
+					i++;
+					if (i == FR_NUMAUTH)
+						i = 0;
+					fr_authstart = i;
+					if (i == fr_authend)
+						break;
+				}
+				if (fr_authstart == fr_authend) {
+					fr_authnext = 0;
+					fr_authstart = fr_authend = 0;
+				}
+			}
+		}
+# endif
+#endif /* _KERNEL */
+		break;
+	default :
+		error = EINVAL;
+		break;
+	}
+	return error;
+}
+
+
+#ifdef	_KERNEL
+/*
+ * Free all network buffer memory used to keep saved packets.
+ */
+void fr_authunload()
+{
+	register int i;
+	register frauthent_t *fae, **faep;
+	mb_t *m;
+
+	WRITE_ENTER(&ipf_auth);
+	for (i = 0; i < FR_NUMAUTH; i++) {
+		if ((m = fr_authpkts[i])) {
+			FREE_MB_T(m);
+			fr_authpkts[i] = NULL;
+			fr_auth[i].fra_index = -1;
+		}
+	}
+
+
+	for (faep = &fae_list; (fae = *faep); ) {
+		*faep = fae->fae_next;
+		KFREE(fae);
+	}
+	ipauth = NULL;
+	RWLOCK_EXIT(&ipf_auth);
+}
+
+
+/*
+ * Slowly expire held auth records.  Timeouts are set
+ * in expectation of this being called twice per second.
+ */
+void fr_authexpire()
+{
+	register int i;
+	register frauth_t *fra;
+	register frauthent_t *fae, **faep;
+	mb_t *m;
+#if !SOLARIS
+	int s;
+#endif
+
+	if (fr_auth_lock)
+		return;
+
+	SPL_NET(s);
+	WRITE_ENTER(&ipf_auth);
+	for (i = 0, fra = fr_auth; i < FR_NUMAUTH; i++, fra++) {
+		if ((!--fra->fra_age) && (m = fr_authpkts[i])) {
+			FREE_MB_T(m);
+			fr_authpkts[i] = NULL;
+			fr_auth[i].fra_index = -1;
+			fr_authstats.fas_expire++;
+			fr_authused--;
+		}
+	}
+
+	for (faep = &fae_list; (fae = *faep); ) {
+		if (!--fae->fae_age) {
+			*faep = fae->fae_next;
+			KFREE(fae);
+			fr_authstats.fas_expire++;
+		} else
+			faep = &fae->fae_next;
+	}
+	ipauth = &fae_list->fae_fr;
+	RWLOCK_EXIT(&ipf_auth);
+	SPL_X(s);
+}
+#endif
diff --git a/sys/netinet/ip_auth.h b/sys/netinet/ip_auth.h
new file mode 100644
index 0000000..2851c3d
--- /dev/null
+++ b/sys/netinet/ip_auth.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 1997-2000 by Darren Reed & Guido Van Rooij.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that this notice is preserved and due credit is given
+ * to the original author and the contributors.
+ *
+ * $Id: ip_auth.h,v 2.1 1999/08/04 17:29:54 darrenr Exp $
+ * $FreeBSD$
+ *
+ */
+#ifndef	__IP_AUTH_H__
+#define	__IP_AUTH_H__
+
+#define FR_NUMAUTH      32
+
+typedef struct  frauth {
+	int	fra_age;
+	int	fra_index;
+	u_32_t	fra_pass;
+	fr_info_t	fra_info;
+#if SOLARIS
+	queue_t	*fra_q;
+#endif
+} frauth_t;
+
+typedef	struct	frauthent  {
+	struct	frentry	fae_fr;
+	struct	frauthent	*fae_next;
+	u_long	fae_age;
+} frauthent_t;
+
+typedef struct  fr_authstat {
+	U_QUAD_T	fas_hits;
+	U_QUAD_T	fas_miss;
+	u_long		fas_nospace;
+	u_long		fas_added;
+	u_long		fas_sendfail;
+	u_long		fas_sendok;
+	u_long		fas_queok;
+	u_long		fas_quefail;
+	u_long		fas_expire;
+	frauthent_t	*fas_faelist;
+} fr_authstat_t;
+
+
+extern	frentry_t	*ipauth;
+extern	struct fr_authstat	fr_authstats;
+extern	int	fr_defaultauthage;
+extern	int	fr_authsize;
+extern	int	fr_authused;
+extern	int	fr_auth_lock;
+extern	u_32_t	fr_checkauth __P((ip_t *, fr_info_t *));
+extern	void	fr_authexpire __P((void));
+extern	void	fr_authunload __P((void));
+extern	mb_t	*fr_authpkts[];
+extern	int	fr_newauth __P((mb_t *, fr_info_t *, ip_t *));
+#if defined(__NetBSD__) || defined(__OpenBSD__)
+extern	int	fr_auth_ioctl __P((caddr_t, u_long, frentry_t *, frentry_t **));
+#else
+extern	int	fr_auth_ioctl __P((caddr_t, int, frentry_t *, frentry_t **));
+#endif
+#endif	/* __IP_AUTH_H__ */
diff --git a/sys/netinet/ip_compat.h b/sys/netinet/ip_compat.h
new file mode 100644
index 0000000..aefd50d
--- /dev/null
+++ b/sys/netinet/ip_compat.h
@@ -0,0 +1,1013 @@
+/*
+ * Copyright (C) 1993-2000 by Darren Reed.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that this notice is preserved and due credit is given
+ * to the original author and the contributors.
+ *
+ * @(#)ip_compat.h	1.8 1/14/96
+ * $Id: ip_compat.h,v 2.26.2.9 2001/01/14 14:58:01 darrenr Exp $
+ * $FreeBSD$
+ */
+
+#ifndef	__IP_COMPAT_H__
+#define	__IP_COMPAT_H__
+
+#ifndef	__P
+# ifdef	__STDC__
+#  define	__P(x)  x
+# else
+#  define	__P(x)  ()
+# endif
+#endif
+#ifndef	__STDC__
+# undef		const
+# define	const
+#endif
+
+#ifndef	SOLARIS
+#define	SOLARIS	(defined(sun) && (defined(__svr4__) || defined(__SVR4)))
+#endif
+#if SOLARIS2 >= 8
+# ifndef	USE_INET6
+#  define	USE_INET6
+# endif
+#endif
+
+#if defined(_KERNEL) || defined(KERNEL) || defined(__KERNEL__)
+# undef	KERNEL
+# undef	_KERNEL
+# undef 	__KERNEL__
+# define	KERNEL
+# define	_KERNEL
+# define 	__KERNEL__
+#endif
+
+#if defined(__SVR4) || defined(__svr4__) || defined(__sgi)
+#define index   strchr
+# if !defined(KERNEL)
+#  define	bzero(a,b)	memset(a,0,b)
+#  define	bcmp		memcmp
+#  define	bcopy(a,b,c)	memmove(b,a,c)
+# endif
+#endif
+
+#ifndef offsetof
+#define offsetof(t,m) (int)((&((t *)0L)->m))
+#endif
+
+#if defined(__sgi) || defined(bsdi)
+struct  ether_addr {
+        u_char  ether_addr_octet[6];
+};
+#endif
+
+#if defined(__sgi) && !defined(IPFILTER_LKM)
+# ifdef __STDC__
+#  define IPL_EXTERN(ep) ipfilter##ep
+# else
+#  define IPL_EXTERN(ep) ipfilter/**/ep
+# endif
+#else
+# ifdef __STDC__
+#  define IPL_EXTERN(ep) ipl##ep
+# else
+#  define IPL_EXTERN(ep) ipl/**/ep
+# endif
+#endif
+
+#ifdef	linux
+# include <sys/sysmacros.h>
+#endif
+#if	SOLARIS
+# define	MTYPE(m)	((m)->b_datap->db_type)
+# include	<sys/isa_defs.h>
+# include	<sys/ioccom.h>
+# include	<sys/sysmacros.h>
+# include	<sys/kmem.h>
+/*
+ * because Solaris 2 defines these in two places :-/
+ */
+# undef	IPOPT_EOL
+# undef	IPOPT_NOP
+# undef	IPOPT_LSRR
+# undef	IPOPT_RR
+# undef	IPOPT_SSRR
+# ifndef	KERNEL
+#  define	_KERNEL
+#  undef	RES_INIT
+#  if SOLARIS2 >= 8
+#   include <netinet/ip6.h>
+#  endif
+#  include <inet/common.h>
+#  include <inet/ip.h>
+#  include <inet/ip_ire.h>
+#  undef	_KERNEL
+# else /* _KERNEL */
+#  if SOLARIS2 >= 8
+#   include <netinet/ip6.h>
+#  endif
+#  include <inet/common.h>
+#  include <inet/ip.h>
+#  include <inet/ip_ire.h>
+# endif /* _KERNEL */
+# if SOLARIS2 >= 8
+#  include <inet/ip_if.h>
+#  include <netinet/ip6.h>
+#  define	ipif_local_addr	ipif_lcl_addr
+/* Only defined in private include file */
+#  ifndef	V4_PART_OF_V6
+#   define	V4_PART_OF_V6(v6)	v6.s6_addr32[3]
+#  endif
+# endif
+#else
+# if !defined(__sgi)
+typedef	 int	minor_t;
+#endif
+#endif /* SOLARIS */
+#define	IPMINLEN(i, h)	((i)->ip_len >= ((i)->ip_hl * 4 + sizeof(struct h)))
+
+#if defined(__FreeBSD__) && (__FreeBSD__ >= 5) && defined(_KERNEL)
+# include <machine/in_cksum.h>
+#endif
+
+#ifndef	IP_OFFMASK
+#define	IP_OFFMASK	0x1fff
+#endif
+
+#if	BSD > 199306
+# define	USE_QUAD_T
+# define	U_QUAD_T	u_quad_t
+# define	QUAD_T		quad_t
+#else /* BSD > 199306 */
+# define	U_QUAD_T	u_long
+# define	QUAD_T		long
+#endif /* BSD > 199306 */
+
+
+/*
+ * These operating systems already take care of the problem for us.
+ */
+#if defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__) || \
+    defined(__sgi)
+typedef u_int32_t       u_32_t;
+# if defined(_KERNEL) && !defined(IPFILTER_LKM)
+#  if defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 104110000)
+#   include "opt_inet.h"
+#  endif
+#  if defined(__FreeBSD_version) && (__FreeBSD_version >= 400000) && \
+      !defined(KLD_MODULE)
+#   include "opt_inet6.h"
+#  endif
+#  ifdef INET6
+#   define USE_INET6
+#  endif
+# endif
+#else
+/*
+ * Really, any arch where sizeof(long) != sizeof(int).
+ */
+# if defined(__alpha__) || defined(__alpha) || defined(_LP64)
+typedef unsigned int    u_32_t;
+# else
+#  if SOLARIS2 >= 6
+typedef	uint32_t	u_32_t;
+#  else
+typedef unsigned int	u_32_t;
+#  endif
+# endif
+#endif /* __NetBSD__ || __OpenBSD__ || __FreeBSD__ || __sgi */
+
+#ifdef	USE_INET6
+# if defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__)
+#  include <netinet/ip6.h>
+#  ifdef	_KERNEL
+#   include <netinet6/ip6_var.h>
+#  endif
+typedef	struct ip6_hdr	ip6_t;
+# endif
+union	i6addr	{
+	u_32_t	i6[4];
+	struct	in_addr	in4;
+	struct	in6_addr in6;
+};
+#else
+union	i6addr	{
+	u_32_t	i6[4];
+	struct	in_addr	in4;
+};
+#endif
+
+#define	IP6CMP(a,b)	bcmp((char *)&(a), (char *)&(b), sizeof(a))
+#define	IP6EQ(a,b)	(bcmp((char *)&(a), (char *)&(b), sizeof(a)) == 0)
+#define	IP6NEQ(a,b)	(bcmp((char *)&(a), (char *)&(b), sizeof(a)) != 0)
+
+#ifndef	MAX
+#define	MAX(a,b)	(((a) > (b)) ? (a) : (b))
+#endif
+
+/*
+ * Security Options for Intenet Protocol (IPSO) as defined in RFC 1108.
+ *
+ * Basic Option
+ *
+ * 00000001   -   (Reserved 4)
+ * 00111101   -   Top Secret
+ * 01011010   -   Secret
+ * 10010110   -   Confidential
+ * 01100110   -   (Reserved 3)
+ * 11001100   -   (Reserved 2)
+ * 10101011   -   Unclassified
+ * 11110001   -   (Reserved 1)
+ */
+#define	IPSO_CLASS_RES4		0x01
+#define	IPSO_CLASS_TOPS		0x3d
+#define	IPSO_CLASS_SECR		0x5a
+#define	IPSO_CLASS_CONF		0x96
+#define	IPSO_CLASS_RES3		0x66
+#define	IPSO_CLASS_RES2		0xcc
+#define	IPSO_CLASS_UNCL		0xab
+#define	IPSO_CLASS_RES1		0xf1
+
+#define	IPSO_AUTH_GENSER	0x80
+#define	IPSO_AUTH_ESI		0x40
+#define	IPSO_AUTH_SCI		0x20
+#define	IPSO_AUTH_NSA		0x10
+#define	IPSO_AUTH_DOE		0x08
+#define	IPSO_AUTH_UN		0x06
+#define	IPSO_AUTH_FTE		0x01
+
+/*
+ * IP option #defines
+ */
+/*#define	IPOPT_RR	7 */
+#define	IPOPT_ZSU	10	/* ZSU */
+#define	IPOPT_MTUP	11	/* MTUP */
+#define	IPOPT_MTUR	12	/* MTUR */
+#define	IPOPT_ENCODE	15	/* ENCODE */
+/*#define	IPOPT_TS	68 */
+#define	IPOPT_TR	82	/* TR */
+/*#define	IPOPT_SECURITY	130 */
+/*#define	IPOPT_LSRR	131 */
+#define	IPOPT_E_SEC	133	/* E-SEC */
+#define	IPOPT_CIPSO	134	/* CIPSO */
+/*#define	IPOPT_SATID	136 */
+#ifndef	IPOPT_SID
+# define	IPOPT_SID	IPOPT_SATID
+#endif
+/*#define	IPOPT_SSRR	137 */
+#define	IPOPT_ADDEXT	147	/* ADDEXT */
+#define	IPOPT_VISA	142	/* VISA */
+#define	IPOPT_IMITD	144	/* IMITD */
+#define	IPOPT_EIP	145	/* EIP */
+#define	IPOPT_FINN	205	/* FINN */
+
+
+#if defined(__FreeBSD__) && (defined(KERNEL) || defined(_KERNEL))
+# ifdef IPFILTER_LKM
+#  include <sys/param.h>
+#  define       ACTUALLY_LKM_NOT_KERNEL
+# else
+#  include <sys/param.h>
+# endif
+# if __FreeBSD__ < 3
+#  include <machine/spl.h>
+# else
+#  if __FreeBSD__ == 3
+#   if defined(IPFILTER_LKM) && !defined(ACTUALLY_LKM_NOT_KERNEL)
+#    define	ACTUALLY_LKM_NOT_KERNEL
+#   endif
+#  endif
+# endif
+#endif /* __FreeBSD__ && KERNEL */
+
+/*
+ * Build some macros and #defines to enable the same code to compile anywhere
+ * Well, that's the idea, anyway :-)
+ */
+#if !SOLARIS || (SOLARIS2 < 6) || !defined(KERNEL)
+# define	ATOMIC_INCL		ATOMIC_INC
+# define	ATOMIC_INC64		ATOMIC_INC
+# define	ATOMIC_INC32		ATOMIC_INC
+# define	ATOMIC_INC16		ATOMIC_INC
+# define	ATOMIC_DECL		ATOMIC_DEC
+# define	ATOMIC_DEC64		ATOMIC_DEC
+# define	ATOMIC_DEC32		ATOMIC_DEC
+# define	ATOMIC_DEC16		ATOMIC_DEC
+#endif
+#ifdef __sgi
+# define  hz HZ
+# include <sys/ksynch.h>
+# define	IPF_LOCK_PL	plhi
+# include <sys/sema.h>
+#undef kmutex_t
+typedef struct {
+	lock_t *l;
+	int pl;
+} kmutex_t;
+# undef	MUTEX_INIT
+# undef	MUTEX_DESTROY
+#endif
+#ifdef KERNEL
+# if SOLARIS
+#  if SOLARIS2 >= 6
+#   include <sys/atomic.h>
+#   if SOLARIS2 == 6
+#    define	ATOMIC_INCL(x)		atomic_add_long((uint32_t*)&(x), 1)
+#    define	ATOMIC_DECL(x)		atomic_add_long((uint32_t*)&(x), -1)
+#   else
+#    define	ATOMIC_INCL(x)		atomic_add_long(&(x), 1)
+#    define	ATOMIC_DECL(x)		atomic_add_long(&(x), -1)
+#   endif
+#   define	ATOMIC_INC64(x)		atomic_add_64((uint64_t*)&(x), 1)
+#   define	ATOMIC_INC32(x)		atomic_add_32((uint32_t*)&(x), 1)
+#   define	ATOMIC_INC16(x)		atomic_add_16((uint16_t*)&(x), 1)
+#   define	ATOMIC_DEC64(x)		atomic_add_64((uint64_t*)&(x), -1)
+#   define	ATOMIC_DEC32(x)		atomic_add_32((uint32_t*)&(x), -1)
+#   define	ATOMIC_DEC16(x)		atomic_add_16((uint16_t*)&(x), -1)
+#  else
+#   define	ATOMIC_INC(x)		{ mutex_enter(&ipf_rw); (x)++; \
+					  mutex_exit(&ipf_rw); }
+#   define	ATOMIC_DEC(x)		{ mutex_enter(&ipf_rw); (x)--; \
+					  mutex_exit(&ipf_rw); }
+#  endif
+#  define	MUTEX_ENTER(x)		mutex_enter(x)
+#  if 1
+#   define	KRWLOCK_T		krwlock_t
+#   define	READ_ENTER(x)		rw_enter(x, RW_READER)
+#   define	WRITE_ENTER(x)		rw_enter(x, RW_WRITER)
+#   define	RW_UPGRADE(x)		{ if (rw_tryupgrade(x) == 0) { \
+					      rw_exit(x); \
+					      rw_enter(x, RW_WRITER); } \
+					}
+#   define	MUTEX_DOWNGRADE(x)	rw_downgrade(x)
+#   define	RWLOCK_INIT(x, y, z)	rw_init((x), (y), RW_DRIVER, (z))
+#   define	RWLOCK_EXIT(x)		rw_exit(x)
+#   define	RW_DESTROY(x)		rw_destroy(x)
+#  else
+#   define	KRWLOCK_T		kmutex_t
+#   define	READ_ENTER(x)		mutex_enter(x)
+#   define	WRITE_ENTER(x)		mutex_enter(x)
+#   define	MUTEX_DOWNGRADE(x)	;
+#   define	RWLOCK_INIT(x, y, z)	mutex_init((x), (y), MUTEX_DRIVER, (z))
+#   define	RWLOCK_EXIT(x)		mutex_exit(x)
+#   define	RW_DESTROY(x)		mutex_destroy(x)
+#  endif
+#  define	MUTEX_INIT(x, y, z)	mutex_init((x), (y), MUTEX_DRIVER, (z))
+#  define	MUTEX_DESTROY(x)	mutex_destroy(x)
+#  define	MUTEX_EXIT(x)	mutex_exit(x)
+#  define	MTOD(m,t)	(t)((m)->b_rptr)
+#  define	IRCOPY(a,b,c)	copyin((caddr_t)(a), (caddr_t)(b), (c))
+#  define	IWCOPY(a,b,c)	copyout((caddr_t)(a), (caddr_t)(b), (c))
+#  define	IRCOPYPTR	ircopyptr
+#  define	IWCOPYPTR	iwcopyptr
+#  define	FREE_MB_T(m)	freemsg(m)
+#  define	SPL_NET(x)	;
+#  define	SPL_IMP(x)	;
+#  undef	SPL_X
+#  define	SPL_X(x)	;
+#  ifdef sparc
+#   define	ntohs(x)	(x)
+#   define	ntohl(x)	(x)
+#   define	htons(x)	(x)
+#   define	htonl(x)	(x)
+#  endif /* sparc */
+#  define	KMALLOC(a,b)	(a) = (b)kmem_alloc(sizeof(*(a)), KM_NOSLEEP)
+#  define	KMALLOCS(a,b,c)	(a) = (b)kmem_alloc((c), KM_NOSLEEP)
+#  define	GET_MINOR(x)	getminor(x)
+typedef	struct	qif	{
+	struct	qif	*qf_next;
+	ill_t	*qf_ill;
+	kmutex_t	qf_lock;
+	void	*qf_iptr;
+	void	*qf_optr;
+	queue_t	*qf_in;
+	queue_t	*qf_out;
+	struct	qinit	*qf_wqinfo;
+	struct	qinit	*qf_rqinfo;
+	struct	qinit	qf_wqinit;
+	struct	qinit	qf_rqinit;
+	mblk_t	*qf_m;	/* These three fields are for passing data up from */
+	queue_t	*qf_q;	/* fr_qin and fr_qout to the packet processing. */
+	size_t	qf_off;
+	size_t	qf_len;	/* this field is used for in ipfr_fastroute */
+	char	qf_name[8];
+	/*
+	 * in case the ILL has disappeared...
+	 */
+	size_t	qf_hl;	/* header length */
+	int	qf_sap;
+} qif_t;
+extern	ill_t	*get_unit __P((char *, int));
+#  define	GETUNIT(n, v)	get_unit(n, v)
+#  define	IFNAME(x)	((ill_t *)x)->ill_name
+# else /* SOLARIS */
+#  if defined(__sgi)
+#   define	ATOMIC_INC(x)		{ MUTEX_ENTER(&ipf_rw); \
+					  (x)++; MUTEX_EXIT(&ipf_rw); }
+#   define	ATOMIC_DEC(x)		{ MUTEX_ENTER(&ipf_rw); \
+					  (x)--; MUTEX_EXIT(&ipf_rw); }
+#   define	MUTEX_ENTER(x)		(x)->pl = LOCK((x)->l, IPF_LOCK_PL);
+#   define	KRWLOCK_T		kmutex_t
+#   define	READ_ENTER(x)		MUTEX_ENTER(x)
+#   define	WRITE_ENTER(x)		MUTEX_ENTER(x)
+#   define	RW_UPGRADE(x)		;
+#   define	MUTEX_DOWNGRADE(x)	;
+#   define	RWLOCK_EXIT(x)		MUTEX_EXIT(x)
+#   define	MUTEX_EXIT(x)		UNLOCK((x)->l, (x)->pl);
+#   define	MUTEX_INIT(x,y,z)	(x)->l = LOCK_ALLOC((uchar_t)-1, IPF_LOCK_PL, (lkinfo_t *)-1, KM_NOSLEEP)
+#   define	MUTEX_DESTROY(x)	LOCK_DEALLOC((x)->l)
+#  else /* __sgi */
+#   define	ATOMIC_INC(x)		(x)++
+#   define	ATOMIC_DEC(x)		(x)--
+#   define	MUTEX_ENTER(x)		;
+#   define	READ_ENTER(x)		;
+#   define	WRITE_ENTER(x)		;
+#   define	RW_UPGRADE(x)		;
+#   define	MUTEX_DOWNGRADE(x)	;
+#   define	RWLOCK_EXIT(x)		;
+#   define	MUTEX_EXIT(x)		;
+#   define	MUTEX_INIT(x,y,z)	;
+#   define	MUTEX_DESTROY(x)	;
+#  endif /* __sgi */
+#  ifndef linux
+#   define	FREE_MB_T(m)	m_freem(m)
+#   define	MTOD(m,t)	mtod(m,t)
+#   define	IRCOPY(a,b,c)	(bcopy((a), (b), (c)), 0)
+#   define	IWCOPY(a,b,c)	(bcopy((a), (b), (c)), 0)
+#   define	IRCOPYPTR	ircopyptr
+#   define	IWCOPYPTR	iwcopyptr
+#  endif /* !linux */
+# endif /* SOLARIS */
+
+# ifdef sun
+#  if !SOLARIS
+#   include	<sys/kmem_alloc.h>
+#   define	GETUNIT(n, v)	ifunit(n, IFNAMSIZ)
+#   define	IFNAME(x)	((struct ifnet *)x)->if_name
+#  endif
+# else
+#  ifndef	linux
+#   define	GETUNIT(n, v)	ifunit(n)
+#   if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199606)) || \
+        (defined(OpenBSD) && (OpenBSD >= 199603))
+#    define	IFNAME(x)	((struct ifnet *)x)->if_xname
+#   else
+#    define	IFNAME(x)	((struct ifnet *)x)->if_name
+#   endif
+#  endif
+# endif /* sun */
+
+# if defined(sun) && !defined(linux) || defined(__sgi)
+#  define	UIOMOVE(a,b,c,d)	uiomove((caddr_t)a,b,c,d)
+#  define	SLEEP(id, n)	sleep((id), PZERO+1)
+#  define	WAKEUP(id)	wakeup(id)
+#  define	KFREE(x)	kmem_free((char *)(x), sizeof(*(x)))
+#  define	KFREES(x,s)	kmem_free((char *)(x), (s))
+#  if !SOLARIS
+extern	void	m_copydata __P((struct mbuf *, int, int, caddr_t));
+extern	void	m_copyback __P((struct mbuf *, int, int, caddr_t));
+#  endif
+#  ifdef __sgi
+#   include <sys/kmem.h>
+#   include <sys/ddi.h>
+#   define	KMALLOC(a,b)	(a) = (b)kmem_alloc(sizeof(*(a)), KM_NOSLEEP)
+#   define	KMALLOCS(a,b,c)	(a) = (b)kmem_alloc((c), KM_NOSLEEP)
+#   define	GET_MINOR(x)	getminor(x)
+#  else
+#   if !SOLARIS
+#    define	KMALLOC(a,b)	(a) = (b)new_kmem_alloc(sizeof(*(a)), \
+							KMEM_NOSLEEP)
+#    define	KMALLOCS(a,b,c)	(a) = (b)new_kmem_alloc((c), KMEM_NOSLEEP)
+#   endif /* SOLARIS */
+#  endif /* __sgi */
+# endif /* sun && !linux */
+# ifndef	GET_MINOR
+#  define	GET_MINOR(x)	minor(x)
+# endif
+# if (BSD >= 199306) || defined(__FreeBSD__)
+#  include <vm/vm.h>
+#  if !defined(__FreeBSD__) || (defined (__FreeBSD__) && __FreeBSD__>=3)
+#   include <vm/vm_extern.h>
+#   include <sys/proc.h>
+extern	vm_map_t	kmem_map;
+#  else /* !__FreeBSD__ || (__FreeBSD__ && __FreeBSD__>=3) */
+#   include <vm/vm_kern.h>
+#  endif /* !__FreeBSD__ || (__FreeBSD__ && __FreeBSD__>=3) */
+#  ifdef	M_PFIL
+#   define	KMALLOC(a, b)	MALLOC((a), b, sizeof(*(a)), M_PFIL, M_NOWAIT)
+#   define	KMALLOCS(a, b, c)	MALLOC((a), b, (c), M_PFIL, M_NOWAIT)
+#   define	KFREE(x)	FREE((x), M_PFIL)
+#   define	KFREES(x,s)	FREE((x), M_PFIL)
+#  else
+#   define	KMALLOC(a, b)	MALLOC((a), b, sizeof(*(a)), M_TEMP, M_NOWAIT)
+#   define	KMALLOCS(a, b, c)	MALLOC((a), b, (c), M_TEMP, M_NOWAIT)
+#   define	KFREE(x)	FREE((x), M_TEMP)
+#   define	KFREES(x,s)	FREE((x), M_TEMP)
+#  endif /* M_PFIL */
+#  define	UIOMOVE(a,b,c,d)	uiomove(a,b,d)
+#  define	SLEEP(id, n)	tsleep((id), PPAUSE|PCATCH, n, 0)
+#  define	WAKEUP(id)	wakeup(id)
+# endif /* BSD */
+# if defined(NetBSD) && NetBSD <= 1991011 && NetBSD >= 199407
+#  define	SPL_NET(x)	x = splsoftnet()
+#  define	SPL_X(x)	(void) splx(x)
+# else
+#  if !SOLARIS && !defined(linux)
+#   define	SPL_IMP(x)	x = splimp()
+#   define	SPL_NET(x)	x = splnet()
+#   define	SPL_X(x)	(void) splx(x)
+#  endif
+# endif /* NetBSD && NetBSD <= 1991011 && NetBSD >= 199407 */
+# define	PANIC(x,y)	if (x) panic y
+#else /* KERNEL */
+# define	SLEEP(x,y)	;
+# define	WAKEUP(x)	;
+# define	PANIC(x,y)	;
+# define	ATOMIC_INC(x)	(x)++
+# define	ATOMIC_DEC(x)	(x)--
+# define	MUTEX_ENTER(x)	;
+# define	READ_ENTER(x)	;
+# define	MUTEX_INIT(x,y,z)	;
+# define	MUTEX_DESTROY(x)	;
+# define	WRITE_ENTER(x)	;
+# define	RW_UPGRADE(x)	;
+# define	MUTEX_DOWNGRADE(x)	;
+# define	RWLOCK_EXIT(x)	;
+# define	MUTEX_EXIT(x)	;
+# define	SPL_NET(x)	;
+# define	SPL_IMP(x)	;
+# undef		SPL_X
+# define	SPL_X(x)	;
+# define	KMALLOC(a,b)	(a) = (b)malloc(sizeof(*a))
+# define	KMALLOCS(a,b,c)	(a) = (b)malloc(c)
+# define	KFREE(x)	free(x)
+# define	KFREES(x,s)	free(x)
+# define	GETUNIT(x, v)	get_unit(x,v)
+# define	IRCOPY(a,b,c)	(bcopy((a), (b), (c)), 0)
+# define	IWCOPY(a,b,c)	(bcopy((a), (b), (c)), 0)
+# define	IRCOPYPTR	ircopyptr
+# define	IWCOPYPTR	iwcopyptr
+#endif /* KERNEL */
+
+#if SOLARIS
+typedef mblk_t mb_t;
+# if SOLARIS2 >= 7
+#  ifdef lint
+#   define ALIGN32(ptr)    (ptr ? 0L : 0L)
+#   define ALIGN16(ptr)    (ptr ? 0L : 0L)
+#  else
+#   define ALIGN32(ptr)    (ptr)
+#   define ALIGN16(ptr)    (ptr)
+#  endif
+# endif
+#else
+# ifdef	linux
+#  ifndef kernel
+typedef struct mb {
+	struct mb *next;
+	u_int len;
+	u_char *data;
+} mb_t;
+#  else
+typedef struct sk_buff mb_t;
+#  endif
+# else
+typedef struct mbuf mb_t;
+# endif
+#endif /* SOLARIS */
+
+#if defined(linux) || defined(__sgi)
+/*
+ * These #ifdef's are here mainly for linux, but who knows, they may
+ * not be in other places or maybe one day linux will grow up and some
+ * of these will turn up there too.
+ */
+#ifndef	ICMP_MINLEN
+# define	ICMP_MINLEN	8
+#endif
+#ifndef	ICMP_UNREACH
+# define	ICMP_UNREACH	ICMP_DEST_UNREACH
+#endif
+#ifndef	ICMP_SOURCEQUENCH
+# define	ICMP_SOURCEQUENCH	ICMP_SOURCE_QUENCH
+#endif
+#ifndef	ICMP_TIMXCEED
+# define	ICMP_TIMXCEED	ICMP_TIME_EXCEEDED
+#endif
+#ifndef	ICMP_PARAMPROB
+# define	ICMP_PARAMPROB	ICMP_PARAMETERPROB
+#endif
+#ifndef ICMP_TSTAMP
+# define	ICMP_TSTAMP	ICMP_TIMESTAMP
+#endif
+#ifndef ICMP_TSTAMPREPLY
+# define	ICMP_TSTAMPREPLY	ICMP_TIMESTAMPREPLY
+#endif
+#ifndef ICMP_IREQ
+# define	ICMP_IREQ	ICMP_INFO_REQUEST
+#endif
+#ifndef ICMP_IREQREPLY
+# define	ICMP_IREQREPLY	ICMP_INFO_REPLY
+#endif
+#ifndef	ICMP_MASKREQ
+# define	ICMP_MASKREQ	ICMP_ADDRESS
+#endif
+#ifndef ICMP_MASKREPLY
+# define	ICMP_MASKREPLY	ICMP_ADDRESSREPLY
+#endif
+#ifndef	IPVERSION
+# define	IPVERSION	4
+#endif
+#ifndef	IPOPT_MINOFF
+# define	IPOPT_MINOFF	4
+#endif
+#ifndef	IPOPT_COPIED
+# define	IPOPT_COPIED(x)	((x)&0x80)
+#endif
+#ifndef	IPOPT_EOL
+# define	IPOPT_EOL	0
+#endif
+#ifndef	IPOPT_NOP
+# define	IPOPT_NOP	1
+#endif
+#ifndef	IP_MF
+# define	IP_MF	((u_short)0x2000)
+#endif
+#ifndef	ETHERTYPE_IP
+# define	ETHERTYPE_IP	((u_short)0x0800)
+#endif
+#ifndef	TH_FIN
+# define	TH_FIN	0x01
+#endif
+#ifndef	TH_SYN
+# define	TH_SYN	0x02
+#endif
+#ifndef	TH_RST
+# define	TH_RST	0x04
+#endif
+#ifndef	TH_PUSH
+# define	TH_PUSH	0x08
+#endif
+#ifndef	TH_ACK
+# define	TH_ACK	0x10
+#endif
+#ifndef	TH_URG
+# define	TH_URG	0x20
+#endif
+#ifndef	IPOPT_EOL
+# define	IPOPT_EOL	0
+#endif
+#ifndef	IPOPT_NOP
+# define	IPOPT_NOP	1
+#endif
+#ifndef	IPOPT_RR
+# define	IPOPT_RR	7
+#endif
+#ifndef	IPOPT_TS
+# define	IPOPT_TS	68
+#endif
+#ifndef	IPOPT_SECURITY
+# define	IPOPT_SECURITY	130
+#endif
+#ifndef	IPOPT_LSRR
+# define	IPOPT_LSRR	131
+#endif
+#ifndef	IPOPT_SATID
+# define	IPOPT_SATID	136
+#endif
+#ifndef	IPOPT_SSRR
+# define	IPOPT_SSRR	137
+#endif
+#ifndef	IPOPT_SECUR_UNCLASS
+# define	IPOPT_SECUR_UNCLASS	((u_short)0x0000)
+#endif
+#ifndef	IPOPT_SECUR_CONFID
+# define	IPOPT_SECUR_CONFID	((u_short)0xf135)
+#endif
+#ifndef	IPOPT_SECUR_EFTO
+# define	IPOPT_SECUR_EFTO	((u_short)0x789a)
+#endif
+#ifndef	IPOPT_SECUR_MMMM
+# define	IPOPT_SECUR_MMMM	((u_short)0xbc4d)
+#endif
+#ifndef	IPOPT_SECUR_RESTR
+# define	IPOPT_SECUR_RESTR	((u_short)0xaf13)
+#endif
+#ifndef	IPOPT_SECUR_SECRET
+# define	IPOPT_SECUR_SECRET	((u_short)0xd788)
+#endif
+#ifndef IPOPT_SECUR_TOPSECRET
+# define	IPOPT_SECUR_TOPSECRET	((u_short)0x6bc5)
+#endif
+#ifndef IPOPT_OLEN
+# define	IPOPT_OLEN	1
+#endif
+#endif /* linux || __sgi */
+
+#ifdef	linux
+#include <linux/in_systm.h>
+/*
+ * TCP States
+ */
+#define	TCPS_CLOSED		0	/* closed */
+#define	TCPS_LISTEN		1	/* listening for connection */
+#define	TCPS_SYN_SENT		2	/* active, have sent syn */
+#define	TCPS_SYN_RECEIVED	3	/* have send and received syn */
+/* states < TCPS_ESTABLISHED are those where connections not established */
+#define	TCPS_ESTABLISHED	4	/* established */
+#define	TCPS_CLOSE_WAIT		5	/* rcvd fin, waiting for close */
+/* states > TCPS_CLOSE_WAIT are those where user has closed */
+#define	TCPS_FIN_WAIT_1		6	/* have closed, sent fin */
+#define	TCPS_CLOSING		7	/* closed xchd FIN; await FIN ACK */
+#define	TCPS_LAST_ACK		8	/* had fin and close; await FIN ACK */
+/* states > TCPS_CLOSE_WAIT && < TCPS_FIN_WAIT_2 await ACK of FIN */
+#define	TCPS_FIN_WAIT_2		9	/* have closed, fin is acked */
+#define	TCPS_TIME_WAIT		10	/* in 2*msl quiet wait after close */
+
+/*
+ * file flags.
+ */
+#ifdef WRITE
+#define	FWRITE	WRITE
+#define	FREAD	READ
+#else
+#define	FWRITE	_IOC_WRITE
+#define	FREAD	_IOC_READ
+#endif
+/*
+ * mbuf related problems.
+ */
+#define	mtod(m,t)	(t)((m)->data)
+#define	m_len		len
+#define	m_next		next
+
+#ifdef	IP_DF
+#undef	IP_DF
+#endif
+#define	IP_DF		0x4000
+
+typedef	struct	{
+	__u16	th_sport;
+	__u16	th_dport;
+	__u32	th_seq;
+	__u32	th_ack;
+# if defined(__i386__) || defined(__MIPSEL__) || defined(__alpha__) ||\
+    defined(vax)
+	__u8	th_res:4;
+	__u8	th_off:4;
+#else
+	__u8	th_off:4;
+	__u8	th_res:4;
+#endif
+	__u8	th_flags;
+	__u16	th_win;
+	__u16	th_sum;
+	__u16	th_urp;
+} tcphdr_t;
+
+typedef	struct	{
+	__u16	uh_sport;
+	__u16	uh_dport;
+	__u16	uh_ulen;
+	__u16	uh_sum;
+} udphdr_t;
+
+typedef	struct	{
+# if defined(__i386__) || defined(__MIPSEL__) || defined(__alpha__) ||\
+    defined(vax)
+	__u8	ip_hl:4;
+	__u8	ip_v:4;
+# else
+	__u8	ip_v:4;
+	__u8	ip_hl:4;
+# endif
+	__u8	ip_tos;
+	__u16	ip_len;
+	__u16	ip_id;
+	__u16	ip_off;
+	__u8	ip_ttl;
+	__u8	ip_p;
+	__u16	ip_sum;
+	struct	in_addr	ip_src;
+	struct	in_addr	ip_dst;
+} ip_t;
+
+/*
+ * Structure of an icmp header.
+ */
+typedef struct icmp {
+	__u8	icmp_type;		/* type of message, see below */
+	__u8	icmp_code;		/* type sub code */
+	__u16	icmp_cksum;		/* ones complement cksum of struct */
+	union {
+		__u8	ih_pptr;		/* ICMP_PARAMPROB */
+		struct	in_addr	ih_gwaddr;	/* ICMP_REDIRECT */
+		struct	ih_idseq {
+			__u16	icd_id;
+			__u16	icd_seq;
+		} ih_idseq;
+		int ih_void;
+	} icmp_hun;
+# define	icmp_pptr	icmp_hun.ih_pptr
+# define	icmp_gwaddr	icmp_hun.ih_gwaddr
+# define	icmp_id		icmp_hun.ih_idseq.icd_id
+# define	icmp_seq	icmp_hun.ih_idseq.icd_seq
+# define	icmp_void	icmp_hun.ih_void
+	union {
+		struct id_ts {
+			n_time its_otime;
+			n_time its_rtime;
+			n_time its_ttime;
+		} id_ts;
+		struct id_ip  {
+			ip_t idi_ip;
+			/* options and then 64 bits of data */
+		} id_ip;
+		u_long	id_mask;
+		char	id_data[1];
+	} icmp_dun;
+# define	icmp_otime	icmp_dun.id_ts.its_otime
+# define	icmp_rtime	icmp_dun.id_ts.its_rtime
+# define	icmp_ttime	icmp_dun.id_ts.its_ttime
+# define	icmp_ip		icmp_dun.id_ip.idi_ip
+# define	icmp_mask	icmp_dun.id_mask
+# define	icmp_data	icmp_dun.id_data
+} icmphdr_t;
+
+# ifndef LINUX_IPOVLY
+#  define LINUX_IPOVLY
+struct ipovly {
+	caddr_t	ih_next, ih_prev;	/* for protocol sequence q's */
+	u_char	ih_x1;			/* (unused) */
+	u_char	ih_pr;			/* protocol */
+	short	ih_len;			/* protocol length */
+	struct	in_addr ih_src;		/* source internet address */
+	struct	in_addr ih_dst;		/* destination internet address */
+};
+# endif
+
+typedef struct  {
+	__u8	ether_dhost[6];
+	__u8	ether_shost[6];
+	__u16	ether_type;
+} ether_header_t;
+
+typedef	struct	uio	{
+	int	uio_resid;
+	int	uio_rw;
+	caddr_t	uio_buf;
+} uio_t;
+
+# define	UIO_READ	0
+# define	UIO_WRITE	1
+# define	UIOMOVE(a, b, c, d)	uiomove(a,b,c,d)
+
+/*
+ * For masking struct ifnet onto struct device
+ */
+# define	if_name	name
+
+# ifdef	KERNEL
+#  define	GETUNIT(x, v)	dev_get(x)
+#  define	FREE_MB_T(m)	kfree_skb(m, FREE_WRITE)
+#  define	uniqtime	do_gettimeofday
+#  undef INT_MAX
+#  undef UINT_MAX
+#  undef LONG_MAX
+#  undef ULONG_MAX
+#  include <linux/netdevice.h>
+#  define	SPL_X(x)
+#  define	SPL_NET(x)
+#  define	SPL_IMP(x)
+ 
+#  define	bcmp(a,b,c)	memcmp(a,b,c)
+#  define	bcopy(a,b,c)	memcpy(b,a,c)
+#  define	bzero(a,c)	memset(a,0,c)
+
+#  define	UNITNAME(n)	dev_get((n))
+
+#  define	KMALLOC(a,b)	(a) = (b)kmalloc(sizeof(*(a)), GFP_ATOMIC)
+#  define	KMALLOCS(a,b,c)	(a) = (b)kmalloc((c), GFP_ATOMIC)
+#  define	KFREE(x)	kfree_s((x), sizeof(*(x)))
+#  define	KFREES(x,s)	kfree_s((x), (s))
+#define IRCOPY(const void *a, void *b, size_t c)	{ \
+	int error; \
+
+	error = verify_area(VERIFY_READ, a ,c); \
+	if (!error) \
+		memcpy_fromfs(b, a, c); \
+	return error; \
+}
+static inline int IWCOPY(const void *a, void *b, size_t c)
+{
+	int error;
+
+	error = verify_area(VERIFY_WRITE, b, c);
+	if (!error)
+		memcpy_tofs(b, a, c);
+	return error;
+}
+static inline int IRCOPYPTR(const void *a, void *b, size_t c) {
+	caddr_t ca;
+	int	error;
+
+	error = verify_area(VERIFY_READ, a ,sizeof(ca));
+	if (!error) {
+		memcpy_fromfs(ca, a, sizeof(ca));
+		error = verify_area(VERIFY_READ, ca , c);
+		if (!error)
+			memcpy_fromfs(b, ca, c);
+	}
+	return error;
+}
+static inline int IWCOPYPTR(const void *a, void *b, size_t c) {
+	caddr_t ca;
+	int	error;
+
+
+	error = verify_area(VERIFY_READ, b ,sizeof(ca));
+	if (!error) {
+		memcpy_fromfs(ca, b, sizeof(ca));
+		error = verify_area(VERIFY_WRITE, ca, c);
+		if (!error)
+			memcpy_tofs(ca, a, c);
+	}
+	return error;
+}
+# else
+#  define	__KERNEL__
+#  undef INT_MAX
+#  undef UINT_MAX
+#  undef LONG_MAX
+#  undef ULONG_MAX
+#  define	s8 __s8
+#  define	u8 __u8
+#  define	s16 __s16
+#  define	u16 __u16
+#  define	s32 __s32
+#  define	u32 __u32
+#  include <linux/netdevice.h>
+#  undef	__KERNEL__
+# endif
+# define	ifnet	device
+#else
+typedef	struct	tcphdr	tcphdr_t;
+typedef	struct	udphdr	udphdr_t;
+typedef	struct	icmp	icmphdr_t;
+typedef	struct	ip	ip_t;
+typedef	struct	ether_header	ether_header_t;
+#endif /* linux */
+typedef	struct	tcpiphdr	tcpiphdr_t;
+
+#if defined(hpux) || defined(linux)
+struct	ether_addr	{
+	char	ether_addr_octet[6];
+};
+#endif
+
+/*
+ * XXX - This is one of those *awful* hacks which nobody likes
+ */
+#ifdef	ultrix
+#define	A_A
+#else
+#define	A_A	&
+#endif
+
+#ifndef	ICMP_ROUTERADVERT
+# define	ICMP_ROUTERADVERT	9
+#endif
+#ifndef	ICMP_ROUTERSOLICIT
+# define	ICMP_ROUTERSOLICIT	10
+#endif
+#undef	ICMP_MAX_UNREACH
+#define	ICMP_MAX_UNREACH	14
+#undef	ICMP_MAXTYPE
+#define	ICMP_MAXTYPE		18
+/*
+ * ICMP error replies have an IP header (20 bytes), 8 bytes of ICMP data,
+ * another IP header and then 64 bits of data, totalling 56.  Of course,
+ * the last 64 bits is dependant on that being available.
+ */
+#define	ICMPERR_ICMPHLEN	8
+#define	ICMPERR_IPICMPHLEN	(20 + 8)
+#define	ICMPERR_MINPKTLEN	(20 + 8 + 20)
+#define	ICMPERR_MAXPKTLEN	(20 + 8 + 20 + 8)
+#define	ICMP6ERR_MINPKTLEN	(40 + 8)
+#define	ICMP6ERR_IPICMPHLEN	(40 + 8 + 40)
+
+/*
+ * ECN is a new addition to TCP - RFC 2481
+ */
+#ifndef TH_ECN
+# define	TH_ECN	0x40
+#endif
+#ifndef TH_CWR
+# define	TH_CWR	0x80
+#endif
+#define	TH_ECNALL	(TH_ECN|TH_CWR)
+
+#define	TCPF_ALL (TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG|TH_ECN|TH_CWR)
+
+#endif	/* __IP_COMPAT_H__ */
diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c
new file mode 100644
index 0000000..df5ec22
--- /dev/null
+++ b/sys/netinet/ip_divert.c
@@ -0,0 +1,543 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include "opt_inet.h"
+#include "opt_ipfw.h"
+#include "opt_ipdivert.h"
+#include "opt_ipsec.h"
+
+#ifndef INET
+#error "IPDIVERT requires INET."
+#endif
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/protosw.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+
+#include <vm/vm_zone.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+
+/*
+ * Divert sockets
+ */
+
+/*
+ * Allocate enough space to hold a full IP packet
+ */
+#define	DIVSNDQ		(65536 + 100)
+#define	DIVRCVQ		(65536 + 100)
+
+/*
+ * A 16 bit cookie is passed to and from the user process.
+ * The user process can send it back to help the caller know
+ * something about where the packet originally came from.
+ *
+ * In the case of ipfw, then the cookie is the rule that sent
+ * us here. On reinjection is is the rule after which processing
+ * should continue. Leaving it the same will make processing start
+ * at the rule number after that which sent it here. Setting it to
+ * 0 will restart processing at the beginning. 
+ *
+ * For divert_packet(), ip_divert_cookie is an input value only.
+ * For div_output(), ip_divert_cookie is an output value only.
+ */
+u_int16_t ip_divert_cookie;
+
+/* Internal variables */
+static struct inpcbhead divcb;
+static struct inpcbinfo divcbinfo;
+
+static u_long	div_sendspace = DIVSNDQ;	/* XXX sysctl ? */
+static u_long	div_recvspace = DIVRCVQ;	/* XXX sysctl ? */
+
+/* Optimization: have this preinitialized */
+static struct sockaddr_in divsrc = { sizeof(divsrc), AF_INET };
+
+/* Internal functions */
+static int div_output(struct socket *so,
+		struct mbuf *m, struct sockaddr *addr, struct mbuf *control);
+
+/*
+ * Initialize divert connection block queue.
+ */
+void
+div_init(void)
+{
+	LIST_INIT(&divcb);
+	divcbinfo.listhead = &divcb;
+	/*
+	 * XXX We don't use the hash list for divert IP, but it's easier
+	 * to allocate a one entry hash list than it is to check all
+	 * over the place for hashbase == NULL.
+	 */
+	divcbinfo.hashbase = hashinit(1, M_PCB, &divcbinfo.hashmask);
+	divcbinfo.porthashbase = hashinit(1, M_PCB, &divcbinfo.porthashmask);
+	divcbinfo.ipi_zone = zinit("divcb", sizeof(struct inpcb),
+				   maxsockets, ZONE_INTERRUPT, 0);
+}
+
+/*
+ * IPPROTO_DIVERT is not a real IP protocol; don't allow any packets
+ * with that protocol number to enter the system from the outside.
+ */
+void
+div_input(struct mbuf *m, int off, int proto)
+{
+	ipstat.ips_noproto++;
+	m_freem(m);
+}
+
+/*
+ * Divert a packet by passing it up to the divert socket at port 'port'.
+ *
+ * Setup generic address and protocol structures for div_input routine,
+ * then pass them along with mbuf chain.
+ */
+void
+divert_packet(struct mbuf *m, int incoming, int port)
+{
+	struct ip *ip;
+	struct inpcb *inp;
+	struct socket *sa;
+	u_int16_t nport;
+
+	/* Sanity check */
+	KASSERT(port != 0, ("%s: port=0", __FUNCTION__));
+
+	/* Record and reset divert cookie */
+	divsrc.sin_port = ip_divert_cookie;
+	ip_divert_cookie = 0;
+
+	/* Assure header */
+	if (m->m_len < sizeof(struct ip) &&
+	    (m = m_pullup(m, sizeof(struct ip))) == 0) {
+		return;
+	}
+	ip = mtod(m, struct ip *);
+
+	/*
+	 * Record receive interface address, if any.
+	 * But only for incoming packets.
+	 */
+	divsrc.sin_addr.s_addr = 0;
+	if (incoming) {
+		struct ifaddr *ifa;
+
+		/* Sanity check */
+		KASSERT((m->m_flags & M_PKTHDR), ("%s: !PKTHDR", __FUNCTION__));
+
+		/* Find IP address for receive interface */
+		TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) {
+			if (ifa->ifa_addr == NULL)
+				continue;
+			if (ifa->ifa_addr->sa_family != AF_INET)
+				continue;
+			divsrc.sin_addr =
+			    ((struct sockaddr_in *) ifa->ifa_addr)->sin_addr;
+			break;
+		}
+	}
+	/*
+	 * Record the incoming interface name whenever we have one.
+	 */
+	bzero(&divsrc.sin_zero, sizeof(divsrc.sin_zero));
+	if (m->m_pkthdr.rcvif) {
+		/*
+		 * Hide the actual interface name in there in the 
+		 * sin_zero array. XXX This needs to be moved to a
+		 * different sockaddr type for divert, e.g.
+		 * sockaddr_div with multiple fields like 
+		 * sockaddr_dl. Presently we have only 7 bytes
+		 * but that will do for now as most interfaces
+		 * are 4 or less + 2 or less bytes for unit.
+		 * There is probably a faster way of doing this,
+		 * possibly taking it from the sockaddr_dl on the iface.
+		 * This solves the problem of a P2P link and a LAN interface
+		 * having the same address, which can result in the wrong
+		 * interface being assigned to the packet when fed back
+		 * into the divert socket. Theoretically if the daemon saves
+		 * and re-uses the sockaddr_in as suggested in the man pages,
+		 * this iface name will come along for the ride.
+		 * (see div_output for the other half of this.)
+		 */ 
+		snprintf(divsrc.sin_zero, sizeof(divsrc.sin_zero),
+			"%s%d", m->m_pkthdr.rcvif->if_name,
+			m->m_pkthdr.rcvif->if_unit);
+	}
+
+	/* Put packet on socket queue, if any */
+	sa = NULL;
+	nport = htons((u_int16_t)port);
+	LIST_FOREACH(inp, &divcb, inp_list) {
+		if (inp->inp_lport == nport)
+			sa = inp->inp_socket;
+	}
+	if (sa) {
+		if (sbappendaddr(&sa->so_rcv, (struct sockaddr *)&divsrc,
+				m, (struct mbuf *)0) == 0)
+			m_freem(m);
+		else
+			sorwakeup(sa);
+	} else {
+		m_freem(m);
+		ipstat.ips_noproto++;
+		ipstat.ips_delivered--;
+        }
+}
+
+/*
+ * Deliver packet back into the IP processing machinery.
+ *
+ * If no address specified, or address is 0.0.0.0, send to ip_output();
+ * otherwise, send to ip_input() and mark as having been received on
+ * the interface with that address.
+ */
+static int
+div_output(so, m, addr, control)
+	struct socket *so;
+	register struct mbuf *m;
+	struct sockaddr *addr;
+	struct mbuf *control;
+{
+	register struct inpcb *const inp = sotoinpcb(so);
+	register struct ip *const ip = mtod(m, struct ip *);
+	struct sockaddr_in *sin = (struct sockaddr_in *)addr;
+	int error = 0;
+
+	if (control)
+		m_freem(control);		/* XXX */
+
+	/* Loopback avoidance and state recovery */
+	if (sin) {
+		int	len = 0;
+		char	*c = sin->sin_zero;
+
+		ip_divert_cookie = sin->sin_port;
+
+		/*
+		 * Find receive interface with the given name or IP address.
+		 * The name is user supplied data so don't trust it's size or 
+		 * that it is zero terminated. The name has priority.
+		 * We are presently assuming that the sockaddr_in 
+		 * has not been replaced by a sockaddr_div, so we limit it
+		 * to 16 bytes in total. the name is stuffed (if it exists)
+		 * in the sin_zero[] field.
+		 */
+		while (*c++ && (len++ < sizeof(sin->sin_zero)));
+		if ((len > 0) && (len < sizeof(sin->sin_zero)))
+			m->m_pkthdr.rcvif = ifunit(sin->sin_zero);
+	} else {
+		ip_divert_cookie = 0;
+	}
+
+	/* Reinject packet into the system as incoming or outgoing */
+	if (!sin || sin->sin_addr.s_addr == 0) {
+		/*
+		 * Don't allow both user specified and setsockopt options,
+		 * and don't allow packet length sizes that will crash
+		 */
+		if (((ip->ip_hl != (sizeof (*ip) >> 2)) && inp->inp_options) ||
+		     ((u_short)ntohs(ip->ip_len) > m->m_pkthdr.len)) {
+			error = EINVAL;
+			goto cantsend;
+		}
+
+		/* Convert fields to host order for ip_output() */
+		NTOHS(ip->ip_len);
+		NTOHS(ip->ip_off);
+
+		/* Send packet to output processing */
+		ipstat.ips_rawout++;			/* XXX */
+		error = ip_output(m, inp->inp_options, &inp->inp_route,
+			(so->so_options & SO_DONTROUTE) |
+			IP_ALLOWBROADCAST | IP_RAWOUTPUT,
+			inp->inp_moptions);
+	} else {
+		struct	ifaddr *ifa;
+
+		/* If no luck with the name above. check by IP address.  */
+		if (m->m_pkthdr.rcvif == NULL) {
+			/*
+			 * Make sure there are no distractions
+			 * for ifa_ifwithaddr. Clear the port and the ifname.
+			 * Maybe zap all 8 bytes at once using a 64bit write?
+			 */
+			bzero(sin->sin_zero, sizeof(sin->sin_zero));
+			/* *((u_int64_t *)sin->sin_zero) = 0; */ /* XXX ?? */
+			sin->sin_port = 0;
+			if (!(ifa = ifa_ifwithaddr((struct sockaddr *) sin))) {
+				error = EADDRNOTAVAIL;
+				goto cantsend;
+			}
+			m->m_pkthdr.rcvif = ifa->ifa_ifp;
+		}
+
+		/* Send packet to input processing */
+		ip_input(m);
+	}
+
+	/* paranoid: Reset for next time (and other packets) */
+	/* almost definitly already done in the ipfw filter but.. */
+	ip_divert_cookie = 0;
+	return error;
+
+cantsend:
+	m_freem(m);
+	ip_divert_cookie = 0;
+	return error;
+}
+
+static int
+div_attach(struct socket *so, int proto, struct proc *p)
+{
+	struct inpcb *inp;
+	int error, s;
+
+	inp  = sotoinpcb(so);
+	if (inp)
+		panic("div_attach");
+	if (p && (error = suser(p)) != 0)
+		return error;
+
+	error = soreserve(so, div_sendspace, div_recvspace);
+	if (error)
+		return error;
+	s = splnet();
+	error = in_pcballoc(so, &divcbinfo, p);
+	splx(s);
+	if (error)
+		return error;
+	inp = (struct inpcb *)so->so_pcb;
+	inp->inp_ip_p = proto;
+	inp->inp_vflag |= INP_IPV4;
+	inp->inp_flags |= INP_HDRINCL;
+	/* The socket is always "connected" because
+	   we always know "where" to send the packet */
+	so->so_state |= SS_ISCONNECTED;
+#ifdef IPSEC
+	error = ipsec_init_policy(so, &inp->inp_sp);
+	if (error != 0) {
+		in_pcbdetach(inp);
+		return error;
+	}
+#endif /*IPSEC*/
+	return 0;
+}
+
+static int
+div_detach(struct socket *so)
+{
+	struct inpcb *inp;
+
+	inp = sotoinpcb(so);
+	if (inp == 0)
+		panic("div_detach");
+	in_pcbdetach(inp);
+	return 0;
+}
+
+static int
+div_abort(struct socket *so)
+{
+	soisdisconnected(so);
+	return div_detach(so);
+}
+
+static int
+div_disconnect(struct socket *so)
+{
+	if ((so->so_state & SS_ISCONNECTED) == 0)
+		return ENOTCONN;
+	return div_abort(so);
+}
+
+static int
+div_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
+{
+	struct inpcb *inp;
+	int s;
+	int error;
+
+	s = splnet();
+	inp = sotoinpcb(so);
+	/* in_pcbbind assumes that the socket is a sockaddr_in
+	 * and in_pcbbind requires a valid address. Since divert
+	 * sockets don't we need to make sure the address is
+	 * filled in properly.
+	 * XXX -- divert should not be abusing in_pcbind
+	 * and should probably have its own family.
+	 */
+	if (nam->sa_family != AF_INET) {
+		error = EAFNOSUPPORT;
+	} else {
+		((struct sockaddr_in *)nam)->sin_addr.s_addr = INADDR_ANY;
+		error = in_pcbbind(inp, nam, p);
+	}
+	splx(s);
+	return error;
+}
+
+static int
+div_shutdown(struct socket *so)
+{
+	socantsendmore(so);
+	return 0;
+}
+
+static int
+div_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
+	 struct mbuf *control, struct proc *p)
+{
+	/* Packet must have a header (but that's about it) */
+	if (m->m_len < sizeof (struct ip) &&
+	    (m = m_pullup(m, sizeof (struct ip))) == 0) {
+		ipstat.ips_toosmall++;
+		m_freem(m);
+		return EINVAL;
+	}
+
+	/* Send packet */
+	return div_output(so, m, nam, control);
+}
+
+static int
+div_pcblist(SYSCTL_HANDLER_ARGS)
+{
+	int error, i, n, s;
+	struct inpcb *inp, **inp_list;
+	inp_gen_t gencnt;
+	struct xinpgen xig;
+
+	/*
+	 * The process of preparing the TCB list is too time-consuming and
+	 * resource-intensive to repeat twice on every request.
+	 */
+	if (req->oldptr == 0) {
+		n = divcbinfo.ipi_count;
+		req->oldidx = 2 * (sizeof xig)
+			+ (n + n/8) * sizeof(struct xinpcb);
+		return 0;
+	}
+
+	if (req->newptr != 0)
+		return EPERM;
+
+	/*
+	 * OK, now we're committed to doing something.
+	 */
+	s = splnet();
+	gencnt = divcbinfo.ipi_gencnt;
+	n = divcbinfo.ipi_count;
+	splx(s);
+
+	xig.xig_len = sizeof xig;
+	xig.xig_count = n;
+	xig.xig_gen = gencnt;
+	xig.xig_sogen = so_gencnt;
+	error = SYSCTL_OUT(req, &xig, sizeof xig);
+	if (error)
+		return error;
+
+	inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
+	if (inp_list == 0)
+		return ENOMEM;
+	
+	s = splnet();
+	for (inp = LIST_FIRST(divcbinfo.listhead), i = 0; inp && i < n;
+	     inp = LIST_NEXT(inp, inp_list)) {
+		if (inp->inp_gencnt <= gencnt && !prison_xinpcb(req->p, inp))
+			inp_list[i++] = inp;
+	}
+	splx(s);
+	n = i;
+
+	error = 0;
+	for (i = 0; i < n; i++) {
+		inp = inp_list[i];
+		if (inp->inp_gencnt <= gencnt) {
+			struct xinpcb xi;
+			xi.xi_len = sizeof xi;
+			/* XXX should avoid extra copy */
+			bcopy(inp, &xi.xi_inp, sizeof *inp);
+			if (inp->inp_socket)
+				sotoxsocket(inp->inp_socket, &xi.xi_socket);
+			error = SYSCTL_OUT(req, &xi, sizeof xi);
+		}
+	}
+	if (!error) {
+		/*
+		 * Give the user an updated idea of our state.
+		 * If the generation differs from what we told
+		 * her before, she knows that something happened
+		 * while we were processing this request, and it
+		 * might be necessary to retry.
+		 */
+		s = splnet();
+		xig.xig_gen = divcbinfo.ipi_gencnt;
+		xig.xig_sogen = so_gencnt;
+		xig.xig_count = divcbinfo.ipi_count;
+		splx(s);
+		error = SYSCTL_OUT(req, &xig, sizeof xig);
+	}
+	free(inp_list, M_TEMP);
+	return error;
+}
+
+SYSCTL_DECL(_net_inet_divert);
+SYSCTL_PROC(_net_inet_divert, OID_AUTO, pcblist, CTLFLAG_RD, 0, 0,
+	    div_pcblist, "S,xinpcb", "List of active divert sockets");
+
+struct pr_usrreqs div_usrreqs = {
+	div_abort, pru_accept_notsupp, div_attach, div_bind,
+	pru_connect_notsupp, pru_connect2_notsupp, in_control, div_detach,
+	div_disconnect, pru_listen_notsupp, in_setpeeraddr, pru_rcvd_notsupp,
+	pru_rcvoob_notsupp, div_send, pru_sense_null, div_shutdown,
+	in_setsockaddr, sosend, soreceive, sopoll
+};
diff --git a/sys/netinet/ip_dummynet.c b/sys/netinet/ip_dummynet.c
new file mode 100644
index 0000000..8f69866
--- /dev/null
+++ b/sys/netinet/ip_dummynet.c
@@ -0,0 +1,1904 @@
+/*
+ * Copyright (c) 1998-2001 Luigi Rizzo, Universita` di Pisa
+ * Portions Copyright (c) 2000 Akamba Corp.
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#define DEB(x)
+#define DDB(x)	x
+
+/*
+ * This module implements IP dummynet, a bandwidth limiter/delay emulator
+ * used in conjunction with the ipfw package.
+ * Description of the data structures used is in ip_dummynet.h
+ * Here you mainly find the following blocks of code:
+ *  + variable declarations;
+ *  + heap management functions;
+ *  + scheduler and dummynet functions;
+ *  + configuration and initialization.
+ *
+ * NOTA BENE: critical sections are protected by splimp()/splx()
+ *    pairs. One would think that splnet() is enough as for most of
+ *    the netinet code, but it is not so because when used with
+ *    bridging, dummynet is invoked at splimp().
+ *
+ * Most important Changes:
+ *
+ * 010124: Fixed WF2Q behaviour
+ * 010122: Fixed spl protection.
+ * 000601: WF2Q support
+ * 000106: large rewrite, use heaps to handle very many pipes.
+ * 980513:	initial release
+ *
+ * include files marked with XXX are probably not needed
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/queue.h>			/* XXX */
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/time.h>
+#include <sys/sysctl.h>
+#include <net/if.h>
+#include <net/route.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/ip_fw.h>
+#include <netinet/ip_dummynet.h>
+#include <netinet/ip_var.h>
+
+#include "opt_bdg.h"
+#ifdef BRIDGE
+#include <netinet/if_ether.h> /* for struct arpcom */
+#include <net/bridge.h>
+#endif
+
+/*
+ * We keep a private variable for the simulation time, but we could
+ * probably use an existing one ("softticks" in sys/kern/kern_timer.c)
+ */
+static dn_key curr_time = 0 ; /* current simulation time */
+
+static int dn_hash_size = 64 ;	/* default hash size */
+
+/* statistics on number of queue searches and search steps */
+static int searches, search_steps ;
+static int pipe_expire = 1 ;   /* expire queue if empty */
+static int dn_max_ratio = 16 ; /* max queues/buckets ratio */
+
+static int red_lookup_depth = 256;	/* RED - default lookup table depth */
+static int red_avg_pkt_size = 512;      /* RED - default medium packet size */
+static int red_max_pkt_size = 1500;     /* RED - default max packet size */
+
+/*
+ * Three heaps contain queues and pipes that the scheduler handles:
+ *
+ * ready_heap contains all dn_flow_queue related to fixed-rate pipes.
+ *
+ * wfq_ready_heap contains the pipes associated with WF2Q flows
+ *
+ * extract_heap contains pipes associated with delay lines.
+ *
+ */
+static struct dn_heap ready_heap, extract_heap, wfq_ready_heap ;
+
+static int heap_init(struct dn_heap *h, int size) ;
+static int heap_insert (struct dn_heap *h, dn_key key1, void *p);
+static void heap_extract(struct dn_heap *h, void *obj);
+
+static void transmit_event(struct dn_pipe *pipe);
+static void ready_event(struct dn_flow_queue *q);
+
+static struct dn_pipe *all_pipes = NULL ;	/* list of all pipes */
+static struct dn_flow_set *all_flow_sets = NULL ;/* list of all flow_sets */
+
+#ifdef SYSCTL_NODE
+SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet,
+		CTLFLAG_RW, 0, "Dummynet");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size,
+	    CTLFLAG_RW, &dn_hash_size, 0, "Default hash table size");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, curr_time,
+	    CTLFLAG_RD, &curr_time, 0, "Current tick");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, ready_heap,
+	    CTLFLAG_RD, &ready_heap.size, 0, "Size of ready heap");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, extract_heap,
+	    CTLFLAG_RD, &extract_heap.size, 0, "Size of extract heap");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, searches,
+	    CTLFLAG_RD, &searches, 0, "Number of queue searches");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, search_steps,
+	    CTLFLAG_RD, &search_steps, 0, "Number of queue search steps");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire,
+	    CTLFLAG_RW, &pipe_expire, 0, "Expire queue if empty");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, max_chain_len,
+	    CTLFLAG_RW, &dn_max_ratio, 0, 
+	"Max ratio between dynamic queues and buckets");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth,
+	CTLFLAG_RD, &red_lookup_depth, 0, "Depth of RED lookup table");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_avg_pkt_size,
+	CTLFLAG_RD, &red_avg_pkt_size, 0, "RED Medium packet size");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size,
+	CTLFLAG_RD, &red_max_pkt_size, 0, "RED Max packet size");
+#endif
+
+static int config_pipe(struct dn_pipe *p);
+static int ip_dn_ctl(struct sockopt *sopt);
+
+static void rt_unref(struct rtentry *);
+static void dummynet(void *);
+static void dummynet_flush(void);
+void dummynet_drain(void);
+
+int if_tx_rdy(struct ifnet *ifp);
+
+/*
+ * ip_fw_chain is used when deleting a pipe, because ipfw rules can
+ * hold references to the pipe.
+ */
+extern LIST_HEAD (ip_fw_head, ip_fw_chain) ip_fw_chain_head;
+
+static void
+rt_unref(struct rtentry *rt)
+{
+    if (rt == NULL)
+	return ;
+    if (rt->rt_refcnt <= 0)
+	printf("-- warning, refcnt now %ld, decreasing\n", rt->rt_refcnt);
+    RTFREE(rt);
+}
+
+/*
+ * Heap management functions.
+ *
+ * In the heap, first node is element 0. Children of i are 2i+1 and 2i+2.
+ * Some macros help finding parent/children so we can optimize them.
+ *
+ * heap_init() is called to expand the heap when needed.
+ * Increment size in blocks of 16 entries.
+ * XXX failure to allocate a new element is a pretty bad failure
+ * as we basically stall a whole queue forever!!
+ * Returns 1 on error, 0 on success
+ */
+#define HEAP_FATHER(x) ( ( (x) - 1 ) / 2 )
+#define HEAP_LEFT(x) ( 2*(x) + 1 )
+#define HEAP_IS_LEFT(x) ( (x) & 1 )
+#define HEAP_RIGHT(x) ( 2*(x) + 2 )
+#define	HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; }
+#define HEAP_INCREMENT	15
+
+static int
+heap_init(struct dn_heap *h, int new_size)
+{       
+    struct dn_heap_entry *p;
+
+    if (h->size >= new_size ) {
+	printf("heap_init, Bogus call, have %d want %d\n",
+		h->size, new_size);
+	return 0 ;
+    }   
+    new_size = (new_size + HEAP_INCREMENT ) & ~HEAP_INCREMENT ;
+    p = malloc(new_size * sizeof(*p), M_IPFW, M_DONTWAIT );
+    if (p == NULL) {
+	printf(" heap_init, resize %d failed\n", new_size );
+	return 1 ; /* error */
+    }
+    if (h->size > 0) {
+	bcopy(h->p, p, h->size * sizeof(*p) );
+	free(h->p, M_IPFW);
+    }
+    h->p = p ;
+    h->size = new_size ;
+    return 0 ;
+}
+
+/*
+ * Insert element in heap. Normally, p != NULL, we insert p in
+ * a new position and bubble up. If p == NULL, then the element is
+ * already in place, and key is the position where to start the
+ * bubble-up.
+ * Returns 1 on failure (cannot allocate new heap entry)
+ *
+ * If offset > 0 the position (index, int) of the element in the heap is
+ * also stored in the element itself at the given offset in bytes.
+ */
+#define SET_OFFSET(heap, node) \
+    if (heap->offset > 0) \
+	    *((int *)((char *)(heap->p[node].object) + heap->offset)) = node ;
+/*
+ * RESET_OFFSET is used for sanity checks. It sets offset to an invalid value.
+ */
+#define RESET_OFFSET(heap, node) \
+    if (heap->offset > 0) \
+	*((int *)((char *)(heap->p[node].object) + heap->offset)) = -1 ;
+static int
+heap_insert(struct dn_heap *h, dn_key key1, void *p)
+{   
+    int son = h->elements ;
+
+    if (p == NULL)	/* data already there, set starting point */
+	son = key1 ;
+    else {		/* insert new element at the end, possibly resize */
+	son = h->elements ;
+	if (son == h->size) /* need resize... */
+	    if (heap_init(h, h->elements+1) )
+		return 1 ; /* failure... */
+	h->p[son].object = p ;
+	h->p[son].key = key1 ;
+	h->elements++ ;
+    }
+    while (son > 0) {				/* bubble up */
+	int father = HEAP_FATHER(son) ;
+	struct dn_heap_entry tmp  ;
+
+	if (DN_KEY_LT( h->p[father].key, h->p[son].key ) )
+	    break ; /* found right position */ 
+	/* son smaller than father, swap and repeat */
+	HEAP_SWAP(h->p[son], h->p[father], tmp) ;
+	SET_OFFSET(h, son);
+	son = father ;
+    }
+    SET_OFFSET(h, son);
+    return 0 ;
+}
+
+/*
+ * remove top element from heap, or obj if obj != NULL
+ */
+static void
+heap_extract(struct dn_heap *h, void *obj)
+{  
+    int child, father, max = h->elements - 1 ;
+
+    if (max < 0) {
+	printf("warning, extract from empty heap 0x%p\n", h);
+	return ;
+    }
+    father = 0 ; /* default: move up smallest child */
+    if (obj != NULL) { /* extract specific element, index is at offset */
+	if (h->offset <= 0)
+	    panic("*** heap_extract from middle not supported on this heap!!!\n");
+	father = *((int *)((char *)obj + h->offset)) ;
+	if (father < 0 || father >= h->elements) {
+	    printf("dummynet: heap_extract, father %d out of bound 0..%d\n",
+		father, h->elements);
+	    panic("heap_extract");
+	}
+    }
+    RESET_OFFSET(h, father);
+    child = HEAP_LEFT(father) ;		/* left child */
+    while (child <= max) {		/* valid entry */
+	if (child != max && DN_KEY_LT(h->p[child+1].key, h->p[child].key) )
+	    child = child+1 ;		/* take right child, otherwise left */
+	h->p[father] = h->p[child] ;
+	SET_OFFSET(h, father);
+	father = child ;
+	child = HEAP_LEFT(child) ;   /* left child for next loop */
+    }   
+    h->elements-- ;
+    if (father != max) {
+	/*
+	 * Fill hole with last entry and bubble up, reusing the insert code
+	 */
+	h->p[father] = h->p[max] ;
+	heap_insert(h, father, NULL); /* this one cannot fail */
+    }
+}           
+
+#if 0
+/*
+ * change object position and update references
+ * XXX this one is never used!
+ */
+static void
+heap_move(struct dn_heap *h, dn_key new_key, void *object)
+{
+    int temp;
+    int i ;
+    int max = h->elements-1 ;
+    struct dn_heap_entry buf ;
+
+    if (h->offset <= 0)
+	panic("cannot move items on this heap");
+
+    i = *((int *)((char *)object + h->offset));
+    if (DN_KEY_LT(new_key, h->p[i].key) ) { /* must move up */
+	h->p[i].key = new_key ;
+	for (; i>0 && DN_KEY_LT(new_key, h->p[(temp = HEAP_FATHER(i))].key) ;
+		 i = temp ) { /* bubble up */
+	    HEAP_SWAP(h->p[i], h->p[temp], buf) ;
+	    SET_OFFSET(h, i);
+	}
+    } else {		/* must move down */
+	h->p[i].key = new_key ;
+	while ( (temp = HEAP_LEFT(i)) <= max ) { /* found left child */
+	    if ((temp != max) && DN_KEY_GT(h->p[temp].key, h->p[temp+1].key))
+		temp++ ; /* select child with min key */
+	    if (DN_KEY_GT(new_key, h->p[temp].key)) { /* go down */
+		HEAP_SWAP(h->p[i], h->p[temp], buf) ;
+		SET_OFFSET(h, i);
+	    } else
+		break ;
+	    i = temp ;
+	}
+    }
+    SET_OFFSET(h, i);
+}
+#endif /* heap_move, unused */
+
+/*
+ * heapify() will reorganize data inside an array to maintain the
+ * heap property. It is needed when we delete a bunch of entries.
+ */
+static void
+heapify(struct dn_heap *h)
+{
+    int i ;
+
+    for (i = 0 ; i < h->elements ; i++ )
+	heap_insert(h, i , NULL) ;
+}
+
+/*
+ * cleanup the heap and free data structure
+ */
+static void
+heap_free(struct dn_heap *h)
+{
+    if (h->size >0 )
+	free(h->p, M_IPFW);
+    bzero(h, sizeof(*h) );
+}
+
+/*
+ * --- end of heap management functions ---
+ */
+
+/*
+ * Scheduler functions:
+ *
+ * transmit_event() is called when the delay-line needs to enter
+ * the scheduler, either because of existing pkts getting ready,
+ * or new packets entering the queue. The event handled is the delivery
+ * time of the packet.
+ *
+ * ready_event() does something similar with fixed-rate queues, and the
+ * event handled is the finish time of the head pkt.
+ *
+ * wfq_ready_event() does something similar with WF2Q queues, and the
+ * event handled is the start time of the head pkt.
+ *
+ * In all cases, we make sure that the data structures are consistent
+ * before passing pkts out, because this might trigger recursive
+ * invocations of the procedures.
+ */
+static void
+transmit_event(struct dn_pipe *pipe)
+{
+    struct dn_pkt *pkt ;
+
+    while ( (pkt = pipe->head) && DN_KEY_LEQ(pkt->output_time, curr_time) ) {
+	/*
+	 * first unlink, then call procedures, since ip_input() can invoke
+	 * ip_output() and viceversa, thus causing nested calls
+	 */
+	pipe->head = DN_NEXT(pkt) ;
+
+	/*
+	 * The actual mbuf is preceded by a struct dn_pkt, resembling an mbuf
+	 * (NOT A REAL one, just a small block of malloc'ed memory) with
+	 *     m_type = MT_DUMMYNET
+	 *     m_next = actual mbuf to be processed by ip_input/output
+	 *     m_data = the matching rule
+	 * and some other fields.
+	 * The block IS FREED HERE because it contains parameters passed
+	 * to the called routine.
+	 */
+	switch (pkt->dn_dir) {
+	case DN_TO_IP_OUT:
+	    (void)ip_output((struct mbuf *)pkt, NULL, NULL, 0, NULL);
+	    rt_unref (pkt->ro.ro_rt) ;
+	    break ;
+
+	case DN_TO_IP_IN :
+	    ip_input((struct mbuf *)pkt) ;
+	    break ;
+
+#ifdef BRIDGE
+	case DN_TO_BDG_FWD : {
+	    struct mbuf *m = (struct mbuf *)pkt;
+	    struct ether_header *eh;
+
+	    if (pkt->dn_m->m_len < ETHER_HDR_LEN
+	      && (pkt->dn_m = m_pullup(pkt->dn_m, ETHER_HDR_LEN)) == NULL) {
+		printf("dummynet/bridge: pullup fail, dropping pkt\n");
+		break;
+	    }
+	    /*
+	     * same as ether_input, make eh be a pointer into the mbuf
+	     */
+	    eh = mtod(pkt->dn_m, struct ether_header *);
+	    m_adj(pkt->dn_m, ETHER_HDR_LEN);
+	    /*
+	     * bdg_forward() wants a pointer to the pseudo-mbuf-header, but
+	     * on return it will supply the pointer to the actual packet
+	     * (originally pkt->dn_m, but could be something else now) if
+	     * it has not consumed it.
+	     */
+	    m = bdg_forward(m, eh, pkt->ifp);
+	    if (m)
+		m_freem(m);
+	    }
+	    break ;
+#endif
+
+	default:
+	    printf("dummynet: bad switch %d!\n", pkt->dn_dir);
+	    m_freem(pkt->dn_m);
+	    break ;
+	}
+	FREE(pkt, M_IPFW);
+    }
+    /* if there are leftover packets, put into the heap for next event */
+    if ( (pkt = pipe->head) )
+         heap_insert(&extract_heap, pkt->output_time, pipe ) ;
+    /* XXX should check errors on heap_insert, by draining the
+     * whole pipe p and hoping in the future we are more successful
+     */
+}
+
+/*
+ * the following macro computes how many ticks we have to wait
+ * before being able to transmit a packet. The credit is taken from
+ * either a pipe (WF2Q) or a flow_queue (per-flow queueing)
+ */
+#define SET_TICKS(pkt, q, p)	\
+    (pkt->dn_m->m_pkthdr.len*8*hz - (q)->numbytes + p->bandwidth - 1 ) / \
+	    p->bandwidth ;
+
+/*
+ * extract pkt from queue, compute output time (could be now)
+ * and put into delay line (p_queue)
+ */
+static void
+move_pkt(struct dn_pkt *pkt, struct dn_flow_queue *q,
+	struct dn_pipe *p, int len)
+{
+    q->head = DN_NEXT(pkt) ;
+    q->len-- ;
+    q->len_bytes -= len ;
+
+    pkt->output_time = curr_time + p->delay ;
+
+    if (p->head == NULL)
+	p->head = pkt;
+    else
+	DN_NEXT(p->tail) = pkt;
+    p->tail = pkt;
+    DN_NEXT(p->tail) = NULL;
+}
+
+/*
+ * ready_event() is invoked every time the queue must enter the
+ * scheduler, either because the first packet arrives, or because
+ * a previously scheduled event fired.
+ * On invokation, drain as many pkts as possible (could be 0) and then
+ * if there are leftover packets reinsert the pkt in the scheduler.
+ */
+static void
+ready_event(struct dn_flow_queue *q)
+{
+    struct dn_pkt *pkt;
+    struct dn_pipe *p = q->fs->pipe ;
+    int p_was_empty ;
+
+    if (p == NULL) {
+	printf("ready_event- pipe is gone\n");
+	return ;
+    }
+    p_was_empty = (p->head == NULL) ;
+
+    /*
+     * schedule fixed-rate queues linked to this pipe:
+     * Account for the bw accumulated since last scheduling, then
+     * drain as many pkts as allowed by q->numbytes and move to
+     * the delay line (in p) computing output time.
+     * bandwidth==0 (no limit) means we can drain the whole queue,
+     * setting len_scaled = 0 does the job.
+     */
+    q->numbytes += ( curr_time - q->sched_time ) * p->bandwidth;
+    while ( (pkt = q->head) != NULL ) {
+	int len = pkt->dn_m->m_pkthdr.len;
+	int len_scaled = p->bandwidth ? len*8*hz : 0 ;
+	if (len_scaled > q->numbytes )
+	    break ;
+	q->numbytes -= len_scaled ;
+	move_pkt(pkt, q, p, len);
+    }
+    /*
+     * If we have more packets queued, schedule next ready event
+     * (can only occur when bandwidth != 0, otherwise we would have
+     * flushed the whole queue in the previous loop).
+     * To this purpose we record the current time and compute how many
+     * ticks to go for the finish time of the packet.
+     */
+    if ( (pkt = q->head) != NULL ) { /* this implies bandwidth != 0 */
+	dn_key t = SET_TICKS(pkt, q, p); /* ticks i have to wait */
+	q->sched_time = curr_time ;
+	heap_insert(&ready_heap, curr_time + t, (void *)q );
+	/* XXX should check errors on heap_insert, and drain the whole
+	 * queue on error hoping next time we are luckier.
+	 */
+    } else	/* RED needs to know when the queue becomes empty */
+	q->q_time = curr_time;
+    /*
+     * If the delay line was empty call transmit_event(p) now.
+     * Otherwise, the scheduler will take care of it.
+     */
+    if (p_was_empty)
+	transmit_event(p);
+}
+
+/*
+ * Called when we can transmit packets on WF2Q queues. Take pkts out of
+ * the queues at their start time, and enqueue into the delay line.
+ * Packets are drained until p->numbytes < 0. As long as
+ * len_scaled >= p->numbytes, the packet goes into the delay line
+ * with a deadline p->delay. For the last packet, if p->numbytes<0,
+ * there is an additional delay.
+ */
+static void
+ready_event_wfq(struct dn_pipe *p)
+{
+    int p_was_empty = (p->head == NULL) ;
+    struct dn_heap *sch = &(p->scheduler_heap);
+    struct dn_heap *neh = &(p->not_eligible_heap) ;
+
+    if (p->if_name[0] == 0) /* tx clock is simulated */
+	p->numbytes += ( curr_time - p->sched_time ) * p->bandwidth;
+    else { /* tx clock is for real, the ifq must be empty or this is a NOP */
+	if (p->ifp && p->ifp->if_snd.ifq_head != NULL)
+	    return ;
+	else {
+	    DEB(printf("pipe %d ready from %s --\n",
+		p->pipe_nr, p->if_name);)
+	}
+    }
+
+    /*
+     * While we have backlogged traffic AND credit, we need to do
+     * something on the queue.
+     */
+    while ( p->numbytes >=0 && (sch->elements>0 || neh->elements >0) ) {
+	if (sch->elements > 0) { /* have some eligible pkts to send out */
+	    struct dn_flow_queue *q = sch->p[0].object ;
+	    struct dn_pkt *pkt = q->head;  
+	    struct dn_flow_set *fs = q->fs;   
+	    u_int64_t len = pkt->dn_m->m_pkthdr.len;
+	    int len_scaled = p->bandwidth ? len*8*hz : 0 ;
+
+	    heap_extract(sch, NULL); /* remove queue from heap */
+	    p->numbytes -= len_scaled ;
+	    move_pkt(pkt, q, p, len);
+
+	    p->V += (len<<MY_M) / p->sum ; /* update V */
+	    q->S = q->F ; /* update start time */
+	    if (q->len == 0) { /* Flow not backlogged any more */
+		fs->backlogged-- ;
+		heap_insert(&(p->idle_heap), q->F, q);
+	    } else { /* still backlogged */
+		/*
+		 * update F and position in backlogged queue, then
+		 * put flow in not_eligible_heap (we will fix this later).
+		 */
+		len = (q->head)->dn_m->m_pkthdr.len;
+		q->F += (len<<MY_M)/(u_int64_t) fs->weight ;
+		if (DN_KEY_LEQ(q->S, p->V))
+		    heap_insert(neh, q->S, q);
+		else
+		    heap_insert(sch, q->F, q);
+	    }
+	}
+	/*
+	 * now compute V = max(V, min(S_i)). Remember that all elements in sch
+	 * have by definition S_i <= V so if sch is not empty, V is surely
+	 * the max and we must not update it. Conversely, if sch is empty
+	 * we only need to look at neh.
+	 */
+	if (sch->elements == 0 && neh->elements > 0)
+	    p->V = MAX64 ( p->V, neh->p[0].key );
+	/* move from neh to sch any packets that have become eligible */
+	while (neh->elements > 0 && DN_KEY_LEQ(neh->p[0].key, p->V) ) {
+	    struct dn_flow_queue *q = neh->p[0].object ;
+	    heap_extract(neh, NULL);
+	    heap_insert(sch, q->F, q);
+	}
+
+	if (p->if_name[0] != '\0') {/* tx clock is from a real thing */
+	    p->numbytes = -1 ; /* mark not ready for I/O */
+	    break ;
+	}
+    }
+    if (sch->elements == 0 && neh->elements == 0 && p->numbytes >= 0
+	    && p->idle_heap.elements > 0) {
+	/*
+	 * no traffic and no events scheduled. We can get rid of idle-heap.
+	 */
+	int i ;
+
+	for (i = 0 ; i < p->idle_heap.elements ; i++) {
+	    struct dn_flow_queue *q = p->idle_heap.p[i].object ;
+
+	    q->F = 0 ;
+	    q->S = q->F + 1 ;
+	}
+	p->sum = 0 ;
+	p->V = 0 ;
+	p->idle_heap.elements = 0 ;
+    }
+    /*
+     * If we are getting clocks from dummynet (not a real interface) and
+     * If we are under credit, schedule the next ready event.
+     * Also fix the delivery time of the last packet.
+     */
+    if (p->if_name[0]==0 && p->numbytes < 0) { /* this implies bandwidth >0 */
+	dn_key t=0 ; /* number of ticks i have to wait */
+
+	if (p->bandwidth > 0)
+	    t = ( p->bandwidth -1 - p->numbytes) / p->bandwidth ;
+	p->tail->output_time += t ;
+	p->sched_time = curr_time ;
+	heap_insert(&wfq_ready_heap, curr_time + t, (void *)p);
+	/* XXX should check errors on heap_insert, and drain the whole
+	 * queue on error hoping next time we are luckier.
+	 */
+    }
+    /*
+     * If the delay line was empty call transmit_event(p) now.
+     * Otherwise, the scheduler will take care of it.
+     */
+    if (p_was_empty)
+	transmit_event(p);
+}
+
+/*
+ * This is called once per tick, or HZ times per second. It is used to
+ * increment the current tick counter and schedule expired events.
+ */
+static void
+dummynet(void * __unused unused)
+{
+    void *p ; /* generic parameter to handler */
+    struct dn_heap *h ;
+    int s ;
+    struct dn_heap *heaps[3];
+    int i;
+    struct dn_pipe *pe ;
+
+    heaps[0] = &ready_heap ;		/* fixed-rate queues */
+    heaps[1] = &wfq_ready_heap ;	/* wfq queues */
+    heaps[2] = &extract_heap ;		/* delay line */
+    s = splimp(); /* see note on top, splnet() is not enough */
+    curr_time++ ;
+    for (i=0; i < 3 ; i++) {
+	h = heaps[i];
+	while (h->elements > 0 && DN_KEY_LEQ(h->p[0].key, curr_time) ) {
+	    DDB(if (h->p[0].key > curr_time)
+		printf("-- dummynet: warning, heap %d is %d ticks late\n",
+		    i, (int)(curr_time - h->p[0].key));)
+	    p = h->p[0].object ; /* store a copy before heap_extract */
+	    heap_extract(h, NULL); /* need to extract before processing */
+	    if (i == 0)
+		ready_event(p) ;
+	    else if (i == 1) {
+		struct dn_pipe *pipe = p;
+		if (pipe->if_name[0] != '\0')
+		    printf("*** bad ready_event_wfq for pipe %s\n",
+			pipe->if_name);
+		else
+		    ready_event_wfq(p) ;
+	    } else
+		transmit_event(p);
+	}
+    }
+    /* sweep pipes trying to expire idle flow_queues */
+    for (pe = all_pipes; pe ; pe = pe->next )
+	if (pe->idle_heap.elements > 0 &&
+		DN_KEY_LT(pe->idle_heap.p[0].key, pe->V) ) {
+	    struct dn_flow_queue *q = pe->idle_heap.p[0].object ;
+
+	    heap_extract(&(pe->idle_heap), NULL);
+	    q->S = q->F + 1 ; /* mark timestamp as invalid */
+	    pe->sum -= q->fs->weight ;
+	}
+    splx(s);
+    timeout(dummynet, NULL, 1);
+}
+ 
+/*
+ * called by an interface when tx_rdy occurs.
+ */
+int
+if_tx_rdy(struct ifnet *ifp)
+{
+    struct dn_pipe *p;
+
+    for (p = all_pipes; p ; p = p->next )
+	if (p->ifp == ifp)
+	    break ;
+    if (p == NULL) {
+	char buf[32];
+	sprintf(buf, "%s%d",ifp->if_name, ifp->if_unit);
+	for (p = all_pipes; p ; p = p->next )
+	    if (!strcmp(p->if_name, buf) ) {
+		p->ifp = ifp ;
+		DEB(printf("++ tx rdy from %s (now found)\n", buf);)
+		break ;
+	    }
+    }
+    if (p != NULL) {
+	DEB(printf("++ tx rdy from %s%d - qlen %d\n", ifp->if_name,
+		ifp->if_unit, ifp->if_snd.ifq_len);)
+	p->numbytes = 0 ; /* mark ready for I/O */
+	ready_event_wfq(p);
+    }
+    return 0;
+}
+
+/*
+ * Unconditionally expire empty queues in case of shortage.
+ * Returns the number of queues freed.
+ */
+static int
+expire_queues(struct dn_flow_set *fs)
+{
+    struct dn_flow_queue *q, *prev ;
+    int i, initial_elements = fs->rq_elements ;
+
+    if (fs->last_expired == time_second)
+	return 0 ;
+    fs->last_expired = time_second ;
+    for (i = 0 ; i <= fs->rq_size ; i++) /* last one is overflow */
+	for (prev=NULL, q = fs->rq[i] ; q != NULL ; )
+	    if (q->head != NULL || q->S != q->F+1) {
+  		prev = q ;
+  	        q = q->next ;
+  	    } else { /* entry is idle, expire it */
+		struct dn_flow_queue *old_q = q ;
+
+		if (prev != NULL)
+		    prev->next = q = q->next ;
+		else
+		    fs->rq[i] = q = q->next ;
+		fs->rq_elements-- ;
+		free(old_q, M_IPFW);
+	    }
+    return initial_elements - fs->rq_elements ;
+}
+
+/*
+ * If room, create a new queue and put at head of slot i;
+ * otherwise, create or use the default queue.
+ */
+static struct dn_flow_queue *
+create_queue(struct dn_flow_set *fs, int i)
+{
+    struct dn_flow_queue *q ;
+
+    if (fs->rq_elements > fs->rq_size * dn_max_ratio &&
+	    expire_queues(fs) == 0) {
+	/*
+	 * No way to get room, use or create overflow queue.
+	 */
+	i = fs->rq_size ;
+	if ( fs->rq[i] != NULL )
+	    return fs->rq[i] ;
+    }
+    q = malloc(sizeof(*q), M_IPFW, M_DONTWAIT | M_ZERO); /* M_ZERO needed */
+    if (q == NULL) {
+	printf("sorry, cannot allocate queue for new flow\n");
+	return NULL ;
+    }
+    q->fs = fs ;
+    q->hash_slot = i ;
+    q->next = fs->rq[i] ;
+    q->S = q->F + 1;   /* hack - mark timestamp as invalid */
+    fs->rq[i] = q ;
+    fs->rq_elements++ ;
+    return q ;
+}
+
+/*
+ * Given a flow_set and a pkt in last_pkt, find a matching queue
+ * after appropriate masking. The queue is moved to front
+ * so that further searches take less time.
+ */
+static struct dn_flow_queue *
+find_queue(struct dn_flow_set *fs)
+{
+    int i = 0 ; /* we need i and q for new allocations */
+    struct dn_flow_queue *q, *prev;
+
+    if ( !(fs->flags_fs & DN_HAVE_FLOW_MASK) )
+	q = fs->rq[0] ;
+    else {
+	/* first, do the masking */
+	last_pkt.dst_ip &= fs->flow_mask.dst_ip ;
+	last_pkt.src_ip &= fs->flow_mask.src_ip ;
+	last_pkt.dst_port &= fs->flow_mask.dst_port ;
+	last_pkt.src_port &= fs->flow_mask.src_port ;
+	last_pkt.proto &= fs->flow_mask.proto ;
+	last_pkt.flags = 0 ; /* we don't care about this one */
+	/* then, hash function */
+	i = ( (last_pkt.dst_ip) & 0xffff ) ^
+	    ( (last_pkt.dst_ip >> 15) & 0xffff ) ^
+	    ( (last_pkt.src_ip << 1) & 0xffff ) ^
+	    ( (last_pkt.src_ip >> 16 ) & 0xffff ) ^
+	    (last_pkt.dst_port << 1) ^ (last_pkt.src_port) ^
+	    (last_pkt.proto );
+	i = i % fs->rq_size ;
+	/* finally, scan the current list for a match */
+	searches++ ;
+	for (prev=NULL, q = fs->rq[i] ; q ; ) {
+	    search_steps++;
+	    if (bcmp(&last_pkt, &(q->id), sizeof(q->id) ) == 0)
+		break ; /* found */
+	    else if (pipe_expire && q->head == NULL && q->S == q->F+1 ) {
+		/* entry is idle and not in any heap, expire it */
+		struct dn_flow_queue *old_q = q ;
+
+		if (prev != NULL)
+		    prev->next = q = q->next ;
+		else
+		    fs->rq[i] = q = q->next ;
+		fs->rq_elements-- ;
+		free(old_q, M_IPFW);
+		continue ;
+	    }
+	    prev = q ;
+	    q = q->next ;
+	}
+	if (q && prev != NULL) { /* found and not in front */
+	    prev->next = q->next ;
+	    q->next = fs->rq[i] ;
+	    fs->rq[i] = q ;
+	}
+    }
+    if (q == NULL) { /* no match, need to allocate a new entry */
+	q = create_queue(fs, i);
+	if (q != NULL)
+	q->id = last_pkt ;
+    }
+    return q ;
+}
+
+static int
+red_drops(struct dn_flow_set *fs, struct dn_flow_queue *q, int len)
+{
+    /*
+     * RED algorithm
+     * 
+     * RED calculates the average queue size (avg) using a low-pass filter
+     * with an exponential weighted (w_q) moving average:
+     * 	avg  <-  (1-w_q) * avg + w_q * q_size
+     * where q_size is the queue length (measured in bytes or * packets).
+     * 
+     * If q_size == 0, we compute the idle time for the link, and set
+     *	avg = (1 - w_q)^(idle/s)
+     * where s is the time needed for transmitting a medium-sized packet.
+     * 
+     * Now, if avg < min_th the packet is enqueued.
+     * If avg > max_th the packet is dropped. Otherwise, the packet is
+     * dropped with probability P function of avg.
+     * 
+     */
+
+    int64_t p_b = 0;
+    /* queue in bytes or packets ? */
+    u_int q_size = (fs->flags_fs & DN_QSIZE_IS_BYTES) ? q->len_bytes : q->len;
+
+    DEB(printf("\n%d q: %2u ", (int) curr_time, q_size);)
+
+    /* average queue size estimation */
+    if (q_size != 0) {
+	/*
+	 * queue is not empty, avg <- avg + (q_size - avg) * w_q
+	 */
+	int diff = SCALE(q_size) - q->avg;
+	int64_t v = SCALE_MUL((int64_t) diff, (int64_t) fs->w_q);
+
+	q->avg += (int) v;
+    } else {
+	/*
+	 * queue is empty, find for how long the queue has been
+	 * empty and use a lookup table for computing
+	 * (1 - * w_q)^(idle_time/s) where s is the time to send a
+	 * (small) packet.
+	 * XXX check wraps...
+	 */
+	if (q->avg) {
+	    u_int t = (curr_time - q->q_time) / fs->lookup_step;
+
+	    q->avg = (t < fs->lookup_depth) ?
+		    SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0;
+	}
+    }
+    DEB(printf("avg: %u ", SCALE_VAL(q->avg));)
+
+    /* should i drop ? */
+
+    if (q->avg < fs->min_th) {
+	q->count = -1;
+	return 0; /* accept packet ; */
+    }
+    if (q->avg >= fs->max_th) { /* average queue >=  max threshold */
+	if (fs->flags_fs & DN_IS_GENTLE_RED) {
+	    /*
+	     * According to Gentle-RED, if avg is greater than max_th the
+	     * packet is dropped with a probability
+	     *	p_b = c_3 * avg - c_4
+	     * where c_3 = (1 - max_p) / max_th, and c_4 = 1 - 2 * max_p
+	     */
+	    p_b = SCALE_MUL((int64_t) fs->c_3, (int64_t) q->avg) - fs->c_4;
+	} else {
+	    q->count = -1;
+	    printf("- drop");
+	    return 1 ;
+	}
+    } else if (q->avg > fs->min_th) {
+	/*
+	 * we compute p_b using the linear dropping function p_b = c_1 *
+	 * avg - c_2, where c_1 = max_p / (max_th - min_th), and c_2 =
+	 * max_p * min_th / (max_th - min_th)
+	 */
+	p_b = SCALE_MUL((int64_t) fs->c_1, (int64_t) q->avg) - fs->c_2;
+    }
+    if (fs->flags_fs & DN_QSIZE_IS_BYTES)
+	p_b = (p_b * len) / fs->max_pkt_size;
+    if (++q->count == 0)
+	q->random = random() & 0xffff;
+    else {
+	/*
+	 * q->count counts packets arrived since last drop, so a greater
+	 * value of q->count means a greater packet drop probability.
+	 */
+	if (SCALE_MUL(p_b, SCALE((int64_t) q->count)) > q->random) {
+	    q->count = 0;
+	    DEB(printf("- red drop");)
+	    /* after a drop we calculate a new random value */
+	    q->random = random() & 0xffff;
+	    return 1;    /* drop */
+	}
+    }
+    /* end of RED algorithm */
+    return 0 ; /* accept */
+}
+
+static __inline
+struct dn_flow_set *
+locate_flowset(int pipe_nr, struct ip_fw_chain *rule)
+{
+    struct dn_flow_set *fs = NULL ;
+
+    if ( (rule->rule->fw_flg & IP_FW_F_COMMAND) == IP_FW_F_QUEUE )
+	for (fs=all_flow_sets; fs && fs->fs_nr != pipe_nr; fs=fs->next)
+	    ;
+    else {
+	struct dn_pipe *p1;
+	for (p1 = all_pipes; p1 && p1->pipe_nr != pipe_nr; p1 = p1->next)
+	    ;
+	if (p1 != NULL)
+	    fs = &(p1->fs) ;
+    }
+    if (fs != NULL)
+	rule->rule->pipe_ptr = fs ; /* record for the future */
+    return fs ;
+}
+
+/*
+ * dummynet hook for packets. Below 'pipe' is a pipe or a queue
+ * depending on whether WF2Q or fixed bw is used.
+ */
+int
+dummynet_io(int pipe_nr, int dir,	/* pipe_nr can also be a fs_nr */
+	struct mbuf *m, struct ifnet *ifp, struct route *ro,
+	struct sockaddr_in *dst,
+	struct ip_fw_chain *rule, int flags)
+{
+    struct dn_pkt *pkt;
+    struct dn_flow_set *fs;
+    struct dn_pipe *pipe ;
+    u_int64_t len = m->m_pkthdr.len ;
+    struct dn_flow_queue *q = NULL ;
+    int s ;
+
+    s = splimp();
+
+    pipe_nr &= 0xffff ;
+
+    if ( (fs = rule->rule->pipe_ptr) == NULL ) {
+	fs = locate_flowset(pipe_nr, rule);
+	if (fs == NULL)
+	    goto dropit ;	/* this queue/pipe does not exist! */
+    }
+    pipe = fs->pipe ;
+    if (pipe == NULL) { /* must be a queue, try find a matching pipe */
+	for (pipe = all_pipes; pipe && pipe->pipe_nr != fs->parent_nr;
+		 pipe = pipe->next)
+	    ;
+	if (pipe != NULL)
+	    fs->pipe = pipe ;
+	else {
+	    printf("No pipe %d for queue %d, drop pkt\n",
+		fs->parent_nr, fs->fs_nr);
+	    goto dropit ;
+	}
+    }
+    q = find_queue(fs);
+    if ( q == NULL )
+	goto dropit ;		/* cannot allocate queue		*/
+    /*
+     * update statistics, then check reasons to drop pkt
+     */
+    q->tot_bytes += len ;
+    q->tot_pkts++ ;
+    if ( fs->plr && random() < fs->plr )
+	goto dropit ;		/* random pkt drop			*/
+    if ( fs->flags_fs & DN_QSIZE_IS_BYTES) {
+    	if (q->len_bytes > fs->qsize)
+	    goto dropit ;	/* queue size overflow			*/
+    } else {
+	if (q->len >= fs->qsize)
+	    goto dropit ;	/* queue count overflow			*/
+    }
+    if ( fs->flags_fs & DN_IS_RED && red_drops(fs, q, len) )
+	goto dropit ;
+
+    /* XXX expensive to zero, see if we can remove it*/
+    pkt = (struct dn_pkt *)malloc(sizeof (*pkt), M_IPFW, M_NOWAIT | M_ZERO);
+    if ( pkt == NULL )
+	goto dropit ;		/* cannot allocate packet header	*/
+    /* ok, i can handle the pkt now... */
+    /* build and enqueue packet + parameters */
+    pkt->hdr.mh_type = MT_DUMMYNET ;
+    (struct ip_fw_chain *)pkt->hdr.mh_data = rule ;
+    DN_NEXT(pkt) = NULL;
+    pkt->dn_m = m;
+    pkt->dn_dir = dir ;
+
+    pkt->ifp = ifp;
+    if (dir == DN_TO_IP_OUT) {
+	/*
+	 * We need to copy *ro because for ICMP pkts (and maybe others)
+	 * the caller passed a pointer into the stack; dst might also be
+	 * a pointer into *ro so it needs to be updated.
+	 */
+	pkt->ro = *ro;
+	if (ro->ro_rt)
+	    ro->ro_rt->rt_refcnt++ ;
+	if (dst == (struct sockaddr_in *)&ro->ro_dst) /* dst points into ro */
+	    dst = (struct sockaddr_in *)&(pkt->ro.ro_dst) ;
+
+	pkt->dn_dst = dst;
+	pkt->flags = flags ;
+    }
+    if (q->head == NULL)
+	q->head = pkt;
+    else
+	DN_NEXT(q->tail) = pkt;
+    q->tail = pkt;
+    q->len++;
+    q->len_bytes += len ;
+
+    if ( q->head != pkt )	/* flow was not idle, we are done */
+	goto done;
+    /*
+     * If we reach this point the flow was previously idle, so we need
+     * to schedule it. This involves different actions for fixed-rate or
+     * WF2Q queues.
+     */
+    if ( (rule->rule->fw_flg & IP_FW_F_COMMAND) == IP_FW_F_PIPE ) {
+	/*
+	 * Fixed-rate queue: just insert into the ready_heap.
+	 */
+	dn_key t = 0 ;
+	if (pipe->bandwidth) 
+	    t = SET_TICKS(pkt, q, pipe);
+	q->sched_time = curr_time ;
+	if (t == 0)	/* must process it now */
+	    ready_event( q );
+	else
+	    heap_insert(&ready_heap, curr_time + t , q );
+    } else {
+	/*
+	 * WF2Q. First, compute start time S: if the flow was idle (S=F+1)
+	 * set S to the virtual time V for the controlling pipe, and update
+	 * the sum of weights for the pipe; otherwise, remove flow from
+	 * idle_heap and set S to max(F,V).
+	 * Second, compute finish time F = S + len/weight.
+	 * Third, if pipe was idle, update V=max(S, V).
+	 * Fourth, count one more backlogged flow.
+	 */
+	if (DN_KEY_GT(q->S, q->F)) { /* means timestamps are invalid */
+	    q->S = pipe->V ;
+	    pipe->sum += fs->weight ; /* add weight of new queue */
+	} else {
+	    heap_extract(&(pipe->idle_heap), q);
+	    q->S = MAX64(q->F, pipe->V ) ;
+	}
+	q->F = q->S + ( len<<MY_M )/(u_int64_t) fs->weight;
+
+	if (pipe->not_eligible_heap.elements == 0 &&
+		pipe->scheduler_heap.elements == 0)
+	    pipe->V = MAX64 ( q->S, pipe->V );
+	fs->backlogged++ ;
+	/*
+	 * Look at eligibility. A flow is not eligibile if S>V (when
+	 * this happens, it means that there is some other flow already
+	 * scheduled for the same pipe, so the scheduler_heap cannot be
+	 * empty). If the flow is not eligible we just store it in the
+	 * not_eligible_heap. Otherwise, we store in the scheduler_heap
+	 * and possibly invoke ready_event_wfq() right now if there is
+	 * leftover credit.
+	 * Note that for all flows in scheduler_heap (SCH), S_i <= V,
+	 * and for all flows in not_eligible_heap (NEH), S_i > V .
+	 * So when we need to compute max( V, min(S_i) ) forall i in SCH+NEH,
+	 * we only need to look into NEH.
+	 */
+	if (DN_KEY_GT(q->S, pipe->V) ) { /* not eligible */
+	    if (pipe->scheduler_heap.elements == 0)
+		printf("++ ouch! not eligible but empty scheduler!\n");
+	    heap_insert(&(pipe->not_eligible_heap), q->S, q);
+	} else {
+	    heap_insert(&(pipe->scheduler_heap), q->F, q);
+	    if (pipe->numbytes >= 0) { /* pipe is idle */
+		if (pipe->scheduler_heap.elements != 1)
+		    printf("*** OUCH! pipe should have been idle!\n");
+		DEB(printf("Waking up pipe %d at %d\n",
+			pipe->pipe_nr, (int)(q->F >> MY_M)); )
+		pipe->sched_time = curr_time ;
+		ready_event_wfq(pipe);
+	    }
+	}
+    }
+done:
+    splx(s);
+    return 0;
+
+dropit:
+    splx(s);
+    if (q)
+	q->drops++ ;
+    m_freem(m);
+    return ENOBUFS ;
+}
+
+/*
+ * Below, the rt_unref is only needed when (pkt->dn_dir == DN_TO_IP_OUT)
+ * Doing this would probably save us the initial bzero of dn_pkt
+ */
+#define DN_FREE_PKT(pkt)	{		\
+	struct dn_pkt *n = pkt ;		\
+	rt_unref ( n->ro.ro_rt ) ;		\
+	m_freem(n->dn_m);			\
+	pkt = DN_NEXT(n) ;			\
+	free(n, M_IPFW) ;	}
+
+/*
+ * Dispose all packets and flow_queues on a flow_set.
+ * If all=1, also remove red lookup table and other storage,
+ * including the descriptor itself.
+ * For the one in dn_pipe MUST also cleanup ready_heap...
+ */
+static void
+purge_flow_set(struct dn_flow_set *fs, int all)
+{
+    struct dn_pkt *pkt ;
+    struct dn_flow_queue *q, *qn ;
+    int i ;
+
+    for (i = 0 ; i <= fs->rq_size ; i++ ) {
+	for (q = fs->rq[i] ; q ; q = qn ) {
+	    for (pkt = q->head ; pkt ; )
+		DN_FREE_PKT(pkt) ;
+	    qn = q->next ;
+	    free(q, M_IPFW);
+	}
+	fs->rq[i] = NULL ;
+    }
+    fs->rq_elements = 0 ;
+    if (all) {
+	/* RED - free lookup table */
+	if (fs->w_q_lookup)
+	    free(fs->w_q_lookup, M_IPFW);
+	if (fs->rq)
+	    free(fs->rq, M_IPFW);
+	/* if this fs is not part of a pipe, free it */
+	if (fs->pipe && fs != &(fs->pipe->fs) )
+	    free(fs, M_IPFW);
+    }
+}
+
+/*
+ * Dispose all packets queued on a pipe (not a flow_set).
+ * Also free all resources associated to a pipe, which is about
+ * to be deleted.
+ */
+static void
+purge_pipe(struct dn_pipe *pipe)
+{
+    struct dn_pkt *pkt ;
+
+    purge_flow_set( &(pipe->fs), 1 );
+
+    for (pkt = pipe->head ; pkt ; )
+	DN_FREE_PKT(pkt) ;
+
+    heap_free( &(pipe->scheduler_heap) );
+    heap_free( &(pipe->not_eligible_heap) );
+    heap_free( &(pipe->idle_heap) );
+}
+
+/*
+ * Delete all pipes and heaps returning memory. Must also
+ * remove references from all ipfw rules to all pipes.
+ */
+static void
+dummynet_flush()
+{
+    struct dn_pipe *curr_p, *p ;
+    struct ip_fw_chain *chain ;
+    struct dn_flow_set *fs, *curr_fs;
+    int s ;
+
+    s = splimp() ;
+
+    /* remove all references to pipes ...*/
+    LIST_FOREACH(chain, &ip_fw_chain_head, next)
+	chain->rule->pipe_ptr = NULL ;
+    /* prevent future matches... */
+    p = all_pipes ;
+    all_pipes = NULL ; 
+    fs = all_flow_sets ;
+    all_flow_sets = NULL ;
+    /* and free heaps so we don't have unwanted events */
+    heap_free(&ready_heap);
+    heap_free(&wfq_ready_heap);
+    heap_free(&extract_heap);
+    splx(s) ;
+    /*
+     * Now purge all queued pkts and delete all pipes
+     */
+    /* scan and purge all flow_sets. */
+    for ( ; fs ; ) {
+	curr_fs = fs ;
+	fs = fs->next ;
+	purge_flow_set(curr_fs, 1);
+    }
+    for ( ; p ; ) {
+	purge_pipe(p);
+	curr_p = p ;
+	p = p->next ;	
+	free(curr_p, M_IPFW);
+    }
+}
+
+
+extern struct ip_fw_chain *ip_fw_default_rule ;
+static void
+dn_rule_delete_fs(struct dn_flow_set *fs, void *r)
+{
+    int i ;
+    struct dn_flow_queue *q ;
+    struct dn_pkt *pkt ;
+
+    for (i = 0 ; i <= fs->rq_size ; i++) /* last one is ovflow */
+	for (q = fs->rq[i] ; q ; q = q->next )
+	    for (pkt = q->head ; pkt ; pkt = DN_NEXT(pkt) )
+		if (pkt->hdr.mh_data == r)
+		    pkt->hdr.mh_data = (void *)ip_fw_default_rule ;
+}
+/*
+ * when a firewall rule is deleted, scan all queues and remove the flow-id
+ * from packets matching this rule.
+ */
+void
+dn_rule_delete(void *r)
+{
+    struct dn_pipe *p ;
+    struct dn_pkt *pkt ;
+    struct dn_flow_set *fs ;
+
+    /*
+     * If the rule references a queue (dn_flow_set), then scan
+     * the flow set, otherwise scan pipes. Should do either, but doing
+     * both does not harm.
+     */
+    for ( fs = all_flow_sets ; fs ; fs = fs->next )
+	dn_rule_delete_fs(fs, r);
+    for ( p = all_pipes ; p ; p = p->next ) {
+	fs = &(p->fs) ;
+	dn_rule_delete_fs(fs, r);
+	for (pkt = p->head ; pkt ; pkt = DN_NEXT(pkt) )
+	    if (pkt->hdr.mh_data == r)
+		pkt->hdr.mh_data = (void *)ip_fw_default_rule ;
+    }
+}
+
+/*
+ * setup RED parameters
+ */
+static int
+config_red(struct dn_flow_set *p, struct dn_flow_set * x) 
+{
+    int i;
+
+    x->w_q = p->w_q;
+    x->min_th = SCALE(p->min_th);
+    x->max_th = SCALE(p->max_th);
+    x->max_p = p->max_p;
+
+    x->c_1 = p->max_p / (p->max_th - p->min_th);
+    x->c_2 = SCALE_MUL(x->c_1, SCALE(p->min_th));
+    if (x->flags_fs & DN_IS_GENTLE_RED) {
+	x->c_3 = (SCALE(1) - p->max_p) / p->max_th;
+	x->c_4 = (SCALE(1) - 2 * p->max_p);
+    }
+
+    /* if the lookup table already exist, free and create it again */
+    if (x->w_q_lookup)
+	free(x->w_q_lookup, M_IPFW);
+    if (red_lookup_depth == 0) {
+	printf("\nnet.inet.ip.dummynet.red_lookup_depth must be > 0");
+	free(x, M_IPFW);
+	return EINVAL;
+    }
+    x->lookup_depth = red_lookup_depth;
+    x->w_q_lookup = (u_int *) malloc(x->lookup_depth * sizeof(int),
+	    M_IPFW, M_DONTWAIT);
+    if (x->w_q_lookup == NULL) {
+	printf("sorry, cannot allocate red lookup table\n");
+	free(x, M_IPFW);
+	return ENOSPC;
+    }
+
+    /* fill the lookup table with (1 - w_q)^x */
+    x->lookup_step = p->lookup_step ;
+    x->lookup_weight = p->lookup_weight ;
+    x->w_q_lookup[0] = SCALE(1) - x->w_q;
+    for (i = 1; i < x->lookup_depth; i++)
+	x->w_q_lookup[i] = SCALE_MUL(x->w_q_lookup[i - 1], x->lookup_weight);
+    if (red_avg_pkt_size < 1)
+	red_avg_pkt_size = 512 ;
+    x->avg_pkt_size = red_avg_pkt_size ;
+    if (red_max_pkt_size < 1)
+	red_max_pkt_size = 1500 ;
+    x->max_pkt_size = red_max_pkt_size ;
+    return 0 ;
+}
+
+static int
+alloc_hash(struct dn_flow_set *x, struct dn_flow_set *pfs)
+{
+    if (x->flags_fs & DN_HAVE_FLOW_MASK) {     /* allocate some slots */
+	int l = pfs->rq_size;
+
+	if (l == 0)
+	    l = dn_hash_size;
+	if (l < 4)
+	    l = 4;
+	else if (l > 1024)
+	    l = 1024;
+	x->rq_size = l;
+    } else                  /* one is enough for null mask */
+	x->rq_size = 1;
+    x->rq = malloc((1 + x->rq_size) * sizeof(struct dn_flow_queue *),
+	    M_IPFW, M_DONTWAIT | M_ZERO);
+    if (x->rq == NULL) {
+	printf("sorry, cannot allocate queue\n");
+	return ENOSPC;
+    }
+    x->rq_elements = 0;
+    return 0 ;
+}
+
+static void
+set_fs_parms(struct dn_flow_set *x, struct dn_flow_set *src)
+{
+    x->flags_fs = src->flags_fs;
+    x->qsize = src->qsize;
+    x->plr = src->plr;
+    x->flow_mask = src->flow_mask;
+    if (x->flags_fs & DN_QSIZE_IS_BYTES) {
+	if (x->qsize > 1024*1024)
+	    x->qsize = 1024*1024 ;
+    } else {
+	if (x->qsize == 0)
+	    x->qsize = 50 ;
+	if (x->qsize > 100)
+	    x->qsize = 50 ;
+    }
+    /* configuring RED */
+    if ( x->flags_fs & DN_IS_RED )
+	config_red(src, x) ;    /* XXX should check errors */
+}
+
+/*
+ * setup pipe or queue parameters.
+ */
+
+static int 
+config_pipe(struct dn_pipe *p)
+{
+    int s ;
+    struct dn_flow_set *pfs = &(p->fs);
+
+	/*
+	 * The config program passes parameters as follows:
+     * bw = bits/second (0 means no limits),
+     * delay = ms, must be translated into ticks.
+     * qsize = slots/bytes
+	 */
+	p->delay = ( p->delay * hz ) / 1000 ;
+    /* We need either a pipe number or a flow_set number */
+    if (p->pipe_nr == 0 && pfs->fs_nr == 0)
+	return EINVAL ;
+    if (p->pipe_nr != 0 && pfs->fs_nr != 0)
+	return EINVAL ;
+    if (p->pipe_nr != 0) { /* this is a pipe */
+	struct dn_pipe *x, *a, *b;
+	/* locate pipe */
+	for (a = NULL , b = all_pipes ; b && b->pipe_nr < p->pipe_nr ;
+		 a = b , b = b->next) ;
+
+	if (b == NULL || b->pipe_nr != p->pipe_nr) { /* new pipe */
+	    x = malloc(sizeof(struct dn_pipe), M_IPFW, M_DONTWAIT | M_ZERO);
+	    if (x == NULL) {
+		printf("ip_dummynet.c: no memory for new pipe\n");
+		return ENOSPC;
+	    }
+	    x->pipe_nr = p->pipe_nr;
+	    x->fs.pipe = x ;
+	    /* idle_heap is the only one from which we extract from the middle.
+	     */
+	    x->idle_heap.size = x->idle_heap.elements = 0 ;
+	    x->idle_heap.offset=OFFSET_OF(struct dn_flow_queue, heap_pos);
+	} else
+	    x = b;
+
+	    x->bandwidth = p->bandwidth ;
+	x->numbytes = 0; /* just in case... */
+	bcopy(p->if_name, x->if_name, sizeof(p->if_name) );
+	x->ifp = NULL ; /* reset interface ptr */
+	    x->delay = p->delay ;
+	set_fs_parms(&(x->fs), pfs);
+
+
+	if ( x->fs.rq == NULL ) { /* a new pipe */
+	    s = alloc_hash(&(x->fs), pfs) ;
+	    if (s) {
+		free(x, M_IPFW);
+		return s ;
+	    }
+	    s = splimp() ;
+	    x->next = b ;
+	    if (a == NULL)
+		all_pipes = x ;
+	    else
+		a->next = x ;
+	    splx(s);
+	}
+    } else { /* config queue */
+	struct dn_flow_set *x, *a, *b ;
+
+	/* locate flow_set */
+	for (a=NULL, b=all_flow_sets ; b && b->fs_nr < pfs->fs_nr ;
+		 a = b , b = b->next) ;
+
+	if (b == NULL || b->fs_nr != pfs->fs_nr) { /* new  */
+	    if (pfs->parent_nr == 0)	/* need link to a pipe */
+		return EINVAL ;
+	    x = malloc(sizeof(struct dn_flow_set), M_IPFW, M_DONTWAIT | M_ZERO);
+	    if (x == NULL) {
+		printf("ip_dummynet.c: no memory for new flow_set\n");
+		return ENOSPC;
+	    }
+	    x->fs_nr = pfs->fs_nr;
+	    x->parent_nr = pfs->parent_nr;
+	    x->weight = pfs->weight ;
+	    if (x->weight == 0)
+		x->weight = 1 ;
+	    else if (x->weight > 100)
+		x->weight = 100 ;
+	} else {
+	    /* Change parent pipe not allowed; must delete and recreate */
+	    if (pfs->parent_nr != 0 && b->parent_nr != pfs->parent_nr)
+		return EINVAL ;
+	    x = b;
+	}
+	set_fs_parms(x, pfs);
+
+	if ( x->rq == NULL ) { /* a new flow_set */
+	    s = alloc_hash(x, pfs) ;
+	    if (s) {
+		free(x, M_IPFW);
+		return s ;
+	    }
+	    s = splimp() ;
+	    x->next = b;
+	    if (a == NULL)
+		all_flow_sets = x;
+	    else
+		a->next = x;
+	    splx(s);
+	}
+    }
+    return 0 ;
+}
+
+/*
+ * Helper function to remove from a heap queues which are linked to
+ * a flow_set about to be deleted.
+ */
+static void
+fs_remove_from_heap(struct dn_heap *h, struct dn_flow_set *fs)
+{
+    int i = 0, found = 0 ;
+    for (; i < h->elements ;)
+	if ( ((struct dn_flow_queue *)h->p[i].object)->fs == fs) {
+	    h->elements-- ;
+	    h->p[i] = h->p[h->elements] ;
+	    found++ ;
+	} else
+	    i++ ;
+    if (found)
+	heapify(h);
+}
+
+/*
+ * helper function to remove a pipe from a heap (can be there at most once)
+ */
+static void
+pipe_remove_from_heap(struct dn_heap *h, struct dn_pipe *p)
+{
+    if (h->elements > 0) {
+	int i = 0 ;
+	for (i=0; i < h->elements ; i++ ) {
+	    if (h->p[i].object == p) { /* found it */
+		h->elements-- ;
+		h->p[i] = h->p[h->elements] ;
+		heapify(h);
+		break ;
+	    }
+	}
+    }
+}
+
+/*
+ * drain all queues. Called in case of severe mbuf shortage.
+ */
+void
+dummynet_drain()
+{
+    struct dn_flow_set *fs;
+    struct dn_pipe *p;
+    struct dn_pkt *pkt;
+
+    heap_free(&ready_heap);
+    heap_free(&wfq_ready_heap);
+    heap_free(&extract_heap);
+    /* remove all references to this pipe from flow_sets */
+    for (fs = all_flow_sets; fs; fs= fs->next )
+	purge_flow_set(fs, 0);
+
+    for (p = all_pipes; p; p= p->next ) {
+	purge_flow_set(&(p->fs), 0);
+	for (pkt = p->head ; pkt ; )
+	    DN_FREE_PKT(pkt) ;
+	p->head = p->tail = NULL ;
+    }
+}
+
+/*
+ * Fully delete a pipe or a queue, cleaning up associated info.
+ */
+static int 
+delete_pipe(struct dn_pipe *p)
+{
+    int s ;
+    struct ip_fw_chain *chain ;
+
+    if (p->pipe_nr == 0 && p->fs.fs_nr == 0)
+	return EINVAL ;
+    if (p->pipe_nr != 0 && p->fs.fs_nr != 0)
+	return EINVAL ;
+    if (p->pipe_nr != 0) { /* this is an old-style pipe */
+	struct dn_pipe *a, *b;
+	struct dn_flow_set *fs;
+
+	/* locate pipe */
+	for (a = NULL , b = all_pipes ; b && b->pipe_nr < p->pipe_nr ;
+		 a = b , b = b->next) ;
+	if (b == NULL || (b->pipe_nr != p->pipe_nr) )
+	    return EINVAL ; /* not found */
+
+	s = splimp() ;
+
+	/* unlink from list of pipes */
+	if (a == NULL)
+	    all_pipes = b->next ;
+	else
+	    a->next = b->next ;
+	/* remove references to this pipe from the ip_fw rules. */
+	LIST_FOREACH(chain, &ip_fw_chain_head, next)
+	    if (chain->rule->pipe_ptr == &(b->fs))
+		chain->rule->pipe_ptr = NULL ;
+
+	/* remove all references to this pipe from flow_sets */
+	for (fs = all_flow_sets; fs; fs= fs->next )
+	    if (fs->pipe == b) {
+		printf("++ ref to pipe %d from fs %d\n",
+			p->pipe_nr, fs->fs_nr);
+		fs->pipe = NULL ;
+		purge_flow_set(fs, 0);
+	    }
+	fs_remove_from_heap(&ready_heap, &(b->fs));
+	purge_pipe(b);	/* remove all data associated to this pipe */
+	/* remove reference to here from extract_heap and wfq_ready_heap */
+	pipe_remove_from_heap(&extract_heap, b);
+	pipe_remove_from_heap(&wfq_ready_heap, b);
+	splx(s);
+	free(b, M_IPFW);
+    } else { /* this is a WF2Q queue (dn_flow_set) */
+	struct dn_flow_set *a, *b;
+
+	/* locate set */
+	for (a = NULL, b = all_flow_sets ; b && b->fs_nr < p->fs.fs_nr ;
+		 a = b , b = b->next) ;
+	if (b == NULL || (b->fs_nr != p->fs.fs_nr) )
+	    return EINVAL ; /* not found */
+
+	s = splimp() ;
+	if (a == NULL)
+	    all_flow_sets = b->next ;
+	else
+	    a->next = b->next ;
+	/* remove references to this flow_set from the ip_fw rules. */
+	LIST_FOREACH(chain, &ip_fw_chain_head, next)
+	    if (chain->rule->pipe_ptr == b)
+		chain->rule->pipe_ptr = NULL ;
+
+	if (b->pipe != NULL) {
+	    /* Update total weight on parent pipe and cleanup parent heaps */
+	    b->pipe->sum -= b->weight * b->backlogged ;
+	    fs_remove_from_heap(&(b->pipe->not_eligible_heap), b);
+	    fs_remove_from_heap(&(b->pipe->scheduler_heap), b);
+#if 1	/* XXX should i remove from idle_heap as well ? */
+	    fs_remove_from_heap(&(b->pipe->idle_heap), b);
+#endif
+	}
+	purge_flow_set(b, 1);
+	splx(s);
+    }
+    return 0 ;
+}
+
+/*
+ * helper function used to copy data from kernel in DUMMYNET_GET
+ */
+static char *
+dn_copy_set(struct dn_flow_set *set, char *bp)
+{
+    int i, copied = 0 ;
+    struct dn_flow_queue *q, *qp = (struct dn_flow_queue *)bp;
+
+    for (i = 0 ; i <= set->rq_size ; i++)
+	for (q = set->rq[i] ; q ; q = q->next, qp++ ) {
+	    if (q->hash_slot != i)
+		printf("++ at %d: wrong slot (have %d, "
+		    "should be %d)\n", copied, q->hash_slot, i);
+	    if (q->fs != set)
+		printf("++ at %d: wrong fs ptr (have %p, should be %p)\n",
+			i, q->fs, set);
+	    copied++ ;
+	    bcopy(q, qp, sizeof( *q ) );
+	    /* cleanup pointers */
+	    qp->next = NULL ;
+	    qp->head = qp->tail = NULL ;
+	    qp->fs = NULL ;
+	}
+    if (copied != set->rq_elements)
+	printf("++ wrong count, have %d should be %d\n",
+	    copied, set->rq_elements);
+    return (char *)qp ;
+}
+
+static int
+dummynet_get(struct sockopt *sopt)
+{
+    char *buf, *bp ; /* bp is the "copy-pointer" */
+    size_t size ;
+    struct dn_flow_set *set ;
+    struct dn_pipe *p ;
+    int s, error=0 ;
+
+    s = splimp();
+    /*
+     * compute size of data structures: list of pipes and flow_sets.
+     */
+    for (p = all_pipes, size = 0 ; p ; p = p->next )
+	size += sizeof( *p ) +
+	    p->fs.rq_elements * sizeof(struct dn_flow_queue);
+    for (set = all_flow_sets ; set ; set = set->next )
+	size += sizeof ( *set ) +
+	    set->rq_elements * sizeof(struct dn_flow_queue);
+    buf = malloc(size, M_TEMP, M_DONTWAIT);
+    if (buf == 0) {
+	splx(s);
+	return ENOBUFS ;
+    }
+    for (p = all_pipes, bp = buf ; p ; p = p->next ) {
+	struct dn_pipe *pipe_bp = (struct dn_pipe *)bp ;
+
+	/*
+	 * copy pipe descriptor into *bp, convert delay back to ms,
+	 * then copy the flow_set descriptor(s) one at a time.
+	 * After each flow_set, copy the queue descriptor it owns.
+	 */
+	bcopy(p, bp, sizeof( *p ) );
+	pipe_bp->delay = (pipe_bp->delay * 1000) / hz ;
+	/*
+	 * XXX the following is a hack based on ->next being the
+	 * first field in dn_pipe and dn_flow_set. The correct
+	 * solution would be to move the dn_flow_set to the beginning
+	 * of struct dn_pipe.
+	 */
+	pipe_bp->next = (struct dn_pipe *)DN_IS_PIPE ;
+	/* clean pointers */
+	pipe_bp->head = pipe_bp->tail = NULL ;
+	pipe_bp->fs.next = NULL ;
+	pipe_bp->fs.pipe = NULL ;
+	pipe_bp->fs.rq = NULL ;
+
+	bp += sizeof( *p ) ;
+	bp = dn_copy_set( &(p->fs), bp );
+    }
+    for (set = all_flow_sets ; set ; set = set->next ) {
+	struct dn_flow_set *fs_bp = (struct dn_flow_set *)bp ;
+	bcopy(set, bp, sizeof( *set ) );
+	/* XXX same hack as above */
+	fs_bp->next = (struct dn_flow_set *)DN_IS_QUEUE ;
+	fs_bp->pipe = NULL ;
+	fs_bp->rq = NULL ;
+	bp += sizeof( *set ) ;
+	bp = dn_copy_set( set, bp );
+    }
+    splx(s);
+    error = sooptcopyout(sopt, buf, size);
+    FREE(buf, M_TEMP);
+    return error ;
+}
+
+/*
+ * Handler for the various dummynet socket options (get, flush, config, del)
+ */
+static int
+ip_dn_ctl(struct sockopt *sopt)
+{
+    int error = 0 ;
+    struct dn_pipe *p, tmp_pipe;
+
+    /* Disallow sets in really-really secure mode. */
+    if (sopt->sopt_dir == SOPT_SET && securelevel >= 3)
+	return (EPERM);
+
+    switch (sopt->sopt_name) {
+    default :
+	printf("ip_dn_ctl -- unknown option %d", sopt->sopt_name);
+	return EINVAL ;
+
+    case IP_DUMMYNET_GET :
+	error = dummynet_get(sopt);
+	break ;
+
+    case IP_DUMMYNET_FLUSH :
+	dummynet_flush() ;
+	break ;
+
+    case IP_DUMMYNET_CONFIGURE :
+	p = &tmp_pipe ;
+	error = sooptcopyin(sopt, p, sizeof *p, sizeof *p);
+	if (error)
+	    break ;
+	error = config_pipe(p);
+	break ;
+
+    case IP_DUMMYNET_DEL :	/* remove a pipe or queue */
+	p = &tmp_pipe ;
+	error = sooptcopyin(sopt, p, sizeof *p, sizeof *p);
+	if (error)
+	    break ;
+
+	error = delete_pipe(p);
+	break ;
+    }
+    return error ;
+}
+
+static void
+ip_dn_init(void)
+{
+    printf("DUMMYNET initialized (010124)\n");
+    all_pipes = NULL ;
+    all_flow_sets = NULL ;
+    ready_heap.size = ready_heap.elements = 0 ;
+    ready_heap.offset = 0 ;
+
+    wfq_ready_heap.size = wfq_ready_heap.elements = 0 ;
+    wfq_ready_heap.offset = 0 ;
+
+    extract_heap.size = extract_heap.elements = 0 ;
+    extract_heap.offset = 0 ;
+    ip_dn_ctl_ptr = ip_dn_ctl;
+    timeout(dummynet, NULL, 1);
+}
+
+static ip_dn_ctl_t *old_dn_ctl_ptr ;
+
+static int
+dummynet_modevent(module_t mod, int type, void *data)
+{
+	int s ;
+	switch (type) {
+	case MOD_LOAD:
+		s = splimp();
+		old_dn_ctl_ptr = ip_dn_ctl_ptr;
+		ip_dn_init();
+		splx(s);
+		break;
+	case MOD_UNLOAD:
+		s = splimp();
+		ip_dn_ctl_ptr =  old_dn_ctl_ptr;
+		splx(s);
+		dummynet_flush();
+		break ;
+	default:
+		break ;
+	}
+	return 0 ;
+}
+
+static moduledata_t dummynet_mod = {
+	"dummynet",
+	dummynet_modevent,
+	NULL
+} ;
+DECLARE_MODULE(dummynet, dummynet_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
diff --git a/sys/netinet/ip_dummynet.h b/sys/netinet/ip_dummynet.h
new file mode 100644
index 0000000..fe80718
--- /dev/null
+++ b/sys/netinet/ip_dummynet.h
@@ -0,0 +1,356 @@
+/*
+ * Copyright (c) 1998-2000 Luigi Rizzo, Universita` di Pisa
+ * Portions Copyright (c) 2000 Akamba Corp.
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IP_DUMMYNET_H
+#define _IP_DUMMYNET_H
+
+/*
+ * Definition of dummynet data structures. In the structures, I decided
+ * not to use the macros in <sys/queue.h> in the hope of making the code
+ * easier to port to other architectures. The type of lists and queue we
+ * use here is pretty simple anyways.
+ */
+
+/*
+ * We start with a heap, which is used in the scheduler to decide when
+ * to transmit packets etc.
+ *
+ * The key for the heap is used for two different values:
+ *
+ * 1. timer ticks- max 10K/second, so 32 bits are enough;
+ *
+ * 2. virtual times. These increase in steps of len/x, where len is the
+ *    packet length, and x is either the weight of the flow, or the
+ *    sum of all weights.
+ *    If we limit to max 1000 flows and a max weight of 100, then
+ *    x needs 17 bits. The packet size is 16 bits, so we can easily
+ *    overflow if we do not allow errors.
+ * So we use a key "dn_key" which is 64 bits. Some macros are used to
+ * compare key values and handle wraparounds.
+ * MAX64 returns the largest of two key values.
+ * MY_M is used as a shift count when doing fixed point arithmetic
+ * (a better name would be useful...).
+ */
+typedef u_int64_t dn_key ;      /* sorting key */
+#define DN_KEY_LT(a,b)     ((int64_t)((a)-(b)) < 0)
+#define DN_KEY_LEQ(a,b)    ((int64_t)((a)-(b)) <= 0)
+#define DN_KEY_GT(a,b)     ((int64_t)((a)-(b)) > 0)
+#define DN_KEY_GEQ(a,b)    ((int64_t)((a)-(b)) >= 0)
+#define MAX64(x,y)  (( (int64_t) ( (y)-(x) )) > 0 ) ? (y) : (x)
+#define MY_M	16 /* number of left shift to obtain a larger precision */
+
+/*
+ * XXX With this scaling, max 1000 flows, max weight 100, 1Gbit/s, the
+ * virtual time wraps every 15 days.
+ */
+
+/*
+ * The OFFSET_OF macro is used to return the offset of a field within
+ * a structure. It is used by the heap management routines.
+ */
+#define OFFSET_OF(type, field) ((int)&( ((type *)0)->field) )
+
+/*
+ * A heap entry is made of a key and a pointer to the actual
+ * object stored in the heap.
+ * The heap is an array of dn_heap_entry entries, dynamically allocated.
+ * Current size is "size", with "elements" actually in use.
+ * The heap normally supports only ordered insert and extract from the top.
+ * If we want to extract an object from the middle of the heap, we
+ * have to know where the object itself is located in the heap (or we
+ * need to scan the whole array). To this purpose, an object has a
+ * field (int) which contains the index of the object itself into the
+ * heap. When the object is moved, the field must also be updated.
+ * The offset of the index in the object is stored in the 'offset'
+ * field in the heap descriptor. The assumption is that this offset
+ * is non-zero if we want to support extract from the middle.
+ */
+struct dn_heap_entry {
+    dn_key key ;	/* sorting key. Topmost element is smallest one */
+    void *object ;	/* object pointer */
+} ;
+
+struct dn_heap {
+    int size ;
+    int elements ;
+    int offset ; /* XXX if > 0 this is the offset of direct ptr to obj */
+    struct dn_heap_entry *p ;	/* really an array of "size" entries */
+} ;
+
+/*
+ * MT_DUMMYNET is a new (fake) mbuf type that is prepended to the
+ * packet when it comes out of a pipe. The definition
+ * ought to go in /sys/sys/mbuf.h but here it is less intrusive.
+ */
+
+#define MT_DUMMYNET MT_CONTROL
+
+/*
+ * struct dn_pkt identifies a packet in the dummynet queue. The
+ * first part is really an m_hdr for implementation purposes, and some
+ * fields are saved there. When passing the packet back to the ip_input/
+ * ip_output()/bdg_forward, the struct is prepended to the mbuf chain with type
+ * MT_DUMMYNET, and contains the pointer to the matching rule.
+ *
+ * Note: there is no real need to make this structure contain an m_hdr,
+ * in the future this should be changed to a normal data structure.
+ */
+struct dn_pkt {
+	struct m_hdr hdr ;
+#define dn_next	hdr.mh_nextpkt	/* next element in queue */
+#define DN_NEXT(x)	(struct dn_pkt *)(x)->dn_next
+#define dn_m	hdr.mh_next	/* packet to be forwarded */
+#define dn_dir	hdr.mh_flags	/* action when pkt extracted from a queue */
+#define DN_TO_IP_OUT	1
+#define DN_TO_IP_IN	2
+#define DN_TO_BDG_FWD	3
+
+	dn_key  output_time;    /* when the pkt is due for delivery */
+        struct ifnet *ifp;	/* interface, for ip_output		*/
+	struct sockaddr_in *dn_dst ;
+        struct route ro;	/* route, for ip_output. MUST COPY	*/
+	int flags ;		/* flags, for ip_output (IPv6 ?) */
+};
+
+/*
+ * Overall structure of dummynet (with WF2Q+):
+
+In dummynet, packets are selected with the firewall rules, and passed
+to two different objects: PIPE or QUEUE.
+
+A QUEUE is just a queue with configurable size and queue management
+policy. It is also associated with a mask (to discriminate among
+different flows), a weight (used to give different shares of the
+bandwidth to different flows) and a "pipe", which essentially
+supplies the transmit clock for all queues associated with that
+pipe.
+
+A PIPE emulates a fixed-bandwidth link, whose bandwidth is
+configurable.  The "clock" for a pipe can come from either an
+internal timer, or from the transmit interrupt of an interface.
+A pipe is also associated with one (or more, if masks are used)
+queue, where all packets for that pipe are stored.
+
+The bandwidth available on the pipe is shared by the queues
+associated with that pipe (only one in case the packet is sent
+to a PIPE) according to the WF2Q+ scheduling algorithm and the
+configured weights.
+
+In general, incoming packets are stored in the appropriate queue,
+which is then placed into one of a few heaps managed by a scheduler
+to decide when the packet should be extracted.
+The scheduler (a function called dummynet()) is run at every timer
+tick, and grabs queues from the head of the heaps when they are
+ready for processing.
+
+There are three data structures definining a pipe and associated queues:
+
+ + dn_pipe, which contains the main configuration parameters related
+   to delay and bandwidth;
+ + dn_flow_set, which contains WF2Q+ configuration, flow
+   masks, plr and RED configuration;
+ + dn_flow_queue, which is the per-flow queue (containing the packets)
+
+Multiple dn_flow_set can be linked to the same pipe, and multiple
+dn_flow_queue can be linked to the same dn_flow_set.
+All data structures are linked in a linear list which is used for
+housekeeping purposes.
+
+During configuration, we create and initialize the dn_flow_set
+and dn_pipe structures (a dn_pipe also contains a dn_flow_set).
+
+At runtime: packets are sent to the appropriate dn_flow_set (either
+WFQ ones, or the one embedded in the dn_pipe for fixed-rate flows),
+which in turn dispatches them to the appropriate dn_flow_queue
+(created dynamically according to the masks).
+
+The transmit clock for fixed rate flows (ready_event()) selects the
+dn_flow_queue to be used to transmit the next packet. For WF2Q,
+wfq_ready_event() extract a pipe which in turn selects the right
+flow using a number of heaps defined into the pipe itself.
+
+ *
+ */
+
+/*
+ * per flow queue. This contains the flow identifier, the queue
+ * of packets, counters, and parameters used to support both RED and
+ * WF2Q+.
+ */
+struct dn_flow_queue {
+    struct dn_flow_queue *next ;
+    struct ipfw_flow_id id ;
+    struct dn_pkt *head, *tail ;	/* queue of packets */
+    u_int len ;
+    u_int len_bytes ;
+    long numbytes ;		/* credit for transmission (dynamic queues) */
+
+    u_int64_t tot_pkts ;	/* statistics counters	*/
+    u_int64_t tot_bytes ;
+    u_int32_t drops ;
+    int hash_slot ;	/* debugging/diagnostic */
+
+    /* RED parameters */
+    int avg ;                   /* average queue length est. (scaled) */
+    int count ;                 /* arrivals since last RED drop */
+    int random ;                /* random value (scaled) */
+    u_int32_t q_time ;          /* start of queue idle time */
+
+    /* WF2Q+ support */
+    struct dn_flow_set *fs ; /* parent flow set */
+    int heap_pos ;	/* position (index) of struct in heap */
+    dn_key sched_time ; /* current time when queue enters ready_heap */
+
+    dn_key S,F ; /* start-time, finishing time */
+    /* setting F < S means the timestamp is invalid. We only need
+     * to test this when the queue is empty.
+     */
+} ;
+
+/*
+ * flow_set descriptor. Contains the "template" parameters for the
+ * queue configuration, and pointers to the hash table of dn_flow_queue's.
+ *
+ * The hash table is an array of lists -- we identify the slot by
+ * hashing the flow-id, then scan the list looking for a match.
+ * The size of the hash table (buckets) is configurable on a per-queue
+ * basis.
+ */
+struct dn_flow_set {
+    struct dn_flow_set *next; /* next flow set in all_flow_sets list */
+
+    u_short fs_nr ;             /* flow_set number       */
+    u_short flags_fs;
+#define DN_HAVE_FLOW_MASK	0x0001
+#define DN_IS_PIPE		0x4000
+#define DN_IS_QUEUE		0x8000
+#define DN_IS_RED		0x0002
+#define DN_IS_GENTLE_RED	0x0004
+#define DN_QSIZE_IS_BYTES	0x0008	/* queue measured in bytes */
+
+    struct dn_pipe *pipe ;		/* pointer to parent pipe */
+    u_short parent_nr ;		/* parent pipe#, 0 if local to a pipe */
+
+    int weight ; /* WFQ queue weight */
+    int qsize ;		/* queue size in slots or bytes */
+    int plr ;           /* pkt loss rate (2^31-1 means 100%) */
+
+    struct ipfw_flow_id flow_mask ;
+    /* hash table of queues onto this flow_set */
+    int rq_size ;		/* number of slots */
+    int rq_elements ;		/* active elements */
+    struct dn_flow_queue **rq;	/* array of rq_size entries */
+    u_int32_t last_expired ;	/* do not expire too frequently */
+	/* XXX some RED parameters as well ? */
+    int backlogged ;		/* #active queues for this flowset */
+
+        /* RED parameters */
+#define SCALE_RED               16
+#define SCALE(x)                ( (x) << SCALE_RED )
+#define SCALE_VAL(x)            ( (x) >> SCALE_RED )
+#define SCALE_MUL(x,y)          ( ( (x) * (y) ) >> SCALE_RED )
+    int w_q ;               /* queue weight (scaled) */
+    int max_th ;            /* maximum threshold for queue (scaled) */
+    int min_th ;            /* minimum threshold for queue (scaled) */
+    int max_p ;             /* maximum value for p_b (scaled) */
+    u_int c_1 ;             /* max_p/(max_th-min_th) (scaled) */
+    u_int c_2 ;             /* max_p*min_th/(max_th-min_th) (scaled) */
+    u_int c_3 ;             /* for GRED, (1-max_p)/max_th (scaled) */
+    u_int c_4 ;             /* for GRED, 1 - 2*max_p (scaled) */
+    u_int * w_q_lookup ;    /* lookup table for computing (1-w_q)^t */
+    u_int lookup_depth ;    /* depth of lookup table */
+    int lookup_step ;       /* granularity inside the lookup table */
+    int lookup_weight ;     /* equal to (1-w_q)^t / (1-w_q)^(t+1) */
+    int avg_pkt_size ;      /* medium packet size */
+    int max_pkt_size ;      /* max packet size */
+} ;
+
+/*
+ * Pipe descriptor. Contains global parameters, delay-line queue,
+ * and the flow_set used for fixed-rate queues.
+ * 
+ * For WF2Q support it also has 4 heaps holding dn_flow_queue:
+ *   not_eligible_heap, for queues whose start time is higher
+ *	than the virtual time. Sorted by start time.
+ *   scheduler_heap, for queues eligible for scheduling. Sorted by
+ *	finish time.
+ *   backlogged_heap, all flows in the two heaps above, sorted by
+ *	start time. This is used to compute the virtual time.
+ *   idle_heap, all flows that are idle and can be removed. We
+ *	do that on each tick so we do not slow down too much
+ *	operations during forwarding.
+ *
+ */
+struct dn_pipe {			/* a pipe */
+	struct dn_pipe *next ;
+
+    int	pipe_nr ;		/* number	*/
+	int	bandwidth;		/* really, bytes/tick.	*/
+	int	delay ;			/* really, ticks	*/
+
+    struct	dn_pkt *head, *tail ;	/* packets in delay line */
+
+    /* WF2Q+ */
+    struct dn_heap scheduler_heap ; /* top extract - key Finish time*/
+    struct dn_heap not_eligible_heap; /* top extract- key Start time */
+    struct dn_heap idle_heap ; /* random extract - key Start=Finish time */
+
+    dn_key V ; /* virtual time */
+    int sum;	/* sum of weights of all active sessions */
+    int numbytes;	/* bit i can transmit (more or less). */
+
+    dn_key sched_time ; /* first time pipe is scheduled in ready_heap */
+
+    /* the tx clock can come from an interface. In this case, the
+     * name is below, and the pointer is filled when the rule is
+     * configured. We identify this by setting the if_name to a
+     * non-empty string.
+     */
+    char if_name[16];
+    struct ifnet *ifp ;
+    int ready ; /* set if ifp != NULL and we got a signal from it */
+
+    struct dn_flow_set fs ; /* used with fixed-rate flows */
+};
+
+#ifdef _KERNEL
+
+MALLOC_DECLARE(M_IPFW);
+
+typedef int ip_dn_ctl_t __P((struct sockopt *)) ;
+extern ip_dn_ctl_t *ip_dn_ctl_ptr;
+
+void dn_rule_delete(void *r);		/* used in ip_fw.c */
+int dummynet_io(int pipe, int dir,
+	struct mbuf *m, struct ifnet *ifp, struct route *ro,
+	struct sockaddr_in * dst,
+	struct ip_fw_chain *rule, int flags);
+#endif
+
+#endif /* _IP_DUMMYNET_H */
diff --git a/sys/netinet/ip_ecn.c b/sys/netinet/ip_ecn.c
new file mode 100644
index 0000000..047f82e
--- /dev/null
+++ b/sys/netinet/ip_ecn.c
@@ -0,0 +1,149 @@
+/*	$FreeBSD$	*/
+/*	$KAME: ip_ecn.c,v 1.7 2000/05/05 11:00:56 sumikawa Exp $	*/
+
+/*
+ * Copyright (C) 1999 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+/*
+ * ECN consideration on tunnel ingress/egress operation.
+ * http://www.aciri.org/floyd/papers/draft-ipsec-ecn-00.txt
+ */
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/errno.h>
+
+#ifdef INET
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#endif
+
+#ifdef INET6
+#ifndef INET
+#include <netinet/in.h>
+#endif
+#include <netinet/ip6.h>
+#endif
+
+#include <netinet/ip_ecn.h>
+#ifdef INET6
+#include <netinet6/ip6_ecn.h>
+#endif
+
+/*
+ * modify outer ECN (TOS) field on ingress operation (tunnel encapsulation).
+ * call it after you've done the default initialization/copy for the outer.
+ */
+void
+ip_ecn_ingress(mode, outer, inner)
+	int mode;
+	u_int8_t *outer;
+	u_int8_t *inner;
+{
+	if (!outer || !inner)
+		panic("NULL pointer passed to ip_ecn_ingress");
+
+	switch (mode) {
+	case ECN_ALLOWED:		/* ECN allowed */
+		*outer &= ~IPTOS_CE;
+		break;
+	case ECN_FORBIDDEN:		/* ECN forbidden */
+		*outer &= ~(IPTOS_ECT | IPTOS_CE);
+		break;
+	case ECN_NOCARE:	/* no consideration to ECN */
+		break;
+	}
+}
+
+/*
+ * modify inner ECN (TOS) field on egress operation (tunnel decapsulation).
+ * call it after you've done the default initialization/copy for the inner.
+ */
+void
+ip_ecn_egress(mode, outer, inner)
+	int mode;
+	u_int8_t *outer;
+	u_int8_t *inner;
+{
+	if (!outer || !inner)
+		panic("NULL pointer passed to ip_ecn_egress");
+
+	switch (mode) {
+	case ECN_ALLOWED:
+		if (*outer & IPTOS_CE)
+			*inner |= IPTOS_CE;
+		break;
+	case ECN_FORBIDDEN:		/* ECN forbidden */
+	case ECN_NOCARE:	/* no consideration to ECN */
+		break;
+	}
+}
+
+#ifdef INET6
+void
+ip6_ecn_ingress(mode, outer, inner)
+	int mode;
+	u_int32_t *outer;
+	u_int32_t *inner;
+{
+	u_int8_t outer8, inner8;
+
+	if (!outer || !inner)
+		panic("NULL pointer passed to ip6_ecn_ingress");
+
+	outer8 = (ntohl(*outer) >> 20) & 0xff;
+	inner8 = (ntohl(*inner) >> 20) & 0xff;
+	ip_ecn_ingress(mode, &outer8, &inner8);
+	*outer &= ~htonl(0xff << 20);
+	*outer |= htonl((u_int32_t)outer8 << 20);
+}
+
+void
+ip6_ecn_egress(mode, outer, inner)
+	int mode;
+	u_int32_t *outer;
+	u_int32_t *inner;
+{
+	u_int8_t outer8, inner8;
+
+	if (!outer || !inner)
+		panic("NULL pointer passed to ip6_ecn_egress");
+
+	outer8 = (ntohl(*outer) >> 20) & 0xff;
+	inner8 = (ntohl(*inner) >> 20) & 0xff;
+	ip_ecn_egress(mode, &outer8, &inner8);
+	*inner &= ~htonl(0xff << 20);
+	*inner |= htonl((u_int32_t)inner8 << 20);
+}
+#endif
diff --git a/sys/netinet/ip_ecn.h b/sys/netinet/ip_ecn.h
new file mode 100644
index 0000000..6445d0f
--- /dev/null
+++ b/sys/netinet/ip_ecn.h
@@ -0,0 +1,45 @@
+/*	$FreeBSD$	*/
+/*	$KAME: ip_ecn.h,v 1.5 2000/03/27 04:58:38 sumikawa Exp $	*/
+
+/*
+ * Copyright (C) 1999 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+/*
+ * ECN consideration on tunnel ingress/egress operation.
+ * http://www.aciri.org/floyd/papers/draft-ipsec-ecn-00.txt
+ */
+
+#define ECN_ALLOWED	1	/* ECN allowed */
+#define ECN_FORBIDDEN	0	/* ECN forbidden */
+#define ECN_NOCARE	(-1)	/* no consideration to ECN */
+
+#ifdef _KERNEL
+extern void ip_ecn_ingress __P((int, u_int8_t *, u_int8_t *));
+extern void ip_ecn_egress __P((int, u_int8_t *, u_int8_t *));
+#endif
diff --git a/sys/netinet/ip_encap.c b/sys/netinet/ip_encap.c
new file mode 100644
index 0000000..7d623ea
--- /dev/null
+++ b/sys/netinet/ip_encap.c
@@ -0,0 +1,534 @@
+/*	$FreeBSD$	*/
+/*	$KAME: ip_encap.c,v 1.36 2000/06/17 20:34:24 itojun Exp $	*/
+
+/*
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * My grandfather said that there's a devil inside tunnelling technology...
+ *
+ * We have surprisingly many protocols that want packets with IP protocol
+ * #4 or #41.  Here's a list of protocols that want protocol #41:
+ *	RFC1933 configured tunnel
+ *	RFC1933 automatic tunnel
+ *	RFC2401 IPsec tunnel
+ *	RFC2473 IPv6 generic packet tunnelling
+ *	RFC2529 6over4 tunnel
+ *	mobile-ip6 (uses RFC2473)
+ *	6to4 tunnel
+ * Here's a list of protocol that want protocol #4:
+ *	RFC1853 IPv4-in-IPv4 tunnelling
+ *	RFC2003 IPv4 encapsulation within IPv4
+ *	RFC2344 reverse tunnelling for mobile-ip4
+ *	RFC2401 IPsec tunnel
+ * Well, what can I say.  They impose different en/decapsulation mechanism
+ * from each other, so they need separate protocol handler.  The only one
+ * we can easily determine by protocol # is IPsec, which always has
+ * AH/ESP/IPComp header right after outer IP header.
+ *
+ * So, clearly good old protosw does not work for protocol #4 and #41.
+ * The code will let you match protocol via src/dst address pair.
+ */
+/* XXX is M_NETADDR correct? */
+
+#include "opt_mrouting.h"
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/mbuf.h>
+#include <sys/errno.h>
+#include <sys/protosw.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_encap.h>
+#ifdef MROUTING
+#include <netinet/ip_mroute.h>
+#endif /* MROUTING */
+#include <netinet/ipprotosw.h>
+
+#ifdef INET6
+#include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/ip6protosw.h>
+#endif
+
+#include <machine/stdarg.h>
+
+#include <net/net_osdep.h>
+
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure");
+
+static void encap_add __P((struct encaptab *));
+static int mask_match __P((const struct encaptab *, const struct sockaddr *,
+		const struct sockaddr *));
+static void encap_fillarg __P((struct mbuf *, const struct encaptab *));
+
+/* rely upon BSS initialization */
+LIST_HEAD(, encaptab) encaptab;
+
+void
+encap_init()
+{
+#if 0
+	/*
+	 * we cannot use LIST_INIT() here, since drivers may want to call
+	 * encap_attach(), on driver attach.  encap_init() will be called
+	 * on AF_INET{,6} initialization, which happens after driver
+	 * initialization - using LIST_INIT() here can nuke encap_attach()
+	 * from drivers.
+	 */
+	LIST_INIT(&encaptab);
+#endif
+}
+
+void
+#if __STDC__
+encap4_input(struct mbuf *m, ...)
+#else
+encap4_input(m, va_alist)
+	struct mbuf *m;
+	va_dcl
+#endif
+{
+	int off, proto;
+	struct ip *ip;
+	struct sockaddr_in s, d;
+	const struct ipprotosw *psw;
+	struct encaptab *ep, *match;
+	va_list ap;
+	int prio, matchprio;
+
+	va_start(ap, m);
+	off = va_arg(ap, int);
+	proto = va_arg(ap, int);
+	va_end(ap);
+
+	ip = mtod(m, struct ip *);
+
+	bzero(&s, sizeof(s));
+	s.sin_family = AF_INET;
+	s.sin_len = sizeof(struct sockaddr_in);
+	s.sin_addr = ip->ip_src;
+	bzero(&d, sizeof(d));
+	d.sin_family = AF_INET;
+	d.sin_len = sizeof(struct sockaddr_in);
+	d.sin_addr = ip->ip_dst;
+
+	match = NULL;
+	matchprio = 0;
+	LIST_FOREACH(ep, &encaptab, chain) {
+		if (ep->af != AF_INET)
+			continue;
+		if (ep->proto >= 0 && ep->proto != proto)
+			continue;
+		if (ep->func)
+			prio = (*ep->func)(m, off, proto, ep->arg);
+		else {
+			/*
+			 * it's inbound traffic, we need to match in reverse
+			 * order
+			 */
+			prio = mask_match(ep, (struct sockaddr *)&d,
+			    (struct sockaddr *)&s);
+		}
+
+		/*
+		 * We prioritize the matches by using bit length of the
+		 * matches.  mask_match() and user-supplied matching function
+		 * should return the bit length of the matches (for example,
+		 * if both src/dst are matched for IPv4, 64 should be returned).
+		 * 0 or negative return value means "it did not match".
+		 *
+		 * The question is, since we have two "mask" portion, we
+		 * cannot really define total order between entries.
+		 * For example, which of these should be preferred?
+		 * mask_match() returns 48 (32 + 16) for both of them.
+		 *	src=3ffe::/16, dst=3ffe:501::/32
+		 *	src=3ffe:501::/32, dst=3ffe::/16
+		 *
+		 * We need to loop through all the possible candidates
+		 * to get the best match - the search takes O(n) for
+		 * n attachments (i.e. interfaces).
+		 */
+		if (prio <= 0)
+			continue;
+		if (prio > matchprio) {
+			matchprio = prio;
+			match = ep;
+		}
+	}
+
+	if (match) {
+		/* found a match, "match" has the best one */
+		psw = (const struct ipprotosw *)match->psw;
+		if (psw && psw->pr_input) {
+			encap_fillarg(m, match);
+			(*psw->pr_input)(m, off, proto);
+		} else
+			m_freem(m);
+		return;
+	}
+
+	/* for backward compatibility */
+# ifdef MROUTING
+#  define COMPATFUNC	ipip_input
+# endif /*MROUTING*/
+
+#ifdef COMPATFUNC
+	if (proto == IPPROTO_IPV4) {
+		COMPATFUNC(m, off, proto);
+		return;
+	}
+#endif
+
+	/* last resort: inject to raw socket */
+	rip_input(m, off, proto);
+}
+
+#ifdef INET6
+int
+encap6_input(mp, offp, proto)
+	struct mbuf **mp;
+	int *offp;
+	int proto;
+{
+	struct mbuf *m = *mp;
+	struct ip6_hdr *ip6;
+	struct sockaddr_in6 s, d;
+	const struct ip6protosw *psw;
+	struct encaptab *ep, *match;
+	int prio, matchprio;
+
+	ip6 = mtod(m, struct ip6_hdr *);
+
+	bzero(&s, sizeof(s));
+	s.sin6_family = AF_INET6;
+	s.sin6_len = sizeof(struct sockaddr_in6);
+	s.sin6_addr = ip6->ip6_src;
+	bzero(&d, sizeof(d));
+	d.sin6_family = AF_INET6;
+	d.sin6_len = sizeof(struct sockaddr_in6);
+	d.sin6_addr = ip6->ip6_dst;
+
+	match = NULL;
+	matchprio = 0;
+	LIST_FOREACH(ep, &encaptab, chain) {
+		if (ep->af != AF_INET6)
+			continue;
+		if (ep->proto >= 0 && ep->proto != proto)
+			continue;
+		if (ep->func)
+			prio = (*ep->func)(m, *offp, proto, ep->arg);
+		else {
+			/*
+			 * it's inbound traffic, we need to match in reverse
+			 * order
+			 */
+			prio = mask_match(ep, (struct sockaddr *)&d,
+			    (struct sockaddr *)&s);
+		}
+
+		/* see encap4_input() for issues here */
+		if (prio <= 0)
+			continue;
+		if (prio > matchprio) {
+			matchprio = prio;
+			match = ep;
+		}
+	}
+
+	if (match) {
+		/* found a match */
+		psw = (const struct ip6protosw *)match->psw;
+		if (psw && psw->pr_input) {
+			encap_fillarg(m, match);
+			return (*psw->pr_input)(mp, offp, proto);
+		} else {
+			m_freem(m);
+			return IPPROTO_DONE;
+		}
+	}
+
+	/* last resort: inject to raw socket */
+	return rip6_input(mp, offp, proto);
+}
+#endif
+
+static void
+encap_add(ep)
+	struct encaptab *ep;
+{
+
+	LIST_INSERT_HEAD(&encaptab, ep, chain);
+}
+
+/*
+ * sp (src ptr) is always my side, and dp (dst ptr) is always remote side.
+ * length of mask (sm and dm) is assumed to be same as sp/dp.
+ * Return value will be necessary as input (cookie) for encap_detach().
+ */
+const struct encaptab *
+encap_attach(af, proto, sp, sm, dp, dm, psw, arg)
+	int af;
+	int proto;
+	const struct sockaddr *sp, *sm;
+	const struct sockaddr *dp, *dm;
+	const struct protosw *psw;
+	void *arg;
+{
+	struct encaptab *ep;
+	int error;
+	int s;
+
+	s = splnet();
+	/* sanity check on args */
+	if (sp->sa_len > sizeof(ep->src) || dp->sa_len > sizeof(ep->dst)) {
+		error = EINVAL;
+		goto fail;
+	}
+	if (sp->sa_len != dp->sa_len) {
+		error = EINVAL;
+		goto fail;
+	}
+	if (af != sp->sa_family || af != dp->sa_family) {
+		error = EINVAL;
+		goto fail;
+	}
+
+	/* check if anyone have already attached with exactly same config */
+	LIST_FOREACH(ep, &encaptab, chain) {
+		if (ep->af != af)
+			continue;
+		if (ep->proto != proto)
+			continue;
+		if (ep->src.ss_len != sp->sa_len ||
+		    bcmp(&ep->src, sp, sp->sa_len) != 0 ||
+		    bcmp(&ep->srcmask, sm, sp->sa_len) != 0)
+			continue;
+		if (ep->dst.ss_len != dp->sa_len ||
+		    bcmp(&ep->dst, dp, dp->sa_len) != 0 ||
+		    bcmp(&ep->dstmask, dm, dp->sa_len) != 0)
+			continue;
+
+		error = EEXIST;
+		goto fail;
+	}
+
+	ep = malloc(sizeof(*ep), M_NETADDR, M_NOWAIT);	/*XXX*/
+	if (ep == NULL) {
+		error = ENOBUFS;
+		goto fail;
+	}
+	bzero(ep, sizeof(*ep));
+
+	ep->af = af;
+	ep->proto = proto;
+	bcopy(sp, &ep->src, sp->sa_len);
+	bcopy(sm, &ep->srcmask, sp->sa_len);
+	bcopy(dp, &ep->dst, dp->sa_len);
+	bcopy(dm, &ep->dstmask, dp->sa_len);
+	ep->psw = psw;
+	ep->arg = arg;
+
+	encap_add(ep);
+
+	error = 0;
+	splx(s);
+	return ep;
+
+fail:
+	splx(s);
+	return NULL;
+}
+
+const struct encaptab *
+encap_attach_func(af, proto, func, psw, arg)
+	int af;
+	int proto;
+	int (*func) __P((const struct mbuf *, int, int, void *));
+	const struct protosw *psw;
+	void *arg;
+{
+	struct encaptab *ep;
+	int error;
+	int s;
+
+	s = splnet();
+	/* sanity check on args */
+	if (!func) {
+		error = EINVAL;
+		goto fail;
+	}
+
+	ep = malloc(sizeof(*ep), M_NETADDR, M_NOWAIT);	/*XXX*/
+	if (ep == NULL) {
+		error = ENOBUFS;
+		goto fail;
+	}
+	bzero(ep, sizeof(*ep));
+
+	ep->af = af;
+	ep->proto = proto;
+	ep->func = func;
+	ep->psw = psw;
+	ep->arg = arg;
+
+	encap_add(ep);
+
+	error = 0;
+	splx(s);
+	return ep;
+
+fail:
+	splx(s);
+	return NULL;
+}
+
+int
+encap_detach(cookie)
+	const struct encaptab *cookie;
+{
+	const struct encaptab *ep = cookie;
+	struct encaptab *p;
+
+	LIST_FOREACH(p, &encaptab, chain) {
+		if (p == ep) {
+			LIST_REMOVE(p, chain);
+			free(p, M_NETADDR);	/*XXX*/
+			return 0;
+		}
+	}
+
+	return EINVAL;
+}
+
+static int
+mask_match(ep, sp, dp)
+	const struct encaptab *ep;
+	const struct sockaddr *sp;
+	const struct sockaddr *dp;
+{
+	struct sockaddr_storage s;
+	struct sockaddr_storage d;
+	int i;
+	const u_int8_t *p, *q;
+	u_int8_t *r;
+	int matchlen;
+
+	if (sp->sa_len > sizeof(s) || dp->sa_len > sizeof(d))
+		return 0;
+	if (sp->sa_family != ep->af || dp->sa_family != ep->af)
+		return 0;
+	if (sp->sa_len != ep->src.ss_len || dp->sa_len != ep->dst.ss_len)
+		return 0;
+
+	matchlen = 0;
+
+	p = (const u_int8_t *)sp;
+	q = (const u_int8_t *)&ep->srcmask;
+	r = (u_int8_t *)&s;
+	for (i = 0 ; i < sp->sa_len; i++) {
+		r[i] = p[i] & q[i];
+		/* XXX estimate */
+		matchlen += (q[i] ? 8 : 0);
+	}
+
+	p = (const u_int8_t *)dp;
+	q = (const u_int8_t *)&ep->dstmask;
+	r = (u_int8_t *)&d;
+	for (i = 0 ; i < dp->sa_len; i++) {
+		r[i] = p[i] & q[i];
+		/* XXX rough estimate */
+		matchlen += (q[i] ? 8 : 0);
+	}
+
+	/* need to overwrite len/family portion as we don't compare them */
+	s.ss_len = sp->sa_len;
+	s.ss_family = sp->sa_family;
+	d.ss_len = dp->sa_len;
+	d.ss_family = dp->sa_family;
+
+	if (bcmp(&s, &ep->src, ep->src.ss_len) == 0 &&
+	    bcmp(&d, &ep->dst, ep->dst.ss_len) == 0) {
+		return matchlen;
+	} else
+		return 0;
+}
+
+static void
+encap_fillarg(m, ep)
+	struct mbuf *m;
+	const struct encaptab *ep;
+{
+#if 0
+	m->m_pkthdr.aux = ep->arg;
+#else
+	struct mbuf *n;
+
+	n = m_aux_add(m, AF_INET, IPPROTO_IPV4);
+	if (n) {
+		*mtod(n, void **) = ep->arg;
+		n->m_len = sizeof(void *);
+	}
+#endif
+}
+
+void *
+encap_getarg(m)
+	struct mbuf *m;
+{
+	void *p;
+#if 0
+	p = m->m_pkthdr.aux;
+	m->m_pkthdr.aux = NULL;
+	return p;
+#else
+	struct mbuf *n;
+
+	p = NULL;
+	n = m_aux_find(m, AF_INET, IPPROTO_IPV4);
+	if (n) {
+		if (n->m_len == sizeof(void *))
+			p = *mtod(n, void **);
+		m_aux_delete(m, n);
+	}
+	return p;
+#endif
+}
diff --git a/sys/netinet/ip_encap.h b/sys/netinet/ip_encap.h
new file mode 100644
index 0000000..38df6f9
--- /dev/null
+++ b/sys/netinet/ip_encap.h
@@ -0,0 +1,64 @@
+/*	$FreeBSD$	*/
+/*	$KAME: ip_encap.h,v 1.7 2000/03/25 07:23:37 sumikawa Exp $	*/
+
+/*
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _NETINET_IP_ENCAP_H_
+#define _NETINET_IP_ENCAP_H_
+
+#ifdef _KERNEL
+
+struct encaptab {
+	LIST_ENTRY(encaptab) chain;
+	int af;
+	int proto;			/* -1: don't care, I'll check myself */
+	struct sockaddr_storage src;	/* my addr */
+	struct sockaddr_storage srcmask;
+	struct sockaddr_storage dst;	/* remote addr */
+	struct sockaddr_storage dstmask;
+	int (*func) __P((const struct mbuf *, int, int, void *));
+	const struct protosw *psw;	/* only pr_input will be used */
+	void *arg;			/* passed via m->m_pkthdr.aux */
+};
+
+void	encap_init __P((void));
+void	encap4_input __P((struct mbuf *, ...));
+int	encap6_input __P((struct mbuf **, int *, int));
+const struct encaptab *encap_attach __P((int, int, const struct sockaddr *,
+	const struct sockaddr *, const struct sockaddr *,
+	const struct sockaddr *, const struct protosw *, void *));
+const struct encaptab *encap_attach_func __P((int, int,
+	int (*) __P((const struct mbuf *, int, int, void *)),
+	const struct protosw *, void *));
+int	encap_detach __P((const struct encaptab *));
+void	*encap_getarg __P((struct mbuf *));
+#endif
+
+#endif /*_NETINET_IP_ENCAP_H_*/
diff --git a/sys/netinet/ip_fil.c b/sys/netinet/ip_fil.c
new file mode 100644
index 0000000..ff80058
--- /dev/null
+++ b/sys/netinet/ip_fil.c
@@ -0,0 +1,1763 @@
+/*
+ * Copyright (C) 1993-2000 by Darren Reed.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that this notice is preserved and due credit is given
+ * to the original author and the contributors.
+ */
+#if !defined(lint)
+static const char sccsid[] = "@(#)ip_fil.c	2.41 6/5/96 (C) 1993-1995 Darren Reed";
+/*static const char rcsid[] = "@(#)$Id: ip_fil.c,v 2.42.2.14 2000/07/18 13:57:55 darrenr Exp $";*/
+static const char rcsid[] = "@(#)$FreeBSD$";
+#endif
+
+#ifndef	SOLARIS
+#define	SOLARIS	(defined(sun) && (defined(__svr4__) || defined(__SVR4)))
+#endif
+
+#if defined(KERNEL) && !defined(_KERNEL)
+# define	_KERNEL
+#endif
+#if defined(_KERNEL) && defined(__FreeBSD_version) && \
+    (__FreeBSD_version >= 400000) && !defined(KLD_MODULE)
+#include "opt_inet6.h"
+#endif
+#include <sys/param.h>
+#if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
+    defined(_KERNEL)
+# include "opt_ipfilter_log.h"
+#endif
+#if defined(__FreeBSD__) && !defined(__FreeBSD_version)
+# if !defined(_KERNEL) || defined(IPFILTER_LKM)
+#  include <osreldate.h>
+# endif
+#endif
+#ifndef	_KERNEL
+# include <stdio.h>
+# include <string.h>
+# include <stdlib.h>
+# include <ctype.h>
+# include <fcntl.h>
+#endif
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <sys/file.h>
+#if __FreeBSD_version >= 220000 && defined(_KERNEL)
+# include <sys/fcntl.h>
+# include <sys/filio.h>
+#else
+# include <sys/ioctl.h>
+#endif
+#include <sys/time.h>
+#ifdef	_KERNEL
+# include <sys/systm.h>
+#endif
+#include <sys/uio.h>
+#if !SOLARIS
+# if (NetBSD > 199609) || (OpenBSD > 199603) || (__FreeBSD_version >= 300000)
+#  include <sys/dirent.h>
+# else
+#  include <sys/dir.h>
+# endif
+# include <sys/mbuf.h>
+#else
+# include <sys/filio.h>
+#endif
+#include <sys/protosw.h>
+#include <sys/socket.h>
+
+#include <net/if.h>
+#ifdef sun
+# include <net/af.h>
+#endif
+#if __FreeBSD_version >= 300000
+# include <net/if_var.h>
+# if defined(_KERNEL) && !defined(IPFILTER_LKM)
+#  include "opt_ipfilter.h"
+# endif
+#endif
+#ifdef __sgi
+#include <sys/debug.h>
+# ifdef IFF_DRVRLOCK /* IRIX6 */
+#include <sys/hashing.h>
+# endif
+#endif
+#include <net/route.h>
+#include <netinet/in.h>
+#if !(defined(__sgi) && !defined(IFF_DRVRLOCK)) /* IRIX < 6 */
+# include <netinet/in_var.h>
+#endif
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet/tcpip.h>
+#include <netinet/ip_icmp.h>
+#ifndef	_KERNEL
+# include <unistd.h>
+# include <syslog.h>
+#endif
+#include "netinet/ip_compat.h"
+#ifdef USE_INET6
+# include <netinet/icmp6.h>
+#endif
+#include "netinet/ip_fil.h"
+#include "netinet/ip_proxy.h"
+#include "netinet/ip_nat.h"
+#include "netinet/ip_frag.h"
+#include "netinet/ip_state.h"
+#include "netinet/ip_auth.h"
+#if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000)
+# include <sys/malloc.h>
+#endif
+#ifndef	MIN
+# define	MIN(a,b)	(((a)<(b))?(a):(b))
+#endif
+#if !SOLARIS && defined(_KERNEL) && !defined(__sgi)
+# include <sys/kernel.h>
+extern	int	ip_optcopy __P((struct ip *, struct ip *));
+#endif
+
+#include <machine/in_cksum.h>
+
+extern	struct	protosw	inetsw[];
+
+#ifndef	_KERNEL
+# include "ipt.h"
+static	struct	ifnet **ifneta = NULL;
+static	int	nifs = 0;
+#else
+# if	(BSD < 199306) || defined(__sgi)
+extern	int	tcp_ttl;
+# endif
+#endif
+
+int	ipl_unreach = ICMP_UNREACH_FILTER;
+u_long	ipl_frouteok[2] = {0, 0};
+
+static	int	frzerostats __P((caddr_t));
+#if defined(__NetBSD__) || defined(__OpenBSD__) || (__FreeBSD_version >= 300003)
+static	int	frrequest __P((int, u_long, caddr_t, int));
+#else
+static	int	frrequest __P((int, int, caddr_t, int));
+#endif
+#ifdef	_KERNEL
+static	int	(*fr_savep) __P((ip_t *, int, void *, int, struct mbuf **));
+static	int	send_ip __P((ip_t *, fr_info_t *, struct mbuf *));
+# ifdef	__sgi
+extern  kmutex_t        ipf_rw;
+extern	KRWLOCK_T	ipf_mutex;
+# endif
+#else
+int	ipllog __P((void));
+void	init_ifp __P((void));
+# ifdef __sgi
+static int 	no_output __P((struct ifnet *, struct mbuf *,
+			       struct sockaddr *));
+static int	write_output __P((struct ifnet *, struct mbuf *,
+				  struct sockaddr *));
+# else
+static int 	no_output __P((struct ifnet *, struct mbuf *,
+			       struct sockaddr *, struct rtentry *));
+static int	write_output __P((struct ifnet *, struct mbuf *,
+				  struct sockaddr *, struct rtentry *));
+# endif
+#endif
+int	fr_running = 0;
+
+#if (__FreeBSD_version >= 300000) && defined(_KERNEL)
+struct callout_handle ipfr_slowtimer_ch;
+#endif
+#if defined(__NetBSD__) && (__NetBSD_Version__ >= 104230000)
+# include <sys/callout.h>
+struct callout ipfr_slowtimer_ch;
+#endif
+#if defined(__sgi) && defined(_KERNEL)
+toid_t ipfr_slowtimer_ch;
+#endif
+
+#if (_BSDI_VERSION >= 199510) && defined(_KERNEL)
+# include <sys/device.h>
+# include <sys/conf.h>
+
+struct cfdriver iplcd = {
+	NULL, "ipl", NULL, NULL, DV_DULL, 0
+};
+
+struct devsw iplsw = {
+	&iplcd,
+	iplopen, iplclose, iplread, nowrite, iplioctl, noselect, nommap,
+	nostrat, nodump, nopsize, 0,
+	nostop
+};
+#endif /* _BSDI_VERSION >= 199510  && _KERNEL */
+
+#if defined(__NetBSD__) || defined(__OpenBSD__)  || \
+    (_BSDI_VERSION >= 199701) || (__FreeBSD_version >= 500011)
+# include <sys/conf.h>
+# if defined(NETBSD_PF)
+#  include <net/pfil.h>
+/*
+ * We provide the fr_checkp name just to minimize changes later.
+ */
+int (*fr_checkp) __P((ip_t *ip, int hlen, void *ifp, int out, mb_t **mp));
+# endif /* NETBSD_PF */
+#endif /* __NetBSD__ */
+
+#ifdef	_KERNEL
+# if	defined(IPFILTER_LKM) && !defined(__sgi)
+int iplidentify(s)
+char *s;
+{
+	if (strcmp(s, "ipl") == 0)
+		return 1;
+	return 0;
+}
+# endif /* IPFILTER_LKM */
+
+
+/*
+ * Try to detect the case when compiling for NetBSD with pseudo-device
+ */
+# if defined(__NetBSD__) && defined(PFIL_HOOKS)
+void
+ipfilterattach(count)
+int count;
+{
+	if (iplattach() != 0)
+		printf("IP Filter failed to attach\n");
+}
+# endif
+
+
+int iplattach()
+{
+	char *defpass;
+	int s;
+# if defined(__sgi) || (defined(NETBSD_PF) && \
+  ((__NetBSD_Version__ >= 104200000) || (__FreeBSD_version >= 500011)))
+	int error = 0;
+# endif
+
+	SPL_NET(s);
+	if (fr_running || (fr_checkp == fr_check)) {
+		printf("IP Filter: already initialized\n");
+		SPL_X(s);
+		return EBUSY;
+	}
+
+# ifdef	IPFILTER_LOG
+	ipflog_init();
+# endif
+	if (nat_init() == -1)
+		return -1;
+	if (fr_stateinit() == -1)
+		return -1;
+	if (appr_init() == -1)
+		return -1;
+
+# ifdef NETBSD_PF
+#  if (__NetBSD_Version__ >= 104200000) || (__FreeBSD_version >= 500011)
+	error = pfil_add_hook((void *)fr_check, PFIL_IN|PFIL_OUT,
+			      &inetsw[ip_protox[IPPROTO_IP]].pr_pfh);
+	if (error) {
+#   ifdef USE_INET6
+		goto pfil_error;
+#   else
+		appr_unload();
+		ip_natunload();
+		fr_stateunload();
+		return error;
+#   endif
+	}
+#  else
+	pfil_add_hook((void *)fr_check, PFIL_IN|PFIL_OUT);
+#  endif
+#  ifdef USE_INET6
+	error = pfil_add_hook((void *)fr_check, PFIL_IN|PFIL_OUT,
+			      &inetsw[ip_protox[IPPROTO_IPV6]].pr_pfh);
+	if (error) {
+		pfil_remove_hook((void *)fr_check, PFIL_IN|PFIL_OUT,
+				 &inetsw[ip_protox[IPPROTO_IP]].pr_pfh);
+pfil_error:
+		appr_unload();
+		ip_natunload();
+		fr_stateunload();
+		return error;
+	}
+#  endif
+# endif
+
+# ifdef __sgi
+	error = ipfilter_sgi_attach();
+	if (error) {
+		SPL_X(s);
+		appr_unload();
+		ip_natunload();
+		fr_stateunload();
+		return error;
+	}
+# endif
+
+	bzero((char *)frcache, sizeof(frcache));
+	fr_savep = fr_checkp;
+	fr_checkp = fr_check;
+	fr_running = 1;
+
+	SPL_X(s);
+	if (fr_pass & FR_PASS)
+		defpass = "pass";
+	else if (fr_pass & FR_BLOCK)
+		defpass = "block";
+	else
+		defpass = "no-match -> block";
+
+	printf("%s initialized.  Default = %s all, Logging = %s\n",
+		ipfilter_version, defpass,
+# ifdef	IPFILTER_LOG
+		"enabled");
+# else
+		"disabled");
+# endif
+#ifdef  _KERNEL
+# if defined(__NetBSD__) && (__NetBSD_Version__ >= 104230000)
+	callout_init(&ipfr_slowtimer_ch);
+	callout_reset(&ipfr_slowtimer_ch, hz / 2, ipfr_slowtimer, NULL);
+# else
+#  if (__FreeBSD_version >= 300000) || defined(__sgi)
+	ipfr_slowtimer_ch = timeout(ipfr_slowtimer, NULL, hz/2);
+#  else
+	timeout(ipfr_slowtimer, NULL, hz/2);
+#  endif
+# endif
+#endif
+	return 0;
+}
+
+
+/*
+ * Disable the filter by removing the hooks from the IP input/output
+ * stream.
+ */
+int ipldetach()
+{
+	int s, i = FR_INQUE|FR_OUTQUE;
+#if defined(NETBSD_PF) && \
+    ((__NetBSD_Version__ >= 104200000) || (__FreeBSD_version >= 500011))
+	int error = 0;
+#endif
+
+#ifdef  _KERNEL
+# if defined(__NetBSD__) && (__NetBSD_Version__ >= 104230000)
+	callout_stop(&ipfr_slowtimer_ch);
+# else
+#  if (__FreeBSD_version >= 300000)
+	untimeout(ipfr_slowtimer, NULL, ipfr_slowtimer_ch);
+#  else
+#  ifdef __sgi
+	untimeout(ipfr_slowtimer_ch);
+#   else
+	untimeout(ipfr_slowtimer, NULL);
+#   endif
+#  endif /* FreeBSD */
+# endif /* NetBSD */
+#endif
+	SPL_NET(s);
+	if (!fr_running)
+	{
+		printf("IP Filter: not initialized\n");
+		SPL_X(s);
+		return 0;
+	}
+
+	printf("%s unloaded\n", ipfilter_version);
+
+	fr_checkp = fr_savep;
+	i = frflush(IPL_LOGIPF, i);
+	fr_running = 0;
+
+# ifdef NETBSD_PF
+#  if ((__NetBSD_Version__ >= 104200000) || (__FreeBSD_version >= 500011))
+	error = pfil_remove_hook((void *)fr_check, PFIL_IN|PFIL_OUT,
+				 &inetsw[ip_protox[IPPROTO_IP]].pr_pfh);
+	if (error)
+		return error;
+#  else
+	pfil_remove_hook((void *)fr_check, PFIL_IN|PFIL_OUT);
+#  endif
+#  ifdef USE_INET6
+	error = pfil_remove_hook((void *)fr_check, PFIL_IN|PFIL_OUT,
+				 &inetsw[ip_protox[IPPROTO_IPV6]].pr_pfh);
+	if (error)
+		return error;
+#  endif
+# endif
+
+# ifdef __sgi
+	ipfilter_sgi_detach();
+# endif
+
+	appr_unload();
+	ipfr_unload();
+	ip_natunload();
+	fr_stateunload();
+	fr_authunload();
+
+	SPL_X(s);
+	return 0;
+}
+#endif /* _KERNEL */
+
+
+static	int	frzerostats(data)
+caddr_t	data;
+{
+	friostat_t fio;
+	int error;
+
+	fr_getstat(&fio);
+	error = IWCOPYPTR((caddr_t)&fio, data, sizeof(fio));
+	if (error)
+		return EFAULT;
+
+	bzero((char *)frstats, sizeof(*frstats) * 2);
+
+	return 0;
+}
+
+
+/*
+ * Filter ioctl interface.
+ */
+#ifdef __sgi
+int IPL_EXTERN(ioctl)(dev_t dev, int cmd, caddr_t data, int mode
+# ifdef _KERNEL
+	, cred_t *cp, int *rp
+# endif
+)
+#else
+int IPL_EXTERN(ioctl)(dev, cmd, data, mode
+# if (defined(_KERNEL) && ((_BSDI_VERSION >= 199510) || (BSD >= 199506) || \
+       (NetBSD >= 199511) || (__FreeBSD_version >= 220000) || \
+       defined(__OpenBSD__)))
+, p)
+struct proc *p;
+# else
+)
+# endif
+dev_t dev;
+# if defined(__NetBSD__) || defined(__OpenBSD__) || \
+	(_BSDI_VERSION >= 199701) || (__FreeBSD_version >= 300000)
+u_long cmd;
+# else
+int cmd;
+# endif
+caddr_t data;
+int mode;
+#endif /* __sgi */
+{
+#if defined(_KERNEL) && !SOLARIS
+	int s;
+#endif
+	int error = 0, unit = 0, tmp;
+
+#if (BSD >= 199306) && defined(_KERNEL)
+	if ((securelevel >= 2) && (mode & FWRITE))
+		return EPERM;
+#endif
+#ifdef	_KERNEL
+	unit = GET_MINOR(dev);
+	if ((IPL_LOGMAX < unit) || (unit < 0))
+		return ENXIO;
+#else
+	unit = dev;
+#endif
+
+	SPL_NET(s);
+
+	if (unit == IPL_LOGNAT) {
+		if (fr_running)
+			error = nat_ioctl(data, cmd, mode);
+		else
+			error = EIO;
+		SPL_X(s);
+		return error;
+	}
+	if (unit == IPL_LOGSTATE) {
+		if (fr_running)
+			error = fr_state_ioctl(data, cmd, mode);
+		else
+			error = EIO;
+		SPL_X(s);
+		return error;
+	}
+	if (unit == IPL_LOGAUTH) {
+		if (!fr_running)
+			return EIO;
+		error = fr_auth_ioctl(data, cmd, NULL, NULL);
+		SPL_X(s);
+		return error;
+	}
+
+	switch (cmd) {
+	case FIONREAD :
+#ifdef IPFILTER_LOG
+		error = IWCOPY((caddr_t)&iplused[IPL_LOGIPF], (caddr_t)data,
+			       sizeof(iplused[IPL_LOGIPF]));
+#endif
+		break;
+#if !defined(IPFILTER_LKM) && defined(_KERNEL)
+	case SIOCFRENB :
+	{
+		u_int	enable;
+
+		if (!(mode & FWRITE))
+			error = EPERM;
+		else {
+			error = IRCOPY(data, (caddr_t)&enable, sizeof(enable));
+			if (error)
+				break;
+			if (enable)
+				error = iplattach();
+			else
+				error = ipldetach();
+		}
+		break;
+	}
+#endif
+	case SIOCSETFF :
+		if (!(mode & FWRITE))
+			error = EPERM;
+		else
+			error = IRCOPY(data, (caddr_t)&fr_flags,
+				       sizeof(fr_flags));
+		break;
+	case SIOCGETFF :
+		error = IWCOPY((caddr_t)&fr_flags, data, sizeof(fr_flags));
+		break;
+	case SIOCINAFR :
+	case SIOCRMAFR :
+	case SIOCADAFR :
+	case SIOCZRLST :
+		if (!(mode & FWRITE))
+			error = EPERM;
+		else
+			error = frrequest(unit, cmd, data, fr_active);
+		break;
+	case SIOCINIFR :
+	case SIOCRMIFR :
+	case SIOCADIFR :
+		if (!(mode & FWRITE))
+			error = EPERM;
+		else
+			error = frrequest(unit, cmd, data, 1 - fr_active);
+		break;
+	case SIOCSWAPA :
+		if (!(mode & FWRITE))
+			error = EPERM;
+		else {
+			bzero((char *)frcache, sizeof(frcache[0]) * 2);
+			*(u_int *)data = fr_active;
+			fr_active = 1 - fr_active;
+		}
+		break;
+	case SIOCGETFS :
+	{
+		friostat_t	fio;
+
+		fr_getstat(&fio);
+		error = IWCOPYPTR((caddr_t)&fio, data, sizeof(fio));
+		if (error)
+			return EFAULT;
+		break;
+	}
+	case	SIOCFRZST :
+		if (!(mode & FWRITE))
+			error = EPERM;
+		else
+			error = frzerostats(data);
+		break;
+	case	SIOCIPFFL :
+		if (!(mode & FWRITE))
+			error = EPERM;
+		else {
+			error = IRCOPY(data, (caddr_t)&tmp, sizeof(tmp));
+			if (!error) {
+				tmp = frflush(unit, tmp);
+				error = IWCOPY((caddr_t)&tmp, data,
+					       sizeof(tmp));
+			}
+		}
+		break;
+	case SIOCSTLCK :
+		error = IRCOPY(data, (caddr_t)&tmp, sizeof(tmp));
+		if (!error) {
+			fr_state_lock = tmp;
+			fr_nat_lock = tmp;
+			fr_frag_lock = tmp;
+			fr_auth_lock = tmp;
+		} else
+			error = EFAULT;
+		break;
+#ifdef	IPFILTER_LOG
+	case	SIOCIPFFB :
+		if (!(mode & FWRITE))
+			error = EPERM;
+		else
+			*(int *)data = ipflog_clear(unit);
+		break;
+#endif /* IPFILTER_LOG */
+	case SIOCGFRST :
+		error = IWCOPYPTR((caddr_t)ipfr_fragstats(), data,
+				  sizeof(ipfrstat_t));
+		if (error)
+			return EFAULT;
+		break;
+	case SIOCAUTHW :
+	case SIOCAUTHR :
+		if (!(mode & FWRITE)) {
+			error = EPERM;
+			break;
+		}
+	case SIOCFRSYN :
+		if (!(mode & FWRITE))
+			error = EPERM;
+		else {
+#if defined(_KERNEL) && defined(__sgi)
+			ipfsync();
+#endif
+			frsync();
+		}
+		break;
+	default :
+		error = EINVAL;
+		break;
+	}
+	SPL_X(s);
+	return error;
+}
+
+
+void fr_forgetifp(ifp)
+void *ifp;
+{
+	register frentry_t *f;
+
+	WRITE_ENTER(&ipf_mutex);
+	for (f = ipacct[0][fr_active]; (f != NULL); f = f->fr_next)
+		if (f->fr_ifa == ifp)
+			f->fr_ifa = (void *)-1;
+	for (f = ipacct[1][fr_active]; (f != NULL); f = f->fr_next)
+		if (f->fr_ifa == ifp)
+			f->fr_ifa = (void *)-1;
+	for (f = ipfilter[0][fr_active]; (f != NULL); f = f->fr_next)
+		if (f->fr_ifa == ifp)
+			f->fr_ifa = (void *)-1;
+	for (f = ipfilter[1][fr_active]; (f != NULL); f = f->fr_next)
+		if (f->fr_ifa == ifp)
+			f->fr_ifa = (void *)-1;
+#ifdef	USE_INET6
+	for (f = ipacct6[0][fr_active]; (f != NULL); f = f->fr_next)
+		if (f->fr_ifa == ifp)
+			f->fr_ifa = (void *)-1;
+	for (f = ipacct6[1][fr_active]; (f != NULL); f = f->fr_next)
+		if (f->fr_ifa == ifp)
+			f->fr_ifa = (void *)-1;
+	for (f = ipfilter6[0][fr_active]; (f != NULL); f = f->fr_next)
+		if (f->fr_ifa == ifp)
+			f->fr_ifa = (void *)-1;
+	for (f = ipfilter6[1][fr_active]; (f != NULL); f = f->fr_next)
+		if (f->fr_ifa == ifp)
+			f->fr_ifa = (void *)-1;
+#endif
+	RWLOCK_EXIT(&ipf_mutex);
+	ip_natsync(ifp);
+}
+
+
+static int frrequest(unit, req, data, set)
+int unit;
+#if defined(__NetBSD__) || defined(__OpenBSD__) || (__FreeBSD_version >= 300003)
+u_long req;
+#else
+int req;
+#endif
+int set;
+caddr_t data;
+{
+	register frentry_t *fp, *f, **fprev;
+	register frentry_t **ftail;
+	frentry_t frd;
+	frdest_t *fdp;
+	frgroup_t *fg = NULL;
+	u_int   *p, *pp;
+	int error = 0, in;
+	u_int group;
+
+	fp = &frd;
+	error = IRCOPYPTR(data, (caddr_t)fp, sizeof(*fp));
+	if (error)
+		return EFAULT;
+	fp->fr_ref = 0;
+#if (BSD >= 199306) && defined(_KERNEL)
+	if ((securelevel > 0) && (fp->fr_func != NULL))
+		return EPERM;
+#endif
+
+	/*
+	 * Check that the group number does exist and that if a head group
+	 * has been specified, doesn't exist.
+	 */
+	if ((req != SIOCZRLST) && fp->fr_grhead &&
+	    fr_findgroup((u_int)fp->fr_grhead, fp->fr_flags, unit, set, NULL))
+		return EEXIST;
+	if ((req != SIOCZRLST) && fp->fr_group &&
+	    !fr_findgroup((u_int)fp->fr_group, fp->fr_flags, unit, set, NULL))
+		return ESRCH;
+
+	in = (fp->fr_flags & FR_INQUE) ? 0 : 1;
+
+	if (unit == IPL_LOGAUTH)
+		ftail = fprev = &ipauth;
+	else if ((fp->fr_flags & FR_ACCOUNT) && (fp->fr_v == 4))
+		ftail = fprev = &ipacct[in][set];
+	else if ((fp->fr_flags & (FR_OUTQUE|FR_INQUE)) && (fp->fr_v == 4))
+		ftail = fprev = &ipfilter[in][set];
+#ifdef	USE_INET6
+	else if ((fp->fr_flags & FR_ACCOUNT) && (fp->fr_v == 6))
+		ftail = fprev = &ipacct6[in][set];
+	else if ((fp->fr_flags & (FR_OUTQUE|FR_INQUE)) && (fp->fr_v == 6))
+		ftail = fprev = &ipfilter6[in][set];
+#endif
+	else
+		return ESRCH;
+
+	if ((group = fp->fr_group)) {
+		if (!(fg = fr_findgroup(group, fp->fr_flags, unit, set, NULL)))
+			return ESRCH;
+		ftail = fprev = fg->fg_start;
+	}
+
+	bzero((char *)frcache, sizeof(frcache[0]) * 2);
+
+	if (*fp->fr_ifname) {
+		fp->fr_ifa = GETUNIT(fp->fr_ifname, fp->fr_v);
+		if (!fp->fr_ifa)
+			fp->fr_ifa = (void *)-1;
+	}
+#if BSD >= 199306
+	if (*fp->fr_oifname) {
+		fp->fr_oifa = GETUNIT(fp->fr_oifname, fp->fr_v);
+		if (!fp->fr_oifa)
+			fp->fr_oifa = (void *)-1;
+	}
+#endif
+
+	fdp = &fp->fr_dif;
+	fp->fr_flags &= ~FR_DUP;
+	if (*fdp->fd_ifname) {
+		fdp->fd_ifp = GETUNIT(fdp->fd_ifname, fp->fr_v);
+		if (!fdp->fd_ifp)
+			fdp->fd_ifp = (struct ifnet *)-1;
+		else
+			fp->fr_flags |= FR_DUP;
+	}
+
+	fdp = &fp->fr_tif;
+	if (*fdp->fd_ifname) {
+		fdp->fd_ifp = GETUNIT(fdp->fd_ifname, fp->fr_v);
+		if (!fdp->fd_ifp)
+			fdp->fd_ifp = (struct ifnet *)-1;
+	}
+
+	/*
+	 * Look for a matching filter rule, but don't include the next or
+	 * interface pointer in the comparison (fr_next, fr_ifa).
+	 */
+	for (fp->fr_cksum = 0, p = (u_int *)&fp->fr_ip, pp = &fp->fr_cksum;
+	     p < pp; p++)
+		fp->fr_cksum += *p;
+
+	for (; (f = *ftail); ftail = &f->fr_next)
+		if ((fp->fr_cksum == f->fr_cksum) &&
+		    !bcmp((char *)&f->fr_ip, (char *)&fp->fr_ip, FR_CMPSIZ))
+			break;
+
+	/*
+	 * If zero'ing statistics, copy current to caller and zero.
+	 */
+	if (req == SIOCZRLST) {
+		if (!f)
+			return ESRCH;
+		error = IWCOPYPTR((caddr_t)f, data, sizeof(*f));
+		if (error)
+			return EFAULT;
+		f->fr_hits = 0;
+		f->fr_bytes = 0;
+		return 0;
+	}
+
+	if (!f) {
+		if (req != SIOCINAFR && req != SIOCINIFR)
+			while ((f = *ftail))
+				ftail = &f->fr_next;
+		else {
+			if (fp->fr_hits) {
+				ftail = fprev;
+				while (--fp->fr_hits && (f = *ftail))
+					ftail = &f->fr_next;
+			}
+			f = NULL;
+		}
+	}
+
+	if (req == SIOCRMAFR || req == SIOCRMIFR) {
+		if (!f)
+			error = ESRCH;
+		else {
+			/*
+			 * Only return EBUSY if there is a group list, else
+			 * it's probably just state information referencing
+			 * the rule.
+			 */
+			if ((f->fr_ref > 1) && f->fr_grp)
+				return EBUSY;
+			if (fg && fg->fg_head)
+				fg->fg_head->fr_ref--;
+			if (unit == IPL_LOGAUTH)
+				return fr_auth_ioctl(data, req, f, ftail);
+			if (f->fr_grhead)
+				fr_delgroup((u_int)f->fr_grhead, fp->fr_flags,
+					    unit, set);
+			fixskip(fprev, f, -1);
+			*ftail = f->fr_next;
+			f->fr_next = NULL;
+			if (f->fr_ref == 0)
+				KFREE(f);
+		}
+	} else {
+		if (f)
+			error = EEXIST;
+		else {
+			if (unit == IPL_LOGAUTH)
+				return fr_auth_ioctl(data, req, fp, ftail);
+			KMALLOC(f, frentry_t *);
+			if (f != NULL) {
+				if (fg && fg->fg_head)
+					fg->fg_head->fr_ref++;
+				bcopy((char *)fp, (char *)f, sizeof(*f));
+				f->fr_ref = 1;
+				f->fr_hits = 0;
+				f->fr_next = *ftail;
+				*ftail = f;
+				if (req == SIOCINIFR || req == SIOCINAFR)
+					fixskip(fprev, f, 1);
+				f->fr_grp = NULL;
+				if ((group = f->fr_grhead))
+					fg = fr_addgroup(group, f, unit, set);
+			} else
+				error = ENOMEM;
+		}
+	}
+	return (error);
+}
+
+
+#ifdef	_KERNEL
+/*
+ * routines below for saving IP headers to buffer
+ */
+# ifdef __sgi
+#  ifdef _KERNEL
+int IPL_EXTERN(open)(dev_t *pdev, int flags, int devtype, cred_t *cp)
+#  else
+int IPL_EXTERN(open)(dev_t dev, int flags)
+#  endif
+# else
+int IPL_EXTERN(open)(dev, flags
+#  if ((_BSDI_VERSION >= 199510) || (BSD >= 199506) || (NetBSD >= 199511) || \
+     (__FreeBSD_version >= 220000) || defined(__OpenBSD__)) && defined(_KERNEL)
+, devtype, p)
+int devtype;
+struct proc *p;
+#  else
+)
+#  endif
+dev_t dev;
+int flags;
+# endif /* __sgi */
+{
+# if defined(__sgi) && defined(_KERNEL)
+	u_int min = geteminor(*pdev);
+# else
+	u_int min = GET_MINOR(dev);
+# endif
+
+	if (IPL_LOGMAX < min)
+		min = ENXIO;
+	else
+		min = 0;
+	return min;
+}
+
+
+# ifdef __sgi
+int IPL_EXTERN(close)(dev_t dev, int flags, int devtype, cred_t *cp)
+#else
+int IPL_EXTERN(close)(dev, flags
+#  if ((_BSDI_VERSION >= 199510) || (BSD >= 199506) || (NetBSD >= 199511) || \
+     (__FreeBSD_version >= 220000) || defined(__OpenBSD__)) && defined(_KERNEL)
+, devtype, p)
+int devtype;
+struct proc *p;
+#  else
+)
+#  endif
+dev_t dev;
+int flags;
+# endif /* __sgi */
+{
+	u_int	min = GET_MINOR(dev);
+
+	if (IPL_LOGMAX < min)
+		min = ENXIO;
+	else
+		min = 0;
+	return min;
+}
+
+/*
+ * iplread/ipllog
+ * both of these must operate with at least splnet() lest they be
+ * called during packet processing and cause an inconsistancy to appear in
+ * the filter lists.
+ */
+# ifdef __sgi
+int IPL_EXTERN(read)(dev_t dev, uio_t *uio, cred_t *crp)
+# else
+#  if BSD >= 199306
+int IPL_EXTERN(read)(dev, uio, ioflag)
+int ioflag;
+#  else
+int IPL_EXTERN(read)(dev, uio)
+#  endif
+dev_t dev;
+register struct uio *uio;
+# endif /* __sgi */
+{
+# ifdef IPFILTER_LOG
+	return ipflog_read(GET_MINOR(dev), uio);
+# else
+	return ENXIO;
+# endif
+}
+
+
+/*
+ * send_reset - this could conceivably be a call to tcp_respond(), but that
+ * requires a large amount of setting up and isn't any more efficient.
+ */
+int send_reset(oip, fin)
+struct ip *oip;
+fr_info_t *fin;
+{
+	struct tcphdr *tcp, *tcp2;
+	int tlen = 0, hlen;
+	struct mbuf *m;
+#ifdef	USE_INET6
+	ip6_t *ip6, *oip6 = (ip6_t *)oip;
+#endif
+	ip_t *ip;
+
+	tcp = (struct tcphdr *)fin->fin_dp;
+	if (tcp->th_flags & TH_RST)
+		return -1;		/* feedback loop */
+# if	(BSD < 199306) || defined(__sgi)
+	m = m_get(M_DONTWAIT, MT_HEADER);
+# else
+	m = m_gethdr(M_DONTWAIT, MT_HEADER);
+# endif
+	if (m == NULL)
+		return ENOBUFS;
+	if (m == NULL)
+		return -1;
+
+	tlen = oip->ip_len - fin->fin_hlen - (tcp->th_off << 2) +
+			((tcp->th_flags & TH_SYN) ? 1 : 0) +
+			((tcp->th_flags & TH_FIN) ? 1 : 0);
+
+#ifdef	USE_INET6
+	hlen = (fin->fin_v == 6) ? sizeof(ip6_t) : sizeof(ip_t);
+#else
+	hlen = sizeof(ip_t);
+#endif
+	m->m_len = sizeof(*tcp2) + hlen;
+# if	BSD >= 199306
+	m->m_data += max_linkhdr;
+	m->m_pkthdr.len = m->m_len;
+	m->m_pkthdr.rcvif = (struct ifnet *)0;
+# endif
+	ip = mtod(m, struct ip *);
+# ifdef	USE_INET6
+	ip6 = (ip6_t *)ip;
+# endif
+	bzero((char *)ip, sizeof(*tcp2) + hlen);
+	tcp2 = (struct tcphdr *)((char *)ip + hlen);
+
+	tcp2->th_sport = tcp->th_dport;
+	tcp2->th_dport = tcp->th_sport;
+	if (tcp->th_flags & TH_ACK) {
+		tcp2->th_seq = tcp->th_ack;
+		tcp2->th_flags = TH_RST;
+	} else {
+		tcp2->th_ack = ntohl(tcp->th_seq);
+		tcp2->th_ack += tlen;
+		tcp2->th_ack = htonl(tcp2->th_ack);
+		tcp2->th_flags = TH_RST|TH_ACK;
+	}
+	tcp2->th_off = sizeof(*tcp2) >> 2;
+# ifdef	USE_INET6
+	if (fin->fin_v == 6) {
+		ip6->ip6_plen = htons(sizeof(struct tcphdr));
+		ip6->ip6_nxt = IPPROTO_TCP;
+		ip6->ip6_src = oip6->ip6_dst;
+		ip6->ip6_dst = oip6->ip6_src;
+		tcp2->th_sum = in6_cksum(m, IPPROTO_TCP,
+					 sizeof(*ip6), sizeof(*tcp2));
+		return send_ip(oip, fin, m);
+	}
+# endif
+	ip->ip_p = IPPROTO_TCP;
+	ip->ip_len = htons(sizeof(struct tcphdr));
+	ip->ip_src.s_addr = oip->ip_dst.s_addr;
+	ip->ip_dst.s_addr = oip->ip_src.s_addr;
+	tcp2->th_sum = in_cksum(m, hlen + sizeof(*tcp2));
+	ip->ip_len = hlen + sizeof(*tcp2);
+	return send_ip(oip, fin, m);
+}
+
+
+static int send_ip(oip, fin, m)
+ip_t *oip;
+fr_info_t *fin;
+struct mbuf *m;
+{
+	ip_t *ip;
+
+	ip = mtod(m, ip_t *);
+
+	ip->ip_v = fin->fin_v;
+	if (ip->ip_v == 4) {
+		ip->ip_hl = (sizeof(*oip) >> 2);
+		ip->ip_v = IPVERSION;
+		ip->ip_tos = oip->ip_tos;
+		ip->ip_id = oip->ip_id;
+		ip->ip_off = 0;
+# if (BSD < 199306) || defined(__sgi)
+		ip->ip_ttl = tcp_ttl;
+# else
+		ip->ip_ttl = ip_defttl;
+# endif
+		ip->ip_sum = 0;
+	}
+# ifdef	USE_INET6
+	else if (ip->ip_v == 6) {
+		ip6_t *ip6 = (ip6_t *)ip;
+
+		ip6->ip6_hlim = 127;
+
+		return ip6_output(m, NULL, NULL, 0, NULL, NULL);
+	}
+# endif
+# ifdef	IPSEC
+	m->m_pkthdr.rcvif = NULL;
+# endif
+	return ipfr_fastroute(m, fin, NULL);
+}
+
+
+int send_icmp_err(oip, type, fin, dst)
+ip_t *oip;
+int type;
+fr_info_t *fin;
+int dst;
+{
+	int err, hlen = 0, xtra = 0, iclen, ohlen = 0, avail, code;
+	struct in_addr dst4;
+	struct icmp *icmp;
+	struct mbuf *m;
+	void *ifp;
+#ifdef USE_INET6
+	ip6_t *ip6, *oip6 = (ip6_t *)oip;
+	struct in6_addr dst6;
+#endif
+	ip_t *ip;
+
+	if ((type < 0) || (type > ICMP_MAXTYPE))
+		return -1;
+
+	code = fin->fin_icode;
+#ifdef USE_INET6
+	if ((code < 0) || (code > sizeof(icmptoicmp6unreach)/sizeof(int)))
+		return -1;
+#endif
+
+	avail = 0;
+	m = NULL;
+	ifp = fin->fin_ifp;
+	if (fin->fin_v == 4) {
+		if ((oip->ip_p == IPPROTO_ICMP) &&
+		    !(fin->fin_fi.fi_fl & FI_SHORT))
+			switch (ntohs(fin->fin_data[0]) >> 8)
+			{
+			case ICMP_ECHO :
+			case ICMP_TSTAMP :
+			case ICMP_IREQ :
+			case ICMP_MASKREQ :
+				break;
+			default :
+				return 0;
+			}
+
+# if	(BSD < 199306) || defined(__sgi)
+		avail = MLEN;
+		m = m_get(M_DONTWAIT, MT_HEADER);
+# else
+		avail = MHLEN;
+		m = m_gethdr(M_DONTWAIT, MT_HEADER);
+# endif
+		if (m == NULL)
+			return ENOBUFS;
+
+		if (dst == 0) {
+			if (fr_ifpaddr(4, ifp, &dst4) == -1)
+				return -1;
+		} else
+			dst4.s_addr = oip->ip_dst.s_addr;
+
+		hlen = sizeof(ip_t);
+		ohlen = oip->ip_hl << 2;
+		xtra = 8;
+	}
+
+#ifdef	USE_INET6
+	else if (fin->fin_v == 6) {
+		hlen = sizeof(ip6_t);
+		ohlen = sizeof(ip6_t);
+		type = icmptoicmp6types[type];
+		if (type == ICMP6_DST_UNREACH)
+			code = icmptoicmp6unreach[code];
+
+		MGETHDR(m, M_DONTWAIT, MT_HEADER);
+		if (!m)
+			return ENOBUFS;
+
+		MCLGET(m, M_DONTWAIT);
+		if ((m->m_flags & M_EXT) == 0) {
+			m_freem(m);
+			return ENOBUFS;
+		}
+# ifdef	M_TRAILINGSPACE
+		m->m_len = 0;
+		avail = M_TRAILINGSPACE(m);
+# else
+		avail = MCLBYTES;
+# endif
+		xtra = MIN(ntohs(oip6->ip6_plen) + sizeof(ip6_t),
+			   avail - hlen - sizeof(*icmp) - max_linkhdr);
+		if (dst == 0) {
+			if (fr_ifpaddr(6, ifp, (struct in_addr *)&dst6) == -1)
+				return -1;
+		} else
+			dst6 = oip6->ip6_dst;
+	}
+#endif
+
+	iclen = hlen + sizeof(*icmp);
+# if	BSD >= 199306
+	avail -= (max_linkhdr + iclen);
+	m->m_data += max_linkhdr;
+	m->m_pkthdr.rcvif = (struct ifnet *)0;
+	if (xtra > avail)
+		xtra = avail;
+	iclen += xtra;
+	m->m_pkthdr.len = iclen;
+#else
+	avail -= (m->m_off + iclen);
+	if (xtra > avail)
+		xtra = avail;
+	iclen += xtra;
+#endif
+	m->m_len = iclen;
+	ip = mtod(m, ip_t *);
+	icmp = (struct icmp *)((char *)ip + hlen);
+	bzero((char *)ip, iclen);
+
+	icmp->icmp_type = type;
+	icmp->icmp_code = fin->fin_icode;
+	icmp->icmp_cksum = 0;
+#ifdef	icmp_nextmtu
+	if (type == ICMP_UNREACH &&
+	    fin->fin_icode == ICMP_UNREACH_NEEDFRAG && ifp)
+		icmp->icmp_nextmtu = htons(((struct ifnet *) ifp)->if_mtu);
+#endif
+
+	if (avail) {
+		bcopy((char *)oip, (char *)&icmp->icmp_ip, MIN(ohlen, avail));
+		avail -= MIN(ohlen, avail);
+	}
+
+#ifdef	USE_INET6
+	ip6 = (ip6_t *)ip;
+	if (fin->fin_v == 6) {
+		ip6->ip6_flow = 0;
+		ip6->ip6_plen = htons(iclen - hlen);
+		ip6->ip6_nxt = IPPROTO_ICMPV6;
+		ip6->ip6_hlim = 0;
+		ip6->ip6_src = dst6;
+		ip6->ip6_dst = oip6->ip6_src;
+		if (avail)
+			bcopy((char *)oip + ohlen,
+			      (char *)&icmp->icmp_ip + ohlen, avail);
+		icmp->icmp_cksum = in6_cksum(m, IPPROTO_ICMPV6,
+					     sizeof(*ip6), iclen - hlen);
+	} else
+#endif
+	{
+		ip->ip_src.s_addr = dst4.s_addr;
+		ip->ip_dst.s_addr = oip->ip_src.s_addr;
+
+		if (avail > 8)
+			avail = 8;
+		if (avail)
+			bcopy((char *)oip + ohlen,
+			      (char *)&icmp->icmp_ip + ohlen, avail);
+		icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
+					     sizeof(*icmp) + 8);
+		ip->ip_len = iclen;
+		ip->ip_p = IPPROTO_ICMP;
+	}
+	err = send_ip(oip, fin, m);
+	return err;
+}
+
+
+# if !defined(IPFILTER_LKM) && (__FreeBSD_version < 300000) && !defined(__sgi)
+#  if	(BSD < 199306)
+int iplinit __P((void));
+
+int
+#  else
+void iplinit __P((void));
+
+void
+#  endif
+iplinit()
+{
+	if (iplattach() != 0)
+		printf("IP Filter failed to attach\n");
+	ip_init();
+}
+# endif /* ! __NetBSD__ */
+
+
+size_t mbufchainlen(m0)
+register struct mbuf *m0;
+{
+	register size_t len = 0;
+
+	for (; m0; m0 = m0->m_next)
+		len += m0->m_len;
+	return len;
+}
+
+
+int ipfr_fastroute(m0, fin, fdp)
+struct mbuf *m0;
+fr_info_t *fin;
+frdest_t *fdp;
+{
+	register struct ip *ip, *mhip;
+	register struct mbuf *m = m0;
+	register struct route *ro;
+	int len, off, error = 0, hlen, code;
+	struct ifnet *ifp, *sifp;
+	struct sockaddr_in *dst;
+	struct route iproute;
+	frentry_t *fr;
+
+	hlen = fin->fin_hlen;
+	ip = mtod(m0, struct ip *);
+
+#ifdef	USE_INET6
+	if (ip->ip_v == 6) {
+		/*
+		 * currently "to <if>" and "to <if>:ip#" are not supported
+		 * for IPv6
+		 */
+		return ip6_output(m0, NULL, NULL, 0, NULL, NULL);
+	}
+#endif
+	/*
+	 * Route packet.
+	 */
+	ro = &iproute;
+	bzero((caddr_t)ro, sizeof (*ro));
+	dst = (struct sockaddr_in *)&ro->ro_dst;
+	dst->sin_family = AF_INET;
+
+	fr = fin->fin_fr;
+	if (fdp)
+		ifp = fdp->fd_ifp;
+	else {
+		ifp = fin->fin_ifp;
+		dst->sin_addr = ip->ip_dst;
+	}
+
+	/*
+	 * In case we're here due to "to <if>" being used with "keep state",
+	 * check that we're going in the correct direction.
+	 */
+	if ((fr != NULL) && (fin->fin_rev != 0)) {
+		if ((ifp != NULL) && (fdp == &fr->fr_tif))
+			return -1;
+		dst->sin_addr = ip->ip_dst;
+	} else if (fdp)
+		dst->sin_addr = fdp->fd_ip.s_addr ? fdp->fd_ip : ip->ip_dst;
+
+# if BSD >= 199306
+	dst->sin_len = sizeof(*dst);
+# endif
+# if	(BSD >= 199306) && !defined(__NetBSD__) && !defined(__bsdi__) && \
+	!defined(__OpenBSD__)
+#  ifdef	RTF_CLONING
+	rtalloc_ign(ro, RTF_CLONING);
+#  else
+	rtalloc_ign(ro, RTF_PRCLONING);
+#  endif
+# else
+	rtalloc(ro);
+# endif
+	if (!ifp) {
+		if (!fr || !(fr->fr_flags & FR_FASTROUTE)) {
+			error = -2;
+			goto bad;
+		}
+		if (ro->ro_rt == 0 || (ifp = ro->ro_rt->rt_ifp) == 0) {
+			if (in_localaddr(ip->ip_dst))
+				error = EHOSTUNREACH;
+			else
+				error = ENETUNREACH;
+			goto bad;
+		}
+		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
+			dst = (struct sockaddr_in *)&ro->ro_rt->rt_gateway;
+	}
+	if (ro->ro_rt)
+		ro->ro_rt->rt_use++;
+
+	/*
+	 * For input packets which are being "fastrouted", they won't
+	 * go back through output filtering and miss their chance to get
+	 * NAT'd and counted.
+	 */
+	fin->fin_ifp = ifp;
+	if (fin->fin_out == 0) {
+		fin->fin_out = 1;
+		if ((fin->fin_fr = ipacct[1][fr_active]) &&
+		    (fr_scanlist(FR_NOMATCH, ip, fin, m) & FR_ACCOUNT)) {
+			ATOMIC_INCL(frstats[1].fr_acct);
+		}
+		fin->fin_fr = NULL;
+		if (!fr || !(fr->fr_flags & FR_RETMASK))
+			(void) fr_checkstate(ip, fin);
+		(void) ip_natout(ip, fin);
+	} else
+		ip->ip_sum = 0;
+	/*
+	 * If small enough for interface, can just send directly.
+	 */
+	if (ip->ip_len <= ifp->if_mtu) {
+# if	BSD >= 199306
+		int i = 0;
+
+		if ((m->m_flags & M_EXT) && MEXT_IS_REF(m))
+			i = 1;
+# endif
+# ifndef sparc
+#  ifndef __FreeBSD__
+		ip->ip_id = htons(ip->ip_id);
+#  endif
+		ip->ip_len = htons(ip->ip_len);
+		ip->ip_off = htons(ip->ip_off);
+# endif
+		if (!ip->ip_sum)
+			ip->ip_sum = in_cksum(m, hlen);
+# if	BSD >= 199306
+		error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst,
+					  ro->ro_rt);
+		if (i) {
+#  ifndef __FreeBSD__
+			ip->ip_id = ntohs(ip->ip_id);
+#  endif
+			ip->ip_len = ntohs(ip->ip_len);
+			ip->ip_off = ntohs(ip->ip_off);
+		}
+# else
+		error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst);
+# endif
+		goto done;
+	}
+	/*
+	 * Too large for interface; fragment if possible.
+	 * Must be able to put at least 8 bytes per fragment.
+	 */
+	if (ip->ip_off & IP_DF) {
+		error = EMSGSIZE;
+		goto bad;
+	}
+	len = (ifp->if_mtu - hlen) &~ 7;
+	if (len < 8) {
+		error = EMSGSIZE;
+		goto bad;
+	}
+
+    {
+	int mhlen, firstlen = len;
+	struct mbuf **mnext = &m->m_act;
+
+	/*
+	 * Loop through length of segment after first fragment,
+	 * make new header and copy data of each part and link onto chain.
+	 */
+	m0 = m;
+	mhlen = sizeof (struct ip);
+	for (off = hlen + len; off < ip->ip_len; off += len) {
+# ifdef	MGETHDR
+		MGETHDR(m, M_DONTWAIT, MT_HEADER);
+# else
+		MGET(m, M_DONTWAIT, MT_HEADER);
+# endif
+		if (m == 0) {
+			error = ENOBUFS;
+			goto bad;
+		}
+# if BSD >= 199306
+		m->m_data += max_linkhdr;
+# else
+		m->m_off = MMAXOFF - hlen;
+# endif
+		mhip = mtod(m, struct ip *);
+		bcopy((char *)ip, (char *)mhip, sizeof(*ip));
+		if (hlen > sizeof (struct ip)) {
+			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
+			mhip->ip_hl = mhlen >> 2;
+		}
+		m->m_len = mhlen;
+		mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
+		if (ip->ip_off & IP_MF)
+			mhip->ip_off |= IP_MF;
+		if (off + len >= ip->ip_len)
+			len = ip->ip_len - off;
+		else
+			mhip->ip_off |= IP_MF;
+		mhip->ip_len = htons((u_short)(len + mhlen));
+		m->m_next = m_copy(m0, off, len);
+		if (m->m_next == 0) {
+			error = ENOBUFS;	/* ??? */
+			goto sendorfree;
+		}
+# if BSD >= 199306
+		m->m_pkthdr.len = mhlen + len;
+		m->m_pkthdr.rcvif = NULL;
+# endif
+# ifndef sparc
+		mhip->ip_off = htons((u_short)mhip->ip_off);
+# endif
+		mhip->ip_sum = 0;
+		mhip->ip_sum = in_cksum(m, mhlen);
+		*mnext = m;
+		mnext = &m->m_act;
+	}
+	/*
+	 * Update first fragment by trimming what's been copied out
+	 * and updating header, then send each fragment (in order).
+	 */
+	m_adj(m0, hlen + firstlen - ip->ip_len);
+	ip->ip_len = htons((u_short)(hlen + firstlen));
+	ip->ip_off = htons((u_short)(ip->ip_off | IP_MF));
+	ip->ip_sum = 0;
+	ip->ip_sum = in_cksum(m0, hlen);
+sendorfree:
+	for (m = m0; m; m = m0) {
+		m0 = m->m_act;
+		m->m_act = 0;
+		if (error == 0)
+# if BSD >= 199306
+			error = (*ifp->if_output)(ifp, m,
+			    (struct sockaddr *)dst, ro->ro_rt);
+# else
+			error = (*ifp->if_output)(ifp, m,
+			    (struct sockaddr *)dst);
+# endif
+		else
+			m_freem(m);
+	}
+    }	
+done:
+	if (!error)
+		ipl_frouteok[0]++;
+	else
+		ipl_frouteok[1]++;
+
+	if (ro->ro_rt)
+		RTFREE(ro->ro_rt);
+	return 0;
+bad:
+	if (error == EMSGSIZE) {
+		sifp = fin->fin_ifp;
+		code = fin->fin_icode;
+		fin->fin_icode = ICMP_UNREACH_NEEDFRAG;
+		fin->fin_ifp = ifp;
+		(void) send_icmp_err(ip, ICMP_UNREACH, fin, 1);
+		fin->fin_ifp = sifp;
+		fin->fin_icode = code;
+	}
+	m_freem(m);
+	goto done;
+}
+
+
+int fr_verifysrc(ipa, ifp)
+struct in_addr ipa;
+void *ifp;
+{
+	struct sockaddr_in *dst;
+	struct route iproute;
+
+	bzero((char *)&iproute, sizeof(iproute));
+	dst = (struct sockaddr_in *)&iproute.ro_dst;
+	dst->sin_family = AF_INET;
+	dst->sin_addr = ipa;
+# if    (BSD >= 199306) && !defined(__NetBSD__) && !defined(__bsdi__) && \
+        !defined(__OpenBSD__)
+#  ifdef        RTF_CLONING
+	rtalloc_ign(&iproute, RTF_CLONING);
+#  else
+	rtalloc_ign(&iproute, RTF_PRCLONING);
+#  endif
+# else
+	rtalloc(&iproute);
+# endif
+	if (iproute.ro_rt == NULL)
+		return 0;
+	return (ifp == iproute.ro_rt->rt_ifp);
+}
+
+#else /* #ifdef _KERNEL */
+
+
+# ifdef __sgi
+static int no_output __P((struct ifnet *ifp, struct mbuf *m,
+			   struct sockaddr *s))
+# else
+static int no_output __P((struct ifnet *ifp, struct mbuf *m,
+			   struct sockaddr *s, struct rtentry *rt))
+# endif
+{
+	return 0;
+}
+
+
+# ifdef __STDC__
+#  ifdef __sgi
+static int write_output __P((struct ifnet *ifp, struct mbuf *m,
+			     struct sockaddr *s))
+#  else
+static int write_output __P((struct ifnet *ifp, struct mbuf *m,
+			     struct sockaddr *s, struct rtentry *rt))
+#  endif
+{
+	ip_t *ip = (ip_t *)m;
+# else
+static int write_output(ifp, ip)
+struct ifnet *ifp;
+ip_t *ip;
+{
+# endif
+	char fname[32];
+	int fd;
+
+# if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199606)) || \
+	(defined(OpenBSD) && (OpenBSD >= 199603))
+	sprintf(fname, "/tmp/%s", ifp->if_xname);
+# else
+	sprintf(fname, "/tmp/%s%d", ifp->if_name, ifp->if_unit);
+# endif
+	fd = open(fname, O_WRONLY|O_APPEND);
+	if (fd == -1) {
+		perror("open");
+		return -1;
+	}
+	write(fd, (char *)ip, ntohs(ip->ip_len));
+	close(fd);
+	return 0;
+}
+
+
+struct ifnet *get_unit(name, v)
+char *name;
+int v;
+{
+	struct ifnet *ifp, **ifa;
+# if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199606)) || \
+	(defined(OpenBSD) && (OpenBSD >= 199603))
+	for (ifa = ifneta; ifa && (ifp = *ifa); ifa++) {
+		if (!strcmp(name, ifp->if_xname))
+			return ifp;
+	}
+# else
+	char ifname[32], *s;
+
+	for (ifa = ifneta; ifa && (ifp = *ifa); ifa++) {
+		(void) sprintf(ifname, "%s%d", ifp->if_name, ifp->if_unit);
+		if (!strcmp(name, ifname))
+			return ifp;
+	}
+# endif
+
+	if (!ifneta) {
+		ifneta = (struct ifnet **)malloc(sizeof(ifp) * 2);
+		if (!ifneta)
+			return NULL;
+		ifneta[1] = NULL;
+		ifneta[0] = (struct ifnet *)calloc(1, sizeof(*ifp));
+		if (!ifneta[0]) {
+			free(ifneta);
+			return NULL;
+		}
+		nifs = 1;
+	} else {
+		nifs++;
+		ifneta = (struct ifnet **)realloc(ifneta,
+						  (nifs + 1) * sizeof(*ifa));
+		if (!ifneta) {
+			nifs = 0;
+			return NULL;
+		}
+		ifneta[nifs] = NULL;
+		ifneta[nifs - 1] = (struct ifnet *)malloc(sizeof(*ifp));
+		if (!ifneta[nifs - 1]) {
+			nifs--;
+			return NULL;
+		}
+	}
+	ifp = ifneta[nifs - 1];
+
+# if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199606)) || \
+	(defined(OpenBSD) && (OpenBSD >= 199603))
+	strncpy(ifp->if_xname, name, sizeof(ifp->if_xname));
+# else
+	for (s = name; *s && !isdigit(*s); s++)
+		;
+	if (*s && isdigit(*s)) {
+		ifp->if_unit = atoi(s);
+		ifp->if_name = (char *)malloc(s - name + 1);
+		strncpy(ifp->if_name, name, s - name);
+		ifp->if_name[s - name] = '\0';
+	} else {
+		ifp->if_name = strdup(name);
+		ifp->if_unit = -1;
+	}
+# endif
+	ifp->if_output = no_output;
+	return ifp;
+}
+
+
+
+void init_ifp()
+{
+	struct ifnet *ifp, **ifa;
+	char fname[32];
+	int fd;
+
+# if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199606)) || \
+	(defined(OpenBSD) && (OpenBSD >= 199603))
+	for (ifa = ifneta; ifa && (ifp = *ifa); ifa++) {
+		ifp->if_output = write_output;
+		sprintf(fname, "/tmp/%s", ifp->if_xname);
+		fd = open(fname, O_WRONLY|O_CREAT|O_EXCL|O_TRUNC, 0600);
+		if (fd == -1)
+			perror("open");
+		else
+			close(fd);
+	}
+# else
+
+	for (ifa = ifneta; ifa && (ifp = *ifa); ifa++) {
+		ifp->if_output = write_output;
+		sprintf(fname, "/tmp/%s%d", ifp->if_name, ifp->if_unit);
+		fd = open(fname, O_WRONLY|O_CREAT|O_EXCL|O_TRUNC, 0600);
+		if (fd == -1)
+			perror("open");
+		else
+			close(fd);
+	}
+# endif
+}
+
+
+int ipfr_fastroute(ip, fin, fdp)
+ip_t *ip;
+fr_info_t *fin;
+frdest_t *fdp;
+{
+	struct ifnet *ifp = fdp->fd_ifp;
+
+	if (!ifp)
+		return 0;	/* no routing table out here */
+
+	ip->ip_len = htons((u_short)ip->ip_len);
+	ip->ip_off = htons((u_short)(ip->ip_off | IP_MF));
+	ip->ip_sum = 0;
+#ifdef __sgi
+	(*ifp->if_output)(ifp, (void *)ip, NULL);
+#else
+	(*ifp->if_output)(ifp, (void *)ip, NULL, 0);
+#endif
+	return 0;
+}
+
+
+int ipllog __P((void))
+{
+	verbose("l");
+	return 0;
+}
+
+
+int send_reset(ip, ifp)
+ip_t *ip;
+struct ifnet *ifp;
+{
+	verbose("- TCP RST sent\n");
+	return 0;
+}
+
+
+int icmp_error(ip, ifp)
+ip_t *ip;
+struct ifnet *ifp;
+{
+	verbose("- TCP RST sent\n");
+	return 0;
+}
+
+
+void frsync()
+{
+	return;
+}
+#endif /* _KERNEL */
diff --git a/sys/netinet/ip_fil.h b/sys/netinet/ip_fil.h
new file mode 100644
index 0000000..a960349
--- /dev/null
+++ b/sys/netinet/ip_fil.h
@@ -0,0 +1,634 @@
+/*
+ * Copyright (C) 1993-2000 by Darren Reed.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that this notice is preserved and due credit is given
+ * to the original author and the contributors.
+ *
+ * @(#)ip_fil.h	1.35 6/5/96
+ * $Id: ip_fil.h,v 2.29.2.4 2000/11/12 11:54:53 darrenr Exp $
+ * $FreeBSD$
+ */
+
+#ifndef	__IP_FIL_H__
+#define	__IP_FIL_H__
+
+/*
+ * Pathnames for various IP Filter control devices.  Used by LKM
+ * and userland, so defined here.
+ */
+#define	IPNAT_NAME	"/dev/ipnat"
+#define	IPSTATE_NAME	"/dev/ipstate"
+#define	IPAUTH_NAME	"/dev/ipauth"
+
+#ifndef	SOLARIS
+# define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4)))
+#endif
+
+#if defined(KERNEL) && !defined(_KERNEL)
+# define	_KERNEL
+#endif
+
+#ifndef	__P
+# ifdef	__STDC__
+#  define	__P(x)	x
+# else
+#  define	__P(x)	()
+# endif
+#endif
+
+#if defined(__STDC__) || defined(__GNUC__)
+# define	SIOCADAFR	_IOW('r', 60, struct frentry *)
+# define	SIOCRMAFR	_IOW('r', 61, struct frentry *)
+# define	SIOCSETFF	_IOW('r', 62, u_int)
+# define	SIOCGETFF	_IOR('r', 63, u_int)
+# define	SIOCGETFS	_IOWR('r', 64, struct friostat *)
+# define	SIOCIPFFL	_IOWR('r', 65, int)
+# define	SIOCIPFFB	_IOR('r', 66, int)
+# define	SIOCADIFR	_IOW('r', 67, struct frentry *)
+# define	SIOCRMIFR	_IOW('r', 68, struct frentry *)
+# define	SIOCSWAPA	_IOR('r', 69, u_int)
+# define	SIOCINAFR	_IOW('r', 70, struct frentry *)
+# define	SIOCINIFR	_IOW('r', 71, struct frentry *)
+# define	SIOCFRENB	_IOW('r', 72, u_int)
+# define	SIOCFRSYN	_IOW('r', 73, u_int)
+# define	SIOCFRZST	_IOWR('r', 74, struct friostat *)
+# define	SIOCZRLST	_IOWR('r', 75, struct frentry *)
+# define	SIOCAUTHW	_IOWR('r', 76, struct fr_info *)
+# define	SIOCAUTHR	_IOWR('r', 77, struct fr_info *)
+# define	SIOCATHST	_IOWR('r', 78, struct fr_authstat *)
+# define	SIOCSTLCK	_IOWR('r', 79, u_int)
+# define	SIOCSTPUT	_IOWR('r', 80, struct ipstate_save *)
+# define	SIOCSTGET	_IOWR('r', 81, struct ipstate_save *)
+# define	SIOCSTGSZ	_IOWR('r', 82, struct natget *)
+# define	SIOCGFRST	_IOWR('r', 83, struct ipfrstat *)
+#else
+# define	SIOCADAFR	_IOW(r, 60, struct frentry *)
+# define	SIOCRMAFR	_IOW(r, 61, struct frentry *)
+# define	SIOCSETFF	_IOW(r, 62, u_int)
+# define	SIOCGETFF	_IOR(r, 63, u_int)
+# define	SIOCGETFS	_IOWR(r, 64, struct friostat *)
+# define	SIOCIPFFL	_IOWR(r, 65, int)
+# define	SIOCIPFFB	_IOR(r, 66, int)
+# define	SIOCADIFR	_IOW(r, 67, struct frentry *)
+# define	SIOCRMIFR	_IOW(r, 68, struct frentry *)
+# define	SIOCSWAPA	_IOR(r, 69, u_int)
+# define	SIOCINAFR	_IOW(r, 70, struct frentry *)
+# define	SIOCINIFR	_IOW(r, 71, struct frentry *)
+# define	SIOCFRENB	_IOW(r, 72, u_int)
+# define	SIOCFRSYN	_IOW(r, 73, u_int)
+# define	SIOCFRZST	_IOWR(r, 74, struct friostat *)
+# define	SIOCZRLST	_IOWR(r, 75, struct frentry *)
+# define	SIOCAUTHW	_IOWR(r, 76, struct fr_info *)
+# define	SIOCAUTHR	_IOWR(r, 77, struct fr_info *)
+# define	SIOCATHST	_IOWR(r, 78, struct fr_authstat *)
+# define	SIOCSTLCK	_IOWR(r, 79, u_int)
+# define	SIOCSTPUT	_IOWR(r, 80, struct ipstate_save *)
+# define	SIOCSTGET	_IOWR(r, 81, struct ipstate_save *)
+# define	SIOCSTGSZ	_IOWR(r, 82, struct natget *)
+# define	SIOCGFRST	_IOWR(r, 83, struct ipfrstat *)
+#endif
+#define	SIOCADDFR	SIOCADAFR
+#define	SIOCDELFR	SIOCRMAFR
+#define	SIOCINSFR	SIOCINAFR
+
+
+typedef	struct	fr_ip	{
+	u_32_t	fi_v:4;		/* IP version */
+	u_32_t	fi_fl:4;	/* packet flags */
+	u_32_t	fi_tos:8;	/* IP packet TOS */
+	u_32_t	fi_ttl:8;	/* IP packet TTL */
+	u_32_t	fi_p:8;		/* IP packet protocol */
+	union	i6addr fi_src;	/* source address from packet */
+	union	i6addr fi_dst;	/* destination address from packet */
+	u_32_t	fi_optmsk;	/* bitmask composed from IP options */
+	u_short	fi_secmsk;	/* bitmask composed from IP security options */
+	u_short	fi_auth;	/* authentication code from IP sec. options */
+} fr_ip_t;
+
+#define	FI_OPTIONS	(FF_OPTIONS >> 24)
+#define	FI_TCPUDP	(FF_TCPUDP >> 24)	/* TCP/UCP implied comparison*/
+#define	FI_FRAG		(FF_FRAG >> 24)
+#define	FI_SHORT	(FF_SHORT >> 24)
+#define	FI_CMP		(FI_OPTIONS|FI_TCPUDP|FI_SHORT)
+
+#define	fi_saddr	fi_src.in4.s_addr
+#define	fi_daddr	fi_dst.in4.s_addr
+
+
+/*
+ * These are both used by the state and NAT code to indicate that one port or
+ * the other should be treated as a wildcard.
+ */
+#define	FI_W_SPORT	0x00000100
+#define	FI_W_DPORT	0x00000200
+#define	FI_WILDP	(FI_W_SPORT|FI_W_DPORT)
+#define	FI_W_SADDR	0x00000400
+#define	FI_W_DADDR	0x00000800
+#define	FI_WILDA	(FI_W_SADDR|FI_W_DADDR)
+#define	FI_NEWFR	0x00001000
+
+typedef	struct	fr_info	{
+	void	*fin_ifp;		/* interface packet is `on' */
+	struct	fr_ip	fin_fi;		/* IP Packet summary */
+	u_short	fin_data[2];		/* TCP/UDP ports, ICMP code/type */
+	u_char	fin_out;		/* in or out ? 1 == out, 0 == in */
+	u_char	fin_rev;		/* state only: 1 = reverse */
+	u_short	fin_hlen;		/* length of IP header in bytes */
+	u_char	fin_tcpf;		/* TCP header flags (SYN, ACK, etc) */
+	/* From here on is packet specific */
+	u_char	fin_icode;		/* ICMP error to return */
+	u_short	fin_rule;		/* rule # last matched */
+	u_32_t	fin_group;		/* group number, -1 for none */
+	struct	frentry *fin_fr;	/* last matching rule */
+	char	*fin_dp;		/* start of data past IP header */
+	u_short	fin_dlen;		/* length of data portion of packet */
+	u_short	fin_id;			/* IP packet id field */
+	void	*fin_mp;		/* pointer to pointer to mbuf */
+#if SOLARIS
+	void	*fin_qfm;		/* pointer to mblk where pkt starts */
+	void	*fin_qif;
+#endif
+	u_short	fin_plen;
+	u_short	fin_off;
+} fr_info_t;
+
+#define	fin_v	fin_fi.fi_v
+
+/*
+ * Size for compares on fr_info structures
+ */
+#define	FI_CSIZE	offsetof(fr_info_t, fin_icode)
+
+/*
+ * Size for copying cache fr_info structure
+ */
+#define	FI_COPYSIZE	offsetof(fr_info_t, fin_dp)
+
+typedef	struct	frdest	{
+	void	*fd_ifp;
+	struct	in_addr	fd_ip;
+	char	fd_ifname[IFNAMSIZ];
+} frdest_t;
+
+typedef	struct	frpcmp	{
+	int	frp_cmp;	/* data for port comparisons */
+	u_short	frp_port;	/* top port for <> and >< */
+	u_short	frp_top;	/* top port for <> and >< */
+} frpcmp_t;
+
+typedef	struct	frtuc	{
+	u_char	ftu_tcpfm;	/* tcp flags mask */
+	u_char	ftu_tcpf;	/* tcp flags */
+	frpcmp_t	ftu_src;
+	frpcmp_t	ftu_dst;
+} frtuc_t;
+
+#define	ftu_scmp	ftu_src.frp_cmp
+#define	ftu_dcmp	ftu_dst.frp_cmp
+#define	ftu_sport	ftu_src.frp_port
+#define	ftu_dport	ftu_dst.frp_port
+#define	ftu_stop	ftu_src.frp_top
+#define	ftu_dtop	ftu_dst.frp_top
+
+typedef	struct	frentry {
+	struct	frentry	*fr_next;
+	u_32_t	fr_group;	/* group to which this rule belongs */
+	u_32_t	fr_grhead;	/* group # which this rule starts */
+	struct	frentry	*fr_grp;
+	int	fr_ref;		/* reference count - for grouping */
+	void	*fr_ifa;
+#if BSD >= 199306
+	void	*fr_oifa;
+#endif
+	/*
+	 * These are only incremented when a packet  matches this rule and
+	 * it is the last match
+	 */
+	U_QUAD_T	fr_hits;
+	U_QUAD_T	fr_bytes;
+	/*
+	 * Fields after this may not change whilst in the kernel.
+	 */
+	struct	fr_ip	fr_ip;
+	struct	fr_ip	fr_mip;	/* mask structure */
+
+
+	u_short	fr_icmpm;	/* data for ICMP packets (mask) */
+	u_short	fr_icmp;
+
+	frtuc_t	fr_tuc;
+	u_32_t	fr_flags;	/* per-rule flags && options (see below) */
+	u_int	fr_skip;	/* # of rules to skip */
+	u_int	fr_loglevel;	/* syslog log facility + priority */
+	int	(*fr_func) __P((int, ip_t *, fr_info_t *));	/* call this function */
+	int	fr_sap;		/* For solaris only */
+	u_char	fr_icode;	/* return ICMP code */
+	char	fr_ifname[IFNAMSIZ];
+#if BSD >= 199306
+	char	fr_oifname[IFNAMSIZ];
+#endif
+	struct	frdest	fr_tif;	/* "to" interface */
+	struct	frdest	fr_dif;	/* duplicate packet interfaces */
+	u_int	fr_cksum;	/* checksum on filter rules for performance */
+} frentry_t;
+
+#define	fr_v		fr_ip.fi_v
+#define	fr_proto	fr_ip.fi_p
+#define	fr_ttl		fr_ip.fi_ttl
+#define	fr_tos		fr_ip.fi_tos
+#define	fr_tcpfm	fr_tuc.ftu_tcpfm
+#define	fr_tcpf		fr_tuc.ftu_tcpf
+#define	fr_scmp		fr_tuc.ftu_scmp
+#define	fr_dcmp		fr_tuc.ftu_dcmp
+#define	fr_dport	fr_tuc.ftu_dport
+#define	fr_sport	fr_tuc.ftu_sport
+#define	fr_stop		fr_tuc.ftu_stop
+#define	fr_dtop		fr_tuc.ftu_dtop
+#define	fr_dst		fr_ip.fi_dst.in4
+#define	fr_src		fr_ip.fi_src.in4
+#define	fr_dmsk		fr_mip.fi_dst.in4
+#define	fr_smsk		fr_mip.fi_src.in4
+
+#ifndef	offsetof
+#define	offsetof(t,m)	(int)((&((t *)0L)->m))
+#endif
+#define	FR_CMPSIZ	(sizeof(struct frentry) - offsetof(frentry_t, fr_ip))
+
+/*
+ * fr_flags
+ */
+#define	FR_BLOCK	0x00001	/* do not allow packet to pass */
+#define	FR_PASS		0x00002	/* allow packet to pass */
+#define	FR_OUTQUE	0x00004	/* outgoing packets */
+#define	FR_INQUE	0x00008	/* ingoing packets */
+#define	FR_LOG		0x00010	/* Log */
+#define	FR_LOGB		0x00011	/* Log-fail */
+#define	FR_LOGP		0x00012	/* Log-pass */
+#define	FR_LOGBODY	0x00020	/* Log the body */
+#define	FR_LOGFIRST	0x00040	/* Log the first byte if state held */
+#define	FR_RETRST	0x00080	/* Return TCP RST packet - reset connection */
+#define	FR_RETICMP	0x00100	/* Return ICMP unreachable packet */
+#define	FR_FAKEICMP	0x00180	/* Return ICMP unreachable with fake source */
+#define	FR_NOMATCH	0x00200	/* no match occured */
+#define	FR_ACCOUNT	0x00400	/* count packet bytes */
+#define	FR_KEEPFRAG	0x00800	/* keep fragment information */
+#define	FR_KEEPSTATE	0x01000	/* keep `connection' state information */
+#define	FR_INACTIVE	0x02000
+#define	FR_QUICK	0x04000	/* match & stop processing list */
+#define	FR_FASTROUTE	0x08000	/* bypass normal routing */
+#define	FR_CALLNOW	0x10000	/* call another function (fr_func) if matches */
+#define	FR_DUP		0x20000	/* duplicate packet */
+#define	FR_LOGORBLOCK	0x40000	/* block the packet if it can't be logged */
+#define	FR_NOTSRCIP	0x80000	/* not the src IP# */
+#define	FR_NOTDSTIP	0x100000	/* not the dst IP# */
+#define	FR_AUTH		0x200000	/* use authentication */
+#define	FR_PREAUTH	0x400000	/* require preauthentication */
+#define	FR_DONTCACHE	0x800000	/* don't cache the result */
+
+#define	FR_LOGMASK	(FR_LOG|FR_LOGP|FR_LOGB)
+#define	FR_RETMASK	(FR_RETICMP|FR_RETRST|FR_FAKEICMP)
+
+/*
+ * These correspond to #define's for FI_* and are stored in fr_flags
+ */
+#define	FF_OPTIONS	0x01000000
+#define	FF_TCPUDP	0x02000000
+#define	FF_FRAG		0x04000000
+#define	FF_SHORT	0x08000000
+/*
+ * recognized flags for SIOCGETFF and SIOCSETFF, and get put in fr_flags
+ */
+#define	FF_LOGPASS	0x10000000
+#define	FF_LOGBLOCK	0x20000000
+#define	FF_LOGNOMATCH	0x40000000
+#define	FF_LOGGING	(FF_LOGPASS|FF_LOGBLOCK|FF_LOGNOMATCH)
+#define	FF_BLOCKNONIP	0x80000000	/* Solaris2 Only */
+
+#define	FR_NONE 0
+#define	FR_EQUAL 1
+#define	FR_NEQUAL 2
+#define FR_LESST 3
+#define FR_GREATERT 4
+#define FR_LESSTE 5
+#define FR_GREATERTE 6
+#define	FR_OUTRANGE 7
+#define	FR_INRANGE 8
+
+typedef	struct	filterstats {
+	u_long	fr_pass;	/* packets allowed */
+	u_long	fr_block;	/* packets denied */
+	u_long	fr_nom;		/* packets which don't match any rule */
+	u_long	fr_short;	/* packets which are short */
+	u_long	fr_ppkl;	/* packets allowed and logged */
+	u_long	fr_bpkl;	/* packets denied and logged */
+	u_long	fr_npkl;	/* packets unmatched and logged */
+	u_long	fr_pkl;		/* packets logged */
+	u_long	fr_skip;	/* packets to be logged but buffer full */
+	u_long	fr_ret;		/* packets for which a return is sent */
+	u_long	fr_acct;	/* packets for which counting was performed */
+	u_long	fr_bnfr;	/* bad attempts to allocate fragment state */
+	u_long	fr_nfr;		/* new fragment state kept */
+	u_long	fr_cfr;		/* add new fragment state but complete pkt */
+	u_long	fr_bads;	/* bad attempts to allocate packet state */
+	u_long	fr_ads;		/* new packet state kept */
+	u_long	fr_chit;	/* cached hit */
+	u_long	fr_tcpbad;	/* TCP checksum check failures */
+	u_long	fr_pull[2];	/* good and bad pullup attempts */
+	u_long	fr_badsrc;	/* source received doesn't match route */
+	u_long	fr_badttl;	/* TTL in packet doesn't reach minimum */
+#if SOLARIS
+	u_long	fr_notdata;	/* PROTO/PCPROTO that have no data */
+	u_long	fr_nodata;	/* mblks that have no data */
+	u_long	fr_bad;		/* bad IP packets to the filter */
+	u_long	fr_notip;	/* packets passed through no on ip queue */
+	u_long	fr_drop;	/* packets dropped - no info for them! */
+	u_long	fr_copy;	/* messages copied due to db_ref > 1 */
+#endif
+	u_long	fr_ipv6[2];	/* IPv6 packets in/out */
+} filterstats_t;
+
+/*
+ * For SIOCGETFS
+ */
+typedef	struct	friostat	{
+	struct	filterstats	f_st[2];
+	struct	frentry		*f_fin[2];
+	struct	frentry		*f_fout[2];
+	struct	frentry		*f_acctin[2];
+	struct	frentry		*f_acctout[2];
+	struct	frentry		*f_fin6[2];
+	struct	frentry		*f_fout6[2];
+	struct	frentry		*f_acctin6[2];
+	struct	frentry		*f_acctout6[2];
+	struct	frentry		*f_auth;
+	struct	frgroup		*f_groups[3][2];
+	u_long	f_froute[2];
+	int	f_defpass;	/* default pass - from fr_pass */
+	char	f_active;	/* 1 or 0 - active rule set */
+	char	f_running;	/* 1 if running, else 0 */
+	char	f_logging;	/* 1 if enabled, else 0 */
+	char	f_version[32];	/* version string */
+	int	f_locks[4];
+} friostat_t;
+
+typedef struct	optlist {
+	u_short ol_val;
+	int	ol_bit;
+} optlist_t;
+
+
+/*
+ * Group list structure.
+ */
+typedef	struct frgroup {
+	u_32_t	fg_num;
+	struct	frgroup	*fg_next;
+	struct	frentry	*fg_head;
+	struct	frentry	**fg_start;
+} frgroup_t;
+
+
+/*
+ * Log structure.  Each packet header logged is prepended by one of these.
+ * Following this in the log records read from the device will be an ipflog
+ * structure which is then followed by any packet data.
+ */
+typedef	struct	iplog	{
+	u_32_t	ipl_magic;
+	u_int	ipl_count;
+	u_long	ipl_sec;
+	u_long	ipl_usec;
+	size_t	ipl_dsize;
+	struct	iplog	*ipl_next;
+} iplog_t;
+
+#define IPL_MAGIC 0x49504c4d /* 'IPLM' */
+
+typedef	struct	ipflog	{
+#if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199603)) || \
+        (defined(OpenBSD) && (OpenBSD >= 199603))
+	u_char	fl_ifname[IFNAMSIZ];
+#else
+	u_int	fl_unit;
+	u_char	fl_ifname[4];
+#endif
+	u_char	fl_plen;	/* extra data after hlen */
+	u_char	fl_hlen;	/* length of IP headers saved */
+	u_short	fl_loglevel;	/* syslog log level */
+	u_32_t	fl_rule;
+	u_32_t	fl_group;
+	u_32_t	fl_flags;
+	u_32_t	fl_lflags;
+} ipflog_t;
+
+
+#ifndef	ICMP_UNREACH_FILTER
+# define	ICMP_UNREACH_FILTER	13
+#endif
+
+#ifndef	IPF_LOGGING
+# define	IPF_LOGGING	0
+#endif
+#ifndef	IPF_DEFAULT_PASS
+# define	IPF_DEFAULT_PASS	FR_PASS
+#endif
+
+#define	IPMINLEN(i, h)	((i)->ip_len >= ((i)->ip_hl * 4 + sizeof(struct h)))
+#define	IPLLOGSIZE	8192
+
+/*
+ * Device filenames for reading log information.  Use ipf on Solaris2 because
+ * ipl is already a name used by something else.
+ */
+#ifndef	IPL_NAME
+# if	SOLARIS
+#  define	IPL_NAME	"/dev/ipf"
+# else
+#  define	IPL_NAME	"/dev/ipl"
+# endif
+#endif
+#define	IPL_NAT		IPNAT_NAME
+#define	IPL_STATE	IPSTATE_NAME
+#define	IPL_AUTH	IPAUTH_NAME
+
+#define	IPL_LOGIPF	0	/* Minor device #'s for accessing logs */
+#define	IPL_LOGNAT	1
+#define	IPL_LOGSTATE	2
+#define	IPL_LOGAUTH	3
+#define	IPL_LOGMAX	3
+
+#if !defined(CDEV_MAJOR) && defined (__FreeBSD_version) && \
+    (__FreeBSD_version >= 220000)
+# define	CDEV_MAJOR	79
+#endif
+
+/*
+ * Post NetBSD 1.2 has the PFIL interface for packet filters.  This turns
+ * on those hooks.  We don't need any special mods in non-IP Filter code
+ * with this!
+ */
+#if (defined(NetBSD) && (NetBSD > 199609) && (NetBSD <= 1991011)) || \
+    (defined(NetBSD1_2) && NetBSD1_2 > 1) || (defined(__FreeBSD_version) && \
+     (__FreeBSD_version >= 500011))
+# if (NetBSD >= 199905)
+#  define PFIL_HOOKS
+# endif
+# ifdef PFIL_HOOKS
+#  define NETBSD_PF
+# endif
+#endif
+
+
+#ifndef	_KERNEL
+struct ifnet;
+extern	int	fr_check __P((ip_t *, int, void *, int, mb_t **));
+extern	int	(*fr_checkp) __P((ip_t *, int, void *, int, mb_t **));
+extern	int	send_reset __P((ip_t *, struct ifnet *));
+extern	int	icmp_error __P((ip_t *, struct ifnet *));
+extern	int	ipf_log __P((void));
+extern	int	ipfr_fastroute __P((ip_t *, fr_info_t *, frdest_t *));
+extern	struct	ifnet *get_unit __P((char *, int));
+# if defined(__NetBSD__) || defined(__OpenBSD__) || \
+	  (_BSDI_VERSION >= 199701) || (__FreeBSD_version >= 300000)
+extern	int	iplioctl __P((dev_t, u_long, caddr_t, int));
+# else
+extern	int	iplioctl __P((dev_t, int, caddr_t, int));
+# endif
+extern	int	iplopen __P((dev_t, int));
+extern	int	iplclose __P((dev_t, int));
+#else /* #ifndef _KERNEL */
+# if defined(__NetBSD__) && defined(PFIL_HOOKS)
+extern	void	ipfilterattach __P((int));
+# endif
+extern	int	iplattach __P((void));
+extern	int	ipl_enable __P((void));
+extern	int	ipl_disable __P((void));
+extern	void	ipflog_init __P((void));
+extern	int	ipflog_clear __P((minor_t));
+extern	int	ipflog_read __P((minor_t, struct uio *));
+extern	int	ipflog __P((u_int, ip_t *, fr_info_t *, mb_t *));
+extern	int	ipllog __P((int, fr_info_t *, void **, size_t *, int *, int));
+extern	int	send_icmp_err __P((ip_t *, int, fr_info_t *, int));
+extern	int	send_reset __P((ip_t *, fr_info_t *));
+# if	SOLARIS
+extern	int	fr_check __P((ip_t *, int, void *, int, qif_t *, mb_t **));
+extern	int	(*fr_checkp) __P((ip_t *, int, void *,
+				  int, qif_t *, mb_t **));
+#  if SOLARIS2 >= 7
+extern	int	iplioctl __P((dev_t, int, intptr_t, int, cred_t *, int *));
+#  else
+extern	int	iplioctl __P((dev_t, int, int *, int, cred_t *, int *));
+#  endif
+extern	int	iplopen __P((dev_t *, int, int, cred_t *));
+extern	int	iplclose __P((dev_t, int, int, cred_t *));
+extern	int	ipfsync __P((void));
+extern	int	ipfr_fastroute __P((ip_t *, mblk_t *, mblk_t **,
+				    fr_info_t *, frdest_t *));
+extern	void	copyin_mblk __P((mblk_t *, size_t, size_t, char *));
+extern	void	copyout_mblk __P((mblk_t *, size_t, size_t, char *));
+extern	int	fr_qin __P((queue_t *, mblk_t *));
+extern	int	fr_qout __P((queue_t *, mblk_t *));
+extern	int	iplread __P((dev_t, struct uio *, cred_t *));
+# else /* SOLARIS */
+extern	int	fr_check __P((ip_t *, int, void *, int, mb_t **));
+extern	int	(*fr_checkp) __P((ip_t *, int, void *, int, mb_t **));
+extern	int	ipfr_fastroute __P((mb_t *, fr_info_t *, frdest_t *));
+extern	size_t	mbufchainlen __P((mb_t *));
+#  ifdef	__sgi
+#   include <sys/cred.h>
+extern	int	iplioctl __P((dev_t, int, caddr_t, int, cred_t *, int *));
+extern	int	iplopen __P((dev_t *, int, int, cred_t *));
+extern	int	iplclose __P((dev_t, int, int, cred_t *));
+extern	int	iplread __P((dev_t, struct uio *, cred_t *));
+extern	int	ipfsync __P((void));
+extern	int	ipfilter_sgi_attach __P((void));
+extern	void	ipfilter_sgi_detach __P((void));
+extern	void	ipfilter_sgi_intfsync __P((void));
+#  else
+#   ifdef	IPFILTER_LKM
+extern	int	iplidentify __P((char *));
+#   endif
+#   if (_BSDI_VERSION >= 199510) || (__FreeBSD_version >= 220000) || \
+      (NetBSD >= 199511) || defined(__OpenBSD__)
+#    if defined(__NetBSD__) || (_BSDI_VERSION >= 199701) || \
+       defined(__OpenBSD__) || (__FreeBSD_version >= 300000)
+extern	int	iplioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
+#    else
+extern	int	iplioctl __P((dev_t, int, caddr_t, int, struct proc *));
+#    endif
+extern	int	iplopen __P((dev_t, int, int, struct proc *));
+extern	int	iplclose __P((dev_t, int, int, struct proc *));
+#   else
+#    ifndef	linux
+extern	int	iplopen __P((dev_t, int));
+extern	int	iplclose __P((dev_t, int));
+extern	int	iplioctl __P((dev_t, int, caddr_t, int));
+#    else
+extern	int	iplioctl(struct inode *, struct file *, u_int, u_long);
+extern	int	iplopen __P((struct inode *, struct file *));
+extern	void	iplclose __P((struct inode *, struct file *));
+#    endif /* !linux */
+#   endif /* (_BSDI_VERSION >= 199510) */
+#   if	BSD >= 199306
+extern	int	iplread __P((dev_t, struct uio *, int));
+#   else
+#    ifndef linux
+extern	int	iplread __P((dev_t, struct uio *));
+#    else
+extern	int	iplread(struct inode *, struct file *, char *, int);
+#    endif /* !linux */
+#   endif /* BSD >= 199306 */
+#  endif /* __ sgi */
+# endif /* SOLARIS */
+#endif /* #ifndef _KERNEL */
+
+extern	char	*memstr __P((char *, char *, int, int));
+extern	void	fixskip __P((frentry_t **, frentry_t *, int));
+extern	int	countbits __P((u_32_t));
+extern	int	ipldetach __P((void));
+extern	u_short	ipf_cksum __P((u_short *, int));
+extern	int	ircopyptr __P((void *, void *, size_t));
+extern	int	iwcopyptr __P((void *, void *, size_t));
+
+extern	int	frflush __P((minor_t, int));
+extern	void	frsync __P((void));
+extern	frgroup_t *fr_addgroup __P((u_32_t, frentry_t *, minor_t, int));
+extern	void	fr_delgroup __P((u_32_t, u_32_t, minor_t, int));
+extern	frgroup_t *fr_findgroup __P((u_32_t, u_32_t, minor_t, int,
+				    frgroup_t ***));
+
+extern	int	fr_copytolog __P((int, char *, int));
+extern	void	fr_forgetifp __P((void *));
+extern	void	fr_getstat __P((struct friostat *));
+extern	int	fr_ifpaddr __P((int, void *, struct in_addr *));
+extern	int	fr_lock __P((caddr_t, int *));
+extern  void	fr_makefrip __P((int, ip_t *, fr_info_t *));
+extern	u_short	fr_tcpsum __P((mb_t *, ip_t *, tcphdr_t *));
+extern	int	fr_scanlist __P((u_32_t, ip_t *, fr_info_t *, void *));
+extern	int	fr_tcpudpchk __P((frtuc_t *, fr_info_t *));
+extern	int	fr_verifysrc __P((struct in_addr, void *));
+
+extern	int	ipl_unreach;
+extern	int	fr_running;
+extern	u_long	ipl_frouteok[2];
+extern	int	fr_pass;
+extern	int	fr_flags;
+extern	int	fr_active;
+extern	int	fr_chksrc;
+extern	int	fr_minttl;
+extern	int	fr_minttllog;
+extern	fr_info_t	frcache[2];
+extern	char	ipfilter_version[];
+extern	iplog_t	**iplh[IPL_LOGMAX+1], *iplt[IPL_LOGMAX+1];
+extern	size_t	iplused[IPL_LOGMAX + 1];
+extern	struct frentry *ipfilter[2][2], *ipacct[2][2];
+#ifdef	USE_INET6
+extern	struct frentry *ipfilter6[2][2], *ipacct6[2][2];
+extern	int	icmptoicmp6types[ICMP_MAXTYPE+1];
+extern	int	icmptoicmp6unreach[ICMP_MAX_UNREACH];
+#endif
+extern	struct frgroup *ipfgroups[3][2];
+extern	struct filterstats frstats[];
+
+#endif	/* __IP_FIL_H__ */
diff --git a/sys/netinet/ip_flow.c b/sys/netinet/ip_flow.c
new file mode 100644
index 0000000..6c9119b
--- /dev/null
+++ b/sys/netinet/ip_flow.c
@@ -0,0 +1,327 @@
+/*-
+ * Copyright (c) 1998 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by the 3am Software Foundry ("3am").  It was developed by Matt Thomas.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the NetBSD
+ *	Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/kernel.h>
+
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_flow.h>
+
+#define	IPFLOW_TIMER		(5 * PR_SLOWHZ)
+#define IPFLOW_HASHBITS		6	/* should not be a multiple of 8 */
+#define	IPFLOW_HASHSIZE		(1 << IPFLOW_HASHBITS)
+static LIST_HEAD(ipflowhead, ipflow) ipflows[IPFLOW_HASHSIZE];
+static int ipflow_inuse;
+#define	IPFLOW_MAX		256
+
+static int ipflow_active = 0;
+SYSCTL_INT(_net_inet_ip, IPCTL_FASTFORWARDING, fastforwarding, CTLFLAG_RW,
+    &ipflow_active, 0, "Enable flow-based IP forwarding");
+
+static MALLOC_DEFINE(M_IPFLOW, "ip_flow", "IP flow");
+
+static unsigned
+ipflow_hash(
+	struct in_addr dst,
+	struct in_addr src,
+	unsigned tos)
+{
+	unsigned hash = tos;
+	int idx;
+	for (idx = 0; idx < 32; idx += IPFLOW_HASHBITS)
+		hash += (dst.s_addr >> (32 - idx)) + (src.s_addr >> idx);
+	return hash & (IPFLOW_HASHSIZE-1);
+}
+
+static struct ipflow *
+ipflow_lookup(
+	const struct ip *ip)
+{
+	unsigned hash;
+	struct ipflow *ipf;
+
+	hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos);
+
+	ipf = LIST_FIRST(&ipflows[hash]);
+	while (ipf != NULL) {
+		if (ip->ip_dst.s_addr == ipf->ipf_dst.s_addr
+		    && ip->ip_src.s_addr == ipf->ipf_src.s_addr
+		    && ip->ip_tos == ipf->ipf_tos)
+			break;
+		ipf = LIST_NEXT(ipf, ipf_next);
+	}
+	return ipf;
+}
+
+int
+ipflow_fastforward(
+	struct mbuf *m)
+{
+	struct ip *ip;
+	struct ipflow *ipf;
+	struct rtentry *rt;
+	int error;
+
+	/*
+	 * Are we forwarding packets?  Big enough for an IP packet?
+	 */
+	if (!ipforwarding || !ipflow_active || m->m_len < sizeof(struct ip))
+		return 0;
+	/*
+	 * IP header with no option and valid version and length
+	 */
+	ip = mtod(m, struct ip *);
+	if (ip->ip_v != IPVERSION || ip->ip_hl != (sizeof(struct ip) >> 2)
+	    || ntohs(ip->ip_len) > m->m_pkthdr.len)
+		return 0;
+	/*
+	 * Find a flow.
+	 */
+	if ((ipf = ipflow_lookup(ip)) == NULL)
+		return 0;
+
+	/*
+	 * Route and interface still up?
+	 */
+	rt = ipf->ipf_ro.ro_rt;
+	if ((rt->rt_flags & RTF_UP) == 0 || (rt->rt_ifp->if_flags & IFF_UP) == 0)
+		return 0;
+
+	/*
+	 * Packet size OK?  TTL?
+	 */
+	if (m->m_pkthdr.len > rt->rt_ifp->if_mtu || ip->ip_ttl <= IPTTLDEC)
+		return 0;
+
+	/*
+	 * Everything checks out and so we can forward this packet.
+	 * Modify the TTL and incrementally change the checksum.
+	 */
+	ip->ip_ttl -= IPTTLDEC;
+	if (ip->ip_sum >= htons(0xffff - (IPTTLDEC << 8))) {
+		ip->ip_sum += htons(IPTTLDEC << 8) + 1;
+	} else {
+		ip->ip_sum += htons(IPTTLDEC << 8);
+	}
+
+	/*
+	 * Send the packet on its way.  All we can get back is ENOBUFS
+	 */
+	ipf->ipf_uses++;
+	ipf->ipf_timer = IPFLOW_TIMER;
+	if ((error = (*rt->rt_ifp->if_output)(rt->rt_ifp, m, &ipf->ipf_ro.ro_dst, rt)) != 0) {
+		if (error == ENOBUFS)
+			ipf->ipf_dropped++;
+		else
+			ipf->ipf_errors++;
+	}
+	return 1;
+}
+
+static void
+ipflow_addstats(
+	struct ipflow *ipf)
+{
+	ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses;
+	ipstat.ips_cantforward += ipf->ipf_errors + ipf->ipf_dropped;
+	ipstat.ips_forward += ipf->ipf_uses;
+	ipstat.ips_fastforward += ipf->ipf_uses;
+}
+
+static void
+ipflow_free(
+	struct ipflow *ipf)
+{
+	int s;
+	/*
+	 * Remove the flow from the hash table (at elevated IPL).
+	 * Once it's off the list, we can deal with it at normal
+	 * network IPL.
+	 */
+	s = splimp();
+	LIST_REMOVE(ipf, ipf_next);
+	splx(s);
+	ipflow_addstats(ipf);
+	RTFREE(ipf->ipf_ro.ro_rt);
+	ipflow_inuse--;
+	FREE(ipf, M_IPFLOW);
+}
+
+static struct ipflow *
+ipflow_reap(
+	void)
+{
+	struct ipflow *ipf, *maybe_ipf = NULL;
+	int idx;
+	int s;
+
+	for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) {
+		ipf = LIST_FIRST(&ipflows[idx]);
+		while (ipf != NULL) {
+			/*
+			 * If this no longer points to a valid route
+			 * reclaim it.
+			 */
+			if ((ipf->ipf_ro.ro_rt->rt_flags & RTF_UP) == 0)
+				goto done;
+			/*
+			 * choose the one that's been least recently used
+			 * or has had the least uses in the last 1.5 
+			 * intervals.
+			 */
+			if (maybe_ipf == NULL
+			    || ipf->ipf_timer < maybe_ipf->ipf_timer
+			    || (ipf->ipf_timer == maybe_ipf->ipf_timer
+				&& ipf->ipf_last_uses + ipf->ipf_uses <
+				      maybe_ipf->ipf_last_uses +
+					maybe_ipf->ipf_uses))
+				maybe_ipf = ipf;
+			ipf = LIST_NEXT(ipf, ipf_next);
+		}
+	}
+	ipf = maybe_ipf;
+    done:
+	/*
+	 * Remove the entry from the flow table.
+	 */
+	s = splimp();
+	LIST_REMOVE(ipf, ipf_next);
+	splx(s);
+	ipflow_addstats(ipf);
+	RTFREE(ipf->ipf_ro.ro_rt);
+	return ipf;
+}
+
+void
+ipflow_slowtimo(
+	void)
+{
+	struct ipflow *ipf;
+	int idx;
+
+	for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) {
+		ipf = LIST_FIRST(&ipflows[idx]);
+		while (ipf != NULL) {
+			struct ipflow *next_ipf = LIST_NEXT(ipf, ipf_next);
+			if (--ipf->ipf_timer == 0) {
+				ipflow_free(ipf);
+			} else {
+				ipf->ipf_last_uses = ipf->ipf_uses;
+				ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses;
+				ipstat.ips_forward += ipf->ipf_uses;
+				ipstat.ips_fastforward += ipf->ipf_uses;
+				ipf->ipf_uses = 0;
+			}
+			ipf = next_ipf;
+		}
+	}
+}
+
+void
+ipflow_create(
+	const struct route *ro,
+	struct mbuf *m)
+{
+	const struct ip *const ip = mtod(m, struct ip *);
+	struct ipflow *ipf;
+	unsigned hash;
+	int s;
+
+	/*
+	 * Don't create cache entries for ICMP messages.
+	 */
+	if (!ipflow_active || ip->ip_p == IPPROTO_ICMP)
+		return;
+	/*
+	 * See if an existing flow struct exists.  If so remove it from it's
+	 * list and free the old route.  If not, try to malloc a new one
+	 * (if we aren't at our limit).
+	 */
+	ipf = ipflow_lookup(ip);
+	if (ipf == NULL) {
+		if (ipflow_inuse == IPFLOW_MAX) {
+			ipf = ipflow_reap();
+		} else {
+			ipf = (struct ipflow *) malloc(sizeof(*ipf), M_IPFLOW,
+						       M_NOWAIT);
+			if (ipf == NULL)
+				return;
+			ipflow_inuse++;
+		}
+		bzero((caddr_t) ipf, sizeof(*ipf));
+	} else {
+		s = splimp();
+		LIST_REMOVE(ipf, ipf_next);
+		splx(s);
+		ipflow_addstats(ipf);
+		RTFREE(ipf->ipf_ro.ro_rt);
+		ipf->ipf_uses = ipf->ipf_last_uses = 0;
+		ipf->ipf_errors = ipf->ipf_dropped = 0;
+	}
+
+	/*
+	 * Fill in the updated information.
+	 */
+	ipf->ipf_ro = *ro;
+	ro->ro_rt->rt_refcnt++;
+	ipf->ipf_dst = ip->ip_dst;
+	ipf->ipf_src = ip->ip_src;
+	ipf->ipf_tos = ip->ip_tos;
+	ipf->ipf_timer = IPFLOW_TIMER;
+	/*
+	 * Insert into the approriate bucket of the flow table.
+	 */
+	hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos);
+	s = splimp();
+	LIST_INSERT_HEAD(&ipflows[hash], ipf, ipf_next);
+	splx(s);
+}
diff --git a/sys/netinet/ip_flow.h b/sys/netinet/ip_flow.h
new file mode 100644
index 0000000..4675996
--- /dev/null
+++ b/sys/netinet/ip_flow.h
@@ -0,0 +1,57 @@
+/*-
+ * Copyright (c) 1998 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by the 3am Software Foundry ("3am").  It was developed by Matt Thomas.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the NetBSD
+ *	Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_IP_FLOW_H
+#define _NETINET_IP_FLOW_H
+
+struct ipflow {
+	LIST_ENTRY(ipflow) ipf_next;	/* next ipflow in bucket */
+	struct in_addr ipf_dst;		/* destination address */
+	struct in_addr ipf_src;		/* source address */
+
+	u_int8_t ipf_tos;		/* type-of-service */
+	struct route ipf_ro;		/* associated route entry */
+	u_long ipf_uses;		/* number of uses in this period */
+
+	int ipf_timer;			/* remaining lifetime of this entry */
+	u_long ipf_dropped;		/* ENOBUFS returned by if_output */
+	u_long ipf_errors;		/* other errors returned by if_output */
+	u_long ipf_last_uses;		/* number of uses in last period */
+};
+
+#endif
diff --git a/sys/netinet/ip_frag.c b/sys/netinet/ip_frag.c
new file mode 100644
index 0000000..f5548fc
--- /dev/null
+++ b/sys/netinet/ip_frag.c
@@ -0,0 +1,576 @@
+/*
+ * Copyright (C) 1993-2000 by Darren Reed.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that this notice is preserved and due credit is given
+ * to the original author and the contributors.
+ */
+#if !defined(lint)
+static const char sccsid[] = "@(#)ip_frag.c	1.11 3/24/96 (C) 1993-1995 Darren Reed";
+static const char rcsid[] = "@(#)$FreeBSD$";
+#endif
+
+#if defined(KERNEL) && !defined(_KERNEL)
+# define      _KERNEL
+#endif
+
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/file.h>
+#if !defined(_KERNEL) && !defined(KERNEL)
+# include <stdio.h>
+# include <string.h>
+# include <stdlib.h>
+#endif
+#if (defined(KERNEL) || defined(_KERNEL)) && (__FreeBSD_version >= 220000)
+# include <sys/filio.h>
+# include <sys/fcntl.h>
+#else
+# include <sys/ioctl.h>
+#endif
+#include <sys/uio.h>
+#ifndef linux
+# include <sys/protosw.h>
+#endif
+#include <sys/socket.h>
+#if defined(_KERNEL) && !defined(linux)
+# include <sys/systm.h>
+#endif
+#if !defined(__SVR4) && !defined(__svr4__)
+# if defined(_KERNEL) && !defined(__sgi)
+#  include <sys/kernel.h>
+# endif
+# ifndef linux
+#  include <sys/mbuf.h>
+# endif
+#else
+# include <sys/byteorder.h>
+# ifdef _KERNEL
+#  include <sys/dditypes.h>
+# endif
+# include <sys/stream.h>
+# include <sys/kmem.h>
+#endif
+#include <net/if.h>
+#ifdef sun
+# include <net/af.h>
+#endif
+#include <net/route.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifndef linux
+# include <netinet/ip_var.h>
+#endif
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet/ip_icmp.h>
+#include "netinet/ip_compat.h"
+#include <netinet/tcpip.h>
+#include "netinet/ip_fil.h"
+#include "netinet/ip_proxy.h"
+#include "netinet/ip_nat.h"
+#include "netinet/ip_frag.h"
+#include "netinet/ip_state.h"
+#include "netinet/ip_auth.h"
+#if (__FreeBSD_version >= 300000)
+# include <sys/malloc.h>
+# if (defined(KERNEL) || defined(_KERNEL))
+#  ifndef IPFILTER_LKM
+#   include <sys/libkern.h>
+#   include <sys/systm.h>
+# endif
+extern struct callout_handle ipfr_slowtimer_ch;
+# endif
+#endif
+#if defined(__NetBSD__) && (__NetBSD_Version__ >= 104230000)
+# include <sys/callout.h>
+extern struct callout ipfr_slowtimer_ch;
+#endif
+
+
+static ipfr_t	*ipfr_heads[IPFT_SIZE];
+static ipfr_t	*ipfr_nattab[IPFT_SIZE];
+static ipfrstat_t ipfr_stats;
+static int	ipfr_inuse = 0;
+
+int	fr_ipfrttl = 120;	/* 60 seconds */
+int	fr_frag_lock = 0;
+
+#ifdef _KERNEL
+# if SOLARIS2 >= 7
+extern	timeout_id_t	ipfr_timer_id;
+# else
+extern	int	ipfr_timer_id;
+# endif
+#endif
+#if	(SOLARIS || defined(__sgi)) && defined(_KERNEL)
+extern	KRWLOCK_T	ipf_frag, ipf_natfrag, ipf_nat, ipf_mutex;
+# if	SOLARIS
+extern	KRWLOCK_T	ipf_solaris;
+# else
+KRWLOCK_T	ipf_solaris;
+# endif
+extern	kmutex_t	ipf_rw;
+#endif
+
+
+static ipfr_t *ipfr_new __P((ip_t *, fr_info_t *, u_int, ipfr_t **));
+static ipfr_t *ipfr_lookup __P((ip_t *, fr_info_t *, ipfr_t **));
+static void ipfr_delete __P((ipfr_t *));
+
+
+ipfrstat_t *ipfr_fragstats()
+{
+	ipfr_stats.ifs_table = ipfr_heads;
+	ipfr_stats.ifs_nattab = ipfr_nattab;
+	ipfr_stats.ifs_inuse = ipfr_inuse;
+	return &ipfr_stats;
+}
+
+
+/*
+ * add a new entry to the fragment cache, registering it as having come
+ * through this box, with the result of the filter operation.
+ */
+static ipfr_t *ipfr_new(ip, fin, pass, table)
+ip_t *ip;
+fr_info_t *fin;
+u_int pass;
+ipfr_t *table[];
+{
+	ipfr_t **fp, *fra, frag;
+	u_int idx, off;
+
+	if (ipfr_inuse >= IPFT_SIZE)
+		return NULL;
+
+	if (!(fin->fin_fi.fi_fl & FI_FRAG))
+		return NULL;
+
+	frag.ipfr_p = ip->ip_p;
+	idx = ip->ip_p;
+	frag.ipfr_id = ip->ip_id;
+	idx += ip->ip_id;
+	frag.ipfr_tos = ip->ip_tos;
+	frag.ipfr_src.s_addr = ip->ip_src.s_addr;
+	idx += ip->ip_src.s_addr;
+	frag.ipfr_dst.s_addr = ip->ip_dst.s_addr;
+	idx += ip->ip_dst.s_addr;
+	frag.ipfr_ifp = fin->fin_ifp;
+	idx *= 127;
+	idx %= IPFT_SIZE;
+
+	/*
+	 * first, make sure it isn't already there...
+	 */
+	for (fp = &table[idx]; (fra = *fp); fp = &fra->ipfr_next)
+		if (!bcmp((char *)&frag.ipfr_src, (char *)&fra->ipfr_src,
+			  IPFR_CMPSZ)) {
+			ATOMIC_INCL(ipfr_stats.ifs_exists);
+			return NULL;
+		}
+
+	/*
+	 * allocate some memory, if possible, if not, just record that we
+	 * failed to do so.
+	 */
+	KMALLOC(fra, ipfr_t *);
+	if (fra == NULL) {
+		ATOMIC_INCL(ipfr_stats.ifs_nomem);
+		return NULL;
+	}
+
+	if ((fra->ipfr_rule = fin->fin_fr) != NULL) {
+		ATOMIC_INC32(fin->fin_fr->fr_ref);
+	}
+
+
+	/*
+	 * Instert the fragment into the fragment table, copy the struct used
+	 * in the search using bcopy rather than reassign each field.
+	 * Set the ttl to the default and mask out logging from "pass"
+	 */
+	if ((fra->ipfr_next = table[idx]))
+		table[idx]->ipfr_prev = fra;
+	fra->ipfr_prev = NULL;
+	fra->ipfr_data = NULL;
+	table[idx] = fra;
+	bcopy((char *)&frag.ipfr_src, (char *)&fra->ipfr_src, IPFR_CMPSZ);
+	fra->ipfr_ttl = fr_ipfrttl;
+	/*
+	 * Compute the offset of the expected start of the next packet.
+	 */
+	off = ip->ip_off & IP_OFFMASK;
+	if (!off)
+		fra->ipfr_seen0 = 1;
+	fra->ipfr_off = off + (fin->fin_dlen >> 3);
+	ATOMIC_INCL(ipfr_stats.ifs_new);
+	ATOMIC_INC32(ipfr_inuse);
+	return fra;
+}
+
+
+int ipfr_newfrag(ip, fin, pass)
+ip_t *ip;
+fr_info_t *fin;
+u_int pass;
+{
+	ipfr_t	*ipf;
+
+	if ((ip->ip_v != 4) || (fr_frag_lock))
+		return -1;
+	WRITE_ENTER(&ipf_frag);
+	ipf = ipfr_new(ip, fin, pass, ipfr_heads);
+	RWLOCK_EXIT(&ipf_frag);
+	return ipf ? 0 : -1;
+}
+
+
+int ipfr_nat_newfrag(ip, fin, pass, nat)
+ip_t *ip;
+fr_info_t *fin;
+u_int pass;
+nat_t *nat;
+{
+	ipfr_t	*ipf;
+
+	if ((ip->ip_v != 4) || (fr_frag_lock))
+		return -1;
+	WRITE_ENTER(&ipf_natfrag);
+	ipf = ipfr_new(ip, fin, pass, ipfr_nattab);
+	if (ipf != NULL) {
+		ipf->ipfr_data = nat;
+		nat->nat_data = ipf;
+	}
+	RWLOCK_EXIT(&ipf_natfrag);
+	return ipf ? 0 : -1;
+}
+
+
+/*
+ * check the fragment cache to see if there is already a record of this packet
+ * with its filter result known.
+ */
+static ipfr_t *ipfr_lookup(ip, fin, table)
+ip_t *ip;
+fr_info_t *fin;
+ipfr_t *table[];
+{
+	ipfr_t	*f, frag;
+	u_int	idx;
+
+	if (!(fin->fin_fi.fi_fl & FI_FRAG))
+		return NULL;
+
+	/*
+	 * For fragments, we record protocol, packet id, TOS and both IP#'s
+	 * (these should all be the same for all fragments of a packet).
+	 *
+	 * build up a hash value to index the table with.
+	 */
+	frag.ipfr_p = ip->ip_p;
+	idx = ip->ip_p;
+	frag.ipfr_id = ip->ip_id;
+	idx += ip->ip_id;
+	frag.ipfr_tos = ip->ip_tos;
+	frag.ipfr_src.s_addr = ip->ip_src.s_addr;
+	idx += ip->ip_src.s_addr;
+	frag.ipfr_dst.s_addr = ip->ip_dst.s_addr;
+	idx += ip->ip_dst.s_addr;
+	frag.ipfr_ifp = fin->fin_ifp;
+	idx *= 127;
+	idx %= IPFT_SIZE;
+
+	/*
+	 * check the table, careful to only compare the right amount of data
+	 */
+	for (f = table[idx]; f; f = f->ipfr_next)
+		if (!bcmp((char *)&frag.ipfr_src, (char *)&f->ipfr_src,
+			  IPFR_CMPSZ)) {
+			u_short	atoff, off;
+
+			/*
+			 * XXX - We really need to be guarding against the
+			 * retransmission of (src,dst,id,offset-range) here
+			 * because a fragmented packet is never resent with
+			 * the same IP ID#.
+			 */
+			off = ip->ip_off & IP_OFFMASK;
+			if (f->ipfr_seen0) {
+				if (!off || (fin->fin_fi.fi_fl & FI_SHORT))
+					continue;
+			} else if (!off)
+				f->ipfr_seen0 = 1;
+
+			if (f != table[idx]) {
+				/*
+				 * move fragment info. to the top of the list
+				 * to speed up searches.
+				 */
+				if ((f->ipfr_prev->ipfr_next = f->ipfr_next))
+					f->ipfr_next->ipfr_prev = f->ipfr_prev;
+				f->ipfr_next = table[idx];
+				table[idx]->ipfr_prev = f;
+				f->ipfr_prev = NULL;
+				table[idx] = f;
+			}
+			atoff = off + (fin->fin_dlen >> 3);
+			/*
+			 * If we've follwed the fragments, and this is the
+			 * last (in order), shrink expiration time.
+			 */
+			if (off == f->ipfr_off) {
+				if (!(ip->ip_off & IP_MF))
+					f->ipfr_ttl = 1;
+				else
+					f->ipfr_off = atoff;
+			}
+			ATOMIC_INCL(ipfr_stats.ifs_hits);
+			return f;
+		}
+	return NULL;
+}
+
+
+/*
+ * functional interface for NAT lookups of the NAT fragment cache
+ */
+nat_t *ipfr_nat_knownfrag(ip, fin)
+ip_t *ip;
+fr_info_t *fin;
+{
+	nat_t	*nat;
+	ipfr_t	*ipf;
+
+	if ((ip->ip_v != 4) || (fr_frag_lock))
+		return NULL;
+	READ_ENTER(&ipf_natfrag);
+	ipf = ipfr_lookup(ip, fin, ipfr_nattab);
+	if (ipf != NULL) {
+		nat = ipf->ipfr_data;
+		/*
+		 * This is the last fragment for this packet.
+		 */
+		if ((ipf->ipfr_ttl == 1) && (nat != NULL)) {
+			nat->nat_data = NULL;
+			ipf->ipfr_data = NULL;
+		}
+	} else
+		nat = NULL;
+	RWLOCK_EXIT(&ipf_natfrag);
+	return nat;
+}
+
+
+/*
+ * functional interface for normal lookups of the fragment cache
+ */
+frentry_t *ipfr_knownfrag(ip, fin)
+ip_t *ip;
+fr_info_t *fin;
+{
+	frentry_t *fr = NULL;
+	ipfr_t	*fra;
+
+	if ((ip->ip_v != 4) || (fr_frag_lock))
+		return NULL;
+	READ_ENTER(&ipf_frag);
+	fra = ipfr_lookup(ip, fin, ipfr_heads);
+	if (fra != NULL)
+		fr = fra->ipfr_rule;
+	RWLOCK_EXIT(&ipf_frag);
+	return fr;
+}
+
+
+/*
+ * forget any references to this external object.
+ */
+void ipfr_forget(nat)
+void *nat;
+{
+	ipfr_t	*fr;
+	int	idx;
+
+	WRITE_ENTER(&ipf_natfrag);
+	for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
+		for (fr = ipfr_heads[idx]; fr; fr = fr->ipfr_next)
+			if (fr->ipfr_data == nat)
+				fr->ipfr_data = NULL;
+
+	RWLOCK_EXIT(&ipf_natfrag);
+}
+
+
+static void ipfr_delete(fra)
+ipfr_t *fra;
+{
+	frentry_t *fr;
+
+	fr = fra->ipfr_rule;
+	if (fr != NULL) {
+		ATOMIC_DEC32(fr->fr_ref);
+		if (fr->fr_ref == 0)
+			KFREE(fr);
+	}
+	if (fra->ipfr_prev)
+		fra->ipfr_prev->ipfr_next = fra->ipfr_next;
+	if (fra->ipfr_next)
+		fra->ipfr_next->ipfr_prev = fra->ipfr_prev;
+	KFREE(fra);
+}
+
+
+/*
+ * Free memory in use by fragment state info. kept.
+ */
+void ipfr_unload()
+{
+	ipfr_t	**fp, *fra;
+	nat_t	*nat;
+	int	idx;
+
+	WRITE_ENTER(&ipf_frag);
+	for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
+		for (fp = &ipfr_heads[idx]; (fra = *fp); ) {
+			*fp = fra->ipfr_next;
+			ipfr_delete(fra);
+		}
+	RWLOCK_EXIT(&ipf_frag);
+
+	WRITE_ENTER(&ipf_nat);
+	WRITE_ENTER(&ipf_natfrag);
+	for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
+		for (fp = &ipfr_nattab[idx]; (fra = *fp); ) {
+			*fp = fra->ipfr_next;
+			nat = fra->ipfr_data;
+			if (nat != NULL) {
+				if (nat->nat_data == fra)
+					nat->nat_data = NULL;
+			}
+			ipfr_delete(fra);
+		}
+	RWLOCK_EXIT(&ipf_natfrag);
+	RWLOCK_EXIT(&ipf_nat);
+}
+
+
+#ifdef	_KERNEL
+void ipfr_fragexpire()
+{
+	ipfr_t	**fp, *fra;
+	nat_t	*nat;
+	int	idx;
+#if defined(_KERNEL)
+# if !SOLARIS
+	int	s;
+# endif
+#endif
+
+	if (fr_frag_lock)
+		return;
+
+	SPL_NET(s);
+	WRITE_ENTER(&ipf_frag);
+
+	/*
+	 * Go through the entire table, looking for entries to expire,
+	 * decreasing the ttl by one for each entry.  If it reaches 0,
+	 * remove it from the chain and free it.
+	 */
+	for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
+		for (fp = &ipfr_heads[idx]; (fra = *fp); ) {
+			--fra->ipfr_ttl;
+			if (fra->ipfr_ttl == 0) {
+				*fp = fra->ipfr_next;
+				ipfr_delete(fra);
+				ATOMIC_INCL(ipfr_stats.ifs_expire);
+				ATOMIC_DEC32(ipfr_inuse);
+			} else
+				fp = &fra->ipfr_next;
+		}
+	RWLOCK_EXIT(&ipf_frag);
+
+	/*
+	 * Same again for the NAT table, except that if the structure also
+	 * still points to a NAT structure, and the NAT structure points back
+	 * at the one to be free'd, NULL the reference from the NAT struct.
+	 * NOTE: We need to grab both mutex's early, and in this order so as
+	 * to prevent a deadlock if both try to expire at the same time.
+	 */
+	WRITE_ENTER(&ipf_nat);
+	WRITE_ENTER(&ipf_natfrag);
+	for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
+		for (fp = &ipfr_nattab[idx]; (fra = *fp); ) {
+			--fra->ipfr_ttl;
+			if (fra->ipfr_ttl == 0) {
+				ATOMIC_INCL(ipfr_stats.ifs_expire);
+				ATOMIC_DEC32(ipfr_inuse);
+				nat = fra->ipfr_data;
+				if (nat != NULL) {
+					if (nat->nat_data == fra)
+						nat->nat_data = NULL;
+				}
+				*fp = fra->ipfr_next;
+				ipfr_delete(fra);
+			} else
+				fp = &fra->ipfr_next;
+		}
+	RWLOCK_EXIT(&ipf_natfrag);
+	RWLOCK_EXIT(&ipf_nat);
+	SPL_X(s);
+}
+
+
+/*
+ * Slowly expire held state for fragments.  Timeouts are set * in expectation
+ * of this being called twice per second.
+ */
+# if (BSD >= 199306) || SOLARIS || defined(__sgi)
+#  if defined(SOLARIS2) && (SOLARIS2 < 7)
+void ipfr_slowtimer()
+#  else
+void ipfr_slowtimer __P((void *ptr))
+#  endif
+# else
+int ipfr_slowtimer()
+# endif
+{
+#if defined(_KERNEL) && SOLARIS
+	extern	int	fr_running;
+
+	if (fr_running <= 0) 
+		return;
+#endif
+
+	READ_ENTER(&ipf_solaris);
+#ifdef __sgi
+	ipfilter_sgi_intfsync();
+#endif
+
+	ipfr_fragexpire();
+	fr_timeoutstate();
+	ip_natexpire();
+	fr_authexpire();
+# if    SOLARIS
+	ipfr_timer_id = timeout(ipfr_slowtimer, NULL, drv_usectohz(500000));
+	RWLOCK_EXIT(&ipf_solaris);
+# else
+#  if defined(__NetBSD__) && (__NetBSD_Version__ >= 104240000)
+	callout_reset(&ipfr_slowtimer_ch, hz / 2, ipfr_slowtimer, NULL);
+#  else
+#   if (__FreeBSD_version >= 300000)
+	ipfr_slowtimer_ch = timeout(ipfr_slowtimer, NULL, hz/2);
+#   else
+	timeout(ipfr_slowtimer, NULL, hz/2);
+#   endif
+#   if (BSD < 199306) && !defined(__sgi)
+	return 0;
+#   endif /* FreeBSD */
+#  endif /* NetBSD */
+# endif /* SOLARIS */
+}
+#endif /* defined(_KERNEL) */
diff --git a/sys/netinet/ip_frag.h b/sys/netinet/ip_frag.h
new file mode 100644
index 0000000..2d0b9be
--- /dev/null
+++ b/sys/netinet/ip_frag.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 1993-2000 by Darren Reed.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that this notice is preserved and due credit is given
+ * to the original author and the contributors.
+ *
+ * @(#)ip_frag.h	1.5 3/24/96
+ * $Id: ip_frag.h,v 2.4.2.2 2000/11/10 13:10:54 darrenr Exp $
+ * $FreeBSD$
+ */
+
+#ifndef	__IP_FRAG_H__
+#define	__IP_FRAG_H__
+
+#define	IPFT_SIZE	257
+
+typedef	struct	ipfr	{
+	struct	ipfr	*ipfr_next, *ipfr_prev;
+	void	*ipfr_data;
+	struct	in_addr	ipfr_src;
+	struct	in_addr	ipfr_dst;
+	void	*ipfr_ifp;
+	u_short	ipfr_id;
+	u_char	ipfr_p;
+	u_char	ipfr_tos;
+	u_short	ipfr_off;
+	u_char	ipfr_ttl;
+	u_char	ipfr_seen0;
+	frentry_t *ipfr_rule;
+} ipfr_t;
+
+
+typedef	struct	ipfrstat {
+	u_long	ifs_exists;	/* add & already exists */
+	u_long	ifs_nomem;
+	u_long	ifs_new;
+	u_long	ifs_hits;
+	u_long	ifs_expire;
+	u_long	ifs_inuse;
+	struct	ipfr	**ifs_table;
+	struct	ipfr	**ifs_nattab;
+} ipfrstat_t;
+
+#define	IPFR_CMPSZ	(offsetof(ipfr_t, ipfr_off) - \
+			 offsetof(ipfr_t, ipfr_src))
+
+extern	int	fr_ipfrttl;
+extern	int	fr_frag_lock;
+extern	ipfrstat_t	*ipfr_fragstats __P((void));
+extern	int	ipfr_newfrag __P((ip_t *, fr_info_t *, u_int));
+extern	int	ipfr_nat_newfrag __P((ip_t *, fr_info_t *, u_int, struct nat *));
+extern	nat_t	*ipfr_nat_knownfrag __P((ip_t *, fr_info_t *));
+extern	frentry_t *ipfr_knownfrag __P((ip_t *, fr_info_t *));
+extern	void	ipfr_forget __P((void *));
+extern	void	ipfr_unload __P((void));
+extern	void	ipfr_fragexpire __P((void));
+
+#if     (BSD >= 199306) || SOLARIS || defined(__sgi)
+# if defined(SOLARIS2) && (SOLARIS2 < 7)
+extern	void	ipfr_slowtimer __P((void));
+# else
+extern	void	ipfr_slowtimer __P((void *));
+# endif
+#else
+extern	int	ipfr_slowtimer __P((void));
+#endif /* (BSD >= 199306) || SOLARIS */
+
+#endif	/* __IP_FIL_H__ */
diff --git a/sys/netinet/ip_ftp_pxy.c b/sys/netinet/ip_ftp_pxy.c
new file mode 100644
index 0000000..6e4fe53
--- /dev/null
+++ b/sys/netinet/ip_ftp_pxy.c
@@ -0,0 +1,786 @@
+/*
+ * Simple FTP transparent proxy for in-kernel use.  For use with the NAT
+ * code.
+ *
+ * $FreeBSD$
+ */
+#if SOLARIS && defined(_KERNEL)
+extern	kmutex_t	ipf_rw;
+#endif
+
+#define	isdigit(x)	((x) >= '0' && (x) <= '9')
+#define	isupper(x)	(((unsigned)(x) >= 'A') && ((unsigned)(x) <= 'Z'))
+#define	islower(x)	(((unsigned)(x) >= 'a') && ((unsigned)(x) <= 'z'))
+#define	isalpha(x)	(isupper(x) || islower(x))
+#define	toupper(x)	(isupper(x) ? (x) : (x) - 'a' + 'A')
+
+#define	IPF_FTP_PROXY
+
+#define	IPF_MINPORTLEN	18
+#define	IPF_MAXPORTLEN	30
+#define	IPF_MIN227LEN	39
+#define	IPF_MAX227LEN	51
+#define	IPF_FTPBUFSZ	96	/* This *MUST* be >= 53! */
+
+
+int ippr_ftp_client __P((fr_info_t *, ip_t *, nat_t *, ftpinfo_t *, int));
+int ippr_ftp_complete __P((char *, size_t));
+int ippr_ftp_in __P((fr_info_t *, ip_t *, ap_session_t *, nat_t *));
+int ippr_ftp_init __P((void));
+int ippr_ftp_new __P((fr_info_t *, ip_t *, ap_session_t *, nat_t *));
+int ippr_ftp_out __P((fr_info_t *, ip_t *, ap_session_t *, nat_t *));
+int ippr_ftp_pasv __P((fr_info_t *, ip_t *, nat_t *, ftpside_t *, int));
+int ippr_ftp_port __P((fr_info_t *, ip_t *, nat_t *, ftpside_t *, int));
+int ippr_ftp_process __P((fr_info_t *, ip_t *, nat_t *, ftpinfo_t *, int));
+int ippr_ftp_server __P((fr_info_t *, ip_t *, nat_t *, ftpinfo_t *, int));
+int ippr_ftp_valid __P((char *, size_t));
+u_short ippr_ftp_atoi __P((char **));
+
+static	frentry_t	natfr;
+int	ippr_ftp_pasvonly = 0;
+int	ippr_ftp_insecure = 0;
+
+
+/*
+ * Initialize local structures.
+ */
+int ippr_ftp_init()
+{
+	bzero((char *)&natfr, sizeof(natfr));
+	natfr.fr_ref = 1;
+	natfr.fr_flags = FR_INQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE;
+	return 0;
+}
+
+
+int ippr_ftp_new(fin, ip, aps, nat)
+fr_info_t *fin;
+ip_t *ip;
+ap_session_t *aps;
+nat_t *nat;
+{
+	ftpinfo_t *ftp;
+	ftpside_t *f;
+
+	KMALLOC(ftp, ftpinfo_t *);
+	if (ftp == NULL)
+		return -1;
+	aps->aps_data = ftp;
+	aps->aps_psiz = sizeof(ftpinfo_t);
+
+	bzero((char *)ftp, sizeof(*ftp));
+	f = &ftp->ftp_side[0];
+	f->ftps_rptr = f->ftps_buf;
+	f->ftps_wptr = f->ftps_buf;
+	f = &ftp->ftp_side[1];
+	f->ftps_rptr = f->ftps_buf;
+	f->ftps_wptr = f->ftps_buf;
+	return 0;
+}
+
+
+int ippr_ftp_port(fin, ip, nat, f, dlen)
+fr_info_t *fin;
+ip_t *ip;
+nat_t *nat;
+ftpside_t *f;
+int dlen;
+{
+	tcphdr_t *tcp, tcph, *tcp2 = &tcph;
+	char newbuf[IPF_FTPBUFSZ], *s;
+	u_short a5, a6, sp, dp;
+	u_int a1, a2, a3, a4;
+	struct in_addr swip;
+	size_t nlen, olen;
+	fr_info_t fi;
+	int inc, off;
+	nat_t *ipn;
+	mb_t *m;
+#if	SOLARIS
+	mb_t *m1;
+#endif
+
+	tcp = (tcphdr_t *)fin->fin_dp;
+	/*
+	 * Check for client sending out PORT message.
+	 */
+	if (dlen < IPF_MINPORTLEN)
+		return 0;
+	off = fin->fin_hlen + (tcp->th_off << 2);
+	/*
+	 * Skip the PORT command + space
+	 */
+	s = f->ftps_rptr + 5;
+	/*
+	 * Pick out the address components, two at a time.
+	 */
+	a1 = ippr_ftp_atoi(&s);
+	if (!s)
+		return 0;
+	a2 = ippr_ftp_atoi(&s);
+	if (!s)
+		return 0;
+	/*
+	 * check that IP address in the PORT/PASV reply is the same as the
+	 * sender of the command - prevents using PORT for port scanning.
+	 */
+	a1 <<= 16;
+	a1 |= a2;
+	if (a1 != ntohl(nat->nat_inip.s_addr))
+		return 0;
+
+	a5 = ippr_ftp_atoi(&s);
+	if (!s)
+		return 0;
+	if (*s == ')')
+		s++;
+
+	/*
+	 * check for CR-LF at the end.
+	 */
+	if (*s == '\n')
+		s--;
+	if ((*s == '\r') && (*(s + 1) == '\n')) {
+		s += 2;
+		a6 = a5 & 0xff;
+	} else
+		return 0;
+	a5 >>= 8;
+	a5 &= 0xff;
+	/*
+	 * Calculate new address parts for PORT command
+	 */
+	a1 = ntohl(ip->ip_src.s_addr);
+	a2 = (a1 >> 16) & 0xff;
+	a3 = (a1 >> 8) & 0xff;
+	a4 = a1 & 0xff;
+	a1 >>= 24;
+	olen = s - f->ftps_rptr;
+	/* DO NOT change this to sprintf! */
+	(void) sprintf(newbuf, "%s %u,%u,%u,%u,%u,%u\r\n",
+		       "PORT", a1, a2, a3, a4, a5, a6);
+
+	nlen = strlen(newbuf);
+	inc = nlen - olen;
+	if ((inc + ip->ip_len) > 65535)
+		return 0;
+
+#if SOLARIS
+	m = fin->fin_qfm;
+	for (m1 = m; m1->b_cont; m1 = m1->b_cont)
+		;
+	if ((inc > 0) && (m1->b_datap->db_lim - m1->b_wptr < inc)) {
+		mblk_t *nm;
+
+		/* alloc enough to keep same trailer space for lower driver */
+		nm = allocb(nlen, BPRI_MED);
+		PANIC((!nm),("ippr_ftp_out: allocb failed"));
+
+		nm->b_band = m1->b_band;
+		nm->b_wptr += nlen;
+
+		m1->b_wptr -= olen;
+		PANIC((m1->b_wptr < m1->b_rptr),
+		      ("ippr_ftp_out: cannot handle fragmented data block"));
+
+		linkb(m1, nm);
+	} else {
+		if (m1->b_datap->db_struiolim == m1->b_wptr)
+			m1->b_datap->db_struiolim += inc;
+		m1->b_datap->db_struioflag &= ~STRUIO_IP;
+		m1->b_wptr += inc;
+	}
+	copyin_mblk(m, off, nlen, newbuf);
+#else
+	m = *((mb_t **)fin->fin_mp);
+	if (inc < 0)
+		m_adj(m, inc);
+	/* the mbuf chain will be extended if necessary by m_copyback() */
+	m_copyback(m, off, nlen, newbuf);
+# ifdef	M_PKTHDR
+	if (!(m->m_flags & M_PKTHDR))
+		m->m_pkthdr.len += inc;
+# endif
+#endif
+	if (inc != 0) {
+#if SOLARIS || defined(__sgi)
+		register u_32_t	sum1, sum2;
+
+		sum1 = ip->ip_len;
+		sum2 = ip->ip_len + inc;
+
+		/* Because ~1 == -2, We really need ~1 == -1 */
+		if (sum1 > sum2)
+			sum2--;
+		sum2 -= sum1;
+		sum2 = (sum2 & 0xffff) + (sum2 >> 16);
+
+		fix_outcksum(&ip->ip_sum, sum2);
+#endif
+		ip->ip_len += inc;
+	}
+
+	/*
+	 * Add skeleton NAT entry for connection which will come back the
+	 * other way.
+	 */
+	sp = htons(a5 << 8 | a6);
+	/*
+	 * Don't allow the PORT command to specify a port < 1024 due to
+	 * security crap.
+	 */
+	if (ntohs(sp) < 1024)
+		return 0;
+	/*
+	 * The server may not make the connection back from port 20, but
+	 * it is the most likely so use it here to check for a conflicting
+	 * mapping.
+	 */
+	dp = htons(fin->fin_data[1] - 1);
+	ipn = nat_outlookup(fin->fin_ifp, IPN_TCP, nat->nat_p, nat->nat_inip,
+			    ip->ip_dst, (dp << 16) | sp, 0);
+	if (ipn == NULL) {
+		int slen;
+
+		slen = ip->ip_len;
+		ip->ip_len = fin->fin_hlen + sizeof(*tcp2);
+		bcopy((char *)fin, (char *)&fi, sizeof(fi));
+		bzero((char *)tcp2, sizeof(*tcp2));
+		tcp2->th_win = htons(8192);
+		tcp2->th_sport = sp;
+		tcp2->th_off = 5;
+		tcp2->th_dport = 0; /* XXX - don't specify remote port */
+		fi.fin_data[0] = ntohs(sp);
+		fi.fin_data[1] = 0;
+		fi.fin_dlen = sizeof(*tcp2);
+		fi.fin_dp = (char *)tcp2;
+		fi.fin_fr = &natfr;
+		swip = ip->ip_src;
+		fi.fin_fi.fi_saddr = nat->nat_inip.s_addr;
+		ip->ip_src = nat->nat_inip;
+		ipn = nat_new(nat->nat_ptr, ip, &fi, IPN_TCP|FI_W_DPORT,
+			      NAT_OUTBOUND);
+		if (ipn != NULL) {
+			ipn->nat_age = fr_defnatage;
+			(void) fr_addstate(ip, &fi, FI_W_DPORT);
+		}
+		ip->ip_len = slen;
+		ip->ip_src = swip;
+	}
+	return APR_INC(inc);
+}
+
+
+int ippr_ftp_client(fin, ip, nat, ftp, dlen)
+fr_info_t *fin;
+nat_t *nat;
+ftpinfo_t *ftp;
+ip_t *ip;
+int dlen;
+{
+	char *rptr, *wptr, cmd[6], c;
+	ftpside_t *f;
+	int inc, i;
+
+	inc = 0;
+	f = &ftp->ftp_side[0];
+	rptr = f->ftps_rptr;
+	wptr = f->ftps_wptr;
+
+	for (i = 0; (i < 5) && (i < dlen); i++) {
+		c = rptr[i];
+		if (isalpha(c)) {
+			cmd[i] = toupper(c);
+		} else {
+			cmd[i] = c;
+		}
+	}
+	cmd[i] = '\0';
+
+	if ((ftp->ftp_passok == 0) && !strncmp(cmd, "USER ", 5))
+		 ftp->ftp_passok = 1;
+	else if ((ftp->ftp_passok == 2) && !strncmp(cmd, "PASS ", 5))
+		 ftp->ftp_passok = 3;
+	else if ((ftp->ftp_passok == 4) && !ippr_ftp_pasvonly &&
+		 !strncmp(cmd, "PORT ", 5)) {
+		inc = ippr_ftp_port(fin, ip, nat, f, dlen);
+	} else if (ippr_ftp_insecure && !ippr_ftp_pasvonly &&
+		   !strncmp(cmd, "PORT ", 5)) {
+		inc = ippr_ftp_port(fin, ip, nat, f, dlen);
+	}
+
+	while ((*rptr++ != '\n') && (rptr < wptr))
+		;
+	f->ftps_rptr = rptr;
+	return inc;
+}
+
+
+int ippr_ftp_pasv(fin, ip, nat, f, dlen)
+fr_info_t *fin;
+ip_t *ip;
+nat_t *nat;
+ftpside_t *f;
+int dlen;
+{
+	tcphdr_t *tcp, tcph, *tcp2 = &tcph;
+	struct in_addr swip, swip2;
+	u_short a5, a6, sp, dp;
+	u_int a1, a2, a3, a4;
+	fr_info_t fi;
+	nat_t *ipn;
+	int inc;
+	char *s;
+
+	/*
+	 * Check for PASV reply message.
+	 */
+	if (dlen < IPF_MIN227LEN)
+		return 0;
+	else if (strncmp(f->ftps_rptr, "227 Entering Passive Mode", 25))
+		return 0;
+
+	tcp = (tcphdr_t *)fin->fin_dp;
+
+	/*
+	 * Skip the PORT command + space
+	 */
+	s = f->ftps_rptr + 25;
+	while (*s && !isdigit(*s))
+		s++;
+	/*
+	 * Pick out the address components, two at a time.
+	 */
+	a1 = ippr_ftp_atoi(&s);
+	if (!s)
+		return 0;
+	a2 = ippr_ftp_atoi(&s);
+	if (!s)
+		return 0;
+
+	/*
+	 * check that IP address in the PORT/PASV reply is the same as the
+	 * sender of the command - prevents using PORT for port scanning.
+	 */
+	a1 <<= 16;
+	a1 |= a2;
+	if (a1 != ntohl(nat->nat_oip.s_addr))
+		return 0;
+
+	a5 = ippr_ftp_atoi(&s);
+	if (!s)
+		return 0;
+
+	if (*s == ')')
+		s++;
+	if (*s == '\n')
+		s--;
+	/*
+	 * check for CR-LF at the end.
+	 */
+	if ((*s == '\r') && (*(s + 1) == '\n')) {
+		s += 2;
+		a6 = a5 & 0xff;
+	} else
+		return 0;
+	a5 >>= 8;
+	/*
+	 * Calculate new address parts for 227 reply
+	 */
+	a1 = ntohl(ip->ip_src.s_addr);
+	a2 = (a1 >> 16) & 0xff;
+	a3 = (a1 >> 8) & 0xff;
+	a4 = a1 & 0xff;
+	a1 >>= 24;
+	inc = 0;
+#if 0
+	olen = s - f->ftps_rptr;
+	(void) sprintf(newbuf, "%s %u,%u,%u,%u,%u,%u\r\n",
+		       "227 Entering Passive Mode", a1, a2, a3, a4, a5, a6);
+	nlen = strlen(newbuf);
+	inc = nlen - olen;
+	if ((inc + ip->ip_len) > 65535)
+		return 0;
+
+#if SOLARIS
+	m = fin->fin_qfm;
+	for (m1 = m; m1->b_cont; m1 = m1->b_cont)
+		;
+	if ((inc > 0) && (m1->b_datap->db_lim - m1->b_wptr < inc)) {
+		mblk_t *nm;
+
+		/* alloc enough to keep same trailer space for lower driver */
+		nm = allocb(nlen, BPRI_MED);
+		PANIC((!nm),("ippr_ftp_out: allocb failed"));
+
+		nm->b_band = m1->b_band;
+		nm->b_wptr += nlen;
+
+		m1->b_wptr -= olen;
+		PANIC((m1->b_wptr < m1->b_rptr),
+		      ("ippr_ftp_out: cannot handle fragmented data block"));
+
+		linkb(m1, nm);
+	} else {
+		m1->b_wptr += inc;
+	}
+	/*copyin_mblk(m, off, nlen, newbuf);*/
+#else /* SOLARIS */
+	m = *((mb_t **)fin->fin_mp);
+	if (inc < 0)
+		m_adj(m, inc);
+	/* the mbuf chain will be extended if necessary by m_copyback() */
+	/*m_copyback(m, off, nlen, newbuf);*/
+#endif /* SOLARIS */
+	if (inc != 0) {
+#if SOLARIS || defined(__sgi)
+		register u_32_t	sum1, sum2;
+
+		sum1 = ip->ip_len;
+		sum2 = ip->ip_len + inc;
+
+		/* Because ~1 == -2, We really need ~1 == -1 */
+		if (sum1 > sum2)
+			sum2--;
+		sum2 -= sum1;
+		sum2 = (sum2 & 0xffff) + (sum2 >> 16);
+
+		fix_outcksum(&ip->ip_sum, sum2);
+#endif /* SOLARIS || defined(__sgi) */
+		ip->ip_len += inc;
+	}
+#endif /* 0 */
+
+	/*
+	 * Add skeleton NAT entry for connection which will come back the
+	 * other way.
+	 */
+	sp = 0;
+	dp = htons(fin->fin_data[1] - 1);
+	ipn = nat_outlookup(fin->fin_ifp, IPN_TCP, nat->nat_p, nat->nat_inip,
+			    ip->ip_dst, (dp << 16) | sp, 0);
+	if (ipn == NULL) {
+		int slen;
+
+		slen = ip->ip_len;
+		ip->ip_len = fin->fin_hlen + sizeof(*tcp2);
+		bcopy((char *)fin, (char *)&fi, sizeof(fi));
+		bzero((char *)tcp2, sizeof(*tcp2));
+		tcp2->th_win = htons(8192);
+		tcp2->th_sport = 0;		/* XXX - fake it for nat_new */
+		tcp2->th_off = 5;
+		fi.fin_data[1] = a5 << 8 | a6;
+		fi.fin_dlen = sizeof(*tcp2);
+		tcp2->th_dport = htons(fi.fin_data[1]);
+		fi.fin_data[0] = 0;
+		fi.fin_dp = (char *)tcp2;
+		fi.fin_fr = &natfr;
+		swip = ip->ip_src;
+		swip2 = ip->ip_dst;
+		fi.fin_fi.fi_daddr = ip->ip_src.s_addr;
+		fi.fin_fi.fi_saddr = nat->nat_inip.s_addr;
+		ip->ip_dst = ip->ip_src;
+		ip->ip_src = nat->nat_inip;
+		ipn = nat_new(nat->nat_ptr, ip, &fi, IPN_TCP|FI_W_SPORT,
+			      NAT_OUTBOUND);
+		if (ipn != NULL) {
+			ipn->nat_age = fr_defnatage;
+			(void) fr_addstate(ip, &fi, FI_W_SPORT);
+		}
+		ip->ip_len = slen;
+		ip->ip_src = swip;
+		ip->ip_dst = swip2;
+	}
+	return inc;
+}
+
+
+int ippr_ftp_server(fin, ip, nat, ftp, dlen)
+fr_info_t *fin;
+ip_t *ip;
+nat_t *nat;
+ftpinfo_t *ftp;
+int dlen;
+{
+	char *rptr, *wptr;
+	ftpside_t *f;
+	int inc;
+
+	inc = 0;
+	f = &ftp->ftp_side[1];
+	rptr = f->ftps_rptr;
+	wptr = f->ftps_wptr;
+
+	if ((ftp->ftp_passok == 1) && !strncmp(rptr, "331", 3))
+		 ftp->ftp_passok = 2;
+	else if ((ftp->ftp_passok == 3) && !strncmp(rptr, "230", 3))
+		 ftp->ftp_passok = 4;
+	else if ((ftp->ftp_passok == 3) && !strncmp(rptr, "530", 3))
+		 ftp->ftp_passok = 0;
+	else if ((ftp->ftp_passok == 4) && !strncmp(rptr, "227 ", 4)) {
+		inc = ippr_ftp_pasv(fin, ip, nat, f, dlen);
+	} else if (ippr_ftp_insecure && !strncmp(rptr, "227 ", 4)) {
+		inc = ippr_ftp_pasv(fin, ip, nat, f, dlen);
+	}
+	while ((*rptr++ != '\n') && (rptr < wptr))
+		;
+	f->ftps_rptr = rptr;
+	return inc;
+}
+
+
+/*
+ * Look to see if the buffer starts with something which we recognise as
+ * being the correct syntax for the FTP protocol.
+ */
+int ippr_ftp_valid(buf, len)
+char *buf;
+size_t len;
+{
+	register char *s, c;
+	register size_t i = len;
+
+	if (i < 5)
+		return 2;
+	s = buf;
+	c = *s++;
+	i--;
+
+	if (isdigit(c)) {
+		c = *s++;
+		i--;
+		if (isdigit(c)) {
+			c = *s++;
+			i--;
+			if (isdigit(c)) {
+				c = *s++;
+				i--;
+				if ((c != '-') && (c != ' '))
+					return 1;
+			} else
+				return 1;
+		} else
+			return 1;
+	} else if (isalpha(c)) {
+		c = *s++;
+		i--;
+		if (isalpha(c)) {
+			c = *s++;
+			i--;
+			if (isalpha(c)) {
+				c = *s++;
+				i--;
+				if (isalpha(c)) {
+					c = *s++;
+					i--;
+					if ((c != ' ') && (c != '\r'))
+						return 1;
+				} else if ((c != ' ') && (c != '\r'))
+					return 1;
+			} else
+				return 1;
+		} else
+			return 1;
+	} else
+		return 1;
+	for (; i; i--) {
+		c = *s++;
+		if (c == '\n')
+			return 0;
+	}
+	return 2;
+}
+
+
+int ippr_ftp_process(fin, ip, nat, ftp, rv)
+fr_info_t *fin;
+ip_t *ip;
+nat_t *nat;
+ftpinfo_t *ftp;
+int rv;
+{
+	int mlen, len, off, inc, i, sel;
+	char *rptr, *wptr;
+	ftpside_t *f, *t;
+	tcphdr_t *tcp;
+	mb_t *m;
+
+	tcp = (tcphdr_t *)fin->fin_dp;
+	off = fin->fin_hlen + (tcp->th_off << 2);
+
+#if	SOLARIS
+	m = fin->fin_qfm;
+#else
+	m = *((mb_t **)fin->fin_mp);
+#endif
+
+#if	SOLARIS
+	mlen = msgdsize(m) - off;
+#else
+	mlen = mbufchainlen(m) - off;
+#endif
+
+	t = &ftp->ftp_side[1 - rv];
+	f = &ftp->ftp_side[rv];
+	if (!mlen) {
+		if (!t->ftps_seq ||
+		    (int)ntohl(tcp->th_ack) - (int)t->ftps_seq > 0)
+			t->ftps_seq = ntohl(tcp->th_ack);
+		f->ftps_len = 0;
+		return 0;
+	}
+
+	inc = 0;
+	rptr = f->ftps_rptr;
+	wptr = f->ftps_wptr;
+
+	sel = nat->nat_aps->aps_sel[1 - rv];
+	if (rv)
+		i = nat->nat_aps->aps_ackoff[sel];
+	else
+		i = nat->nat_aps->aps_seqoff[sel];
+	/*
+	 * XXX - Ideally, this packet should get dropped because we now know
+	 * that it is out of order (and there is no real danger in doing so
+	 * apart from causing packets to go through here ordered).
+	 */
+	if (f->ftps_len + f->ftps_seq == ntohl(tcp->th_seq))
+		f->ftps_seq = ntohl(tcp->th_seq);
+	else if (ntohl(tcp->th_seq) + i != f->ftps_seq) {
+		return APR_ERR(-1);
+	}
+	f->ftps_len = mlen;
+
+	while (mlen > 0) {
+		len = MIN(mlen, FTP_BUFSZ / 2);
+
+#if	SOLARIS
+		copyout_mblk(m, off, len, wptr);
+#else
+		m_copydata(m, off, len, wptr);
+#endif
+		mlen -= len;
+		off += len;
+		wptr += len;
+		f->ftps_wptr = wptr;
+		if (f->ftps_junk == 2)
+			f->ftps_junk = ippr_ftp_valid(rptr, wptr - rptr);
+
+		while ((f->ftps_junk == 0) && (wptr > rptr)) {
+			f->ftps_junk = ippr_ftp_valid(rptr, wptr - rptr);
+			if (f->ftps_junk == 0) {
+				len = wptr - rptr;
+				f->ftps_rptr = rptr;
+				if (rv)
+					inc += ippr_ftp_server(fin, ip, nat,
+							       ftp, len);
+				else
+					inc += ippr_ftp_client(fin, ip, nat,
+							       ftp, len);
+				rptr = f->ftps_rptr;
+			}
+		}
+
+		while ((f->ftps_junk == 1) && (rptr < wptr)) {
+			while ((rptr < wptr) && (*rptr != '\r'))
+				rptr++;
+
+			if (*rptr == '\r') {
+				if (rptr + 1 < wptr) {
+					if (*(rptr + 1) == '\n') {
+						rptr += 2;
+						f->ftps_junk = 0;
+					} else
+						rptr++;
+				} else
+					break;
+			}
+		}
+		f->ftps_rptr = rptr;
+
+		if (rptr == wptr) {
+			rptr = wptr = f->ftps_buf;
+		} else {
+			if ((wptr > f->ftps_buf + FTP_BUFSZ / 2)) {
+				i = wptr - rptr;
+				if ((rptr == f->ftps_buf) ||
+				    (wptr - rptr > FTP_BUFSZ / 2)) {
+					f->ftps_junk = 1;
+					rptr = wptr = f->ftps_buf;
+				} else {
+					bcopy(rptr, f->ftps_buf, i);
+					wptr = f->ftps_buf + i;
+					rptr = f->ftps_buf;
+				}
+			}
+			f->ftps_rptr = rptr;
+			f->ftps_wptr = wptr;
+		}
+	}
+
+	t->ftps_seq = ntohl(tcp->th_ack);
+	f->ftps_rptr = rptr;
+	f->ftps_wptr = wptr;
+	return APR_INC(inc);
+}
+
+
+int ippr_ftp_out(fin, ip, aps, nat)
+fr_info_t *fin;
+ip_t *ip;
+ap_session_t *aps;
+nat_t *nat;
+{
+	ftpinfo_t *ftp;
+
+	ftp = aps->aps_data;
+	if (ftp == NULL)
+		return 0;
+	return ippr_ftp_process(fin, ip, nat, ftp, 0);
+}
+
+
+int ippr_ftp_in(fin, ip, aps, nat)
+fr_info_t *fin;
+ip_t *ip;
+ap_session_t *aps;
+nat_t *nat;
+{
+	ftpinfo_t *ftp;
+
+	ftp = aps->aps_data;
+	if (ftp == NULL)
+		return 0;
+	return ippr_ftp_process(fin, ip, nat, ftp, 1);
+}
+
+
+/*
+ * ippr_ftp_atoi - implement a version of atoi which processes numbers in
+ * pairs separated by commas (which are expected to be in the range 0 - 255),
+ * returning a 16 bit number combining either side of the , as the MSB and
+ * LSB.
+ */
+u_short ippr_ftp_atoi(ptr)
+char **ptr;
+{
+	register char *s = *ptr, c;
+	register u_char i = 0, j = 0;
+
+	while ((c = *s++) && isdigit(c)) {
+		i *= 10;
+		i += c - '0';
+	}
+	if (c != ',') {
+		*ptr = NULL;
+		return 0;
+	}
+	while ((c = *s++) && isdigit(c)) {
+		j *= 10;
+		j += c - '0';
+	}
+	*ptr = s;
+	i &= 0xff;
+	j &= 0xff;
+	return (i << 8) | j;
+}
diff --git a/sys/netinet/ip_fw.c b/sys/netinet/ip_fw.c
new file mode 100644
index 0000000..78c79cf
--- /dev/null
+++ b/sys/netinet/ip_fw.c
@@ -0,0 +1,2082 @@
+/*
+ * Copyright (c) 1993 Daniel Boulet
+ * Copyright (c) 1994 Ugen J.S.Antsilevich
+ * Copyright (c) 1996 Alex Nash
+ * Copyright (c) 2000 Luigi Rizzo
+ *
+ * Redistribution and use in source forms, with and without modification,
+ * are permitted provided that this entire comment appears intact.
+ *
+ * Redistribution in binary form may occur without any restrictions.
+ * Obviously, it would be nice if you gave credit where credit is due
+ * but requiring it would be too onerous.
+ *
+ * This software is provided ``AS IS'' without any warranties of any kind.
+ *
+ * $FreeBSD$
+ */
+
+#define        DEB(x)
+#define        DDB(x) x
+
+/*
+ * Implement IP packet firewall
+ */
+
+#if !defined(KLD_MODULE)
+#include "opt_ipfw.h"
+#include "opt_ipdn.h"
+#include "opt_ipdivert.h"
+#include "opt_inet.h"
+#ifndef INET
+#error IPFIREWALL requires INET.
+#endif /* INET */
+#endif
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/ucred.h>
+#include <net/if.h>
+#include <net/route.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/ip_fw.h>
+#ifdef DUMMYNET
+#include <netinet/ip_dummynet.h>
+#endif
+#include <netinet/tcp.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+
+#include <netinet/if_ether.h> /* XXX ethertype_ip */
+
+static int fw_debug = 1;
+#ifdef IPFIREWALL_VERBOSE
+static int fw_verbose = 1;
+#else
+static int fw_verbose = 0;
+#endif
+int fw_one_pass = 1 ;
+#ifdef IPFIREWALL_VERBOSE_LIMIT
+static int fw_verbose_limit = IPFIREWALL_VERBOSE_LIMIT;
+#else
+static int fw_verbose_limit = 0;
+#endif
+static int fw_permanent_rules = 0;
+
+/*
+ * Right now, two fields in the IP header are changed to host format
+ * by the IP layer before calling the firewall. Ideally, we would like
+ * to have them in network format so that the packet can be
+ * used as it comes from the device driver (and is thus readonly).
+ */
+
+static u_int64_t counter;	/* counter for ipfw_report(NULL...) */
+struct ipfw_flow_id last_pkt ;
+
+#define	IPFW_DEFAULT_RULE	((u_int)(u_short)~0)
+
+LIST_HEAD (ip_fw_head, ip_fw_chain) ip_fw_chain_head;
+
+MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's");
+
+#ifdef SYSCTL_NODE
+SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, enable, CTLFLAG_RW,
+    &fw_enable, 0, "Enable ipfw");
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO,one_pass,CTLFLAG_RW, 
+    &fw_one_pass, 0, 
+    "Only do a single pass through ipfw when using dummynet(4)");
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, debug, CTLFLAG_RW, 
+    &fw_debug, 0, "Enable printing of debug ip_fw statements");
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose, CTLFLAG_RW, 
+    &fw_verbose, 0, "Log matches to ipfw rules");
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_RW, 
+    &fw_verbose_limit, 0, "Set upper limit of matches of ipfw rules logged");
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, permanent_rules, CTLFLAG_RW, 
+    &fw_permanent_rules, 0, "Set rule number, below which rules are permanent");
+
+/*
+ * Extension for stateful ipfw.
+ *
+ * Dynamic rules are stored in lists accessed through a hash table
+ * (ipfw_dyn_v) whose size is curr_dyn_buckets. This value can
+ * be modified through the sysctl variable dyn_buckets which is
+ * updated when the table becomes empty.
+ *
+ * XXX currently there is only one list, ipfw_dyn.
+ *
+ * When a packet is received, it is first hashed, then matched
+ * against the entries in the corresponding list.
+ * Matching occurs according to the rule type. The default is to
+ * match the four fields and the protocol, and rules are bidirectional.
+ *
+ * For a busy proxy/web server we will have lots of connections to
+ * the server. We could decide for a rule type where we ignore
+ * ports (different hashing) and avoid special SYN/RST/FIN handling.
+ *
+ * XXX when we decide to support more than one rule type, we should
+ * repeat the hashing multiple times uing only the useful fields.
+ * Or, we could run the various tests in parallel, because the
+ * 'move to front' technique should shorten the average search.
+ *
+ * The lifetime of dynamic rules is regulated by dyn_*_lifetime,
+ * measured in seconds and depending on the flags.
+ *
+ * The total number of dynamic rules is stored in dyn_count.
+ * The max number of dynamic rules is dyn_max. When we reach
+ * the maximum number of rules we do not create anymore. This is
+ * done to avoid consuming too much memory, but also too much
+ * time when searching on each packet (ideally, we should try instead
+ * to put a limit on the length of the list on each bucket...).
+ *
+ * Each dynamic rules holds a pointer to the parent ipfw rule so
+ * we know what action to perform. Dynamic rules are removed when
+ * the parent rule is deleted.
+ * There are some limitations with dynamic rules -- we do not
+ * obey the 'randomized match', and we do not do multiple
+ * passes through the firewall.
+ * XXX check the latter!!!
+ */
+static struct ipfw_dyn_rule **ipfw_dyn_v = NULL ;
+static u_int32_t dyn_buckets = 256 ; /* must be power of 2 */
+static u_int32_t curr_dyn_buckets = 256 ; /* must be power of 2 */
+static u_int32_t dyn_ack_lifetime = 300 ;
+static u_int32_t dyn_syn_lifetime = 20 ;
+static u_int32_t dyn_fin_lifetime = 20 ;
+static u_int32_t dyn_rst_lifetime = 5 ;
+static u_int32_t dyn_short_lifetime = 30 ;
+static u_int32_t dyn_count = 0 ;
+static u_int32_t dyn_max = 1000 ;
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_buckets, CTLFLAG_RW,
+    &dyn_buckets, 0, "Number of dyn. buckets");
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets, CTLFLAG_RD,
+    &curr_dyn_buckets, 0, "Current Number of dyn. buckets");
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_count, CTLFLAG_RD,
+    &dyn_count, 0, "Number of dyn. rules");
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_max, CTLFLAG_RW,
+    &dyn_max, 0, "Max number of dyn. rules");
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, CTLFLAG_RW,
+    &dyn_ack_lifetime, 0, "Lifetime of dyn. rules for acks");
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, CTLFLAG_RW,
+    &dyn_syn_lifetime, 0, "Lifetime of dyn. rules for syn");
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, CTLFLAG_RW,
+    &dyn_fin_lifetime, 0, "Lifetime of dyn. rules for fin");
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, CTLFLAG_RW,
+    &dyn_rst_lifetime, 0, "Lifetime of dyn. rules for rst");
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, CTLFLAG_RW,
+    &dyn_short_lifetime, 0, "Lifetime of dyn. rules for other situations");
+
+#endif
+
+#define dprintf(a)	do {						\
+				if (fw_debug)				\
+					printf a;			\
+			} while (0)
+#define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0
+
+static int	add_entry __P((struct ip_fw_head *chainptr, struct ip_fw *frwl));
+static int	del_entry __P((struct ip_fw_head *chainptr, u_short number));
+static int	zero_entry __P((struct ip_fw *));
+static int	resetlog_entry __P((struct ip_fw *));
+static int	check_ipfw_struct __P((struct ip_fw *m));
+static __inline int
+		iface_match __P((struct ifnet *ifp, union ip_fw_if *ifu,
+				 int byname));
+static int	ipopts_match __P((struct ip *ip, struct ip_fw *f));
+static int	iptos_match __P((struct ip *ip, struct ip_fw *f));
+static __inline int
+		port_match __P((u_short *portptr, int nports, u_short port,
+				int range_flag, int mask));
+static int	tcpflg_match __P((struct tcphdr *tcp, struct ip_fw *f));
+static int	icmptype_match __P((struct icmp *  icmp, struct ip_fw * f));
+static void	ipfw_report __P((struct ip_fw *f, struct ip *ip, int offset,
+				struct ifnet *rif, struct ifnet *oif));
+
+static void flush_rule_ptrs(void);
+
+static int	ip_fw_chk __P((struct ip **pip, int hlen,
+			struct ifnet *oif, u_int16_t *cookie, struct mbuf **m,
+			struct ip_fw_chain **flow_id,
+			struct sockaddr_in **next_hop));
+static int	ip_fw_ctl __P((struct sockopt *sopt));
+
+static char err_prefix[] = "ip_fw_ctl:";
+
+/*
+ * Returns 1 if the port is matched by the vector, 0 otherwise
+ */
+static __inline int 
+port_match(u_short *portptr, int nports, u_short port, int range_flag, int mask)
+{
+	if (!nports)
+		return 1;
+	if (mask) {
+		if ( 0 == ((portptr[0] ^ port) & portptr[1]) )
+			return 1;
+		nports -= 2;
+		portptr += 2;
+	}
+	if (range_flag) {
+		if (portptr[0] <= port && port <= portptr[1]) {
+			return 1;
+		}
+		nports -= 2;
+		portptr += 2;
+	}
+	while (nports-- > 0) {
+		if (*portptr++ == port) {
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static int
+tcpflg_match(struct tcphdr *tcp, struct ip_fw *f)
+{
+	u_char		flg_set, flg_clr;
+
+	/*
+	 * If an established connection is required, reject packets that
+	 * have only SYN of RST|ACK|SYN set.  Otherwise, fall through to
+	 * other flag requirements.
+	 */
+	if ((f->fw_ipflg & IP_FW_IF_TCPEST) &&
+	    ((tcp->th_flags & (IP_FW_TCPF_RST | IP_FW_TCPF_ACK |
+	    IP_FW_TCPF_SYN)) == IP_FW_TCPF_SYN))
+		return 0;
+
+	flg_set = tcp->th_flags & f->fw_tcpf;
+	flg_clr = tcp->th_flags & f->fw_tcpnf;
+
+	if (flg_set != f->fw_tcpf)
+		return 0;
+	if (flg_clr)
+		return 0;
+
+	return 1;
+}
+
+static int
+icmptype_match(struct icmp *icmp, struct ip_fw *f)
+{
+	int type;
+
+	if (!(f->fw_flg & IP_FW_F_ICMPBIT))
+		return(1);
+
+	type = icmp->icmp_type;
+
+	/* check for matching type in the bitmap */
+	if (type < IP_FW_ICMPTYPES_MAX &&
+	    (f->fw_uar.fw_icmptypes[type / (sizeof(unsigned) * NBBY)] & 
+	    (1U << (type % (sizeof(unsigned) * NBBY)))))
+		return(1);
+
+	return(0); /* no match */
+}
+
+static int
+is_icmp_query(struct ip *ip)
+{
+	const struct icmp *icmp;
+	int icmp_type;
+
+	icmp = (struct icmp *)((u_int32_t *)ip + ip->ip_hl);
+	icmp_type = icmp->icmp_type;
+
+	if (icmp_type == ICMP_ECHO || icmp_type == ICMP_ROUTERSOLICIT ||
+	    icmp_type == ICMP_TSTAMP || icmp_type == ICMP_IREQ ||
+	    icmp_type == ICMP_MASKREQ)
+		return(1);
+
+	return(0);
+}
+
+static int
+ipopts_match(struct ip *ip, struct ip_fw *f)
+{
+	register u_char *cp;
+	int opt, optlen, cnt;
+	u_char	opts, nopts, nopts_sve;
+
+	cp = (u_char *)(ip + 1);
+	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
+	opts = f->fw_ipopt;
+	nopts = nopts_sve = f->fw_ipnopt;
+
+	for (; cnt > 0; cnt -= optlen, cp += optlen) {
+		opt = cp[IPOPT_OPTVAL];
+		if (opt == IPOPT_EOL)
+			break;
+		if (opt == IPOPT_NOP)
+			optlen = 1;
+		else {
+			optlen = cp[IPOPT_OLEN];
+			if (optlen <= 0 || optlen > cnt) {
+				return 0; /*XXX*/
+			}
+		}
+		switch (opt) {
+
+		default:
+			break;
+
+		case IPOPT_LSRR:
+			opts &= ~IP_FW_IPOPT_LSRR;
+			nopts &= ~IP_FW_IPOPT_LSRR;
+			break;
+
+		case IPOPT_SSRR:
+			opts &= ~IP_FW_IPOPT_SSRR;
+			nopts &= ~IP_FW_IPOPT_SSRR;
+			break;
+
+		case IPOPT_RR:
+			opts &= ~IP_FW_IPOPT_RR;
+			nopts &= ~IP_FW_IPOPT_RR;
+			break;
+		case IPOPT_TS:
+			opts &= ~IP_FW_IPOPT_TS;
+			nopts &= ~IP_FW_IPOPT_TS;
+			break;
+		}
+		if (opts == nopts)
+			break;
+	}
+	if (opts == 0 && nopts == nopts_sve)
+		return 1;
+	else
+		return 0;
+}
+
+static int
+iptos_match(struct ip *ip, struct ip_fw *f)
+{
+
+	u_int flags = (ip->ip_tos & 0x1f);
+	u_char opts, nopts, nopts_sve;
+
+	opts = f->fw_iptos;
+	nopts = nopts_sve = f->fw_ipntos;
+
+	while (flags != 0) {
+		u_int flag;
+
+		flag = 1 << (ffs(flags) -1);
+		opts &= ~flag;
+		nopts &= ~flag;
+		flags &= ~flag;
+	}
+
+	if (opts == 0 && nopts == nopts_sve)
+		return 1;
+	else
+		return 0;
+
+}
+
+
+static int
+tcpopts_match(struct tcphdr *tcp, struct ip_fw *f)
+{
+	register u_char *cp;
+	int opt, optlen, cnt;
+	u_char	opts, nopts, nopts_sve;
+
+	cp = (u_char *)(tcp + 1);
+	cnt = (tcp->th_off << 2) - sizeof (struct tcphdr);
+	opts = f->fw_tcpopt;
+	nopts = nopts_sve = f->fw_tcpnopt;
+
+	for (; cnt > 0; cnt -= optlen, cp += optlen) {
+		opt = cp[0];
+		if (opt == TCPOPT_EOL)
+			break;
+		if (opt == TCPOPT_NOP)
+			optlen = 1;
+		else {
+			optlen = cp[1];
+			if (optlen <= 0)
+				break;
+		}
+
+
+		switch (opt) {
+
+		default:
+			break;
+
+		case TCPOPT_MAXSEG:
+			opts &= ~IP_FW_TCPOPT_MSS;
+			nopts &= ~IP_FW_TCPOPT_MSS;
+			break;
+
+		case TCPOPT_WINDOW:
+			opts &= ~IP_FW_TCPOPT_WINDOW;
+			nopts &= ~IP_FW_TCPOPT_WINDOW;
+			break;
+
+		case TCPOPT_SACK_PERMITTED:
+		case TCPOPT_SACK:
+			opts &= ~IP_FW_TCPOPT_SACK;
+			nopts &= ~IP_FW_TCPOPT_SACK;
+			break;
+
+		case TCPOPT_TIMESTAMP:
+			opts &= ~IP_FW_TCPOPT_TS;
+			nopts &= ~IP_FW_TCPOPT_TS;
+			break;
+
+		case TCPOPT_CC:
+		case TCPOPT_CCNEW:
+		case TCPOPT_CCECHO:
+			opts &= ~IP_FW_TCPOPT_CC;
+			nopts &= ~IP_FW_TCPOPT_CC;
+			break;
+		}
+		if (opts == nopts)
+			break;
+	}
+	if (opts == 0 && nopts == nopts_sve)
+		return 1;
+	else
+		return 0;
+}
+
+static __inline int
+iface_match(struct ifnet *ifp, union ip_fw_if *ifu, int byname)
+{
+	/* Check by name or by IP address */
+	if (byname) {
+		/* Check unit number (-1 is wildcard) */
+		if (ifu->fu_via_if.unit != -1
+		    && ifp->if_unit != ifu->fu_via_if.unit)
+			return(0);
+		/* Check name */
+		if (strncmp(ifp->if_name, ifu->fu_via_if.name, FW_IFNLEN))
+			return(0);
+		return(1);
+	} else if (ifu->fu_via_ip.s_addr != 0) {	/* Zero == wildcard */
+		struct ifaddr *ia;
+
+		TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) {
+			if (ia->ifa_addr == NULL)
+				continue;
+			if (ia->ifa_addr->sa_family != AF_INET)
+				continue;
+			if (ifu->fu_via_ip.s_addr != ((struct sockaddr_in *)
+			    (ia->ifa_addr))->sin_addr.s_addr)
+				continue;
+			return(1);
+		}
+		return(0);
+	}
+	return(1);
+}
+
+static void
+ipfw_report(struct ip_fw *f, struct ip *ip, int offset,
+	struct ifnet *rif, struct ifnet *oif)
+{
+    struct tcphdr *const tcp = (struct tcphdr *) ((u_int32_t *) ip+ ip->ip_hl);
+    struct udphdr *const udp = (struct udphdr *) ((u_int32_t *) ip+ ip->ip_hl);
+    struct icmp *const icmp = (struct icmp *) ((u_int32_t *) ip + ip->ip_hl);
+    u_int64_t count;
+    char *action;
+    char action2[32], proto[47], name[18], fragment[17];
+    int len;
+
+    count = f ? f->fw_pcnt : ++counter;
+    if ((f == NULL && fw_verbose_limit != 0 && count > fw_verbose_limit) ||
+	(f && f->fw_logamount != 0 && count > f->fw_loghighest))
+	    return;
+
+    /* Print command name */
+    snprintf(SNPARGS(name, 0), "ipfw: %d", f ? f->fw_number : -1);
+
+    action = action2;
+    if (!f)
+	    action = "Refuse";
+    else {
+	    switch (f->fw_flg & IP_FW_F_COMMAND) {
+	    case IP_FW_F_DENY:
+		    action = "Deny";
+		    break;
+	    case IP_FW_F_REJECT:
+		    if (f->fw_reject_code == IP_FW_REJECT_RST)
+			    action = "Reset";
+		    else
+			    action = "Unreach";
+		    break;
+	    case IP_FW_F_ACCEPT:
+		    action = "Accept";
+		    break;
+	    case IP_FW_F_COUNT:
+		    action = "Count";
+		    break;
+#ifdef IPDIVERT
+	    case IP_FW_F_DIVERT:
+		    snprintf(SNPARGS(action2, 0), "Divert %d",
+			f->fw_divert_port);
+		    break;
+	    case IP_FW_F_TEE:
+		    snprintf(SNPARGS(action2, 0), "Tee %d",
+			f->fw_divert_port);
+		    break;
+#endif
+	    case IP_FW_F_SKIPTO:
+		    snprintf(SNPARGS(action2, 0), "SkipTo %d",
+			f->fw_skipto_rule);
+		    break;
+#ifdef DUMMYNET
+	    case IP_FW_F_PIPE:
+		    snprintf(SNPARGS(action2, 0), "Pipe %d",
+			f->fw_skipto_rule);
+		    break;
+	    case IP_FW_F_QUEUE:
+		    snprintf(SNPARGS(action2, 0), "Queue %d",
+			f->fw_skipto_rule);
+		    break;
+#endif
+#ifdef IPFIREWALL_FORWARD
+	    case IP_FW_F_FWD:
+		    if (f->fw_fwd_ip.sin_port)
+			    snprintf(SNPARGS(action2, 0),
+				"Forward to %s:%d",
+				inet_ntoa(f->fw_fwd_ip.sin_addr),
+				f->fw_fwd_ip.sin_port);
+		    else
+			    snprintf(SNPARGS(action2, 0), "Forward to %s",
+				inet_ntoa(f->fw_fwd_ip.sin_addr));
+		    break;
+#endif
+	    default:	
+		    action = "UNKNOWN";
+		    break;
+	    }
+    }
+
+    switch (ip->ip_p) {
+    case IPPROTO_TCP:
+	    len = snprintf(SNPARGS(proto, 0), "TCP %s",
+		inet_ntoa(ip->ip_src));
+	    if (offset == 0)
+		    len += snprintf(SNPARGS(proto, len), ":%d ",
+			ntohs(tcp->th_sport));
+	    else
+		    len += snprintf(SNPARGS(proto, len), " ");
+	    len += snprintf(SNPARGS(proto, len), "%s",
+		inet_ntoa(ip->ip_dst));
+	    if (offset == 0)
+		    snprintf(SNPARGS(proto, len), ":%d",
+			ntohs(tcp->th_dport));
+	    break;
+    case IPPROTO_UDP:
+	    len = snprintf(SNPARGS(proto, 0), "UDP %s",
+		inet_ntoa(ip->ip_src));
+	    if (offset == 0)
+		    len += snprintf(SNPARGS(proto, len), ":%d ",
+			ntohs(udp->uh_sport));
+	    else
+		    len += snprintf(SNPARGS(proto, len), " ");
+	    len += snprintf(SNPARGS(proto, len), "%s",
+		inet_ntoa(ip->ip_dst));
+	    if (offset == 0)
+		    snprintf(SNPARGS(proto, len), ":%d",
+			ntohs(udp->uh_dport));
+	    break;
+    case IPPROTO_ICMP:
+	    if (offset == 0)
+		    len = snprintf(SNPARGS(proto, 0), "ICMP:%u.%u ",
+			icmp->icmp_type, icmp->icmp_code);
+	    else
+		    len = snprintf(SNPARGS(proto, 0), "ICMP ");
+	    len += snprintf(SNPARGS(proto, len), "%s",
+		inet_ntoa(ip->ip_src));
+	    snprintf(SNPARGS(proto, len), " %s", inet_ntoa(ip->ip_dst));
+	    break;
+    default:
+	    len = snprintf(SNPARGS(proto, 0), "P:%d %s", ip->ip_p,
+		inet_ntoa(ip->ip_src));
+	    snprintf(SNPARGS(proto, len), " %s", inet_ntoa(ip->ip_dst));
+	    break;
+    }
+
+    if (offset != 0)
+	    snprintf(SNPARGS(fragment, 0), " Fragment = %d",
+		offset);
+    else
+	    fragment[0] = '\0';
+    if (oif)
+	    log(LOG_SECURITY | LOG_INFO, "%s %s %s out via %s%d%s\n",
+		name, action, proto, oif->if_name, oif->if_unit, fragment);
+    else if (rif)
+	    log(LOG_SECURITY | LOG_INFO, "%s %s %s in via %s%d%s\n", name,
+		action, proto, rif->if_name, rif->if_unit, fragment);
+    else
+	    log(LOG_SECURITY | LOG_INFO, "%s %s %s%s\n", name, action,
+		proto, fragment);
+    if ((f ? f->fw_logamount != 0 : 1) &&
+	count == (f ? f->fw_loghighest : fw_verbose_limit))
+	    log(LOG_SECURITY | LOG_NOTICE,
+		"ipfw: limit %d reached on entry %d\n",
+		f ? f->fw_logamount : fw_verbose_limit,
+		f ? f->fw_number : -1);
+}
+
+static __inline int
+hash_packet(struct ipfw_flow_id *id)
+{
+    u_int32_t i ;
+
+    i = (id->dst_ip) ^ (id->src_ip) ^ (id->dst_port) ^ (id->src_port);
+    i &= (curr_dyn_buckets - 1) ;
+    return i ;
+}
+
+#define TIME_LEQ(a,b)       ((int)((a)-(b)) <= 0)
+/*
+ * Remove all dynamic rules pointing to a given chain, or all
+ * rules if chain == NULL. Second parameter is 1 if we want to
+ * delete unconditionally, otherwise only expired rules are removed.
+ */
+static void
+remove_dyn_rule(struct ip_fw_chain *chain, int force)
+{
+    struct ipfw_dyn_rule *prev, *q, *old_q ;
+    int i ;
+    static u_int32_t last_remove = 0 ;
+
+    if (ipfw_dyn_v == NULL || dyn_count == 0)
+	return ;
+    /* do not expire more than once per second, it is useless */
+    if (force == 0 && last_remove == time_second)
+	return ;
+    last_remove = time_second ;
+
+    for (i = 0 ; i < curr_dyn_buckets ; i++) {
+	for (prev=NULL, q = ipfw_dyn_v[i] ; q ; ) {
+	    if ( (chain == NULL || chain == q->chain) &&
+		 (force || TIME_LEQ( q->expire , time_second ) ) ) {
+		DEB(printf("-- remove entry 0x%08x %d -> 0x%08x %d, %d left\n",
+		    (q->id.src_ip), (q->id.src_port),
+		    (q->id.dst_ip), (q->id.dst_port), dyn_count-1 ); )
+		old_q = q ;
+		if (prev != NULL)
+		    prev->next = q = q->next ;
+		else
+		    ipfw_dyn_v[i] = q = q->next ;
+		dyn_count-- ;
+		free(old_q, M_IPFW);
+		continue ;
+	    } else {
+		prev = q ;
+		q = q->next ;
+	    }
+	}
+    }
+}
+
+static struct ipfw_dyn_rule *
+lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction)
+{
+    /*
+     * stateful ipfw extensions.
+     * Lookup into dynamic session queue
+     */
+    struct ipfw_dyn_rule *prev, *q, *old_q ;
+    int i, dir = 0;
+#define MATCH_FORWARD 1
+
+    if (ipfw_dyn_v == NULL)
+	return NULL ;
+    i = hash_packet( pkt );
+    for (prev=NULL, q = ipfw_dyn_v[i] ; q != NULL ; ) {
+       if (TIME_LEQ( q->expire , time_second ) ) { /* expire entry */
+           old_q = q ;
+           if (prev != NULL)
+               prev->next = q = q->next ;
+           else
+               ipfw_dyn_v[i] = q = q->next ;
+           dyn_count-- ;
+           free(old_q, M_IPFW);
+           continue ;
+	}
+	if ( pkt->proto == q->id.proto) {
+	    switch (q->type) {
+	    default:        /* bidirectional rule, no masks */
+		if (pkt->src_ip == q->id.src_ip &&
+			pkt->dst_ip == q->id.dst_ip &&
+			pkt->src_port == q->id.src_port &&
+			pkt->dst_port == q->id.dst_port ) {
+		    dir = MATCH_FORWARD ;
+		    goto found ;
+		}
+		if (pkt->src_ip == q->id.dst_ip &&
+			pkt->dst_ip == q->id.src_ip &&
+			pkt->src_port == q->id.dst_port &&
+			pkt->dst_port == q->id.src_port ) {
+		   dir = 0 ; /* reverse match */
+		   goto found ;
+		}
+		break ;
+	    }
+	}
+	prev = q ;
+	q = q->next ;
+    }
+    return NULL ; /* clearly not found */
+found:
+    if ( prev != NULL) { /* found and not in front */
+	prev->next = q->next ;
+	q->next = ipfw_dyn_v[i] ;
+	ipfw_dyn_v[i] = q ;
+    }
+    if (pkt->proto == IPPROTO_TCP) {
+	/* update state according to flags */
+	u_char flags = pkt->flags & (TH_FIN|TH_SYN|TH_RST);
+	q->state |= (dir == MATCH_FORWARD ) ? flags : (flags << 8);
+	switch (q->state) {
+	case TH_SYN :
+	    /* opening */
+	    q->expire = time_second + dyn_syn_lifetime ;
+	    break ;
+	case TH_SYN | (TH_SYN << 8) :
+	    /* move to established */
+	    q->expire = time_second + dyn_ack_lifetime ;
+	    break ;
+	case TH_SYN | (TH_SYN << 8) | TH_FIN :
+	case TH_SYN | (TH_SYN << 8) | (TH_FIN << 8) :
+	    /* one side tries to close */
+	    q->expire = time_second + dyn_ack_lifetime ;
+	    break ;
+	case TH_SYN | (TH_SYN << 8) | TH_FIN | (TH_FIN << 8) :
+	    /* both sides closed */
+	    q->expire = time_second + dyn_fin_lifetime ;
+	    break ;
+	default:
+#if 0
+	    /*
+	     * reset or some invalid combination, but can also
+	     * occur if we use keep-state the wrong way.
+	     */
+	    if ( (q->state & ((TH_RST << 8)|TH_RST)) == 0)
+		printf("invalid state: 0x%x\n", q->state);
+#endif
+	    q->expire = time_second + dyn_rst_lifetime ;
+	    break ;
+	}
+    } else {
+	/* should do something for UDP and others... */
+	q->expire = time_second + dyn_short_lifetime ;
+    }
+    if (match_direction)
+	*match_direction = dir ;
+    return q ;
+}
+
+/*
+ * Install state for a dynamic session.
+ */
+
+static void
+add_dyn_rule(struct ipfw_flow_id *id, struct ipfw_flow_id *mask,
+       struct ip_fw_chain *chain)
+{
+    struct ipfw_dyn_rule *r ;
+
+    int i ;
+    if (ipfw_dyn_v == NULL ||
+       (dyn_count == 0 && dyn_buckets != curr_dyn_buckets)) {
+       /* try reallocation, make sure we have a power of 2 */
+       u_int32_t i = dyn_buckets ;
+       while ( i > 0 && (i & 1) == 0 )
+           i >>= 1 ;
+       if (i != 1) /* not a power of 2 */
+           dyn_buckets = curr_dyn_buckets ; /* reset */
+       else {
+           if (ipfw_dyn_v != NULL)
+		free(ipfw_dyn_v, M_IPFW);
+           ipfw_dyn_v = malloc(curr_dyn_buckets * sizeof r,
+                   M_IPFW, M_DONTWAIT | M_ZERO);
+	   if (ipfw_dyn_v == NULL)
+		return ; /* failed ! */
+       }
+    }
+    i = hash_packet(id);
+
+    r = malloc(sizeof *r, M_IPFW, M_DONTWAIT | M_ZERO);
+    if (r == NULL) {
+       printf ("sorry cannot allocate state\n");
+       return ;
+    }
+
+    if (mask)
+	r->mask = *mask ;
+    r->id = *id ;
+    r->expire = time_second + dyn_syn_lifetime ;
+    r->chain = chain ;
+    r->type = ((struct ip_fw_ext *)chain->rule)->dyn_type ;
+
+    r->bucket = i ;
+    r->next = ipfw_dyn_v[i] ;
+    ipfw_dyn_v[i] = r ;
+    dyn_count++ ;
+    DEB(printf("-- add entry 0x%08x %d -> 0x%08x %d, %d left\n",
+       (r->id.src_ip), (r->id.src_port),
+       (r->id.dst_ip), (r->id.dst_port),
+       dyn_count ); )
+}
+
+/*
+ * Install dynamic state.
+ * There are different types of dynamic rules which can be installed.
+ * The type is in chain->dyn_type.
+ * Type 0 (default) is a bidirectional rule
+ */
+static void
+install_state(struct ip_fw_chain *chain)
+{
+    struct ipfw_dyn_rule *q ;
+    static int last_log ;
+
+    u_long type = ((struct ip_fw_ext *)chain->rule)->dyn_type ;
+
+    DEB(printf("-- install state type %d 0x%08lx %u -> 0x%08lx %u\n",
+       type,
+       (last_pkt.src_ip), (last_pkt.src_port),
+       (last_pkt.dst_ip), (last_pkt.dst_port) );)
+
+    q = lookup_dyn_rule(&last_pkt, NULL) ;
+    if (q != NULL) {
+	if (last_log == time_second)
+	    return ;
+	last_log = time_second ;
+       printf(" entry already present, done\n");
+       return ;
+    }
+    if (dyn_count >= dyn_max) /* try remove old ones... */
+	remove_dyn_rule(NULL, 0 /* expire */);
+    if (dyn_count >= dyn_max) {
+	if (last_log == time_second)
+	    return ;
+	last_log = time_second ;
+       printf(" Too many dynamic rules, sorry\n");
+       return ;
+    }
+    switch (type) {
+    default: /* bidir rule */
+       add_dyn_rule(&last_pkt, NULL, chain);
+       break ;
+    }
+    q = lookup_dyn_rule(&last_pkt, NULL) ; /* XXX this just sets the lifetime ... */
+}
+
+/*
+ * given an ip_fw_chain *, lookup_next_rule will return a pointer
+ * of the same type to the next one. This can be either the jump
+ * target (for skipto instructions) or the next one in the chain (in
+ * all other cases including a missing jump target).
+ * Backward jumps are not allowed, so start looking from the next
+ * rule...
+ */ 
+static struct ip_fw_chain * lookup_next_rule(struct ip_fw_chain *me);
+
+static struct ip_fw_chain *
+lookup_next_rule(struct ip_fw_chain *me)
+{
+    struct ip_fw_chain *chain ;
+    int rule = me->rule->fw_skipto_rule ; /* guess... */
+
+    if ( (me->rule->fw_flg & IP_FW_F_COMMAND) == IP_FW_F_SKIPTO )
+	for (chain = LIST_NEXT(me,next); chain ; chain = LIST_NEXT(chain,next))
+	    if (chain->rule->fw_number >= rule)
+                return chain ;
+    return LIST_NEXT(me,next) ; /* failure or not a skipto */
+}
+
+/*
+ * Parameters:
+ *
+ *	pip	Pointer to packet header (struct ip **)
+ *	hlen	Packet header length
+ *	oif	Outgoing interface, or NULL if packet is incoming
+ *	*cookie Skip up to the first rule past this rule number;
+ *		upon return, non-zero port number for divert or tee.
+ *		Special case: cookie == NULL on input for bridging.
+ *	*m	The packet; we set to NULL when/if we nuke it.
+ *	*flow_id pointer to the last matching rule (in/out)
+ *	*next_hop socket we are forwarding to (in/out).
+ *
+ * Return value:
+ *
+ *	IP_FW_PORT_DENY_FLAG	the packet must be dropped.
+ *	0	The packet is to be accepted and routed normally OR
+ *      	the packet was denied/rejected and has been dropped;
+ *		in the latter case, *m is equal to NULL upon return.
+ *	port	Divert the packet to port, with these caveats:
+ *
+ *		- If IP_FW_PORT_TEE_FLAG is set, tee the packet instead
+ *		  of diverting it (ie, 'ipfw tee').
+ *
+ *		- If IP_FW_PORT_DYNT_FLAG is set, interpret the lower
+ *		  16 bits as a dummynet pipe number instead of diverting
+ */
+
+static int 
+ip_fw_chk(struct ip **pip, int hlen,
+	struct ifnet *oif, u_int16_t *cookie, struct mbuf **m,
+	struct ip_fw_chain **flow_id,
+        struct sockaddr_in **next_hop)
+{
+	struct ip_fw_chain *chain;
+	struct ip_fw *f = NULL, *rule = NULL;
+	struct ip *ip = *pip;
+	struct ifnet *const rif = (*m)->m_pkthdr.rcvif;
+	struct ifnet *tif;
+	u_short offset = 0 ;
+	u_short src_port = 0, dst_port = 0;
+	struct in_addr src_ip, dst_ip; /* XXX */
+	u_int8_t proto= 0, flags = 0 ; /* XXX */
+	u_int16_t skipto, bridgeCookie;
+	u_int16_t ip_len;
+
+	int dyn_checked = 0 ; /* set after dyn.rules have been checked. */
+	int direction = MATCH_FORWARD ; /* dirty trick... */
+	struct ipfw_dyn_rule *q = NULL ;
+
+	/* Special hack for bridging (as usual) */
+	if (cookie == NULL) {
+		bridgeCookie = 0;
+		cookie = &bridgeCookie;
+#define BRIDGED	(cookie == &bridgeCookie)
+		hlen = ip->ip_hl << 2;
+	}
+
+	/* Grab and reset cookie */
+	skipto = *cookie;
+	*cookie = 0;
+
+#define PULLUP_TO(len)	do {						\
+			    if ((*m)->m_len < (len)) {			\
+				ip = NULL ;				\
+				if ((*m = m_pullup(*m, (len))) == 0)	\
+				    goto bogusfrag;			\
+				ip = mtod(*m, struct ip *);		\
+				*pip = ip;				\
+			    }						\
+			} while (0)
+
+	/*
+	 * Collect parameters into local variables for faster matching.
+	 */
+	proto = ip->ip_p;
+	src_ip = ip->ip_src;
+	dst_ip = ip->ip_dst;
+	if (0 && BRIDGED) { /* not yet... */
+	    offset = (ntohs(ip->ip_off) & IP_OFFMASK);
+	    ip_len = ntohs(ip->ip_len);
+	} else {
+	    offset = (ip->ip_off & IP_OFFMASK);
+	    ip_len = ip->ip_len;
+	}
+	if (offset == 0) {
+	    struct tcphdr *tcp;
+	    struct udphdr *udp;
+
+	    switch (proto) {
+	    case IPPROTO_TCP :
+		PULLUP_TO(hlen + sizeof(struct tcphdr));
+		tcp =(struct tcphdr *)((u_int32_t *)ip + ip->ip_hl);
+		dst_port = tcp->th_dport ;
+		src_port = tcp->th_sport ;
+		flags = tcp->th_flags ;
+		break ;
+
+	    case IPPROTO_UDP :
+		PULLUP_TO(hlen + sizeof(struct udphdr));
+		udp =(struct udphdr *)((u_int32_t *)ip + ip->ip_hl);
+		dst_port = udp->uh_dport ;
+		src_port = udp->uh_sport ;
+		break;
+
+	    case IPPROTO_ICMP:
+		PULLUP_TO(hlen + 4);	/* type, code and checksum. */
+		flags = ((struct icmp *)
+			((u_int32_t *)ip + ip->ip_hl))->icmp_type ;
+		break ;
+
+	    default :
+		break;
+	    }
+	}
+#undef PULLUP_TO
+	last_pkt.src_ip = ntohl(src_ip.s_addr);
+	last_pkt.dst_ip = ntohl(dst_ip.s_addr);
+	last_pkt.proto = proto;
+	last_pkt.src_port = ntohs(src_port);
+	last_pkt.dst_port = ntohs(dst_port);
+	last_pkt.flags = flags;
+
+	if (*flow_id) {
+		/* Accept if passed first test */
+		if (fw_one_pass)
+			return 0;
+		/*
+		 * Packet has already been tagged. Look for the next rule
+		 * to restart processing.
+		 */
+		chain = LIST_NEXT(*flow_id, next);
+
+		if ((chain = (*flow_id)->rule->next_rule_ptr) == NULL)
+			chain = (*flow_id)->rule->next_rule_ptr =
+			    lookup_next_rule(*flow_id);
+		if (chain == NULL)
+			goto dropit;
+	} else {
+		/*
+		 * Go down the chain, looking for enlightment.
+		 * If we've been asked to start at a given rule, do so.
+		 */
+		chain = LIST_FIRST(&ip_fw_chain_head);
+		if (skipto != 0) {
+			if (skipto >= IPFW_DEFAULT_RULE)
+				goto dropit;
+			while (chain && chain->rule->fw_number <= skipto)
+				chain = LIST_NEXT(chain, next);
+			if (chain == NULL)
+				goto dropit;
+		}
+	}
+
+
+	for (; chain; chain = LIST_NEXT(chain, next)) {
+again:
+		f = chain->rule;
+		if (f->fw_number == IPFW_DEFAULT_RULE)
+		    goto got_match ;
+
+		/*
+		 * dynamic rules are checked at the first keep-state or
+		 * check-state occurrence.
+		 */
+		if (f->fw_flg & (IP_FW_F_KEEP_S|IP_FW_F_CHECK_S) &&
+			 dyn_checked == 0 ) {
+		    dyn_checked = 1 ;
+		    q = lookup_dyn_rule(&last_pkt, &direction);
+		    if (q != NULL) {
+			DEB(printf("-- dynamic match 0x%08x %d %s 0x%08x %d\n",
+			    (q->id.src_ip), (q->id.src_port),
+			    (direction == MATCH_FORWARD ? "-->" : "<--"),
+			    (q->id.dst_ip), (q->id.dst_port) ); )
+			chain = q->chain ;
+			f = chain->rule ;
+			q->pcnt++ ;
+			q->bcnt += ip_len;
+			goto got_match ; /* random not allowed here */
+		    }
+		    /* if this was a check-only rule, continue with next */
+		    if (f->fw_flg & IP_FW_F_CHECK_S)
+			continue ;
+		}
+
+		/* Check if rule only valid for bridged packets */
+		if ((f->fw_flg & IP_FW_BRIDGED) != 0 && !(BRIDGED))
+			continue;
+
+		if (oif) {
+			/* Check direction outbound */
+			if (!(f->fw_flg & IP_FW_F_OUT))
+				continue;
+		} else {
+			/* Check direction inbound */
+			if (!(f->fw_flg & IP_FW_F_IN))
+				continue;
+		}
+
+		/* Fragments */
+		if ((f->fw_flg & IP_FW_F_FRAG) && offset == 0 )
+			continue;
+
+		if (f->fw_flg & IP_FW_F_SME) {
+			INADDR_TO_IFP(src_ip, tif);
+			if (tif == NULL)
+				continue;
+		}
+		if (f->fw_flg & IP_FW_F_DME) {
+			INADDR_TO_IFP(dst_ip, tif);
+			if (tif == NULL)
+				continue;
+		}
+		/* If src-addr doesn't match, not this rule. */
+		if (((f->fw_flg & IP_FW_F_INVSRC) != 0) ^ ((src_ip.s_addr
+		    & f->fw_smsk.s_addr) != f->fw_src.s_addr))
+			continue;
+
+		/* If dest-addr doesn't match, not this rule. */
+		if (((f->fw_flg & IP_FW_F_INVDST) != 0) ^ ((dst_ip.s_addr
+		    & f->fw_dmsk.s_addr) != f->fw_dst.s_addr))
+			continue;
+
+		/* Interface check */
+		if ((f->fw_flg & IF_FW_F_VIAHACK) == IF_FW_F_VIAHACK) {
+			struct ifnet *const iface = oif ? oif : rif;
+
+			/* Backwards compatibility hack for "via" */
+			if (!iface || !iface_match(iface,
+			    &f->fw_in_if, f->fw_flg & IP_FW_F_OIFNAME))
+				continue;
+		} else {
+			/* Check receive interface */
+			if ((f->fw_flg & IP_FW_F_IIFACE)
+			    && (!rif || !iface_match(rif,
+			      &f->fw_in_if, f->fw_flg & IP_FW_F_IIFNAME)))
+				continue;
+			/* Check outgoing interface */
+			if ((f->fw_flg & IP_FW_F_OIFACE)
+			    && (!oif || !iface_match(oif,
+			      &f->fw_out_if, f->fw_flg & IP_FW_F_OIFNAME)))
+				continue;
+		}
+
+		/* Check IP header values */
+		if (f->fw_ipflg & IP_FW_IF_IPOPT && !ipopts_match(ip, f))
+			continue;
+		if (f->fw_ipflg & IP_FW_IF_IPLEN && f->fw_iplen != ip_len)
+			continue;
+		if (f->fw_ipflg & IP_FW_IF_IPID && f->fw_ipid != ntohs(ip->ip_id))
+			continue;
+		if (f->fw_ipflg & IP_FW_IF_IPTOS && !iptos_match(ip, f))
+			continue;
+		if (f->fw_ipflg & IP_FW_IF_IPTTL && f->fw_ipttl != ip->ip_ttl)
+			continue;
+		if (f->fw_ipflg & IP_FW_IF_IPVER && f->fw_ipver != ip->ip_v)
+			continue;
+
+		/* Check protocol; if wildcard, and no [ug]id, match */
+		if (f->fw_prot == IPPROTO_IP) {
+			if (!(f->fw_flg & (IP_FW_F_UID|IP_FW_F_GID)))
+				goto rnd_then_got_match;
+		} else
+		    /* If different, don't match */
+		    if (proto != f->fw_prot) 
+			    continue;
+
+		/* Protocol specific checks for uid only */
+		if (f->fw_flg & (IP_FW_F_UID|IP_FW_F_GID)) {
+		    switch (proto) {
+		    case IPPROTO_TCP:
+			{
+			    struct inpcb *P;
+
+			    if (offset == 1)	/* cf. RFC 1858 */
+				    goto bogusfrag;
+			    if (offset != 0)
+				    continue;
+
+			    if (oif)
+				P = in_pcblookup_hash(&tcbinfo, dst_ip,
+				   dst_port, src_ip, src_port, 0,
+				   oif);
+			    else
+				P = in_pcblookup_hash(&tcbinfo, src_ip,
+				   src_port, dst_ip, dst_port, 0,
+				   NULL);
+
+			    if (P && P->inp_socket) {
+				if (f->fw_flg & IP_FW_F_UID) {
+					if (P->inp_socket->so_cred->cr_uid !=
+					    f->fw_uid)
+						continue;
+				} else if (!groupmember(f->fw_gid,
+					    P->inp_socket->so_cred))
+						continue;
+			    } else
+				continue;
+			    break;
+			}
+
+		    case IPPROTO_UDP:
+			{
+			    struct inpcb *P;
+
+			    if (offset != 0)
+				continue;
+
+			    if (oif)
+				P = in_pcblookup_hash(&udbinfo, dst_ip,
+				   dst_port, src_ip, src_port, 1,
+				   oif);
+			    else
+				P = in_pcblookup_hash(&udbinfo, src_ip,
+				   src_port, dst_ip, dst_port, 1,
+				   NULL);
+
+			    if (P && P->inp_socket) {
+				if (f->fw_flg & IP_FW_F_UID) {
+					if (P->inp_socket->so_cred->cr_uid !=
+					    f->fw_uid)
+						continue;
+				} else if (!groupmember(f->fw_gid,
+					    P->inp_socket->so_cred))
+						continue;
+			    } else
+				continue;
+			    break;
+			}
+
+		    default:
+			    continue;
+		    }
+		}
+		    
+		/* Protocol specific checks */
+		switch (proto) {
+		case IPPROTO_TCP:
+		    {
+			struct tcphdr *tcp;
+
+			if (offset == 1)	/* cf. RFC 1858 */
+				goto bogusfrag;
+			if (offset != 0) {
+				/*
+				 * TCP flags and ports aren't available in this
+				 * packet -- if this rule specified either one,
+				 * we consider the rule a non-match.
+				 */
+				if (f->fw_nports != 0 ||
+				    f->fw_ipflg & IP_FW_IF_TCPMSK)
+					continue;
+
+				break;
+			}
+			tcp = (struct tcphdr *) ((u_int32_t *)ip + ip->ip_hl);
+
+			if (f->fw_ipflg & IP_FW_IF_TCPOPT && !tcpopts_match(tcp, f))
+				continue;
+			if (((f->fw_ipflg & IP_FW_IF_TCPFLG) ||
+			    (f->fw_ipflg & IP_FW_IF_TCPEST)) &&
+			    !tcpflg_match(tcp, f))
+				continue;
+			if (f->fw_ipflg & IP_FW_IF_TCPSEQ && tcp->th_seq != f->fw_tcpseq)
+				continue;
+			if (f->fw_ipflg & IP_FW_IF_TCPACK && tcp->th_ack != f->fw_tcpack)
+				continue;
+			if (f->fw_ipflg & IP_FW_IF_TCPWIN && tcp->th_win != f->fw_tcpwin)
+				continue;
+			goto check_ports;
+		    }
+
+		case IPPROTO_UDP:
+			if (offset != 0) {
+				/*
+				 * Port specification is unavailable -- if this
+				 * rule specifies a port, we consider the rule
+				 * a non-match.
+				 */
+				if (f->fw_nports != 0)
+					continue;
+
+				break;
+			}
+check_ports:
+			if (!port_match(&f->fw_uar.fw_pts[0],
+			    IP_FW_GETNSRCP(f), ntohs(src_port),
+			    f->fw_flg & IP_FW_F_SRNG,
+			    f->fw_flg & IP_FW_F_SMSK))
+				continue;
+			if (!port_match(&f->fw_uar.fw_pts[IP_FW_GETNSRCP(f)],
+			    IP_FW_GETNDSTP(f), ntohs(dst_port),
+			    f->fw_flg & IP_FW_F_DRNG,
+			    f->fw_flg & IP_FW_F_DMSK)) 
+				continue;
+			break;
+
+		case IPPROTO_ICMP:
+		    {
+			struct icmp *icmp;
+
+			if (offset != 0)	/* Type isn't valid */
+				break;
+			icmp = (struct icmp *) ((u_int32_t *)ip + ip->ip_hl);
+			if (!icmptype_match(icmp, f))
+				continue;
+			break;
+		    }
+
+		default:
+			break;
+
+bogusfrag:
+		if (fw_verbose && ip != NULL)
+			ipfw_report(NULL, ip, offset, rif, oif);
+		goto dropit;
+
+		}
+
+rnd_then_got_match:
+		if ( ((struct ip_fw_ext *)f)->dont_match_prob &&
+		    random() < ((struct ip_fw_ext *)f)->dont_match_prob )
+			continue ;
+got_match:
+		/*
+		 * If not a dynamic match (q == NULL) and keep-state, install
+		 * a new dynamic entry.
+		 */
+		if (q == NULL && f->fw_flg & IP_FW_F_KEEP_S)
+		    install_state(chain);
+		/* Update statistics */
+		f->fw_pcnt += 1;
+		f->fw_bcnt += ip_len;
+		f->timestamp = time_second;
+
+		/* Log to console if desired */
+		if ((f->fw_flg & IP_FW_F_PRN) && fw_verbose)
+			ipfw_report(f, ip, offset, rif, oif);
+
+		/* Take appropriate action */
+		switch (f->fw_flg & IP_FW_F_COMMAND) {
+		case IP_FW_F_ACCEPT:
+			return(0);
+		case IP_FW_F_COUNT:
+			continue;
+#ifdef IPDIVERT
+		case IP_FW_F_DIVERT:
+			*cookie = f->fw_number;
+			return(f->fw_divert_port);
+		case IP_FW_F_TEE:
+			*cookie = f->fw_number;
+			return(f->fw_divert_port | IP_FW_PORT_TEE_FLAG);
+#endif
+		case IP_FW_F_SKIPTO: /* XXX check */
+			if ( f->next_rule_ptr )
+			    chain = f->next_rule_ptr ;
+			else
+			    chain = lookup_next_rule(chain) ;
+			if (! chain) goto dropit;
+			goto again ;
+#ifdef DUMMYNET
+		case IP_FW_F_PIPE:
+		case IP_FW_F_QUEUE:
+			*flow_id = chain;
+			return(f->fw_pipe_nr | IP_FW_PORT_DYNT_FLAG);
+#endif
+#ifdef IPFIREWALL_FORWARD
+		case IP_FW_F_FWD:
+			/* Change the next-hop address for this packet.
+			 * Initially we'll only worry about directly
+			 * reachable next-hop's, but ultimately
+			 * we will work out for next-hops that aren't
+			 * direct the route we would take for it. We
+			 * [cs]ould leave this latter problem to
+			 * ip_output.c. We hope to high [name the abode of
+			 * your favourite deity] that ip_output doesn't modify
+			 * the new value of next_hop (which is dst there)
+			 */
+			if (next_hop != NULL /* Make sure, first... */
+			    && (q == NULL || direction == MATCH_FORWARD) )
+				*next_hop = &(f->fw_fwd_ip);
+			return(0); /* Allow the packet */
+#endif
+		}
+
+		/* Deny/reject this packet using this rule */
+		rule = f;
+		break;
+
+	}
+
+	/* Rule IPFW_DEFAULT_RULE should always be there and match */
+	KASSERT(chain != NULL, ("ip_fw: no chain"));
+
+	/*
+	 * At this point, we're going to drop the packet.
+	 * Send a reject notice if all of the following are true:
+	 *
+	 * - The packet matched a reject rule
+	 * - The packet is not an ICMP packet, or is an ICMP query packet
+	 * - The packet is not a multicast or broadcast packet
+	 */
+	if ((rule->fw_flg & IP_FW_F_COMMAND) == IP_FW_F_REJECT
+	    && (ip->ip_p != IPPROTO_ICMP || is_icmp_query(ip))
+	    && !((*m)->m_flags & (M_BCAST|M_MCAST))
+	    && !IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
+		switch (rule->fw_reject_code) {
+		case IP_FW_REJECT_RST:
+		  {
+			/* XXX warning, this code writes into the mbuf */
+			struct tcphdr *const tcp =
+				(struct tcphdr *) ((u_int32_t *)ip + ip->ip_hl);
+			struct tcpiphdr ti, *const tip = (struct tcpiphdr *) ip;
+
+			if (offset != 0 || (tcp->th_flags & TH_RST))
+				break;
+			ti.ti_i = *((struct ipovly *) ip);
+			ti.ti_t = *tcp;
+			bcopy(&ti, ip, sizeof(ti));
+			NTOHL(tip->ti_seq);
+			NTOHL(tip->ti_ack);
+			tip->ti_len = ip_len - hlen - (tip->ti_off << 2);
+			if (tcp->th_flags & TH_ACK) {
+				tcp_respond(NULL, (void *)ip, tcp, *m,
+				    (tcp_seq)0, tcp->th_ack, TH_RST);
+			} else {
+				if (tcp->th_flags & TH_SYN)
+					tip->ti_len++;
+				tcp_respond(NULL, (void *)ip, tcp, *m, 
+				    tip->ti_seq + tip->ti_len,
+				    (tcp_seq)0, TH_RST|TH_ACK);
+			}
+			*m = NULL;
+			break;
+		  }
+		default:	/* Send an ICMP unreachable using code */
+			icmp_error(*m, ICMP_UNREACH,
+			    rule->fw_reject_code, 0L, 0);
+			*m = NULL;
+			break;
+		}
+	}
+
+dropit:
+	/*
+	 * Finally, drop the packet.
+	 */
+	return(IP_FW_PORT_DENY_FLAG);
+#undef BRIDGED
+}
+
+/*
+ * when a rule is added/deleted, zero the direct pointers within
+ * all firewall rules. These will be reconstructed on the fly
+ * as packets are matched.
+ * Must be called at splnet().
+ */
+static void
+flush_rule_ptrs()
+{
+    struct ip_fw_chain *fcp ;
+
+    LIST_FOREACH(fcp, &ip_fw_chain_head, next) {
+	fcp->rule->next_rule_ptr = NULL ;
+    }
+}
+
+static int
+add_entry(struct ip_fw_head *chainptr, struct ip_fw *frwl)
+{
+	struct ip_fw *ftmp = 0;
+	struct ip_fw_ext *ftmp_ext = 0 ;
+	struct ip_fw_chain *fwc = 0, *fcp, *fcpl = 0;
+	u_short nbr = 0;
+	int s;
+
+	fwc = malloc(sizeof *fwc, M_IPFW, M_DONTWAIT);
+	ftmp_ext = malloc(sizeof *ftmp_ext, M_IPFW, M_DONTWAIT | M_ZERO);
+	ftmp = &ftmp_ext->rule ;
+	if (!fwc || !ftmp) {
+		dprintf(("%s malloc said no\n", err_prefix));
+		if (fwc)  free(fwc, M_IPFW);
+		if (ftmp) free(ftmp, M_IPFW);
+		return (ENOSPC);
+	}
+
+	bcopy(frwl, ftmp, sizeof(*ftmp));
+	if (ftmp->fw_flg & IP_FW_F_RND_MATCH)
+		ftmp_ext->dont_match_prob = (intptr_t)ftmp->pipe_ptr;
+	if (ftmp->fw_flg & IP_FW_F_KEEP_S)
+		ftmp_ext->dyn_type = (u_long)(ftmp->next_rule_ptr) ;
+
+	ftmp->fw_in_if.fu_via_if.name[FW_IFNLEN - 1] = '\0';
+	ftmp->fw_pcnt = 0L;
+	ftmp->fw_bcnt = 0L;
+	ftmp->next_rule_ptr = NULL ;
+	ftmp->pipe_ptr = NULL ;
+	fwc->rule = ftmp;
+	
+	s = splnet();
+
+	if (LIST_FIRST(chainptr) == 0) {
+		LIST_INSERT_HEAD(chainptr, fwc, next);
+		splx(s);
+		return(0);
+        }
+
+	/* If entry number is 0, find highest numbered rule and add 100 */
+	if (ftmp->fw_number == 0) {
+		LIST_FOREACH(fcp, chainptr, next) {
+			if (fcp->rule->fw_number != (u_short)-1)
+				nbr = fcp->rule->fw_number;
+			else
+				break;
+		}
+		if (nbr < IPFW_DEFAULT_RULE - 100)
+			nbr += 100;
+		ftmp->fw_number = frwl->fw_number = nbr;
+	}
+
+	/* Got a valid number; now insert it, keeping the list ordered */
+	LIST_FOREACH(fcp, chainptr, next) {
+		if (fcp->rule->fw_number > ftmp->fw_number) {
+			if (fcpl) {
+				LIST_INSERT_AFTER(fcpl, fwc, next);
+			} else {
+				LIST_INSERT_HEAD(chainptr, fwc, next);
+			}
+			break;
+		} else {
+			fcpl = fcp;
+		}
+	}
+	flush_rule_ptrs();
+
+	splx(s);
+	return (0);
+}
+
+static int
+del_entry(struct ip_fw_head *chainptr, u_short number)
+{
+	struct ip_fw_chain *fcp;
+
+	fcp = LIST_FIRST(chainptr);
+	if (number != (u_short)-1) {
+		for (; fcp; fcp = LIST_NEXT(fcp, next)) {
+			if (fcp->rule->fw_number == number) {
+				int s;
+
+				/* prevent access to rules while removing them */
+				s = splnet();
+				while (fcp && fcp->rule->fw_number == number) {
+					struct ip_fw_chain *next;
+
+					remove_dyn_rule(fcp, 1 /* delete */);
+					next = LIST_NEXT(fcp, next);
+					LIST_REMOVE(fcp, next);
+#ifdef DUMMYNET
+					dn_rule_delete(fcp) ;
+#endif
+					flush_rule_ptrs();
+					free(fcp->rule, M_IPFW);
+					free(fcp, M_IPFW);
+					fcp = next;
+				}
+				splx(s);
+				return 0;
+			}
+		}
+	}
+
+	return (EINVAL);
+}
+
+static int
+zero_entry(struct ip_fw *frwl)
+{
+	struct ip_fw_chain *fcp;
+	int s, cleared;
+
+	if (frwl == 0) {
+		s = splnet();
+		LIST_FOREACH(fcp, &ip_fw_chain_head, next) {
+			fcp->rule->fw_bcnt = fcp->rule->fw_pcnt = 0;
+			fcp->rule->fw_loghighest = fcp->rule->fw_logamount;
+			fcp->rule->timestamp = 0;
+		}
+		splx(s);
+	}
+	else {
+		cleared = 0;
+
+		/*
+		 *	It's possible to insert multiple chain entries with the
+		 *	same number, so we don't stop after finding the first
+		 *	match if zeroing a specific entry.
+		 */
+		LIST_FOREACH(fcp, &ip_fw_chain_head, next)
+			if (frwl->fw_number == fcp->rule->fw_number) {
+				s = splnet();
+				while (fcp && frwl->fw_number == fcp->rule->fw_number) {
+					fcp->rule->fw_bcnt = fcp->rule->fw_pcnt = 0;
+					fcp->rule->fw_loghighest =
+					    fcp->rule->fw_logamount;
+					fcp->rule->timestamp = 0;
+					fcp = LIST_NEXT(fcp, next);
+				}
+				splx(s);
+				cleared = 1;
+				break;
+			}
+		if (!cleared)	/* we didn't find any matching rules */
+			return (EINVAL);
+	}
+
+	if (fw_verbose) {
+		if (frwl)
+			log(LOG_SECURITY | LOG_NOTICE,
+			    "ipfw: Entry %d cleared.\n", frwl->fw_number);
+		else
+			log(LOG_SECURITY | LOG_NOTICE,
+			    "ipfw: Accounting cleared.\n");
+	}
+
+	return (0);
+}
+
+static int
+resetlog_entry(struct ip_fw *frwl)
+{
+	struct ip_fw_chain *fcp;
+	int s, cleared;
+
+	if (frwl == 0) {
+		s = splnet();
+		counter = 0;
+		LIST_FOREACH(fcp, &ip_fw_chain_head, next)
+			fcp->rule->fw_loghighest = fcp->rule->fw_pcnt +
+			    fcp->rule->fw_logamount;
+		splx(s);
+	}
+	else {
+		cleared = 0;
+
+		/*
+		 *	It's possible to insert multiple chain entries with the
+		 *	same number, so we don't stop after finding the first
+		 *	match if zeroing a specific entry.
+		 */
+		LIST_FOREACH(fcp, &ip_fw_chain_head, next)
+			if (frwl->fw_number == fcp->rule->fw_number) {
+				s = splnet();
+				while (fcp && frwl->fw_number == fcp->rule->fw_number) {
+					fcp->rule->fw_loghighest =
+					    fcp->rule->fw_pcnt +
+					    fcp->rule->fw_logamount;
+					fcp = LIST_NEXT(fcp, next);
+				}
+				splx(s);
+				cleared = 1;
+				break;
+			}
+		if (!cleared)	/* we didn't find any matching rules */
+			return (EINVAL);
+	}
+
+	if (fw_verbose) {
+		if (frwl)
+			log(LOG_SECURITY | LOG_NOTICE,
+			    "ipfw: Entry %d logging count reset.\n",
+			    frwl->fw_number);
+		else
+			log(LOG_SECURITY | LOG_NOTICE, "
+			    ipfw: All logging counts cleared.\n");
+	}
+
+	return (0);
+}
+
+static int
+check_ipfw_struct(struct ip_fw *frwl)
+{
+	/* Check for invalid flag bits */
+	if ((frwl->fw_flg & ~IP_FW_F_MASK) != 0) {
+		dprintf(("%s undefined flag bits set (flags=%x)\n",
+		    err_prefix, frwl->fw_flg));
+		return (EINVAL);
+	}
+	if (frwl->fw_flg == IP_FW_F_CHECK_S) {
+		/* check-state */
+		return 0 ;
+	}
+	/* Must apply to incoming or outgoing (or both) */
+	if (!(frwl->fw_flg & (IP_FW_F_IN | IP_FW_F_OUT))) {
+		dprintf(("%s neither in nor out\n", err_prefix));
+		return (EINVAL);
+	}
+	/* Empty interface name is no good */
+	if (((frwl->fw_flg & IP_FW_F_IIFNAME)
+	      && !*frwl->fw_in_if.fu_via_if.name)
+	    || ((frwl->fw_flg & IP_FW_F_OIFNAME)
+	      && !*frwl->fw_out_if.fu_via_if.name)) {
+		dprintf(("%s empty interface name\n", err_prefix));
+		return (EINVAL);
+	}
+	/* Sanity check interface matching */
+	if ((frwl->fw_flg & IF_FW_F_VIAHACK) == IF_FW_F_VIAHACK) {
+		;		/* allow "via" backwards compatibility */
+	} else if ((frwl->fw_flg & IP_FW_F_IN)
+	    && (frwl->fw_flg & IP_FW_F_OIFACE)) {
+		dprintf(("%s outgoing interface check on incoming\n",
+		    err_prefix));
+		return (EINVAL);
+	}
+	/* Sanity check port ranges */
+	if ((frwl->fw_flg & IP_FW_F_SRNG) && IP_FW_GETNSRCP(frwl) < 2) {
+		dprintf(("%s src range set but n_src_p=%d\n",
+		    err_prefix, IP_FW_GETNSRCP(frwl)));
+		return (EINVAL);
+	}
+	if ((frwl->fw_flg & IP_FW_F_DRNG) && IP_FW_GETNDSTP(frwl) < 2) {
+		dprintf(("%s dst range set but n_dst_p=%d\n",
+		    err_prefix, IP_FW_GETNDSTP(frwl)));
+		return (EINVAL);
+	}
+	if (IP_FW_GETNSRCP(frwl) + IP_FW_GETNDSTP(frwl) > IP_FW_MAX_PORTS) {
+		dprintf(("%s too many ports (%d+%d)\n",
+		    err_prefix, IP_FW_GETNSRCP(frwl), IP_FW_GETNDSTP(frwl)));
+		return (EINVAL);
+	}
+	/*
+	 *	Protocols other than TCP/UDP don't use port range
+	 */
+	if ((frwl->fw_prot != IPPROTO_TCP) &&
+	    (frwl->fw_prot != IPPROTO_UDP) &&
+	    (IP_FW_GETNSRCP(frwl) || IP_FW_GETNDSTP(frwl))) {
+		dprintf(("%s port(s) specified for non TCP/UDP rule\n",
+		    err_prefix));
+		return (EINVAL);
+	}
+
+	/*
+	 *	Rather than modify the entry to make such entries work, 
+	 *	we reject this rule and require user level utilities
+	 *	to enforce whatever policy they deem appropriate.
+	 */
+	if ((frwl->fw_src.s_addr & (~frwl->fw_smsk.s_addr)) || 
+		(frwl->fw_dst.s_addr & (~frwl->fw_dmsk.s_addr))) {
+		dprintf(("%s rule never matches\n", err_prefix));
+		return (EINVAL);
+	}
+
+	if ((frwl->fw_flg & IP_FW_F_FRAG) &&
+		(frwl->fw_prot == IPPROTO_UDP || frwl->fw_prot == IPPROTO_TCP)) {
+		if (frwl->fw_nports) {
+			dprintf(("%s cannot mix 'frag' and ports\n", err_prefix));
+			return (EINVAL);
+		}
+		if (frwl->fw_prot == IPPROTO_TCP &&
+			frwl->fw_tcpf != frwl->fw_tcpnf) {
+			dprintf(("%s cannot mix 'frag' and TCP flags\n", err_prefix));
+			return (EINVAL);
+		}
+	}
+
+	if (frwl->fw_flg & (IP_FW_F_UID | IP_FW_F_GID)) {
+		if ((frwl->fw_prot != IPPROTO_TCP) &&
+		   (frwl->fw_prot != IPPROTO_UDP) &&
+		   (frwl->fw_prot != IPPROTO_IP)) {
+			dprintf(("%s cannot use uid/gid logic on non-TCP/UDP\n", err_prefix));
+			return (EINVAL);
+		}
+	}
+
+	/* Check command specific stuff */
+	switch (frwl->fw_flg & IP_FW_F_COMMAND)
+	{
+	case IP_FW_F_REJECT:
+		if (frwl->fw_reject_code >= 0x100
+		    && !(frwl->fw_prot == IPPROTO_TCP
+		      && frwl->fw_reject_code == IP_FW_REJECT_RST)) {
+			dprintf(("%s unknown reject code\n", err_prefix));
+			return (EINVAL);
+		}
+		break;
+#if defined(IPDIVERT) || defined(DUMMYNET)
+#ifdef IPDIVERT
+	case IP_FW_F_DIVERT:		/* Diverting to port zero is invalid */
+	case IP_FW_F_TEE:
+#endif
+#ifdef DUMMYNET
+	case IP_FW_F_PIPE:              /* piping through 0 is invalid */
+	case IP_FW_F_QUEUE:             /* piping through 0 is invalid */
+#endif
+		if (frwl->fw_divert_port == 0) {
+			dprintf(("%s can't divert to port 0\n", err_prefix));
+			return (EINVAL);
+		}
+		break;
+#endif /* IPDIVERT || DUMMYNET */
+	case IP_FW_F_DENY:
+	case IP_FW_F_ACCEPT:
+	case IP_FW_F_COUNT:
+	case IP_FW_F_SKIPTO:
+#ifdef IPFIREWALL_FORWARD
+	case IP_FW_F_FWD:
+#endif
+		break;
+	default:
+		dprintf(("%s invalid command\n", err_prefix));
+		return (EINVAL);
+	}
+
+	return 0;
+}
+
+static int
+ip_fw_ctl(struct sockopt *sopt)
+{
+	int error, s;
+	size_t size;
+	struct ip_fw_chain *fcp;
+	struct ip_fw frwl, *bp , *buf;
+
+	/*
+	 * Disallow modifications in really-really secure mode, but still allow
+	 * the logging counters to be reset.
+	 */
+	if (securelevel >= 3 && (sopt->sopt_name == IP_FW_ADD ||
+	    (sopt->sopt_dir == SOPT_SET && sopt->sopt_name != IP_FW_RESETLOG)))
+			return (EPERM);
+	error = 0;
+
+	switch (sopt->sopt_name) {
+	case IP_FW_GET:
+		size = 0 ;
+		LIST_FOREACH(fcp, &ip_fw_chain_head, next)
+		    size += sizeof(struct ip_fw) ;
+		if (ipfw_dyn_v) {
+		    int i ;
+		    struct ipfw_dyn_rule *p ;
+
+		    for (i = 0 ; i < curr_dyn_buckets ; i++ )
+			for ( p = ipfw_dyn_v[i] ; p != NULL ; p = p->next )
+			    size += sizeof(*p) ;
+		}
+		buf = malloc(size, M_TEMP, M_WAITOK);
+		if (buf == 0) {
+			error = ENOBUFS;
+			break;
+		}
+
+		bp = buf ;
+		LIST_FOREACH(fcp, &ip_fw_chain_head, next) {
+			bcopy(fcp->rule, bp, sizeof *fcp->rule);
+			bp->pipe_ptr = (void *)(intptr_t)
+			    ((struct ip_fw_ext *)fcp->rule)->dont_match_prob;
+			bp->next_rule_ptr = (void *)(intptr_t)
+			    ((struct ip_fw_ext *)fcp->rule)->dyn_type;
+			bp++;
+		}
+		if (ipfw_dyn_v) {
+		    int i ;
+		    struct ipfw_dyn_rule *p, *dst, *last = NULL ;
+
+		    dst = (struct ipfw_dyn_rule *)bp ;
+		    for (i = 0 ; i < curr_dyn_buckets ; i++ )
+			for ( p = ipfw_dyn_v[i] ; p != NULL ; p = p->next, dst++ ) {
+			    bcopy(p, dst, sizeof *p);
+                            (int)dst->chain = p->chain->rule->fw_number ;
+                            dst->next = dst ; /* fake non-null pointer... */
+			    last = dst ;
+			    if (TIME_LEQ(dst->expire, time_second) )
+				dst->expire = 0 ;
+			    else
+				dst->expire -= time_second ;
+			    }
+		    if (last != NULL)
+			last->next = NULL ;
+		}
+		error = sooptcopyout(sopt, buf, size);
+		FREE(buf, M_TEMP);
+		break;
+
+	case IP_FW_FLUSH:
+		s = splnet();
+		remove_dyn_rule(NULL, 1 /* force delete */);
+		splx(s);
+		fcp = LIST_FIRST(&ip_fw_chain_head);
+		while (fcp) {
+			struct ip_fw_chain *next;
+			next = LIST_NEXT(fcp, next);
+			if (fcp->rule->fw_number > fw_permanent_rules &&
+			     fcp->rule->fw_number != IPFW_DEFAULT_RULE ) {
+				s = splnet();
+				LIST_REMOVE(fcp, next);
+#ifdef DUMMYNET
+				dn_rule_delete(fcp);
+#endif
+				FREE(fcp->rule, M_IPFW);
+				FREE(fcp, M_IPFW);
+				splx(s);
+			}
+			fcp = next;
+		}
+		break;
+
+	case IP_FW_ZERO:
+		if (sopt->sopt_val != 0) {
+			error = sooptcopyin(sopt, &frwl, sizeof frwl,
+					    sizeof frwl);
+			if (error || (error = zero_entry(&frwl)))
+				break;
+		} else {
+			error = zero_entry(0);
+		}
+		break;
+
+	case IP_FW_ADD:
+		error = sooptcopyin(sopt, &frwl, sizeof frwl, sizeof frwl);
+		if (error || (error = check_ipfw_struct(&frwl)))
+			break;
+
+		if (frwl.fw_number == IPFW_DEFAULT_RULE) {
+			dprintf(("%s can't add rule %u\n", err_prefix,
+				 (unsigned)IPFW_DEFAULT_RULE));
+			error = EINVAL;
+		} else {
+			error = add_entry(&ip_fw_chain_head, &frwl);
+			if (!error && sopt->sopt_dir == SOPT_GET)
+				error = sooptcopyout(sopt, &frwl, sizeof frwl);
+		}
+		break;
+
+	case IP_FW_DEL:
+		error = sooptcopyin(sopt, &frwl, sizeof frwl, sizeof frwl);
+		if (error)
+			break;
+
+		if (frwl.fw_number == IPFW_DEFAULT_RULE) {
+			dprintf(("%s can't delete rule %u\n", err_prefix,
+				 (unsigned)IPFW_DEFAULT_RULE));
+			error = EINVAL;
+		} else {
+			error = del_entry(&ip_fw_chain_head, frwl.fw_number);
+		}
+		break;
+
+	case IP_FW_RESETLOG:
+		if (sopt->sopt_val != 0) {
+			error = sooptcopyin(sopt, &frwl, sizeof frwl,
+					    sizeof frwl);
+			if (error || (error = resetlog_entry(&frwl)))
+				break;
+		} else {
+			error = resetlog_entry(0);
+		}
+		break;
+
+	default:
+		printf("ip_fw_ctl invalid option %d\n", sopt->sopt_name);
+		error = EINVAL ;
+	}
+
+	return (error);
+}
+
+struct ip_fw_chain *ip_fw_default_rule ;
+
+void
+ip_fw_init(void)
+{
+	struct ip_fw default_rule;
+
+	ip_fw_chk_ptr = ip_fw_chk;
+	ip_fw_ctl_ptr = ip_fw_ctl;
+	LIST_INIT(&ip_fw_chain_head);
+
+	bzero(&default_rule, sizeof default_rule);
+	default_rule.fw_prot = IPPROTO_IP;
+	default_rule.fw_number = IPFW_DEFAULT_RULE;
+#ifdef IPFIREWALL_DEFAULT_TO_ACCEPT
+	default_rule.fw_flg |= IP_FW_F_ACCEPT;
+#else
+	default_rule.fw_flg |= IP_FW_F_DENY;
+#endif
+	default_rule.fw_flg |= IP_FW_F_IN | IP_FW_F_OUT;
+	if (check_ipfw_struct(&default_rule) != 0 ||
+	    add_entry(&ip_fw_chain_head, &default_rule))
+		panic("ip_fw_init");
+
+	ip_fw_default_rule = LIST_FIRST(&ip_fw_chain_head) ;
+	printf("IP packet filtering initialized, "
+#ifdef IPDIVERT
+		"divert enabled, "
+#else
+		"divert disabled, "
+#endif
+#ifdef IPFIREWALL_FORWARD
+		"rule-based forwarding enabled, "
+#else
+		"rule-based forwarding disabled, "
+#endif
+#ifdef IPFIREWALL_DEFAULT_TO_ACCEPT
+		"default to accept, ");
+#else
+		"default to deny, " );
+#endif
+#ifndef IPFIREWALL_VERBOSE
+	printf("logging disabled\n");
+#else
+	if (fw_verbose_limit == 0)
+		printf("unlimited logging\n");
+	else
+		printf("logging limited to %d packets/entry by default\n",
+		    fw_verbose_limit);
+#endif
+}
+
+static ip_fw_chk_t *old_chk_ptr;
+static ip_fw_ctl_t *old_ctl_ptr;
+
+static int
+ipfw_modevent(module_t mod, int type, void *unused)
+{
+	int s;
+	struct ip_fw_chain *fcp;
+	
+	switch (type) {
+	case MOD_LOAD:
+		s = splnet();
+
+		old_chk_ptr = ip_fw_chk_ptr;
+		old_ctl_ptr = ip_fw_ctl_ptr;
+
+		ip_fw_init();
+		splx(s);
+		return 0;
+	case MOD_UNLOAD:
+		s = splnet();
+		ip_fw_chk_ptr =  old_chk_ptr;
+		ip_fw_ctl_ptr =  old_ctl_ptr;
+		remove_dyn_rule(NULL, 1 /* force delete */);
+		while ( (fcp = LIST_FIRST(&ip_fw_chain_head)) != NULL) {
+			LIST_REMOVE(fcp, next);
+#ifdef DUMMYNET
+			dn_rule_delete(fcp);
+#endif
+			free(fcp->rule, M_IPFW);
+			free(fcp, M_IPFW);
+		}
+	
+		splx(s);
+		printf("IP firewall unloaded\n");
+		return 0;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static moduledata_t ipfwmod = {
+	"ipfw",
+	ipfw_modevent,
+	0
+};
+DECLARE_MODULE(ipfw, ipfwmod, SI_SUB_PSEUDO, SI_ORDER_ANY);
diff --git a/sys/netinet/ip_fw.h b/sys/netinet/ip_fw.h
new file mode 100644
index 0000000..7abae15
--- /dev/null
+++ b/sys/netinet/ip_fw.h
@@ -0,0 +1,309 @@
+/*
+ * Copyright (c) 1993 Daniel Boulet
+ * Copyright (c) 1994 Ugen J.S.Antsilevich
+ *
+ * Redistribution and use in source forms, with and without modification,
+ * are permitted provided that this entire comment appears intact.
+ *
+ * Redistribution in binary form may occur without any restrictions.
+ * Obviously, it would be nice if you gave credit where credit is due
+ * but requiring it would be too onerous.
+ *
+ * This software is provided ``AS IS'' without any warranties of any kind.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IP_FW_H
+#define _IP_FW_H
+
+#include <sys/queue.h>
+
+/*
+ * This union structure identifies an interface, either explicitly
+ * by name or implicitly by IP address. The flags IP_FW_F_IIFNAME
+ * and IP_FW_F_OIFNAME say how to interpret this structure. An
+ * interface unit number of -1 matches any unit number, while an
+ * IP address of 0.0.0.0 indicates matches any interface.
+ *
+ * The receive and transmit interfaces are only compared against the
+ * the packet if the corresponding bit (IP_FW_F_IIFACE or IP_FW_F_OIFACE)
+ * is set. Note some packets lack a receive or transmit interface
+ * (in which case the missing "interface" never matches).
+ */
+
+union ip_fw_if {
+    struct in_addr fu_via_ip;	/* Specified by IP address */
+    struct {			/* Specified by interface name */
+#define FW_IFNLEN     10 /* need room ! was IFNAMSIZ */
+	    char  name[FW_IFNLEN];
+	    short unit;		/* -1 means match any unit */
+    } fu_via_if;
+};
+
+/*
+ * Format of an IP firewall descriptor
+ *
+ * fw_src, fw_dst, fw_smsk, fw_dmsk are always stored in network byte order.
+ * fw_flg and fw_n*p are stored in host byte order (of course).
+ * Port numbers are stored in HOST byte order.
+ */
+
+struct ip_fw {
+    u_int64_t fw_pcnt,fw_bcnt;		/* Packet and byte counters */
+    struct in_addr fw_src, fw_dst;	/* Source and destination IP addr */
+    struct in_addr fw_smsk, fw_dmsk;	/* Mask for src and dest IP addr */
+    u_short fw_number;			/* Rule number */
+    u_int fw_flg;			/* Operational Flags word */
+#define IP_FW_MAX_PORTS	10		/* A reasonable maximum */
+	union {
+	u_short fw_pts[IP_FW_MAX_PORTS];	/* Array of port numbers to match */
+#define IP_FW_ICMPTYPES_MAX	128
+#define IP_FW_ICMPTYPES_DIM	(IP_FW_ICMPTYPES_MAX / (sizeof(unsigned) * 8))
+	unsigned fw_icmptypes[IP_FW_ICMPTYPES_DIM]; /* ICMP types bitmap */
+	} fw_uar;
+    u_int fw_ipflg;			/* IP flags word */
+    u_char fw_ipopt,fw_ipnopt;		/* IP options set/unset */
+    u_short fw_iplen, fw_ipid;		/* IP length, identification */
+    u_char fw_iptos, fw_ipntos;		/* IP type of service set/unset */
+    u_char fw_ipttl;			/* IP time to live */
+    u_int fw_ipver:4;			/* IP version */
+    u_char fw_tcpopt,fw_tcpnopt;	/* TCP options set/unset */
+    u_char fw_tcpf,fw_tcpnf;		/* TCP flags set/unset */
+    u_int32_t fw_tcpseq, fw_tcpack;	/* TCP sequence and acknowledgement */
+    u_short fw_tcpwin;			/* TCP window size */
+    long timestamp;			/* timestamp (tv_sec) of last match */
+    union ip_fw_if fw_in_if, fw_out_if;	/* Incoming and outgoing interfaces */
+    union {
+	u_short fu_divert_port;		/* Divert/tee port (options IPDIVERT) */
+	u_short fu_pipe_nr;		/* queue number (option DUMMYNET) */
+	u_short fu_skipto_rule;		/* SKIPTO command rule number */
+	u_short fu_reject_code;		/* REJECT response code */
+	struct sockaddr_in fu_fwd_ip;
+    } fw_un;
+    u_char fw_prot;			/* IP protocol */
+	/*
+	 * N'of src ports and # of dst ports in ports array (dst ports
+	 * follow src ports; max of 10 ports in all; count of 0 means
+	 * match all ports)
+	 */
+    u_char fw_nports;
+    void *pipe_ptr;                    /* flow_set ptr for dummynet pipe */
+    void *next_rule_ptr ;              /* next rule in case of match */
+    uid_t fw_uid;			/* uid to match */
+    gid_t fw_gid;			/* gid to match */
+    int fw_logamount;			/* amount to log */
+    u_int64_t fw_loghighest;		/* highest number packet to log */
+};
+
+/*
+ * extended ipfw structure... some fields in the original struct
+ * can be used to pass parameters up/down, namely pointers
+ *     void *pipe_ptr
+ *     void *next_rule_ptr 
+ * some others can be used to pass parameters down, namely counters etc.
+ *     u_int64_t fw_pcnt,fw_bcnt;
+ *     long timestamp;
+ */
+
+struct ip_fw_ext {             /* extended structure */
+    struct ip_fw rule;      /* must be at offset 0 */
+    long    dont_match_prob;        /* 0x7fffffff means 1.0, always fail */
+    u_int   dyn_type;  /* type for dynamic rule */
+};
+
+#define IP_FW_GETNSRCP(rule)		((rule)->fw_nports & 0x0f)
+#define IP_FW_SETNSRCP(rule, n)		do {				\
+					  (rule)->fw_nports &= ~0x0f;	\
+					  (rule)->fw_nports |= (n);	\
+					} while (0)
+#define IP_FW_GETNDSTP(rule)		((rule)->fw_nports >> 4)
+#define IP_FW_SETNDSTP(rule, n)		do {				\
+					  (rule)->fw_nports &= ~0xf0;	\
+					  (rule)->fw_nports |= (n) << 4;\
+					} while (0)
+
+#define fw_divert_port	fw_un.fu_divert_port
+#define fw_skipto_rule	fw_un.fu_skipto_rule
+#define fw_reject_code	fw_un.fu_reject_code
+#define fw_pipe_nr	fw_un.fu_pipe_nr
+#define fw_fwd_ip	fw_un.fu_fwd_ip
+
+struct ip_fw_chain {
+	LIST_ENTRY(ip_fw_chain) next;
+	struct ip_fw *rule;
+};
+
+/*
+ * Flow mask/flow id for each queue.
+ */
+struct ipfw_flow_id {
+    u_int32_t dst_ip, src_ip ;
+    u_int16_t dst_port, src_port ; 
+    u_int8_t proto ;    
+    u_int8_t flags ;    /* protocol-specific flags */
+} ;
+
+/*
+ * dynamic ipfw rule
+ */
+struct ipfw_dyn_rule {
+    struct ipfw_dyn_rule *next ;
+
+    struct ipfw_flow_id id ;
+    struct ipfw_flow_id mask ;
+    struct ip_fw_chain *chain ;		/* pointer to parent rule	*/
+    u_int32_t type ;			/* rule type			*/
+    u_int32_t expire ;			/* expire time			*/
+    u_int64_t pcnt, bcnt;		/* match counters		*/
+    u_int32_t bucket ;			/* which bucket in hash table	*/
+    u_int32_t state ;			/* state of this rule (typ. a   */
+					/* combination of TCP flags)	*/
+} ;
+
+/*
+ * Values for "flags" field .
+ */
+#define IP_FW_F_COMMAND 0x000000ff	/* Mask for type of chain entry:	*/
+#define IP_FW_F_DENY	0x00000000	/* This is a deny rule			*/
+#define IP_FW_F_REJECT	0x00000001	/* Deny and send a response packet	*/
+#define IP_FW_F_ACCEPT	0x00000002	/* This is an accept rule		*/
+#define IP_FW_F_COUNT	0x00000003	/* This is a count rule			*/
+#define IP_FW_F_DIVERT	0x00000004	/* This is a divert rule		*/
+#define IP_FW_F_TEE	0x00000005	/* This is a tee rule			*/
+#define IP_FW_F_SKIPTO	0x00000006	/* This is a skipto rule		*/
+#define IP_FW_F_FWD	0x00000007	/* This is a "change forwarding address" rule */
+#define IP_FW_F_PIPE	0x00000008	/* This is a dummynet rule */
+#define IP_FW_F_QUEUE	0x00000009	/* This is a dummynet queue */
+
+#define IP_FW_F_IN	0x00000100	/* Check inbound packets		*/
+#define IP_FW_F_OUT	0x00000200	/* Check outbound packets		*/
+#define IP_FW_F_IIFACE	0x00000400	/* Apply inbound interface test		*/
+#define IP_FW_F_OIFACE	0x00000800	/* Apply outbound interface test	*/
+
+#define IP_FW_F_PRN	0x00001000	/* Print if this rule matches		*/
+
+#define IP_FW_F_SRNG	0x00002000	/* The first two src ports are a min	*
+					 * and max range (stored in host byte	*
+					 * order).				*/
+
+#define IP_FW_F_DRNG	0x00004000	/* The first two dst ports are a min	*
+					 * and max range (stored in host byte	*
+					 * order).				*/
+
+#define IP_FW_F_FRAG	0x00008000	/* Fragment				*/
+
+#define IP_FW_F_IIFNAME	0x00010000	/* In interface by name/unit (not IP)	*/
+#define IP_FW_F_OIFNAME	0x00020000	/* Out interface by name/unit (not IP)	*/
+
+#define IP_FW_F_INVSRC	0x00040000	/* Invert sense of src check		*/
+#define IP_FW_F_INVDST	0x00080000	/* Invert sense of dst check		*/
+
+#define IP_FW_F_ICMPBIT 0x00100000	/* ICMP type bitmap is valid		*/
+
+#define IP_FW_F_UID	0x00200000	/* filter by uid			*/
+
+#define IP_FW_F_GID	0x00400000	/* filter by gid			*/
+
+#define IP_FW_F_RND_MATCH 0x00800000	/* probabilistic rule match		*/
+#define IP_FW_F_SMSK	0x01000000	/* src-port + mask 			*/
+#define IP_FW_F_DMSK	0x02000000	/* dst-port + mask 			*/
+#define	IP_FW_BRIDGED	0x04000000	/* only match bridged packets		*/
+#define IP_FW_F_KEEP_S	0x08000000	/* keep state	 			*/
+#define IP_FW_F_CHECK_S	0x10000000	/* check state	 			*/
+
+#define IP_FW_F_SME	0x20000000	/* source = me				*/
+#define IP_FW_F_DME	0x40000000	/* destination = me			*/
+
+#define IP_FW_F_MASK	0x7FFFFFFF	/* All possible flag bits mask		*/
+
+/* 
+ * Flags for the 'fw_ipflg' field, for comparing values of ip and its protocols.
+ */
+#define IP_FW_IF_TCPOPT	0x00000001      /* tcp options			*/
+#define IP_FW_IF_TCPFLG	0x00000002      /* tcp flags			*/
+#define IP_FW_IF_TCPSEQ	0x00000004      /* tcp sequence number		*/
+#define IP_FW_IF_TCPACK	0x00000008      /* tcp acknowledgement number	*/
+#define IP_FW_IF_TCPWIN	0x00000010      /* tcp window size		*/
+#define	IP_FW_IF_TCPEST 0x00000020	/* established TCP connection	*/
+#define	IP_FW_IF_TCPMSK	0x0000003f	/* mask of all tcp values	*/
+
+#define IP_FW_IF_IPOPT	0x00000100	/* ip options			*/
+#define IP_FW_IF_IPLEN	0x00000200	/* ip length			*/
+#define IP_FW_IF_IPID	0x00000400	/* ip identification		*/
+#define IP_FW_IF_IPTOS	0x00000800	/* ip type of service		*/
+#define IP_FW_IF_IPTTL	0x00001000	/* ip time to live		*/
+#define IP_FW_IF_IPVER	0x00002000	/* ip version			*/
+#define IP_FW_IF_IPMSK	0x00003f00	/* mask of all ip values	*/
+
+#define IP_FW_IF_MSK	0x0000ffff	/* All possible bits mask	*/
+
+/*
+ * For backwards compatibility with rules specifying "via iface" but
+ * not restricted to only "in" or "out" packets, we define this combination
+ * of bits to represent this configuration.
+ */
+
+#define IF_FW_F_VIAHACK	(IP_FW_F_IN|IP_FW_F_OUT|IP_FW_F_IIFACE|IP_FW_F_OIFACE)
+
+/*
+ * Definitions for REJECT response codes.
+ * Values less than 256 correspond to ICMP unreachable codes.
+ */
+#define IP_FW_REJECT_RST	0x0100		/* TCP packets: send RST */
+
+/*
+ * Definitions for IP option names.
+ */
+#define IP_FW_IPOPT_LSRR	0x01
+#define IP_FW_IPOPT_SSRR	0x02
+#define IP_FW_IPOPT_RR		0x04
+#define IP_FW_IPOPT_TS		0x08
+
+/*
+ * Definitions for TCP option names.
+ */
+#define IP_FW_TCPOPT_MSS	0x01
+#define IP_FW_TCPOPT_WINDOW	0x02
+#define IP_FW_TCPOPT_SACK	0x04
+#define IP_FW_TCPOPT_TS		0x08
+#define IP_FW_TCPOPT_CC		0x10
+
+/*
+ * Definitions for TCP flags.
+ */
+#define IP_FW_TCPF_FIN		TH_FIN
+#define IP_FW_TCPF_SYN		TH_SYN
+#define IP_FW_TCPF_RST		TH_RST
+#define IP_FW_TCPF_PSH		TH_PUSH
+#define IP_FW_TCPF_ACK		TH_ACK
+#define IP_FW_TCPF_URG		TH_URG
+
+/*
+ * Main firewall chains definitions and global var's definitions.
+ */
+#ifdef _KERNEL
+
+#define IP_FW_PORT_DYNT_FLAG	0x10000
+#define IP_FW_PORT_TEE_FLAG	0x20000
+#define IP_FW_PORT_DENY_FLAG	0x40000
+
+/*
+ * Function definitions.
+ */
+void ip_fw_init __P((void));
+
+/* Firewall hooks */
+struct ip;
+struct sockopt;
+typedef	int ip_fw_chk_t __P((struct ip **, int, struct ifnet *, u_int16_t *,
+	     struct mbuf **, struct ip_fw_chain **, struct sockaddr_in **));
+typedef	int ip_fw_ctl_t __P((struct sockopt *));
+extern	ip_fw_chk_t *ip_fw_chk_ptr;
+extern	ip_fw_ctl_t *ip_fw_ctl_ptr;
+extern int fw_one_pass;
+extern int fw_enable;
+extern struct ipfw_flow_id last_pkt ;
+#endif /* _KERNEL */
+
+#endif /* _IP_FW_H */
diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c
new file mode 100644
index 0000000..ddb95f0
--- /dev/null
+++ b/sys/netinet/ip_icmp.c
@@ -0,0 +1,878 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
+ * $FreeBSD$
+ */
+
+#include "opt_ipsec.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#define _IP_VHL
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/ip_var.h>
+#include <netinet/icmp_var.h>
+
+#ifdef IPSEC
+#include <netinet6/ipsec.h>
+#include <netkey/key.h>
+#endif
+
+#include "faith.h"
+#if defined(NFAITH) && NFAITH > 0
+#include <net/if_types.h>
+#endif
+
+#include <machine/in_cksum.h>
+
+/*
+ * ICMP routines: error generation, receive packet processing, and
+ * routines to turnaround packets back to the originator, and
+ * host table maintenance routines.
+ */
+
+static struct	icmpstat icmpstat;
+SYSCTL_STRUCT(_net_inet_icmp, ICMPCTL_STATS, stats, CTLFLAG_RD,
+	&icmpstat, icmpstat, "");
+
+static int	icmpmaskrepl = 0;
+SYSCTL_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_RW,
+	&icmpmaskrepl, 0, "");
+
+static int	drop_redirect = 0;
+SYSCTL_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_RW, 
+	&drop_redirect, 0, "");
+
+static int	log_redirect = 0;
+SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW, 
+	&log_redirect, 0, "");
+
+static int      icmplim = 200;
+SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RW,
+	&icmplim, 0, "");
+
+static int	icmplim_output = 1;
+SYSCTL_INT(_net_inet_icmp, OID_AUTO, icmplim_output, CTLFLAG_RW,
+	&icmplim_output, 0, "");
+
+/*
+ * ICMP broadcast echo sysctl
+ */
+
+static int	icmpbmcastecho = 0;
+SYSCTL_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_RW,
+	&icmpbmcastecho, 0, "");
+
+
+#ifdef ICMPPRINTFS
+int	icmpprintfs = 0;
+#endif
+
+static void	icmp_reflect __P((struct mbuf *));
+static void	icmp_send __P((struct mbuf *, struct mbuf *));
+static int	ip_next_mtu __P((int, int));
+
+extern	struct protosw inetsw[];
+
+/*
+ * Generate an error packet of type error
+ * in response to bad packet ip.
+ */
+void
+icmp_error(n, type, code, dest, destifp)
+	struct mbuf *n;
+	int type, code;
+	n_long dest;
+	struct ifnet *destifp;
+{
+	register struct ip *oip = mtod(n, struct ip *), *nip;
+	register unsigned oiplen = IP_VHL_HL(oip->ip_vhl) << 2;
+	register struct icmp *icp;
+	register struct mbuf *m;
+	unsigned icmplen;
+
+#ifdef ICMPPRINTFS
+	if (icmpprintfs)
+		printf("icmp_error(%p, %x, %d)\n", oip, type, code);
+#endif
+	if (type != ICMP_REDIRECT)
+		icmpstat.icps_error++;
+	/*
+	 * Don't send error if not the first fragment of message.
+	 * Don't error if the old packet protocol was ICMP
+	 * error message, only known informational types.
+	 */
+	if (oip->ip_off &~ (IP_MF|IP_DF))
+		goto freeit;
+	if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
+	  n->m_len >= oiplen + ICMP_MINLEN &&
+	  !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
+		icmpstat.icps_oldicmp++;
+		goto freeit;
+	}
+	/* Don't send error in response to a multicast or broadcast packet */
+	if (n->m_flags & (M_BCAST|M_MCAST))
+		goto freeit;
+	/*
+	 * First, formulate icmp message
+	 */
+	m = m_gethdr(M_DONTWAIT, MT_HEADER);
+	if (m == NULL)
+		goto freeit;
+	icmplen = min(oiplen + 8, oip->ip_len);
+	if (icmplen < sizeof(struct ip))
+		panic("icmp_error: bad length");
+	m->m_len = icmplen + ICMP_MINLEN;
+	MH_ALIGN(m, m->m_len);
+	icp = mtod(m, struct icmp *);
+	if ((u_int)type > ICMP_MAXTYPE)
+		panic("icmp_error");
+	icmpstat.icps_outhist[type]++;
+	icp->icmp_type = type;
+	if (type == ICMP_REDIRECT)
+		icp->icmp_gwaddr.s_addr = dest;
+	else {
+		icp->icmp_void = 0;
+		/*
+		 * The following assignments assume an overlay with the
+		 * zeroed icmp_void field.
+		 */
+		if (type == ICMP_PARAMPROB) {
+			icp->icmp_pptr = code;
+			code = 0;
+		} else if (type == ICMP_UNREACH &&
+			code == ICMP_UNREACH_NEEDFRAG && destifp) {
+			icp->icmp_nextmtu = htons(destifp->if_mtu);
+		}
+	}
+
+	icp->icmp_code = code;
+	m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
+	nip = &icp->icmp_ip;
+
+	/*
+	 * Convert fields to network representation.
+	 */
+	HTONS(nip->ip_len);
+	HTONS(nip->ip_off);
+
+	/*
+	 * Now, copy old ip header (without options)
+	 * in front of icmp message.
+	 */
+	if (m->m_data - sizeof(struct ip) < m->m_pktdat)
+		panic("icmp len");
+	m->m_data -= sizeof(struct ip);
+	m->m_len += sizeof(struct ip);
+	m->m_pkthdr.len = m->m_len;
+	m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
+	nip = mtod(m, struct ip *);
+	bcopy((caddr_t)oip, (caddr_t)nip, sizeof(struct ip));
+	nip->ip_len = m->m_len;
+	nip->ip_vhl = IP_VHL_BORING;
+	nip->ip_p = IPPROTO_ICMP;
+	nip->ip_tos = 0;
+	icmp_reflect(m);
+
+freeit:
+	m_freem(n);
+}
+
+static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
+static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
+static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
+
+/*
+ * Process a received ICMP message.
+ */
+void
+icmp_input(m, off, proto)
+	register struct mbuf *m;
+	int off, proto;
+{
+	int hlen = off;
+	register struct icmp *icp;
+	register struct ip *ip = mtod(m, struct ip *);
+	int icmplen = ip->ip_len;
+	register int i;
+	struct in_ifaddr *ia;
+	void (*ctlfunc) __P((int, struct sockaddr *, void *));
+	int code;
+
+	/*
+	 * Locate icmp structure in mbuf, and check
+	 * that not corrupted and of at least minimum length.
+	 */
+#ifdef ICMPPRINTFS
+	if (icmpprintfs) {
+		char buf[4 * sizeof "123"];
+		strcpy(buf, inet_ntoa(ip->ip_src));
+		printf("icmp_input from %s to %s, len %d\n",
+		       buf, inet_ntoa(ip->ip_dst), icmplen);
+	}
+#endif
+	if (icmplen < ICMP_MINLEN) {
+		icmpstat.icps_tooshort++;
+		goto freeit;
+	}
+	i = hlen + min(icmplen, ICMP_ADVLENMIN);
+	if (m->m_len < i && (m = m_pullup(m, i)) == 0)  {
+		icmpstat.icps_tooshort++;
+		return;
+	}
+	ip = mtod(m, struct ip *);
+	m->m_len -= hlen;
+	m->m_data += hlen;
+	icp = mtod(m, struct icmp *);
+	if (in_cksum(m, icmplen)) {
+		icmpstat.icps_checksum++;
+		goto freeit;
+	}
+	m->m_len += hlen;
+	m->m_data -= hlen;
+
+#if defined(NFAITH) && 0 < NFAITH
+	if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
+		/*
+		 * Deliver very specific ICMP type only.
+		 */
+		switch (icp->icmp_type) {
+		case ICMP_UNREACH:
+		case ICMP_TIMXCEED:
+			break;
+		default:
+			goto freeit;
+		}
+	}
+#endif
+
+#ifdef ICMPPRINTFS
+	if (icmpprintfs)
+		printf("icmp_input, type %d code %d\n", icp->icmp_type,
+		    icp->icmp_code);
+#endif
+
+#ifdef IPSEC
+	/* drop it if it does not match the policy */
+	/* XXX Is there meaning of check in here ? */
+	if (ipsec4_in_reject(m, NULL)) {
+		ipsecstat.in_polvio++;
+		goto freeit;
+	}
+#endif
+
+	/*
+	 * Message type specific processing.
+	 */
+	if (icp->icmp_type > ICMP_MAXTYPE)
+		goto raw;
+	icmpstat.icps_inhist[icp->icmp_type]++;
+	code = icp->icmp_code;
+	switch (icp->icmp_type) {
+
+	case ICMP_UNREACH:
+		switch (code) {
+			case ICMP_UNREACH_NET:
+			case ICMP_UNREACH_HOST:
+			case ICMP_UNREACH_SRCFAIL:
+			case ICMP_UNREACH_NET_UNKNOWN:
+			case ICMP_UNREACH_HOST_UNKNOWN:
+			case ICMP_UNREACH_ISOLATED:
+			case ICMP_UNREACH_TOSNET:
+			case ICMP_UNREACH_TOSHOST:
+			case ICMP_UNREACH_HOST_PRECEDENCE:
+			case ICMP_UNREACH_PRECEDENCE_CUTOFF:
+				code = PRC_UNREACH_NET;
+				break;
+
+			case ICMP_UNREACH_NEEDFRAG:
+				code = PRC_MSGSIZE;
+				break;
+
+			/*
+			 * RFC 1122, Sections 3.2.2.1 and 4.2.3.9.
+			 * Treat subcodes 2,3 as immediate RST
+			 */
+			case ICMP_UNREACH_PROTOCOL:
+			case ICMP_UNREACH_PORT:
+				code = PRC_UNREACH_PORT;
+				break;
+
+			case ICMP_UNREACH_NET_PROHIB:
+			case ICMP_UNREACH_HOST_PROHIB:
+			case ICMP_UNREACH_FILTER_PROHIB:
+				code = PRC_UNREACH_ADMIN_PROHIB;
+				break;
+
+			default:
+				goto badcode;
+		}
+		goto deliver;
+
+	case ICMP_TIMXCEED:
+		if (code > 1)
+			goto badcode;
+		code += PRC_TIMXCEED_INTRANS;
+		goto deliver;
+
+	case ICMP_PARAMPROB:
+		if (code > 1)
+			goto badcode;
+		code = PRC_PARAMPROB;
+		goto deliver;
+
+	case ICMP_SOURCEQUENCH:
+		if (code)
+			goto badcode;
+		code = PRC_QUENCH;
+	deliver:
+		/*
+		 * Problem with datagram; advise higher level routines.
+		 */
+		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
+		    IP_VHL_HL(icp->icmp_ip.ip_vhl) < (sizeof(struct ip) >> 2)) {
+			icmpstat.icps_badlen++;
+			goto freeit;
+		}
+		NTOHS(icp->icmp_ip.ip_len);
+		/* Discard ICMP's in response to multicast packets */
+		if (IN_MULTICAST(ntohl(icp->icmp_ip.ip_dst.s_addr)))
+			goto badcode;
+#ifdef ICMPPRINTFS
+		if (icmpprintfs)
+			printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
+#endif
+		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
+#if 1
+		/*
+		 * MTU discovery:
+		 * If we got a needfrag and there is a host route to the
+		 * original destination, and the MTU is not locked, then
+		 * set the MTU in the route to the suggested new value
+		 * (if given) and then notify as usual.  The ULPs will
+		 * notice that the MTU has changed and adapt accordingly.
+		 * If no new MTU was suggested, then we guess a new one
+		 * less than the current value.  If the new MTU is 
+		 * unreasonably small (arbitrarily set at 296), then
+		 * we reset the MTU to the interface value and enable the
+		 * lock bit, indicating that we are no longer doing MTU
+		 * discovery.
+		 */
+		if (code == PRC_MSGSIZE) {
+			struct rtentry *rt;
+			int mtu;
+
+			rt = rtalloc1((struct sockaddr *)&icmpsrc, 0,
+				      RTF_CLONING | RTF_PRCLONING);
+			if (rt && (rt->rt_flags & RTF_HOST)
+			    && !(rt->rt_rmx.rmx_locks & RTV_MTU)) {
+				mtu = ntohs(icp->icmp_nextmtu);
+				if (!mtu)
+					mtu = ip_next_mtu(rt->rt_rmx.rmx_mtu,
+							  1);
+#ifdef DEBUG_MTUDISC
+				printf("MTU for %s reduced to %d\n",
+					inet_ntoa(icmpsrc.sin_addr), mtu);
+#endif
+				if (mtu < 296) {
+					/* rt->rt_rmx.rmx_mtu =
+						rt->rt_ifp->if_mtu; */
+					rt->rt_rmx.rmx_locks |= RTV_MTU;
+				} else if (rt->rt_rmx.rmx_mtu > mtu) {
+					rt->rt_rmx.rmx_mtu = mtu;
+				}
+			}
+			if (rt)
+				RTFREE(rt);
+		}
+
+#endif
+		/*
+		 * XXX if the packet contains [IPv4 AH TCP], we can't make a
+		 * notification to TCP layer.
+		 */
+		ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
+		if (ctlfunc)
+			(*ctlfunc)(code, (struct sockaddr *)&icmpsrc,
+				   (void *)&icp->icmp_ip);
+		break;
+
+	badcode:
+		icmpstat.icps_badcode++;
+		break;
+
+	case ICMP_ECHO:
+		if (!icmpbmcastecho
+		    && (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
+			icmpstat.icps_bmcastecho++;
+			break;
+		}
+		icp->icmp_type = ICMP_ECHOREPLY;
+		if (badport_bandlim(BANDLIM_ICMP_ECHO) < 0)
+			goto freeit;
+		else
+			goto reflect;
+
+	case ICMP_TSTAMP:
+		if (!icmpbmcastecho
+		    && (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
+			icmpstat.icps_bmcasttstamp++;
+			break;
+		}
+		if (icmplen < ICMP_TSLEN) {
+			icmpstat.icps_badlen++;
+			break;
+		}
+		icp->icmp_type = ICMP_TSTAMPREPLY;
+		icp->icmp_rtime = iptime();
+		icp->icmp_ttime = icp->icmp_rtime;	/* bogus, do later! */
+		if (badport_bandlim(BANDLIM_ICMP_TSTAMP) < 0)
+			goto freeit;
+		else
+			goto reflect;
+
+	case ICMP_MASKREQ:
+#define	satosin(sa)	((struct sockaddr_in *)(sa))
+		if (icmpmaskrepl == 0)
+			break;
+		/*
+		 * We are not able to respond with all ones broadcast
+		 * unless we receive it over a point-to-point interface.
+		 */
+		if (icmplen < ICMP_MASKLEN)
+			break;
+		switch (ip->ip_dst.s_addr) {
+
+		case INADDR_BROADCAST:
+		case INADDR_ANY:
+			icmpdst.sin_addr = ip->ip_src;
+			break;
+
+		default:
+			icmpdst.sin_addr = ip->ip_dst;
+		}
+		ia = (struct in_ifaddr *)ifaof_ifpforaddr(
+			    (struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif);
+		if (ia == 0)
+			break;
+		if (ia->ia_ifp == 0)
+			break;
+		icp->icmp_type = ICMP_MASKREPLY;
+		icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
+		if (ip->ip_src.s_addr == 0) {
+			if (ia->ia_ifp->if_flags & IFF_BROADCAST)
+			    ip->ip_src = satosin(&ia->ia_broadaddr)->sin_addr;
+			else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
+			    ip->ip_src = satosin(&ia->ia_dstaddr)->sin_addr;
+		}
+reflect:
+		ip->ip_len += hlen;	/* since ip_input deducts this */
+		icmpstat.icps_reflect++;
+		icmpstat.icps_outhist[icp->icmp_type]++;
+		icmp_reflect(m);
+		return;
+
+	case ICMP_REDIRECT:
+		if (log_redirect) {
+			u_long src, dst, gw;
+
+			src = ntohl(ip->ip_src.s_addr);
+			dst = ntohl(icp->icmp_ip.ip_dst.s_addr);
+			gw = ntohl(icp->icmp_gwaddr.s_addr);
+			printf("icmp redirect from %d.%d.%d.%d: "
+			       "%d.%d.%d.%d => %d.%d.%d.%d\n",
+			       (int)(src >> 24), (int)((src >> 16) & 0xff),
+			       (int)((src >> 8) & 0xff), (int)(src & 0xff),
+			       (int)(dst >> 24), (int)((dst >> 16) & 0xff),
+			       (int)((dst >> 8) & 0xff), (int)(dst & 0xff),
+			       (int)(gw >> 24), (int)((gw >> 16) & 0xff),
+			       (int)((gw >> 8) & 0xff), (int)(gw & 0xff));
+		}
+		if (drop_redirect)
+			break;
+		if (code > 3)
+			goto badcode;
+		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
+		    IP_VHL_HL(icp->icmp_ip.ip_vhl) < (sizeof(struct ip) >> 2)) {
+			icmpstat.icps_badlen++;
+			break;
+		}
+		/*
+		 * Short circuit routing redirects to force
+		 * immediate change in the kernel's routing
+		 * tables.  The message is also handed to anyone
+		 * listening on a raw socket (e.g. the routing
+		 * daemon for use in updating its tables).
+		 */
+		icmpgw.sin_addr = ip->ip_src;
+		icmpdst.sin_addr = icp->icmp_gwaddr;
+#ifdef	ICMPPRINTFS
+		if (icmpprintfs) {
+			char buf[4 * sizeof "123"];
+			strcpy(buf, inet_ntoa(icp->icmp_ip.ip_dst));
+
+			printf("redirect dst %s to %s\n",
+			       buf, inet_ntoa(icp->icmp_gwaddr));
+		}
+#endif
+		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
+		rtredirect((struct sockaddr *)&icmpsrc,
+		  (struct sockaddr *)&icmpdst,
+		  (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
+		  (struct sockaddr *)&icmpgw, (struct rtentry **)0);
+		pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&icmpsrc);
+#ifdef IPSEC
+		key_sa_routechange((struct sockaddr *)&icmpsrc);
+#endif
+		break;
+
+	/*
+	 * No kernel processing for the following;
+	 * just fall through to send to raw listener.
+	 */
+	case ICMP_ECHOREPLY:
+	case ICMP_ROUTERADVERT:
+	case ICMP_ROUTERSOLICIT:
+	case ICMP_TSTAMPREPLY:
+	case ICMP_IREQREPLY:
+	case ICMP_MASKREPLY:
+	default:
+		break;
+	}
+
+raw:
+	rip_input(m, off, proto);
+	return;
+
+freeit:
+	m_freem(m);
+}
+
+/*
+ * Reflect the ip packet back to the source
+ */
+static void
+icmp_reflect(m)
+	struct mbuf *m;
+{
+	register struct ip *ip = mtod(m, struct ip *);
+	register struct in_ifaddr *ia;
+	struct in_addr t;
+	struct mbuf *opts = 0;
+	int optlen = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof(struct ip);
+
+	if (!in_canforward(ip->ip_src) &&
+	    ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) !=
+	     (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
+		m_freem(m);	/* Bad return address */
+		goto done;	/* Ip_output() will check for broadcast */
+	}
+	t = ip->ip_dst;
+	ip->ip_dst = ip->ip_src;
+	/*
+	 * If the incoming packet was addressed directly to us,
+	 * use dst as the src for the reply.  Otherwise (broadcast
+	 * or anonymous), use the address which corresponds
+	 * to the incoming interface.
+	 */
+	TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
+		if (t.s_addr == IA_SIN(ia)->sin_addr.s_addr)
+			break;
+		if (ia->ia_ifp && (ia->ia_ifp->if_flags & IFF_BROADCAST) &&
+		    t.s_addr == satosin(&ia->ia_broadaddr)->sin_addr.s_addr)
+			break;
+	}
+	icmpdst.sin_addr = t;
+	if ((ia == (struct in_ifaddr *)0) && m->m_pkthdr.rcvif)
+		ia = (struct in_ifaddr *)ifaof_ifpforaddr(
+			(struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif);
+	/*
+	 * The following happens if the packet was not addressed to us,
+	 * and was received on an interface with no IP address.
+	 */
+	if (ia == (struct in_ifaddr *)0)
+		ia = TAILQ_FIRST(&in_ifaddrhead);
+	t = IA_SIN(ia)->sin_addr;
+	ip->ip_src = t;
+	ip->ip_ttl = ip_defttl;
+
+	if (optlen > 0) {
+		register u_char *cp;
+		int opt, cnt;
+		u_int len;
+
+		/*
+		 * Retrieve any source routing from the incoming packet;
+		 * add on any record-route or timestamp options.
+		 */
+		cp = (u_char *) (ip + 1);
+		if ((opts = ip_srcroute()) == 0 &&
+		    (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
+			opts->m_len = sizeof(struct in_addr);
+			mtod(opts, struct in_addr *)->s_addr = 0;
+		}
+		if (opts) {
+#ifdef ICMPPRINTFS
+		    if (icmpprintfs)
+			    printf("icmp_reflect optlen %d rt %d => ",
+				optlen, opts->m_len);
+#endif
+		    for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
+			    opt = cp[IPOPT_OPTVAL];
+			    if (opt == IPOPT_EOL)
+				    break;
+			    if (opt == IPOPT_NOP)
+				    len = 1;
+			    else {
+				    if (cnt < IPOPT_OLEN + sizeof(*cp))
+					    break;
+				    len = cp[IPOPT_OLEN];
+				    if (len < IPOPT_OLEN + sizeof(*cp) ||
+				        len > cnt)
+					    break;
+			    }
+			    /*
+			     * Should check for overflow, but it "can't happen"
+			     */
+			    if (opt == IPOPT_RR || opt == IPOPT_TS ||
+				opt == IPOPT_SECURITY) {
+				    bcopy((caddr_t)cp,
+					mtod(opts, caddr_t) + opts->m_len, len);
+				    opts->m_len += len;
+			    }
+		    }
+		    /* Terminate & pad, if necessary */
+		    cnt = opts->m_len % 4;
+		    if (cnt) {
+			    for (; cnt < 4; cnt++) {
+				    *(mtod(opts, caddr_t) + opts->m_len) =
+					IPOPT_EOL;
+				    opts->m_len++;
+			    }
+		    }
+#ifdef ICMPPRINTFS
+		    if (icmpprintfs)
+			    printf("%d\n", opts->m_len);
+#endif
+		}
+		/*
+		 * Now strip out original options by copying rest of first
+		 * mbuf's data back, and adjust the IP length.
+		 */
+		ip->ip_len -= optlen;
+		ip->ip_vhl = IP_VHL_BORING;
+		m->m_len -= optlen;
+		if (m->m_flags & M_PKTHDR)
+			m->m_pkthdr.len -= optlen;
+		optlen += sizeof(struct ip);
+		bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
+			 (unsigned)(m->m_len - sizeof(struct ip)));
+	}
+	m->m_flags &= ~(M_BCAST|M_MCAST);
+	icmp_send(m, opts);
+done:
+	if (opts)
+		(void)m_free(opts);
+}
+
+/*
+ * Send an icmp packet back to the ip level,
+ * after supplying a checksum.
+ */
+static void
+icmp_send(m, opts)
+	register struct mbuf *m;
+	struct mbuf *opts;
+{
+	register struct ip *ip = mtod(m, struct ip *);
+	register int hlen;
+	register struct icmp *icp;
+	struct route ro;
+
+	hlen = IP_VHL_HL(ip->ip_vhl) << 2;
+	m->m_data += hlen;
+	m->m_len -= hlen;
+	icp = mtod(m, struct icmp *);
+	icp->icmp_cksum = 0;
+	icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
+	m->m_data -= hlen;
+	m->m_len += hlen;
+	m->m_pkthdr.rcvif = (struct ifnet *)0;
+#ifdef ICMPPRINTFS
+	if (icmpprintfs) {
+		char buf[4 * sizeof "123"];
+		strcpy(buf, inet_ntoa(ip->ip_dst));
+		printf("icmp_send dst %s src %s\n",
+		       buf, inet_ntoa(ip->ip_src));
+	}
+#endif
+	bzero(&ro, sizeof ro);
+	(void) ip_output(m, opts, &ro, 0, NULL);
+	if (ro.ro_rt)
+		RTFREE(ro.ro_rt);
+}
+
+n_time
+iptime()
+{
+	struct timeval atv;
+	u_long t;
+
+	getmicrotime(&atv);
+	t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
+	return (htonl(t));
+}
+
+#if 1
+/*
+ * Return the next larger or smaller MTU plateau (table from RFC 1191)
+ * given current value MTU.  If DIR is less than zero, a larger plateau
+ * is returned; otherwise, a smaller value is returned.
+ */
+static int
+ip_next_mtu(mtu, dir)
+	int mtu;
+	int dir;
+{
+	static int mtutab[] = {
+		65535, 32000, 17914, 8166, 4352, 2002, 1492, 1006, 508, 296,
+		68, 0
+	};
+	int i;
+
+	for (i = 0; i < (sizeof mtutab) / (sizeof mtutab[0]); i++) {
+		if (mtu >= mtutab[i])
+			break;
+	}
+
+	if (dir < 0) {
+		if (i == 0) {
+			return 0;
+		} else {
+			return mtutab[i - 1];
+		}
+	} else {
+		if (mtutab[i] == 0) {
+			return 0;
+		} else if(mtu > mtutab[i]) {
+			return mtutab[i];
+		} else {
+			return mtutab[i + 1];
+		}
+	}
+}
+#endif
+
+
+/*
+ * badport_bandlim() - check for ICMP bandwidth limit
+ *
+ *	Return 0 if it is ok to send an ICMP error response, -1 if we have
+ *	hit our bandwidth limit and it is not ok.  
+ *
+ *	If icmplim is <= 0, the feature is disabled and 0 is returned.
+ *
+ *	For now we separate the TCP and UDP subsystems w/ different 'which'
+ *	values.  We may eventually remove this separation (and simplify the
+ *	code further).
+ *
+ *	Note that the printing of the error message is delayed so we can
+ *	properly print the icmp error rate that the system was trying to do
+ *	(i.e. 22000/100 pps, etc...).  This can cause long delays in printing
+ *	the 'final' error, but it doesn't make sense to solve the printing 
+ *	delay with more complex code.
+ */
+
+int
+badport_bandlim(int which)
+{
+	static int lticks[BANDLIM_MAX + 1];
+	static int lpackets[BANDLIM_MAX + 1];
+	int dticks;
+	const char *bandlimittype[] = {
+		"Limiting icmp unreach response",
+		"Limiting icmp ping response",
+		"Limiting icmp tstamp response",
+		"Limiting closed port RST response",
+		"Limiting open port RST response"
+		};
+
+	/*
+	 * Return ok status if feature disabled or argument out of
+	 * ranage.
+	 */
+
+	if (icmplim <= 0 || which > BANDLIM_MAX || which < 0)
+		return(0);
+	dticks = ticks - lticks[which];
+
+	/*
+	 * reset stats when cumulative dt exceeds one second.
+	 */
+
+	if ((unsigned int)dticks > hz) {
+		if (lpackets[which] > icmplim && icmplim_output) {
+			printf("%s from %d to %d packets per second\n",
+				bandlimittype[which],
+				lpackets[which],
+				icmplim
+			);
+		}
+		lticks[which] = ticks;
+		lpackets[which] = 0;
+	}
+
+	/*
+	 * bump packet count
+	 */
+
+	if (++lpackets[which] > icmplim) {
+		return(-1);
+	}
+	return(0);
+}
+
diff --git a/sys/netinet/ip_icmp.h b/sys/netinet/ip_icmp.h
new file mode 100644
index 0000000..b7d5400
--- /dev/null
+++ b/sys/netinet/ip_icmp.h
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ip_icmp.h	8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_IP_ICMP_H_
+#define _NETINET_IP_ICMP_H_
+
+/*
+ * Interface Control Message Protocol Definitions.
+ * Per RFC 792, September 1981.
+ */
+
+/*
+ * Internal of an ICMP Router Advertisement
+ */
+struct icmp_ra_addr {
+	u_int32_t ira_addr;
+	u_int32_t ira_preference;
+};
+
+/*
+ * Structure of an icmp header.
+ */
+struct icmp {
+	u_char	icmp_type;		/* type of message, see below */
+	u_char	icmp_code;		/* type sub code */
+	u_short	icmp_cksum;		/* ones complement cksum of struct */
+	union {
+		u_char ih_pptr;			/* ICMP_PARAMPROB */
+		struct in_addr ih_gwaddr;	/* ICMP_REDIRECT */
+		struct ih_idseq {
+			n_short	icd_id;
+			n_short	icd_seq;
+		} ih_idseq;
+		int ih_void;
+
+		/* ICMP_UNREACH_NEEDFRAG -- Path MTU Discovery (RFC1191) */
+		struct ih_pmtu {
+			n_short ipm_void;
+			n_short ipm_nextmtu;
+		} ih_pmtu;
+
+		struct ih_rtradv {
+			u_char irt_num_addrs;
+			u_char irt_wpa;
+			u_int16_t irt_lifetime;
+		} ih_rtradv;
+	} icmp_hun;
+#define	icmp_pptr	icmp_hun.ih_pptr
+#define	icmp_gwaddr	icmp_hun.ih_gwaddr
+#define	icmp_id		icmp_hun.ih_idseq.icd_id
+#define	icmp_seq	icmp_hun.ih_idseq.icd_seq
+#define	icmp_void	icmp_hun.ih_void
+#define	icmp_pmvoid	icmp_hun.ih_pmtu.ipm_void
+#define	icmp_nextmtu	icmp_hun.ih_pmtu.ipm_nextmtu
+#define	icmp_num_addrs	icmp_hun.ih_rtradv.irt_num_addrs
+#define	icmp_wpa	icmp_hun.ih_rtradv.irt_wpa
+#define	icmp_lifetime	icmp_hun.ih_rtradv.irt_lifetime
+	union {
+		struct id_ts {
+			n_time its_otime;
+			n_time its_rtime;
+			n_time its_ttime;
+		} id_ts;
+		struct id_ip  {
+			struct ip idi_ip;
+			/* options and then 64 bits of data */
+		} id_ip;
+		struct icmp_ra_addr id_radv;
+		u_int32_t id_mask;
+		char	id_data[1];
+	} icmp_dun;
+#define	icmp_otime	icmp_dun.id_ts.its_otime
+#define	icmp_rtime	icmp_dun.id_ts.its_rtime
+#define	icmp_ttime	icmp_dun.id_ts.its_ttime
+#define	icmp_ip		icmp_dun.id_ip.idi_ip
+#define	icmp_radv	icmp_dun.id_radv
+#define	icmp_mask	icmp_dun.id_mask
+#define	icmp_data	icmp_dun.id_data
+};
+
+/*
+ * Lower bounds on packet lengths for various types.
+ * For the error advice packets must first insure that the
+ * packet is large enough to contain the returned ip header.
+ * Only then can we do the check to see if 64 bits of packet
+ * data have been returned, since we need to check the returned
+ * ip header length.
+ */
+#define	ICMP_MINLEN	8				/* abs minimum */
+#define	ICMP_TSLEN	(8 + 3 * sizeof (n_time))	/* timestamp */
+#define	ICMP_MASKLEN	12				/* address mask */
+#define	ICMP_ADVLENMIN	(8 + sizeof (struct ip) + 8)	/* min */
+#ifndef _IP_VHL
+#define	ICMP_ADVLEN(p)	(8 + ((p)->icmp_ip.ip_hl << 2) + 8)
+	/* N.B.: must separately check that ip_hl >= 5 */
+#else
+#define	ICMP_ADVLEN(p)	(8 + (IP_VHL_HL((p)->icmp_ip.ip_vhl) << 2) + 8)
+	/* N.B.: must separately check that header length >= 5 */
+#endif
+
+/*
+ * Definition of type and code field values.
+ */
+#define	ICMP_ECHOREPLY		0		/* echo reply */
+#define	ICMP_UNREACH		3		/* dest unreachable, codes: */
+#define		ICMP_UNREACH_NET	0		/* bad net */
+#define		ICMP_UNREACH_HOST	1		/* bad host */
+#define		ICMP_UNREACH_PROTOCOL	2		/* bad protocol */
+#define		ICMP_UNREACH_PORT	3		/* bad port */
+#define		ICMP_UNREACH_NEEDFRAG	4		/* IP_DF caused drop */
+#define		ICMP_UNREACH_SRCFAIL	5		/* src route failed */
+#define		ICMP_UNREACH_NET_UNKNOWN 6		/* unknown net */
+#define		ICMP_UNREACH_HOST_UNKNOWN 7		/* unknown host */
+#define		ICMP_UNREACH_ISOLATED	8		/* src host isolated */
+#define		ICMP_UNREACH_NET_PROHIB	9		/* prohibited access */
+#define		ICMP_UNREACH_HOST_PROHIB 10		/* ditto */
+#define		ICMP_UNREACH_TOSNET	11		/* bad tos for net */
+#define		ICMP_UNREACH_TOSHOST	12		/* bad tos for host */
+#define		ICMP_UNREACH_FILTER_PROHIB 13		/* admin prohib */
+#define		ICMP_UNREACH_HOST_PRECEDENCE 14		/* host prec vio. */
+#define		ICMP_UNREACH_PRECEDENCE_CUTOFF 15	/* prec cutoff */
+#define	ICMP_SOURCEQUENCH	4		/* packet lost, slow down */
+#define	ICMP_REDIRECT		5		/* shorter route, codes: */
+#define		ICMP_REDIRECT_NET	0		/* for network */
+#define		ICMP_REDIRECT_HOST	1		/* for host */
+#define		ICMP_REDIRECT_TOSNET	2		/* for tos and net */
+#define		ICMP_REDIRECT_TOSHOST	3		/* for tos and host */
+#define	ICMP_ECHO		8		/* echo service */
+#define	ICMP_ROUTERADVERT	9		/* router advertisement */
+#define	ICMP_ROUTERSOLICIT	10		/* router solicitation */
+#define	ICMP_TIMXCEED		11		/* time exceeded, code: */
+#define		ICMP_TIMXCEED_INTRANS	0		/* ttl==0 in transit */
+#define		ICMP_TIMXCEED_REASS	1		/* ttl==0 in reass */
+#define	ICMP_PARAMPROB		12		/* ip header bad */
+#define		ICMP_PARAMPROB_ERRATPTR 0		/* error at param ptr */
+#define		ICMP_PARAMPROB_OPTABSENT 1		/* req. opt. absent */
+#define		ICMP_PARAMPROB_LENGTH 2			/* bad length */
+#define	ICMP_TSTAMP		13		/* timestamp request */
+#define	ICMP_TSTAMPREPLY	14		/* timestamp reply */
+#define	ICMP_IREQ		15		/* information request */
+#define	ICMP_IREQREPLY		16		/* information reply */
+#define	ICMP_MASKREQ		17		/* address mask request */
+#define	ICMP_MASKREPLY		18		/* address mask reply */
+
+#define	ICMP_MAXTYPE		18
+
+#define	ICMP_INFOTYPE(type) \
+	((type) == ICMP_ECHOREPLY || (type) == ICMP_ECHO || \
+	(type) == ICMP_ROUTERADVERT || (type) == ICMP_ROUTERSOLICIT || \
+	(type) == ICMP_TSTAMP || (type) == ICMP_TSTAMPREPLY || \
+	(type) == ICMP_IREQ || (type) == ICMP_IREQREPLY || \
+	(type) == ICMP_MASKREQ || (type) == ICMP_MASKREPLY)
+
+#ifdef _KERNEL
+void	icmp_error __P((struct mbuf *, int, int, n_long, struct ifnet *));
+void	icmp_input __P((struct mbuf *, int, int));
+#endif
+
+#endif
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
new file mode 100644
index 0000000..0963a0a
--- /dev/null
+++ b/sys/netinet/ip_input.c
@@ -0,0 +1,1833 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
+ * $FreeBSD$
+ */
+
+#define	_IP_VHL
+
+#include "opt_bootp.h"
+#include "opt_ipfw.h"
+#include "opt_ipdn.h"
+#include "opt_ipdivert.h"
+#include "opt_ipfilter.h"
+#include "opt_ipstealth.h"
+#include "opt_ipsec.h"
+#include "opt_pfil_hooks.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/syslog.h>
+#include <sys/sysctl.h>
+
+#include <net/pfil.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_dl.h>
+#include <net/route.h>
+#include <net/netisr.h>
+#include <net/intrq.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_icmp.h>
+#include <machine/in_cksum.h>
+
+#include <netinet/ipprotosw.h>
+
+#include <sys/socketvar.h>
+
+#include <netinet/ip_fw.h>
+
+#ifdef IPSEC
+#include <netinet6/ipsec.h>
+#include <netkey/key.h>
+#endif
+
+#include "faith.h"
+#if defined(NFAITH) && NFAITH > 0
+#include <net/if_types.h>
+#endif
+
+#ifdef DUMMYNET
+#include <netinet/ip_dummynet.h>
+#endif
+
+int rsvp_on = 0;
+static int ip_rsvp_on;
+struct socket *ip_rsvpd;
+
+int	ipforwarding = 0;
+SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW,
+    &ipforwarding, 0, "Enable IP forwarding between interfaces");
+
+static int	ipsendredirects = 1; /* XXX */
+SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW,
+    &ipsendredirects, 0, "Enable sending IP redirects");
+
+int	ip_defttl = IPDEFTTL;
+SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW,
+    &ip_defttl, 0, "Maximum TTL on IP packets");
+
+static int	ip_dosourceroute = 0;
+SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW,
+    &ip_dosourceroute, 0, "Enable forwarding source routed IP packets");
+
+static int	ip_acceptsourceroute = 0;
+SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute, 
+    CTLFLAG_RW, &ip_acceptsourceroute, 0, 
+    "Enable accepting source routed IP packets");
+
+static int	ip_keepfaith = 0;
+SYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW,
+	&ip_keepfaith,	0,
+	"Enable packet capture for FAITH IPv4->IPv6 translater daemon");
+
+/*
+ * XXX - Setting ip_checkinterface mostly implements the receive side of
+ * the Strong ES model described in RFC 1122, but since the routing table
+ * and transmit implementation do not implement the Strong ES model,
+ * setting this to 1 results in an odd hybrid.
+ *
+ * XXX - ip_checkinterface currently must be disabled if you use ipnat
+ * to translate the destination address to another local interface.
+ *
+ * XXX - ip_checkinterface must be disabled if you add IP aliases
+ * to the loopback interface instead of the interface where the
+ * packets for those addresses are received.
+ */
+static int	ip_checkinterface = 1;
+SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW,
+    &ip_checkinterface, 0, "Verify packet arrives on correct interface");
+
+#ifdef DIAGNOSTIC
+static int	ipprintfs = 0;
+#endif
+
+extern	struct domain inetdomain;
+extern	struct ipprotosw inetsw[];
+u_char	ip_protox[IPPROTO_MAX];
+static int	ipqmaxlen = IFQ_MAXLEN;
+struct	in_ifaddrhead in_ifaddrhead; /* first inet address */
+SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RW,
+    &ipintrq.ifq_maxlen, 0, "Maximum size of the IP input queue");
+SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD,
+    &ipintrq.ifq_drops, 0, "Number of packets dropped from the IP input queue");
+
+struct ipstat ipstat;
+SYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RD,
+    &ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)");
+
+/* Packet reassembly stuff */
+#define IPREASS_NHASH_LOG2      6
+#define IPREASS_NHASH           (1 << IPREASS_NHASH_LOG2)
+#define IPREASS_HMASK           (IPREASS_NHASH - 1)
+#define IPREASS_HASH(x,y) \
+	(((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
+
+static TAILQ_HEAD(ipqhead, ipq) ipq[IPREASS_NHASH];
+static int    nipq = 0;         /* total # of reass queues */
+static int    maxnipq;
+const  int    ipintrq_present = 1;
+
+#ifdef IPCTL_DEFMTU
+SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
+    &ip_mtu, 0, "Default MTU");
+#endif
+
+#ifdef IPSTEALTH
+static int	ipstealth = 0;
+SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW,
+    &ipstealth, 0, "");
+#endif
+
+
+/* Firewall hooks */
+ip_fw_chk_t *ip_fw_chk_ptr;
+ip_fw_ctl_t *ip_fw_ctl_ptr;
+int fw_enable = 1 ;
+
+#ifdef DUMMYNET
+ip_dn_ctl_t *ip_dn_ctl_ptr;
+#endif
+
+
+/*
+ * We need to save the IP options in case a protocol wants to respond
+ * to an incoming packet over the same route if the packet got here
+ * using IP source routing.  This allows connection establishment and
+ * maintenance when the remote end is on a network that is not known
+ * to us.
+ */
+static int	ip_nhops = 0;
+static	struct ip_srcrt {
+	struct	in_addr dst;			/* final destination */
+	char	nop;				/* one NOP to align */
+	char	srcopt[IPOPT_OFFSET + 1];	/* OPTVAL, OLEN and OFFSET */
+	struct	in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
+} ip_srcrt;
+
+struct sockaddr_in *ip_fw_fwd_addr;
+
+static void	save_rte __P((u_char *, struct in_addr));
+static int	ip_dooptions __P((struct mbuf *));
+static void	ip_forward __P((struct mbuf *, int));
+static void	ip_freef __P((struct ipqhead *, struct ipq *));
+#ifdef IPDIVERT
+static struct	mbuf *ip_reass __P((struct mbuf *, struct ipqhead *, struct ipq *, u_int32_t *, u_int16_t *));
+#else
+static struct	mbuf *ip_reass __P((struct mbuf *, struct ipqhead *, struct ipq *));
+#endif
+static struct	in_ifaddr *ip_rtaddr __P((struct in_addr));
+static void	ipintr __P((void));
+
+/*
+ * IP initialization: fill in IP protocol switch table.
+ * All protocols not implemented in kernel go to raw IP protocol handler.
+ */
+void
+ip_init()
+{
+	register struct ipprotosw *pr;
+	register int i;
+
+	TAILQ_INIT(&in_ifaddrhead);
+	pr = (struct ipprotosw *)pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
+	if (pr == 0)
+		panic("ip_init");
+	for (i = 0; i < IPPROTO_MAX; i++)
+		ip_protox[i] = pr - inetsw;
+	for (pr = (struct ipprotosw *)inetdomain.dom_protosw;
+	    pr < (struct ipprotosw *)inetdomain.dom_protoswNPROTOSW; pr++)
+		if (pr->pr_domain->dom_family == PF_INET &&
+		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
+			ip_protox[pr->pr_protocol] = pr - inetsw;
+
+	for (i = 0; i < IPREASS_NHASH; i++)
+	    TAILQ_INIT(&ipq[i]);
+
+	maxnipq = nmbclusters/4;
+
+	ip_id = time_second & 0xffff;
+	ipintrq.ifq_maxlen = ipqmaxlen;
+	mtx_init(&ipintrq.ifq_mtx, "ip_inq", MTX_DEF);
+
+	register_netisr(NETISR_IP, ipintr);
+}
+
+static struct	sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
+struct	route ipforward_rt;
+
+/*
+ * Ip input routine.  Checksum and byte swap header.  If fragmented
+ * try to reassemble.  Process options.  Pass to next level.
+ */
+void
+ip_input(struct mbuf *m)
+{
+	struct ip *ip;
+	struct ipq *fp;
+	struct in_ifaddr *ia = NULL;
+	int    i, hlen, checkif;
+	u_short sum;
+	u_int16_t divert_cookie;		/* firewall cookie */
+	struct in_addr pkt_dst;
+#ifdef IPDIVERT
+	u_int32_t divert_info = 0;		/* packet divert/tee info */
+#endif
+	struct ip_fw_chain *rule = NULL;
+#ifdef PFIL_HOOKS
+	struct packet_filter_hook *pfh;
+	struct mbuf *m0;
+	int rv;
+#endif /* PFIL_HOOKS */
+
+#ifdef IPDIVERT
+	/* Get and reset firewall cookie */
+	divert_cookie = ip_divert_cookie;
+	ip_divert_cookie = 0;
+#else
+	divert_cookie = 0;
+#endif
+
+#if defined(IPFIREWALL) && defined(DUMMYNET)
+        /*
+         * dummynet packet are prepended a vestigial mbuf with
+         * m_type = MT_DUMMYNET and m_data pointing to the matching
+         * rule.
+         */
+        if (m->m_type == MT_DUMMYNET) {
+            rule = (struct ip_fw_chain *)(m->m_data) ;
+            m = m->m_next ;
+            ip = mtod(m, struct ip *);
+            hlen = IP_VHL_HL(ip->ip_vhl) << 2;
+            goto iphack ;
+        } else
+            rule = NULL ;
+#endif
+
+#ifdef	DIAGNOSTIC
+	if (m == NULL || (m->m_flags & M_PKTHDR) == 0)
+		panic("ip_input no HDR");
+#endif
+	ipstat.ips_total++;
+
+	if (m->m_pkthdr.len < sizeof(struct ip))
+		goto tooshort;
+
+	if (m->m_len < sizeof (struct ip) &&
+	    (m = m_pullup(m, sizeof (struct ip))) == 0) {
+		ipstat.ips_toosmall++;
+		return;
+	}
+	ip = mtod(m, struct ip *);
+
+	if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
+		ipstat.ips_badvers++;
+		goto bad;
+	}
+
+	hlen = IP_VHL_HL(ip->ip_vhl) << 2;
+	if (hlen < sizeof(struct ip)) {	/* minimum header length */
+		ipstat.ips_badhlen++;
+		goto bad;
+	}
+	if (hlen > m->m_len) {
+		if ((m = m_pullup(m, hlen)) == 0) {
+			ipstat.ips_badhlen++;
+			return;
+		}
+		ip = mtod(m, struct ip *);
+	}
+	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
+		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
+	} else {
+		if (hlen == sizeof(struct ip)) {
+			sum = in_cksum_hdr(ip);
+		} else {
+			sum = in_cksum(m, hlen);
+		}
+	}
+	if (sum) {
+		ipstat.ips_badsum++;
+		goto bad;
+	}
+
+	/*
+	 * Convert fields to host representation.
+	 */
+	NTOHS(ip->ip_len);
+	if (ip->ip_len < hlen) {
+		ipstat.ips_badlen++;
+		goto bad;
+	}
+	NTOHS(ip->ip_off);
+
+	/*
+	 * Check that the amount of data in the buffers
+	 * is as at least much as the IP header would have us expect.
+	 * Trim mbufs if longer than we expect.
+	 * Drop packet if shorter than we expect.
+	 */
+	if (m->m_pkthdr.len < ip->ip_len) {
+tooshort:
+		ipstat.ips_tooshort++;
+		goto bad;
+	}
+	if (m->m_pkthdr.len > ip->ip_len) {
+		if (m->m_len == m->m_pkthdr.len) {
+			m->m_len = ip->ip_len;
+			m->m_pkthdr.len = ip->ip_len;
+		} else
+			m_adj(m, ip->ip_len - m->m_pkthdr.len);
+	}
+
+	/*
+	 * Don't accept packets with a loopback destination address
+	 * unless they arrived via the loopback interface.
+	 */
+	if ((ntohl(ip->ip_dst.s_addr) & IN_CLASSA_NET) ==
+	    (IN_LOOPBACKNET << IN_CLASSA_NSHIFT) && 
+	    (m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) {
+		goto bad;
+	}
+
+	/*
+	 * IpHack's section.
+	 * Right now when no processing on packet has done
+	 * and it is still fresh out of network we do our black
+	 * deals with it.
+	 * - Firewall: deny/allow/divert
+	 * - Xlate: translate packet's addr/port (NAT).
+	 * - Pipe: pass pkt through dummynet.
+	 * - Wrap: fake packet's addr/port <unimpl.>
+	 * - Encapsulate: put it in another IP and send out. <unimp.>
+ 	 */
+
+#if defined(IPFIREWALL) && defined(DUMMYNET)
+iphack:
+#endif
+
+#ifdef PFIL_HOOKS
+	/*
+	 * Run through list of hooks for input packets.  If there are any
+	 * filters which require that additional packets in the flow are
+	 * not fast-forwarded, they must clear the M_CANFASTFWD flag.
+	 * Note that filters must _never_ set this flag, as another filter
+	 * in the list may have previously cleared it.
+	 */
+	m0 = m;
+	pfh = pfil_hook_get(PFIL_IN, &inetsw[ip_protox[IPPROTO_IP]].pr_pfh);
+	for (; pfh; pfh = TAILQ_NEXT(pfh, pfil_link))
+		if (pfh->pfil_func) {
+			rv = pfh->pfil_func(ip, hlen,
+					    m->m_pkthdr.rcvif, 0, &m0);
+			if (rv)
+				return;
+			m = m0;
+			if (m == NULL)
+				return;
+			ip = mtod(m, struct ip *);
+		}
+#endif /* PFIL_HOOKS */
+
+	if (fw_enable && ip_fw_chk_ptr) {
+#ifdef IPFIREWALL_FORWARD
+		/*
+		 * If we've been forwarded from the output side, then
+		 * skip the firewall a second time
+		 */
+		if (ip_fw_fwd_addr)
+			goto ours;
+#endif	/* IPFIREWALL_FORWARD */
+		/*
+		 * See the comment in ip_output for the return values
+		 * produced by the firewall.
+		 */
+		i = (*ip_fw_chk_ptr)(&ip,
+		    hlen, NULL, &divert_cookie, &m, &rule, &ip_fw_fwd_addr);
+		if (i & IP_FW_PORT_DENY_FLAG) { /* XXX new interface-denied */
+		    if (m)
+			m_freem(m);
+		    return ;
+		}
+		if (m == NULL) {	/* Packet discarded by firewall */
+		    static int __debug=10;
+		    if (__debug >0) {
+			printf("firewall returns NULL, please update!\n");	
+			__debug-- ;
+		    }
+		    return;
+		}
+		if (i == 0 && ip_fw_fwd_addr == NULL)	/* common case */
+			goto pass;
+#ifdef DUMMYNET
+                if ((i & IP_FW_PORT_DYNT_FLAG) != 0) {
+                        /* Send packet to the appropriate pipe */
+                        dummynet_io(i&0xffff,DN_TO_IP_IN,m,NULL,NULL,0, rule,
+				    0);
+			return;
+		}
+#endif
+#ifdef IPDIVERT
+		if (i != 0 && (i & IP_FW_PORT_DYNT_FLAG) == 0) {
+			/* Divert or tee packet */
+			divert_info = i;
+			goto ours;
+		}
+#endif
+#ifdef IPFIREWALL_FORWARD
+		if (i == 0 && ip_fw_fwd_addr != NULL)
+			goto pass;
+#endif
+		/*
+		 * if we get here, the packet must be dropped
+		 */
+		m_freem(m);
+		return;
+	}
+pass:
+
+	/*
+	 * Process options and, if not destined for us,
+	 * ship it on.  ip_dooptions returns 1 when an
+	 * error was detected (causing an icmp message
+	 * to be sent and the original packet to be freed).
+	 */
+	ip_nhops = 0;		/* for source routed packets */
+	if (hlen > sizeof (struct ip) && ip_dooptions(m)) {
+#ifdef IPFIREWALL_FORWARD
+		ip_fw_fwd_addr = NULL;
+#endif
+		return;
+	}
+
+        /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
+         * matter if it is destined to another node, or whether it is 
+         * a multicast one, RSVP wants it! and prevents it from being forwarded
+         * anywhere else. Also checks if the rsvp daemon is running before
+	 * grabbing the packet.
+         */
+	if (rsvp_on && ip->ip_p==IPPROTO_RSVP) 
+		goto ours;
+
+	/*
+	 * Check our list of addresses, to see if the packet is for us.
+	 * If we don't have any addresses, assume any unicast packet
+	 * we receive might be for us (and let the upper layers deal
+	 * with it).
+	 */
+	if (TAILQ_EMPTY(&in_ifaddrhead) &&
+	    (m->m_flags & (M_MCAST|M_BCAST)) == 0)
+		goto ours;
+
+	/*
+	 * Cache the destination address of the packet; this may be
+	 * changed by use of 'ipfw fwd'.
+	 */
+	pkt_dst = ip_fw_fwd_addr == NULL ?
+	    ip->ip_dst : ip_fw_fwd_addr->sin_addr;
+
+	/*
+	 * Enable a consistency check between the destination address
+	 * and the arrival interface for a unicast packet (the RFC 1122
+	 * strong ES model) if IP forwarding is disabled and the packet
+	 * is not locally generated and the packet is not subject to
+	 * 'ipfw fwd'.
+	 *
+         * XXX - Checking also should be disabled if the destination
+	 * address is ipnat'ed to a different interface.
+	 *
+	 * XXX - Checking is incompatible with IP aliases added
+	 * to the loopback interface instead of the interface where
+	 * the packets are received.
+	 */
+	checkif = ip_checkinterface && (ipforwarding == 0) && 
+	    ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) &&
+	    (ip_fw_fwd_addr == NULL);
+
+	TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
+#define	satosin(sa)	((struct sockaddr_in *)(sa))
+
+#ifdef BOOTP_COMPAT
+		if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY)
+			goto ours;
+#endif
+		/*
+		 * If the address matches, verify that the packet
+		 * arrived via the correct interface if checking is
+		 * enabled.
+		 */
+		if (IA_SIN(ia)->sin_addr.s_addr == pkt_dst.s_addr && 
+		    (!checkif || ia->ia_ifp == m->m_pkthdr.rcvif))
+			goto ours;
+		/*
+		 * Only accept broadcast packets that arrive via the
+		 * matching interface.  Reception of forwarded directed
+		 * broadcasts would be handled via ip_forward() and
+		 * ether_output() with the loopback into the stack for
+		 * SIMPLEX interfaces handled by ether_output().
+		 */
+		if (ia->ia_ifp == m->m_pkthdr.rcvif &&
+		    ia->ia_ifp && ia->ia_ifp->if_flags & IFF_BROADCAST) {
+			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
+			    pkt_dst.s_addr)
+				goto ours;
+			if (ia->ia_netbroadcast.s_addr == pkt_dst.s_addr)
+				goto ours;
+		}
+	}
+	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
+		struct in_multi *inm;
+		if (ip_mrouter) {
+			/*
+			 * If we are acting as a multicast router, all
+			 * incoming multicast packets are passed to the
+			 * kernel-level multicast forwarding function.
+			 * The packet is returned (relatively) intact; if
+			 * ip_mforward() returns a non-zero value, the packet
+			 * must be discarded, else it may be accepted below.
+			 */
+			if (ip_mforward(ip, m->m_pkthdr.rcvif, m, 0) != 0) {
+				ipstat.ips_cantforward++;
+				m_freem(m);
+				return;
+			}
+
+			/*
+			 * The process-level routing demon needs to receive
+			 * all multicast IGMP packets, whether or not this
+			 * host belongs to their destination groups.
+			 */
+			if (ip->ip_p == IPPROTO_IGMP)
+				goto ours;
+			ipstat.ips_forward++;
+		}
+		/*
+		 * See if we belong to the destination multicast group on the
+		 * arrival interface.
+		 */
+		IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
+		if (inm == NULL) {
+			ipstat.ips_notmember++;
+			m_freem(m);
+			return;
+		}
+		goto ours;
+	}
+	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
+		goto ours;
+	if (ip->ip_dst.s_addr == INADDR_ANY)
+		goto ours;
+
+#if defined(NFAITH) && 0 < NFAITH
+	/*
+	 * FAITH(Firewall Aided Internet Translator)
+	 */
+	if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
+		if (ip_keepfaith) {
+			if (ip->ip_p == IPPROTO_TCP || ip->ip_p == IPPROTO_ICMP) 
+				goto ours;
+		}
+		m_freem(m);
+		return;
+	}
+#endif
+	/*
+	 * Not for us; forward if possible and desirable.
+	 */
+	if (ipforwarding == 0) {
+		ipstat.ips_cantforward++;
+		m_freem(m);
+	} else
+		ip_forward(m, 0);
+#ifdef IPFIREWALL_FORWARD
+	ip_fw_fwd_addr = NULL;
+#endif
+	return;
+
+ours:
+	/* Count the packet in the ip address stats */
+	if (ia != NULL) {
+		ia->ia_ifa.if_ipackets++;
+		ia->ia_ifa.if_ibytes += m->m_pkthdr.len;
+	}
+
+	/*
+	 * If offset or IP_MF are set, must reassemble.
+	 * Otherwise, nothing need be done.
+	 * (We could look in the reassembly queue to see
+	 * if the packet was previously fragmented,
+	 * but it's not worth the time; just let them time out.)
+	 */
+	if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
+
+		sum = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
+		/*
+		 * Look for queue of fragments
+		 * of this datagram.
+		 */
+		TAILQ_FOREACH(fp, &ipq[sum], ipq_list)
+			if (ip->ip_id == fp->ipq_id &&
+			    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
+			    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
+			    ip->ip_p == fp->ipq_p)
+				goto found;
+
+		fp = 0;
+
+		/* check if there's a place for the new queue */
+		if (nipq > maxnipq) {
+		    /*
+		     * drop something from the tail of the current queue
+		     * before proceeding further
+		     */
+		    struct ipq *q = TAILQ_LAST(&ipq[sum], ipqhead);
+		    if (q == NULL) {   /* gak */
+			for (i = 0; i < IPREASS_NHASH; i++) {
+			    struct ipq *r = TAILQ_LAST(&ipq[i], ipqhead);
+			    if (r) {
+				ip_freef(&ipq[i], r);
+				break;
+			    }
+			}
+		    } else
+			ip_freef(&ipq[sum], q);
+		}
+found:
+		/*
+		 * Adjust ip_len to not reflect header,
+		 * convert offset of this to bytes.
+		 */
+		ip->ip_len -= hlen;
+		if (ip->ip_off & IP_MF) {
+		        /*
+		         * Make sure that fragments have a data length
+			 * that's a non-zero multiple of 8 bytes.
+		         */
+			if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) {
+				ipstat.ips_toosmall++; /* XXX */
+				goto bad;
+			}
+			m->m_flags |= M_FRAG;
+		}
+		ip->ip_off <<= 3;
+
+		/*
+		 * Attempt reassembly; if it succeeds, proceed.
+		 */
+		ipstat.ips_fragments++;
+		m->m_pkthdr.header = ip;
+#ifdef IPDIVERT
+		m = ip_reass(m,
+		    &ipq[sum], fp, &divert_info, &divert_cookie);
+#else
+		m = ip_reass(m, &ipq[sum], fp);
+#endif
+		if (m == 0) {
+#ifdef IPFIREWALL_FORWARD
+			ip_fw_fwd_addr = NULL;
+#endif
+			return;
+		}
+		ipstat.ips_reassembled++;
+		ip = mtod(m, struct ip *);
+		/* Get the header length of the reassembled packet */
+		hlen = IP_VHL_HL(ip->ip_vhl) << 2;
+#ifdef IPDIVERT
+		/* Restore original checksum before diverting packet */
+		if (divert_info != 0) {
+			ip->ip_len += hlen;
+			HTONS(ip->ip_len);
+			HTONS(ip->ip_off);
+			ip->ip_sum = 0;
+			if (hlen == sizeof(struct ip))
+				ip->ip_sum = in_cksum_hdr(ip);
+			else
+				ip->ip_sum = in_cksum(m, hlen);
+			NTOHS(ip->ip_off);
+			NTOHS(ip->ip_len);
+			ip->ip_len -= hlen;
+		}
+#endif
+	} else
+		ip->ip_len -= hlen;
+
+#ifdef IPDIVERT
+	/*
+	 * Divert or tee packet to the divert protocol if required.
+	 *
+	 * If divert_info is zero then cookie should be too, so we shouldn't
+	 * need to clear them here.  Assume divert_packet() does so also.
+	 */
+	if (divert_info != 0) {
+		struct mbuf *clone = NULL;
+
+		/* Clone packet if we're doing a 'tee' */
+		if ((divert_info & IP_FW_PORT_TEE_FLAG) != 0)
+			clone = m_dup(m, M_DONTWAIT);
+
+		/* Restore packet header fields to original values */
+		ip->ip_len += hlen;
+		HTONS(ip->ip_len);
+		HTONS(ip->ip_off);
+
+		/* Deliver packet to divert input routine */
+		ip_divert_cookie = divert_cookie;
+		divert_packet(m, 1, divert_info & 0xffff);
+		ipstat.ips_delivered++;
+
+		/* If 'tee', continue with original packet */
+		if (clone == NULL)
+			return;
+		m = clone;
+		ip = mtod(m, struct ip *);
+	}
+#endif
+
+	/*
+	 * Switch out to protocol's input routine.
+	 */
+	ipstat.ips_delivered++;
+    {
+	int off = hlen, nh = ip->ip_p;
+
+	(*inetsw[ip_protox[ip->ip_p]].pr_input)(m, off, nh);
+#ifdef	IPFIREWALL_FORWARD
+	ip_fw_fwd_addr = NULL;	/* tcp needed it */
+#endif
+	return;
+    }
+bad:
+#ifdef	IPFIREWALL_FORWARD
+	ip_fw_fwd_addr = NULL;
+#endif
+	m_freem(m);
+}
+
+/*
+ * IP software interrupt routine - to go away sometime soon
+ */
+static void
+ipintr(void)
+{
+	struct mbuf *m;
+
+	while (1) {
+		IF_DEQUEUE(&ipintrq, m);
+		if (m == 0)
+			return;
+		ip_input(m);
+	}
+}
+
+/*
+ * Take incoming datagram fragment and try to reassemble it into
+ * whole datagram.  If a chain for reassembly of this datagram already
+ * exists, then it is given as fp; otherwise have to make a chain.
+ *
+ * When IPDIVERT enabled, keep additional state with each packet that
+ * tells us if we need to divert or tee the packet we're building.
+ */
+
+static struct mbuf *
+#ifdef IPDIVERT
+ip_reass(m, head, fp, divinfo, divcookie)
+#else
+ip_reass(m, head, fp)
+#endif
+	struct mbuf *m;
+	struct ipqhead *head;
+	struct ipq *fp;
+#ifdef IPDIVERT
+	u_int32_t *divinfo;
+	u_int16_t *divcookie;
+#endif
+{
+	struct ip *ip = mtod(m, struct ip *);
+	register struct mbuf *p, *q, *nq;
+	struct mbuf *t;
+	int hlen = IP_VHL_HL(ip->ip_vhl) << 2;
+	int i, next;
+
+	/*
+	 * Presence of header sizes in mbufs
+	 * would confuse code below.
+	 */
+	m->m_data += hlen;
+	m->m_len -= hlen;
+
+	/*
+	 * If first fragment to arrive, create a reassembly queue.
+	 */
+	if (fp == 0) {
+		if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL)
+			goto dropfrag;
+		fp = mtod(t, struct ipq *);
+		TAILQ_INSERT_HEAD(head, fp, ipq_list);
+		nipq++;
+		fp->ipq_ttl = IPFRAGTTL;
+		fp->ipq_p = ip->ip_p;
+		fp->ipq_id = ip->ip_id;
+		fp->ipq_src = ip->ip_src;
+		fp->ipq_dst = ip->ip_dst;
+		fp->ipq_frags = m;
+		m->m_nextpkt = NULL;
+#ifdef IPDIVERT
+		fp->ipq_div_info = 0;
+		fp->ipq_div_cookie = 0;
+#endif
+		goto inserted;
+	}
+
+#define GETIP(m)	((struct ip*)((m)->m_pkthdr.header))
+
+	/*
+	 * Find a segment which begins after this one does.
+	 */
+	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
+		if (GETIP(q)->ip_off > ip->ip_off)
+			break;
+
+	/*
+	 * If there is a preceding segment, it may provide some of
+	 * our data already.  If so, drop the data from the incoming
+	 * segment.  If it provides all of our data, drop us, otherwise
+	 * stick new segment in the proper place.
+	 *
+	 * If some of the data is dropped from the the preceding
+	 * segment, then it's checksum is invalidated.
+	 */
+	if (p) {
+		i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off;
+		if (i > 0) {
+			if (i >= ip->ip_len)
+				goto dropfrag;
+			m_adj(m, i);
+			m->m_pkthdr.csum_flags = 0;
+			ip->ip_off += i;
+			ip->ip_len -= i;
+		}
+		m->m_nextpkt = p->m_nextpkt;
+		p->m_nextpkt = m;
+	} else {
+		m->m_nextpkt = fp->ipq_frags;
+		fp->ipq_frags = m;
+	}
+
+	/*
+	 * While we overlap succeeding segments trim them or,
+	 * if they are completely covered, dequeue them.
+	 */
+	for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off;
+	     q = nq) {
+		i = (ip->ip_off + ip->ip_len) -
+		    GETIP(q)->ip_off;
+		if (i < GETIP(q)->ip_len) {
+			GETIP(q)->ip_len -= i;
+			GETIP(q)->ip_off += i;
+			m_adj(q, i);
+			q->m_pkthdr.csum_flags = 0;
+			break;
+		}
+		nq = q->m_nextpkt;
+		m->m_nextpkt = nq;
+		m_freem(q);
+	}
+
+inserted:
+
+#ifdef IPDIVERT
+	/*
+	 * Transfer firewall instructions to the fragment structure.
+	 * Any fragment diverting causes the whole packet to divert.
+	 */
+	fp->ipq_div_info = *divinfo;
+	fp->ipq_div_cookie = *divcookie;
+	*divinfo = 0;
+	*divcookie = 0;
+#endif
+
+	/*
+	 * Check for complete reassembly.
+	 */
+	next = 0;
+	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
+		if (GETIP(q)->ip_off != next)
+			return (0);
+		next += GETIP(q)->ip_len;
+	}
+	/* Make sure the last packet didn't have the IP_MF flag */
+	if (p->m_flags & M_FRAG)
+		return (0);
+
+	/*
+	 * Reassembly is complete.  Make sure the packet is a sane size.
+	 */
+	q = fp->ipq_frags;
+	ip = GETIP(q);
+	if (next + (IP_VHL_HL(ip->ip_vhl) << 2) > IP_MAXPACKET) {
+		ipstat.ips_toolong++;
+		ip_freef(head, fp);
+		return (0);
+	}
+
+	/*
+	 * Concatenate fragments.
+	 */
+	m = q;
+	t = m->m_next;
+	m->m_next = 0;
+	m_cat(m, t);
+	nq = q->m_nextpkt;
+	q->m_nextpkt = 0;
+	for (q = nq; q != NULL; q = nq) {
+		nq = q->m_nextpkt;
+		q->m_nextpkt = NULL;
+		m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags;
+		m->m_pkthdr.csum_data += q->m_pkthdr.csum_data;
+		m_cat(m, q);
+	}
+
+#ifdef IPDIVERT
+	/*
+	 * Extract firewall instructions from the fragment structure.
+	 */
+	*divinfo = fp->ipq_div_info;
+	*divcookie = fp->ipq_div_cookie;
+#endif
+
+	/*
+	 * Create header for new ip packet by
+	 * modifying header of first packet;
+	 * dequeue and discard fragment reassembly header.
+	 * Make header visible.
+	 */
+	ip->ip_len = next;
+	ip->ip_src = fp->ipq_src;
+	ip->ip_dst = fp->ipq_dst;
+	TAILQ_REMOVE(head, fp, ipq_list);
+	nipq--;
+	(void) m_free(dtom(fp));
+	m->m_len += (IP_VHL_HL(ip->ip_vhl) << 2);
+	m->m_data -= (IP_VHL_HL(ip->ip_vhl) << 2);
+	/* some debugging cruft by sklower, below, will go away soon */
+	if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */
+		register int plen = 0;
+		for (t = m; t; t = t->m_next)
+			plen += t->m_len;
+		m->m_pkthdr.len = plen;
+	}
+	return (m);
+
+dropfrag:
+#ifdef IPDIVERT
+	*divinfo = 0;
+	*divcookie = 0;
+#endif
+	ipstat.ips_fragdropped++;
+	m_freem(m);
+	return (0);
+
+#undef GETIP
+}
+
+/*
+ * Free a fragment reassembly header and all
+ * associated datagrams.
+ */
+static void
+ip_freef(fhp, fp)
+	struct ipqhead *fhp;
+	struct ipq *fp;
+{
+	register struct mbuf *q;
+
+	while (fp->ipq_frags) {
+		q = fp->ipq_frags;
+		fp->ipq_frags = q->m_nextpkt;
+		m_freem(q);
+	}
+	TAILQ_REMOVE(fhp, fp, ipq_list);
+	(void) m_free(dtom(fp));
+	nipq--;
+}
+
+/*
+ * IP timer processing;
+ * if a timer expires on a reassembly
+ * queue, discard it.
+ */
+void
+ip_slowtimo()
+{
+	register struct ipq *fp;
+	int s = splnet();
+	int i;
+
+	for (i = 0; i < IPREASS_NHASH; i++) {
+		for(fp = TAILQ_FIRST(&ipq[i]); fp;) {
+			struct ipq *fpp;
+
+			fpp = fp;
+			fp = TAILQ_NEXT(fp, ipq_list);
+			if(--fpp->ipq_ttl == 0) {
+				ipstat.ips_fragtimeout++;
+				ip_freef(&ipq[i], fpp);
+			}
+		}
+	}
+	ipflow_slowtimo();
+	splx(s);
+}
+
+/*
+ * Drain off all datagram fragments.
+ */
+void
+ip_drain()
+{
+	int     i;
+
+	for (i = 0; i < IPREASS_NHASH; i++) {
+		while(!TAILQ_EMPTY(&ipq[i])) {
+			ipstat.ips_fragdropped++;
+			ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i]));
+		}
+	}
+	in_rtqdrain();
+}
+
+/*
+ * Do option processing on a datagram,
+ * possibly discarding it if bad options are encountered,
+ * or forwarding it if source-routed.
+ * Returns 1 if packet has been forwarded/freed,
+ * 0 if the packet should be processed further.
+ */
+static int
+ip_dooptions(m)
+	struct mbuf *m;
+{
+	register struct ip *ip = mtod(m, struct ip *);
+	register u_char *cp;
+	register struct ip_timestamp *ipt;
+	register struct in_ifaddr *ia;
+	int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
+	struct in_addr *sin, dst;
+	n_time ntime;
+
+	dst = ip->ip_dst;
+	cp = (u_char *)(ip + 1);
+	cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
+	for (; cnt > 0; cnt -= optlen, cp += optlen) {
+		opt = cp[IPOPT_OPTVAL];
+		if (opt == IPOPT_EOL)
+			break;
+		if (opt == IPOPT_NOP)
+			optlen = 1;
+		else {
+			if (cnt < IPOPT_OLEN + sizeof(*cp)) {
+				code = &cp[IPOPT_OLEN] - (u_char *)ip;
+				goto bad;
+			}
+			optlen = cp[IPOPT_OLEN];
+			if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
+				code = &cp[IPOPT_OLEN] - (u_char *)ip;
+				goto bad;
+			}
+		}
+		switch (opt) {
+
+		default:
+			break;
+
+		/*
+		 * Source routing with record.
+		 * Find interface with current destination address.
+		 * If none on this machine then drop if strictly routed,
+		 * or do nothing if loosely routed.
+		 * Record interface address and bring up next address
+		 * component.  If strictly routed make sure next
+		 * address is on directly accessible net.
+		 */
+		case IPOPT_LSRR:
+		case IPOPT_SSRR:
+			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
+				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
+				goto bad;
+			}
+			ipaddr.sin_addr = ip->ip_dst;
+			ia = (struct in_ifaddr *)
+				ifa_ifwithaddr((struct sockaddr *)&ipaddr);
+			if (ia == 0) {
+				if (opt == IPOPT_SSRR) {
+					type = ICMP_UNREACH;
+					code = ICMP_UNREACH_SRCFAIL;
+					goto bad;
+				}
+				if (!ip_dosourceroute)
+					goto nosourcerouting;
+				/*
+				 * Loose routing, and not at next destination
+				 * yet; nothing to do except forward.
+				 */
+				break;
+			}
+			off--;			/* 0 origin */
+			if (off > optlen - (int)sizeof(struct in_addr)) {
+				/*
+				 * End of source route.  Should be for us.
+				 */
+				if (!ip_acceptsourceroute)
+					goto nosourcerouting;
+				save_rte(cp, ip->ip_src);
+				break;
+			}
+
+			if (!ip_dosourceroute) {
+				if (ipforwarding) {
+					char buf[16]; /* aaa.bbb.ccc.ddd\0 */
+					/*
+					 * Acting as a router, so generate ICMP
+					 */
+nosourcerouting:
+					strcpy(buf, inet_ntoa(ip->ip_dst));
+					log(LOG_WARNING, 
+					    "attempted source route from %s to %s\n",
+					    inet_ntoa(ip->ip_src), buf);
+					type = ICMP_UNREACH;
+					code = ICMP_UNREACH_SRCFAIL;
+					goto bad;
+				} else {
+					/*
+					 * Not acting as a router, so silently drop.
+					 */
+					ipstat.ips_cantforward++;
+					m_freem(m);
+					return (1);
+				}
+			}
+
+			/*
+			 * locate outgoing interface
+			 */
+			(void)memcpy(&ipaddr.sin_addr, cp + off,
+			    sizeof(ipaddr.sin_addr));
+
+			if (opt == IPOPT_SSRR) {
+#define	INA	struct in_ifaddr *
+#define	SA	struct sockaddr *
+			    if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0)
+				ia = (INA)ifa_ifwithnet((SA)&ipaddr);
+			} else
+				ia = ip_rtaddr(ipaddr.sin_addr);
+			if (ia == 0) {
+				type = ICMP_UNREACH;
+				code = ICMP_UNREACH_SRCFAIL;
+				goto bad;
+			}
+			ip->ip_dst = ipaddr.sin_addr;
+			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
+			    sizeof(struct in_addr));
+			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
+			/*
+			 * Let ip_intr's mcast routing check handle mcast pkts
+			 */
+			forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
+			break;
+
+		case IPOPT_RR:
+			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
+				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
+				goto bad;
+			}
+			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
+				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
+				goto bad;
+			}
+			/*
+			 * If no space remains, ignore.
+			 */
+			off--;			/* 0 origin */
+			if (off > optlen - (int)sizeof(struct in_addr))
+				break;
+			(void)memcpy(&ipaddr.sin_addr, &ip->ip_dst,
+			    sizeof(ipaddr.sin_addr));
+			/*
+			 * locate outgoing interface; if we're the destination,
+			 * use the incoming interface (should be same).
+			 */
+			if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0 &&
+			    (ia = ip_rtaddr(ipaddr.sin_addr)) == 0) {
+				type = ICMP_UNREACH;
+				code = ICMP_UNREACH_HOST;
+				goto bad;
+			}
+			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
+			    sizeof(struct in_addr));
+			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
+			break;
+
+		case IPOPT_TS:
+			code = cp - (u_char *)ip;
+			ipt = (struct ip_timestamp *)cp;
+			if (ipt->ipt_len < 5)
+				goto bad;
+			if (ipt->ipt_ptr >
+			    ipt->ipt_len - (int)sizeof(int32_t)) {
+				if (++ipt->ipt_oflw == 0)
+					goto bad;
+				break;
+			}
+			sin = (struct in_addr *)(cp + ipt->ipt_ptr - 1);
+			switch (ipt->ipt_flg) {
+
+			case IPOPT_TS_TSONLY:
+				break;
+
+			case IPOPT_TS_TSANDADDR:
+				if (ipt->ipt_ptr - 1 + sizeof(n_time) +
+				    sizeof(struct in_addr) > ipt->ipt_len)
+					goto bad;
+				ipaddr.sin_addr = dst;
+				ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
+							    m->m_pkthdr.rcvif);
+				if (ia == 0)
+					continue;
+				(void)memcpy(sin, &IA_SIN(ia)->sin_addr,
+				    sizeof(struct in_addr));
+				ipt->ipt_ptr += sizeof(struct in_addr);
+				break;
+
+			case IPOPT_TS_PRESPEC:
+				if (ipt->ipt_ptr - 1 + sizeof(n_time) +
+				    sizeof(struct in_addr) > ipt->ipt_len)
+					goto bad;
+				(void)memcpy(&ipaddr.sin_addr, sin,
+				    sizeof(struct in_addr));
+				if (ifa_ifwithaddr((SA)&ipaddr) == 0)
+					continue;
+				ipt->ipt_ptr += sizeof(struct in_addr);
+				break;
+
+			default:
+				goto bad;
+			}
+			ntime = iptime();
+			(void)memcpy(cp + ipt->ipt_ptr - 1, &ntime,
+			    sizeof(n_time));
+			ipt->ipt_ptr += sizeof(n_time);
+		}
+	}
+	if (forward && ipforwarding) {
+		ip_forward(m, 1);
+		return (1);
+	}
+	return (0);
+bad:
+	icmp_error(m, type, code, 0, 0);
+	ipstat.ips_badoptions++;
+	return (1);
+}
+
+/*
+ * Given address of next destination (final or next hop),
+ * return internet address info of interface to be used to get there.
+ */
+static struct in_ifaddr *
+ip_rtaddr(dst)
+	 struct in_addr dst;
+{
+	register struct sockaddr_in *sin;
+
+	sin = (struct sockaddr_in *) &ipforward_rt.ro_dst;
+
+	if (ipforward_rt.ro_rt == 0 ||
+	    !(ipforward_rt.ro_rt->rt_flags & RTF_UP) ||
+	    dst.s_addr != sin->sin_addr.s_addr) {
+		if (ipforward_rt.ro_rt) {
+			RTFREE(ipforward_rt.ro_rt);
+			ipforward_rt.ro_rt = 0;
+		}
+		sin->sin_family = AF_INET;
+		sin->sin_len = sizeof(*sin);
+		sin->sin_addr = dst;
+
+		rtalloc_ign(&ipforward_rt, RTF_PRCLONING);
+	}
+	if (ipforward_rt.ro_rt == 0)
+		return ((struct in_ifaddr *)0);
+	return ((struct in_ifaddr *) ipforward_rt.ro_rt->rt_ifa);
+}
+
+/*
+ * Save incoming source route for use in replies,
+ * to be picked up later by ip_srcroute if the receiver is interested.
+ */
+void
+save_rte(option, dst)
+	u_char *option;
+	struct in_addr dst;
+{
+	unsigned olen;
+
+	olen = option[IPOPT_OLEN];
+#ifdef DIAGNOSTIC
+	if (ipprintfs)
+		printf("save_rte: olen %d\n", olen);
+#endif
+	if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
+		return;
+	bcopy(option, ip_srcrt.srcopt, olen);
+	ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
+	ip_srcrt.dst = dst;
+}
+
+/*
+ * Retrieve incoming source route for use in replies,
+ * in the same form used by setsockopt.
+ * The first hop is placed before the options, will be removed later.
+ */
+struct mbuf *
+ip_srcroute()
+{
+	register struct in_addr *p, *q;
+	register struct mbuf *m;
+
+	if (ip_nhops == 0)
+		return ((struct mbuf *)0);
+	m = m_get(M_DONTWAIT, MT_HEADER);
+	if (m == 0)
+		return ((struct mbuf *)0);
+
+#define OPTSIZ	(sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
+
+	/* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
+	m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
+	    OPTSIZ;
+#ifdef DIAGNOSTIC
+	if (ipprintfs)
+		printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
+#endif
+
+	/*
+	 * First save first hop for return route
+	 */
+	p = &ip_srcrt.route[ip_nhops - 1];
+	*(mtod(m, struct in_addr *)) = *p--;
+#ifdef DIAGNOSTIC
+	if (ipprintfs)
+		printf(" hops %lx", (u_long)ntohl(mtod(m, struct in_addr *)->s_addr));
+#endif
+
+	/*
+	 * Copy option fields and padding (nop) to mbuf.
+	 */
+	ip_srcrt.nop = IPOPT_NOP;
+	ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
+	(void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr),
+	    &ip_srcrt.nop, OPTSIZ);
+	q = (struct in_addr *)(mtod(m, caddr_t) +
+	    sizeof(struct in_addr) + OPTSIZ);
+#undef OPTSIZ
+	/*
+	 * Record return path as an IP source route,
+	 * reversing the path (pointers are now aligned).
+	 */
+	while (p >= ip_srcrt.route) {
+#ifdef DIAGNOSTIC
+		if (ipprintfs)
+			printf(" %lx", (u_long)ntohl(q->s_addr));
+#endif
+		*q++ = *p--;
+	}
+	/*
+	 * Last hop goes to final destination.
+	 */
+	*q = ip_srcrt.dst;
+#ifdef DIAGNOSTIC
+	if (ipprintfs)
+		printf(" %lx\n", (u_long)ntohl(q->s_addr));
+#endif
+	return (m);
+}
+
+/*
+ * Strip out IP options, at higher
+ * level protocol in the kernel.
+ * Second argument is buffer to which options
+ * will be moved, and return value is their length.
+ * XXX should be deleted; last arg currently ignored.
+ */
+void
+ip_stripoptions(m, mopt)
+	register struct mbuf *m;
+	struct mbuf *mopt;
+{
+	register int i;
+	struct ip *ip = mtod(m, struct ip *);
+	register caddr_t opts;
+	int olen;
+
+	olen = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
+	opts = (caddr_t)(ip + 1);
+	i = m->m_len - (sizeof (struct ip) + olen);
+	bcopy(opts + olen, opts, (unsigned)i);
+	m->m_len -= olen;
+	if (m->m_flags & M_PKTHDR)
+		m->m_pkthdr.len -= olen;
+	ip->ip_vhl = IP_MAKE_VHL(IPVERSION, sizeof(struct ip) >> 2);
+}
+
+u_char inetctlerrmap[PRC_NCMDS] = {
+	0,		0,		0,		0,
+	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
+	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
+	EMSGSIZE,	EHOSTUNREACH,	0,		0,
+	0,		0,		0,		0,
+	ENOPROTOOPT,	ENETRESET
+};
+
+/*
+ * Forward a packet.  If some error occurs return the sender
+ * an icmp packet.  Note we can't always generate a meaningful
+ * icmp message because icmp doesn't have a large enough repertoire
+ * of codes and types.
+ *
+ * If not forwarding, just drop the packet.  This could be confusing
+ * if ipforwarding was zero but some routing protocol was advancing
+ * us as a gateway to somewhere.  However, we must let the routing
+ * protocol deal with that.
+ *
+ * The srcrt parameter indicates whether the packet is being forwarded
+ * via a source route.
+ */
+static void
+ip_forward(m, srcrt)
+	struct mbuf *m;
+	int srcrt;
+{
+	register struct ip *ip = mtod(m, struct ip *);
+	register struct rtentry *rt;
+	int error, type = 0, code = 0;
+	struct mbuf *mcopy;
+	n_long dest;
+	struct ifnet *destifp;
+#ifdef IPSEC
+	struct ifnet dummyifp;
+#endif
+
+	dest = 0;
+#ifdef DIAGNOSTIC
+	if (ipprintfs)
+		printf("forward: src %lx dst %lx ttl %x\n",
+		    (u_long)ip->ip_src.s_addr, (u_long)ip->ip_dst.s_addr,
+		    ip->ip_ttl);
+#endif
+
+
+	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
+		ipstat.ips_cantforward++;
+		m_freem(m);
+		return;
+	}
+#ifdef IPSTEALTH
+	if (!ipstealth) {
+#endif
+		if (ip->ip_ttl <= IPTTLDEC) {
+			icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS,
+			    dest, 0);
+			return;
+		}
+#ifdef IPSTEALTH
+	}
+#endif
+
+	if (ip_rtaddr(ip->ip_dst) == 0) {
+		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
+		return;
+	} else
+		rt = ipforward_rt.ro_rt;
+
+	/*
+	 * Save the IP header and at most 8 bytes of the payload,
+	 * in case we need to generate an ICMP message to the src.
+	 *
+	 * We don't use m_copy() because it might return a reference
+	 * to a shared cluster. Both this function and ip_output()
+	 * assume exclusive access to the IP header in `m', so any
+	 * data in a cluster may change before we reach icmp_error().
+	 */
+	MGET(mcopy, M_DONTWAIT, m->m_type);
+	if (mcopy != NULL) {
+		M_COPY_PKTHDR(mcopy, m);
+		mcopy->m_len = imin((IP_VHL_HL(ip->ip_vhl) << 2) + 8,
+		    (int)ip->ip_len);
+		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
+	}
+
+#ifdef IPSTEALTH
+	if (!ipstealth) {
+#endif
+		ip->ip_ttl -= IPTTLDEC;
+#ifdef IPSTEALTH
+	}
+#endif
+
+	/*
+	 * If forwarding packet using same interface that it came in on,
+	 * perhaps should send a redirect to sender to shortcut a hop.
+	 * Only send redirect if source is sending directly to us,
+	 * and if packet was not source routed (or has any options).
+	 * Also, don't send redirect if forwarding using a default route
+	 * or a route modified by a redirect.
+	 */
+#define	satosin(sa)	((struct sockaddr_in *)(sa))
+	if (rt->rt_ifp == m->m_pkthdr.rcvif &&
+	    (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
+	    satosin(rt_key(rt))->sin_addr.s_addr != 0 &&
+	    ipsendredirects && !srcrt) {
+#define	RTA(rt)	((struct in_ifaddr *)(rt->rt_ifa))
+		u_long src = ntohl(ip->ip_src.s_addr);
+
+		if (RTA(rt) &&
+		    (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
+		    if (rt->rt_flags & RTF_GATEWAY)
+			dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
+		    else
+			dest = ip->ip_dst.s_addr;
+		    /* Router requirements says to only send host redirects */
+		    type = ICMP_REDIRECT;
+		    code = ICMP_REDIRECT_HOST;
+#ifdef DIAGNOSTIC
+		    if (ipprintfs)
+		        printf("redirect (%d) to %lx\n", code, (u_long)dest);
+#endif
+		}
+	}
+
+	error = ip_output(m, (struct mbuf *)0, &ipforward_rt, 
+			  IP_FORWARDING, 0);
+	if (error)
+		ipstat.ips_cantforward++;
+	else {
+		ipstat.ips_forward++;
+		if (type)
+			ipstat.ips_redirectsent++;
+		else {
+			if (mcopy) {
+				ipflow_create(&ipforward_rt, mcopy);
+				m_freem(mcopy);
+			}
+			return;
+		}
+	}
+	if (mcopy == NULL)
+		return;
+	destifp = NULL;
+
+	switch (error) {
+
+	case 0:				/* forwarded, but need redirect */
+		/* type, code set above */
+		break;
+
+	case ENETUNREACH:		/* shouldn't happen, checked above */
+	case EHOSTUNREACH:
+	case ENETDOWN:
+	case EHOSTDOWN:
+	default:
+		type = ICMP_UNREACH;
+		code = ICMP_UNREACH_HOST;
+		break;
+
+	case EMSGSIZE:
+		type = ICMP_UNREACH;
+		code = ICMP_UNREACH_NEEDFRAG;
+#ifndef IPSEC
+		if (ipforward_rt.ro_rt)
+			destifp = ipforward_rt.ro_rt->rt_ifp;
+#else
+		/*
+		 * If the packet is routed over IPsec tunnel, tell the
+		 * originator the tunnel MTU.
+		 *	tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
+		 * XXX quickhack!!!
+		 */
+		if (ipforward_rt.ro_rt) {
+			struct secpolicy *sp = NULL;
+			int ipsecerror;
+			int ipsechdr;
+			struct route *ro;
+
+			sp = ipsec4_getpolicybyaddr(mcopy,
+						    IPSEC_DIR_OUTBOUND,
+			                            IP_FORWARDING,
+			                            &ipsecerror);
+
+			if (sp == NULL)
+				destifp = ipforward_rt.ro_rt->rt_ifp;
+			else {
+				/* count IPsec header size */
+				ipsechdr = ipsec4_hdrsiz(mcopy,
+							 IPSEC_DIR_OUTBOUND,
+							 NULL);
+
+				/*
+				 * find the correct route for outer IPv4
+				 * header, compute tunnel MTU.
+				 *
+				 * XXX BUG ALERT
+				 * The "dummyifp" code relies upon the fact
+				 * that icmp_error() touches only ifp->if_mtu.
+				 */
+				/*XXX*/
+				destifp = NULL;
+				if (sp->req != NULL
+				 && sp->req->sav != NULL
+				 && sp->req->sav->sah != NULL) {
+					ro = &sp->req->sav->sah->sa_route;
+					if (ro->ro_rt && ro->ro_rt->rt_ifp) {
+						dummyifp.if_mtu =
+						    ro->ro_rt->rt_ifp->if_mtu;
+						dummyifp.if_mtu -= ipsechdr;
+						destifp = &dummyifp;
+					}
+				}
+
+				key_freesp(sp);
+			}
+		}
+#endif /*IPSEC*/
+		ipstat.ips_cantfrag++;
+		break;
+
+	case ENOBUFS:
+		type = ICMP_SOURCEQUENCH;
+		code = 0;
+		break;
+
+	case EACCES:			/* ipfw denied packet */
+		m_freem(mcopy);
+		return;
+	}
+	icmp_error(mcopy, type, code, dest, destifp);
+}
+
+void
+ip_savecontrol(inp, mp, ip, m)
+	register struct inpcb *inp;
+	register struct mbuf **mp;
+	register struct ip *ip;
+	register struct mbuf *m;
+{
+	if (inp->inp_socket->so_options & SO_TIMESTAMP) {
+		struct timeval tv;
+
+		microtime(&tv);
+		*mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
+			SCM_TIMESTAMP, SOL_SOCKET);
+		if (*mp)
+			mp = &(*mp)->m_next;
+	}
+	if (inp->inp_flags & INP_RECVDSTADDR) {
+		*mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
+		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
+		if (*mp)
+			mp = &(*mp)->m_next;
+	}
+#ifdef notyet
+	/* XXX
+	 * Moving these out of udp_input() made them even more broken
+	 * than they already were.
+	 */
+	/* options were tossed already */
+	if (inp->inp_flags & INP_RECVOPTS) {
+		*mp = sbcreatecontrol((caddr_t) opts_deleted_above,
+		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
+		if (*mp)
+			mp = &(*mp)->m_next;
+	}
+	/* ip_srcroute doesn't do what we want here, need to fix */
+	if (inp->inp_flags & INP_RECVRETOPTS) {
+		*mp = sbcreatecontrol((caddr_t) ip_srcroute(),
+		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
+		if (*mp)
+			mp = &(*mp)->m_next;
+	}
+#endif
+	if (inp->inp_flags & INP_RECVIF) {
+		struct ifnet *ifp;
+		struct sdlbuf {
+			struct sockaddr_dl sdl;
+			u_char	pad[32];
+		} sdlbuf;
+		struct sockaddr_dl *sdp;
+		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
+
+		if (((ifp = m->m_pkthdr.rcvif)) 
+		&& ( ifp->if_index && (ifp->if_index <= if_index))) {
+			sdp = (struct sockaddr_dl *)(ifnet_addrs
+					[ifp->if_index - 1]->ifa_addr);
+			/*
+			 * Change our mind and don't try copy.
+			 */
+			if ((sdp->sdl_family != AF_LINK)
+			|| (sdp->sdl_len > sizeof(sdlbuf))) {
+				goto makedummy;
+			}
+			bcopy(sdp, sdl2, sdp->sdl_len);
+		} else {
+makedummy:	
+			sdl2->sdl_len
+				= offsetof(struct sockaddr_dl, sdl_data[0]);
+			sdl2->sdl_family = AF_LINK;
+			sdl2->sdl_index = 0;
+			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
+		}
+		*mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len,
+			IP_RECVIF, IPPROTO_IP);
+		if (*mp)
+			mp = &(*mp)->m_next;
+	}
+}
+
+int
+ip_rsvp_init(struct socket *so)
+{
+	if (so->so_type != SOCK_RAW ||
+	    so->so_proto->pr_protocol != IPPROTO_RSVP)
+	  return EOPNOTSUPP;
+
+	if (ip_rsvpd != NULL)
+	  return EADDRINUSE;
+
+	ip_rsvpd = so;
+	/*
+	 * This may seem silly, but we need to be sure we don't over-increment
+	 * the RSVP counter, in case something slips up.
+	 */
+	if (!ip_rsvp_on) {
+		ip_rsvp_on = 1;
+		rsvp_on++;
+	}
+
+	return 0;
+}
+
+int
+ip_rsvp_done(void)
+{
+	ip_rsvpd = NULL;
+	/*
+	 * This may seem silly, but we need to be sure we don't over-decrement
+	 * the RSVP counter, in case something slips up.
+	 */
+	if (ip_rsvp_on) {
+		ip_rsvp_on = 0;
+		rsvp_on--;
+	}
+	return 0;
+}
diff --git a/sys/netinet/ip_log.c b/sys/netinet/ip_log.c
new file mode 100644
index 0000000..082a0a3
--- /dev/null
+++ b/sys/netinet/ip_log.c
@@ -0,0 +1,507 @@
+/*
+ * Copyright (C) 1997-2000 by Darren Reed.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that this notice is preserved and due credit is given
+ * to the original author and the contributors.
+ *
+ * $Id: ip_log.c,v 2.5.2.1 2000/07/19 13:11:47 darrenr Exp $
+ * $FreeBSD$
+ */
+#include <sys/param.h>
+#if defined(KERNEL) && !defined(_KERNEL)
+# define       _KERNEL
+#endif
+#if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM)
+# include "opt_ipfilter_log.h"
+#endif
+#ifdef  __FreeBSD__
+# if defined(IPFILTER_LKM) || defined(_KERNEL)
+#  if !defined(__FreeBSD_version) 
+#   include <sys/osreldate.h>
+#  endif
+#  if !defined(IPFILTER_LKM)
+#   if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000)
+#    include "opt_ipfilter.h"
+#   endif
+#  endif
+# else
+#  ifdef KLD_MODULE
+#   include <osreldate.h>
+#  endif
+# endif
+#endif
+#ifdef  IPFILTER_LOG
+# ifndef SOLARIS
+#  define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4)))
+# endif
+# ifndef _KERNEL
+#  include <stdio.h>
+#  include <string.h>
+#  include <stdlib.h>
+#  include <ctype.h>
+# endif
+# include <sys/errno.h>
+# include <sys/types.h>
+# include <sys/file.h>
+# if __FreeBSD_version >= 220000 && defined(_KERNEL)
+#  include <sys/fcntl.h>
+#  include <sys/filio.h>
+# else
+#  include <sys/ioctl.h>
+# endif
+# include <sys/time.h>
+# if defined(_KERNEL) && !defined(linux)
+#  include <sys/systm.h>
+# endif
+# include <sys/uio.h>
+# if !SOLARIS
+#  if (NetBSD > 199609) || (OpenBSD > 199603) || (__FreeBSD_version >= 300000)
+#   include <sys/dirent.h>
+#  else
+#   include <sys/dir.h>
+#  endif
+#  ifndef linux
+#   include <sys/mbuf.h>
+#  endif
+# else
+#  include <sys/filio.h>
+#  include <sys/cred.h>
+#  include <sys/ddi.h>
+#  include <sys/sunddi.h>
+#  include <sys/ksynch.h>
+#  include <sys/kmem.h>
+#  include <sys/mkdev.h>
+#  include <sys/dditypes.h>
+#  include <sys/cmn_err.h>
+# endif
+# ifndef linux
+#  include <sys/protosw.h>
+# endif
+# include <sys/socket.h>
+
+# include <net/if.h>
+# ifdef sun
+#  include <net/af.h>
+# endif
+# if __FreeBSD_version >= 300000
+#  include <net/if_var.h>
+# endif
+# include <net/route.h>
+# include <netinet/in.h>
+# ifdef __sgi
+#  include <sys/ddi.h>
+#  ifdef IFF_DRVRLOCK /* IRIX6 */
+#   include <sys/hashing.h>
+#  endif
+# endif
+# if !defined(linux) && !(defined(__sgi) && !defined(IFF_DRVRLOCK)) /*IRIX<6*/
+#  include <netinet/in_var.h>
+# endif
+# include <netinet/in_systm.h>
+# include <netinet/ip.h>
+# include <netinet/tcp.h>
+# include <netinet/udp.h>
+# include <netinet/ip_icmp.h>
+# ifndef linux
+#  include <netinet/ip_var.h>
+# endif
+# ifndef _KERNEL
+#  include <syslog.h>
+# endif
+# include "netinet/ip_compat.h"
+# include <netinet/tcpip.h>
+# include "netinet/ip_fil.h"
+# include "netinet/ip_proxy.h"
+# include "netinet/ip_nat.h"
+# include "netinet/ip_frag.h"
+# include "netinet/ip_state.h"
+# include "netinet/ip_auth.h"
+# if (__FreeBSD_version >= 300000)
+#  include <sys/malloc.h>
+# endif
+
+# ifndef MIN
+#  define	MIN(a,b)	(((a)<(b))?(a):(b))
+# endif
+
+
+# if SOLARIS || defined(__sgi)
+extern	kmutex_t	ipl_mutex;
+#  if SOLARIS
+extern	kcondvar_t	iplwait;
+#  endif
+# endif
+
+iplog_t	**iplh[IPL_LOGMAX+1], *iplt[IPL_LOGMAX+1], *ipll[IPL_LOGMAX+1];
+size_t	iplused[IPL_LOGMAX+1];
+static fr_info_t	iplcrc[IPL_LOGMAX+1];
+# ifdef	linux
+static struct wait_queue *iplwait[IPL_LOGMAX+1];
+# endif
+
+
+/*
+ * Initialise log buffers & pointers.  Also iniialised the CRC to a local
+ * secret for use in calculating the "last log checksum".
+ */
+void ipflog_init()
+{
+	int	i;
+
+	for (i = IPL_LOGMAX; i >= 0; i--) {
+		iplt[i] = NULL;
+		ipll[i] = NULL;
+		iplh[i] = &iplt[i];
+		iplused[i] = 0;
+		bzero((char *)&iplcrc[i], sizeof(iplcrc[i]));
+	}
+}
+
+
+/*
+ * ipflog
+ * Create a log record for a packet given that it has been triggered by a
+ * rule (or the default setting).  Calculate the transport protocol header
+ * size using predetermined size of a couple of popular protocols and thus
+ * how much data to copy into the log, including part of the data body if
+ * requested.
+ */
+int ipflog(flags, ip, fin, m)
+u_int flags;
+ip_t *ip;
+fr_info_t *fin;
+mb_t *m;
+{
+	ipflog_t ipfl;
+	register size_t mlen, hlen;
+	size_t sizes[2];
+	void *ptrs[2];
+	int types[2];
+	u_char p;
+# if SOLARIS
+	ill_t *ifp = fin->fin_ifp;
+# else
+	struct ifnet *ifp = fin->fin_ifp;
+# endif
+
+	/*
+	 * calculate header size.
+	 */
+	hlen = fin->fin_hlen;
+	if (fin->fin_off == 0) {
+		p = fin->fin_fi.fi_p;
+		if (p == IPPROTO_TCP)
+			hlen += MIN(sizeof(tcphdr_t), fin->fin_dlen);
+		else if (p == IPPROTO_UDP)
+			hlen += MIN(sizeof(udphdr_t), fin->fin_dlen);
+		else if (p == IPPROTO_ICMP) {
+			struct icmp *icmp;
+
+			icmp = (struct icmp *)fin->fin_dp;
+	 
+			/*
+			 * For ICMP, if the packet is an error packet, also
+			 * include the information about the packet which
+			 * caused the error.
+			 */
+			switch (icmp->icmp_type)
+			{
+			case ICMP_UNREACH :
+			case ICMP_SOURCEQUENCH :
+			case ICMP_REDIRECT :
+			case ICMP_TIMXCEED :
+			case ICMP_PARAMPROB :
+				hlen += MIN(sizeof(struct icmp) + 8,
+					    fin->fin_dlen);
+				break;
+			default :
+				hlen += MIN(sizeof(struct icmp),
+					    fin->fin_dlen);
+				break;
+			}
+		}
+	}
+	/*
+	 * Get the interface number and name to which this packet is
+	 * currently associated.
+	 */
+# if SOLARIS
+	ipfl.fl_unit = (u_char)ifp->ill_ppa;
+	bcopy(ifp->ill_name, ipfl.fl_ifname, MIN(ifp->ill_name_length, 4));
+	mlen = (flags & FR_LOGBODY) ? MIN(msgdsize(m) - hlen, 128) : 0;
+# else
+#  if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199603)) || \
+	(defined(OpenBSD) && (OpenBSD >= 199603))
+	strncpy(ipfl.fl_ifname, ifp->if_xname, IFNAMSIZ);
+#  else
+#   ifndef linux
+	ipfl.fl_unit = (u_char)ifp->if_unit;
+#   endif
+	if ((ipfl.fl_ifname[0] = ifp->if_name[0]))
+		if ((ipfl.fl_ifname[1] = ifp->if_name[1]))
+			if ((ipfl.fl_ifname[2] = ifp->if_name[2]))
+				ipfl.fl_ifname[3] = ifp->if_name[3];
+#  endif
+	mlen = (flags & FR_LOGBODY) ? MIN(fin->fin_plen - hlen, 128) : 0;
+# endif
+	ipfl.fl_plen = (u_char)mlen;
+	ipfl.fl_hlen = (u_char)hlen;
+	ipfl.fl_rule = fin->fin_rule;
+	ipfl.fl_group = fin->fin_group;
+	if (fin->fin_fr != NULL)
+		ipfl.fl_loglevel = fin->fin_fr->fr_loglevel;
+	else
+		ipfl.fl_loglevel = 0xffff;
+	ipfl.fl_flags = flags;
+	ptrs[0] = (void *)&ipfl;
+	sizes[0] = sizeof(ipfl);
+	types[0] = 0;
+# if SOLARIS
+	/*
+	 * Are we copied from the mblk or an aligned array ?
+	 */
+	if (ip == (ip_t *)m->b_rptr) {
+		ptrs[1] = m;
+		sizes[1] = hlen + mlen;
+		types[1] = 1;
+	} else {
+		ptrs[1] = ip;
+		sizes[1] = hlen + mlen;
+		types[1] = 0;
+	}
+# else
+	ptrs[1] = m;
+	sizes[1] = hlen + mlen;
+	types[1] = 1;
+# endif
+	return ipllog(IPL_LOGIPF, fin, ptrs, sizes, types, 2);
+}
+
+
+/*
+ * ipllog
+ */
+int ipllog(dev, fin, items, itemsz, types, cnt)
+int dev;
+fr_info_t *fin;
+void **items;
+size_t *itemsz;
+int *types, cnt;
+{
+	caddr_t buf, s;
+	iplog_t *ipl;
+	size_t len;
+	int i;
+ 
+	/*
+	 * Check to see if this log record has a CRC which matches the last
+	 * record logged.  If it does, just up the count on the previous one
+	 * rather than create a new one.
+	 */
+	MUTEX_ENTER(&ipl_mutex);
+	if (fin != NULL) {
+		if ((ipll[dev] != NULL) &&
+		    bcmp((char *)fin, (char *)&iplcrc[dev], FI_CSIZE) == 0) {
+			ipll[dev]->ipl_count++;
+			MUTEX_EXIT(&ipl_mutex);
+			return 1;
+		}
+		bcopy((char *)fin, (char *)&iplcrc[dev], FI_CSIZE);
+	} else
+		bzero((char *)&iplcrc[dev], FI_CSIZE);
+	MUTEX_EXIT(&ipl_mutex);
+
+	/*
+	 * Get the total amount of data to be logged.
+	 */
+	for (i = 0, len = sizeof(iplog_t); i < cnt; i++)
+		len += itemsz[i];
+
+	/*
+	 * check that we have space to record this information and can
+	 * allocate that much.
+	 */
+	KMALLOCS(buf, caddr_t, len);
+	if (!buf)
+		return 0;
+	MUTEX_ENTER(&ipl_mutex);
+	if ((iplused[dev] + len) > IPLLOGSIZE) {
+		MUTEX_EXIT(&ipl_mutex);
+		KFREES(buf, len);
+		return 0;
+	}
+	iplused[dev] += len;
+	MUTEX_EXIT(&ipl_mutex);
+
+	/*
+	 * advance the log pointer to the next empty record and deduct the
+	 * amount of space we're going to use.
+	 */
+	ipl = (iplog_t *)buf;
+	ipl->ipl_magic = IPL_MAGIC;
+	ipl->ipl_count = 1;
+	ipl->ipl_next = NULL;
+	ipl->ipl_dsize = len;
+# if SOLARIS || defined(sun) || defined(linux)
+	uniqtime((struct timeval *)&ipl->ipl_sec);
+# else
+#  if BSD >= 199306 || defined(__FreeBSD__) || defined(__sgi)
+	microtime((struct timeval *)&ipl->ipl_sec);
+#  endif
+# endif
+
+	/*
+	 * Loop through all the items to be logged, copying each one to the
+	 * buffer.  Use bcopy for normal data or the mb_t copyout routine.
+	 */
+	for (i = 0, s = buf + sizeof(*ipl); i < cnt; i++) {
+		if (types[i] == 0)
+			bcopy(items[i], s, itemsz[i]);
+		else if (types[i] == 1) {
+# if SOLARIS
+			copyout_mblk(items[i], 0, itemsz[i], s);
+# else
+			m_copydata(items[i], 0, itemsz[i], s);
+# endif
+		}
+		s += itemsz[i];
+	}
+	MUTEX_ENTER(&ipl_mutex);
+	ipll[dev] = ipl;
+	*iplh[dev] = ipl;
+	iplh[dev] = &ipl->ipl_next;
+# if SOLARIS
+	cv_signal(&iplwait);
+	mutex_exit(&ipl_mutex);
+# else
+	MUTEX_EXIT(&ipl_mutex);
+#  ifdef linux
+	wake_up_interruptible(&iplwait[dev]);
+#  else
+	wakeup(&iplh[dev]);
+#  endif
+# endif
+	return 1;
+}
+
+
+int ipflog_read(unit, uio)
+minor_t unit;
+struct uio *uio;
+{
+	size_t dlen, copied;
+	int error = 0;
+	iplog_t *ipl;
+# if defined(_KERNEL) && !SOLARIS
+	int s;
+# endif
+
+	/*
+	 * Sanity checks.  Make sure the minor # is valid and we're copying
+	 * a valid chunk of data.
+	 */
+	if (IPL_LOGMAX < unit)
+		return ENXIO;
+	if (!uio->uio_resid)
+		return 0;
+	if ((uio->uio_resid < sizeof(iplog_t)) ||
+	    (uio->uio_resid > IPLLOGSIZE))
+		return EINVAL;
+ 
+	/*
+	 * Lock the log so we can snapshot the variables.  Wait for a signal
+	 * if the log is empty.
+	 */
+	SPL_NET(s);
+	MUTEX_ENTER(&ipl_mutex);
+
+	while (!iplused[unit] || !iplt[unit]) {
+# if SOLARIS && defined(_KERNEL)
+		if (!cv_wait_sig(&iplwait, &ipl_mutex)) {
+			MUTEX_EXIT(&ipl_mutex);
+			return EINTR;
+		}
+# else
+#  ifdef linux
+		interruptible_sleep_on(&iplwait[unit]);
+		if (current->signal & ~current->blocked)
+			return -EINTR;
+#  else
+		MUTEX_EXIT(&ipl_mutex);
+		SPL_X(s);
+		error = SLEEP(&iplh[unit], "ipl sleep");
+		if (error)
+			return error;
+		SPL_NET(s);
+		MUTEX_ENTER(&ipl_mutex);
+#  endif /* linux */
+# endif /* SOLARIS */
+	}
+
+# if BSD >= 199306 || defined(__FreeBSD__)
+	uio->uio_rw = UIO_READ;
+# endif
+
+	for (copied = 0; (ipl = iplt[unit]); copied += dlen) {
+		dlen = ipl->ipl_dsize;
+		if (dlen > uio->uio_resid)
+			break;
+		/*
+		 * Don't hold the mutex over the uiomove call.
+		 */
+		iplt[unit] = ipl->ipl_next;
+		iplused[unit] -= dlen;
+		MUTEX_EXIT(&ipl_mutex);
+		SPL_X(s);
+		error = UIOMOVE((caddr_t)ipl, dlen, UIO_READ, uio);
+		if (error) {
+			SPL_NET(s);
+			MUTEX_ENTER(&ipl_mutex);
+			ipl->ipl_next = iplt[unit];
+			iplt[unit] = ipl;
+			iplused[unit] += dlen;
+			break;
+		}
+		KFREES((caddr_t)ipl, dlen);
+		SPL_NET(s);
+		MUTEX_ENTER(&ipl_mutex);
+	}
+	if (!iplt[unit]) {
+		iplused[unit] = 0;
+		iplh[unit] = &iplt[unit];
+		ipll[unit] = NULL;
+	}
+
+	MUTEX_EXIT(&ipl_mutex);
+	SPL_X(s);
+# ifdef 	linux
+	if (!error)
+		return (int)copied;
+	return -error;
+# else
+	return error;
+# endif
+}
+
+
+int ipflog_clear(unit)
+minor_t unit;
+{
+	iplog_t *ipl;
+	int used;
+
+	MUTEX_ENTER(&ipl_mutex);
+	while ((ipl = iplt[unit])) {
+		iplt[unit] = ipl->ipl_next;
+		KFREES((caddr_t)ipl, ipl->ipl_dsize);
+	}
+	iplh[unit] = &iplt[unit];
+	ipll[unit] = NULL;
+	used = iplused[unit];
+	iplused[unit] = 0;
+	bzero((char *)&iplcrc[unit], FI_CSIZE);
+	MUTEX_EXIT(&ipl_mutex);
+	return used;
+}
+#endif /* IPFILTER_LOG */
diff --git a/sys/netinet/ip_mroute.c b/sys/netinet/ip_mroute.c
new file mode 100644
index 0000000..b6a9fca
--- /dev/null
+++ b/sys/netinet/ip_mroute.c
@@ -0,0 +1,2263 @@
+/*
+ * IP multicast forwarding procedures
+ *
+ * Written by David Waitzman, BBN Labs, August 1988.
+ * Modified by Steve Deering, Stanford, February 1989.
+ * Modified by Mark J. Steiglitz, Stanford, May, 1991
+ * Modified by Van Jacobson, LBL, January 1993
+ * Modified by Ajit Thyagarajan, PARC, August 1993
+ * Modified by Bill Fenner, PARC, April 1995
+ *
+ * MROUTING Revision: 3.5
+ * $FreeBSD$
+ */
+
+#include "opt_mrouting.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/sockio.h>
+#include <sys/syslog.h>
+#include <net/if.h>
+#include <net/route.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/in_var.h>
+#include <netinet/igmp.h>
+#include <netinet/ip_mroute.h>
+#include <netinet/udp.h>
+#include <machine/in_cksum.h>
+
+#ifndef NTOHL
+#if BYTE_ORDER != BIG_ENDIAN
+#define NTOHL(d) ((d) = ntohl((d)))
+#define NTOHS(d) ((d) = ntohs((u_short)(d)))
+#define HTONL(d) ((d) = htonl((d)))
+#define HTONS(d) ((d) = htons((u_short)(d)))
+#else
+#define NTOHL(d)
+#define NTOHS(d)
+#define HTONL(d)
+#define HTONS(d)
+#endif
+#endif
+
+#ifndef MROUTING
+extern u_long	_ip_mcast_src __P((int vifi));
+extern int	_ip_mforward __P((struct ip *ip, struct ifnet *ifp,
+				  struct mbuf *m, struct ip_moptions *imo));
+extern int	_ip_mrouter_done __P((void));
+extern int	_ip_mrouter_get __P((struct socket *so, struct sockopt *sopt));
+extern int	_ip_mrouter_set __P((struct socket *so, struct sockopt *sopt));
+extern int	_mrt_ioctl __P((int req, caddr_t data, struct proc *p));
+
+/*
+ * Dummy routines and globals used when multicast routing is not compiled in.
+ */
+
+struct socket  *ip_mrouter  = NULL;
+u_int		rsvpdebug = 0;
+
+int
+_ip_mrouter_set(so, sopt)
+	struct socket *so;
+	struct sockopt *sopt;
+{
+	return(EOPNOTSUPP);
+}
+
+int (*ip_mrouter_set)(struct socket *, struct sockopt *) = _ip_mrouter_set;
+
+
+int
+_ip_mrouter_get(so, sopt)
+	struct socket *so;
+	struct sockopt *sopt;
+{
+	return(EOPNOTSUPP);
+}
+
+int (*ip_mrouter_get)(struct socket *, struct sockopt *) = _ip_mrouter_get;
+
+int
+_ip_mrouter_done()
+{
+	return(0);
+}
+
+int (*ip_mrouter_done)(void) = _ip_mrouter_done;
+
+int
+_ip_mforward(ip, ifp, m, imo)
+	struct ip *ip;
+	struct ifnet *ifp;
+	struct mbuf *m;
+	struct ip_moptions *imo;
+{
+	return(0);
+}
+
+int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
+		   struct ip_moptions *) = _ip_mforward;
+
+int
+_mrt_ioctl(int req, caddr_t data, struct proc *p)
+{
+	return EOPNOTSUPP;
+}
+
+int (*mrt_ioctl)(int, caddr_t, struct proc *) = _mrt_ioctl;
+
+void
+rsvp_input(m, off, proto)		/* XXX must fixup manually */
+	struct mbuf *m;
+	int off;
+	int proto;
+{
+    /* Can still get packets with rsvp_on = 0 if there is a local member
+     * of the group to which the RSVP packet is addressed.  But in this
+     * case we want to throw the packet away.
+     */
+    if (!rsvp_on) {
+	m_freem(m);
+	return;
+    }
+ 
+    if (ip_rsvpd != NULL) {
+	if (rsvpdebug)
+	    printf("rsvp_input: Sending packet up old-style socket\n");
+	rip_input(m, off, proto);
+	return;
+    }
+    /* Drop the packet */
+    m_freem(m);
+}
+
+void ipip_input(struct mbuf *m, int off, int proto) { /* XXX must fixup manually */
+	rip_input(m, off, proto);
+}
+
+int (*legal_vif_num)(int) = 0;
+
+/*
+ * This should never be called, since IP_MULTICAST_VIF should fail, but
+ * just in case it does get called, the code a little lower in ip_output
+ * will assign the packet a local address.
+ */
+u_long
+_ip_mcast_src(int vifi) { return INADDR_ANY; }
+u_long (*ip_mcast_src)(int) = _ip_mcast_src;
+
+int
+ip_rsvp_vif_init(so, sopt)
+    struct socket *so;
+    struct sockopt *sopt;
+{
+    return(EINVAL);
+}
+
+int
+ip_rsvp_vif_done(so, sopt)
+    struct socket *so;
+    struct sockopt *sopt;
+{
+    return(EINVAL);
+}
+
+void
+ip_rsvp_force_done(so)
+    struct socket *so;
+{
+    return;
+}
+
+#else /* MROUTING */
+
+#define M_HASCL(m)	((m)->m_flags & M_EXT)
+
+#define INSIZ		sizeof(struct in_addr)
+#define	same(a1, a2) \
+	(bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0)
+
+static MALLOC_DEFINE(M_MRTABLE, "mroutetbl", "multicast routing tables");
+
+/*
+ * Globals.  All but ip_mrouter and ip_mrtproto could be static,
+ * except for netstat or debugging purposes.
+ */
+#ifndef MROUTE_LKM
+struct socket  *ip_mrouter  = NULL;
+static struct mrtstat	mrtstat;
+#else /* MROUTE_LKM */
+extern void	X_ipip_input __P((struct mbuf *m, int iphlen));
+extern struct mrtstat mrtstat;
+static int ip_mrtproto;
+#endif
+
+#define NO_RTE_FOUND 	0x1
+#define RTE_FOUND	0x2
+
+static struct mfc	*mfctable[MFCTBLSIZ];
+static u_char		nexpire[MFCTBLSIZ];
+static struct vif	viftable[MAXVIFS];
+static u_int	mrtdebug = 0;	  /* debug level 	*/
+#define		DEBUG_MFC	0x02
+#define		DEBUG_FORWARD	0x04
+#define		DEBUG_EXPIRE	0x08
+#define		DEBUG_XMIT	0x10
+static u_int  	tbfdebug = 0;     /* tbf debug level 	*/
+static u_int	rsvpdebug = 0;	  /* rsvp debug level   */
+
+static struct callout_handle expire_upcalls_ch;
+
+#define		EXPIRE_TIMEOUT	(hz / 4)	/* 4x / second		*/
+#define		UPCALL_EXPIRE	6		/* number of timeouts	*/
+
+/*
+ * Define the token bucket filter structures
+ * tbftable -> each vif has one of these for storing info 
+ */
+
+static struct tbf tbftable[MAXVIFS];
+#define		TBF_REPROCESS	(hz / 100)	/* 100x / second */
+
+/*
+ * 'Interfaces' associated with decapsulator (so we can tell
+ * packets that went through it from ones that get reflected
+ * by a broken gateway).  These interfaces are never linked into
+ * the system ifnet list & no routes point to them.  I.e., packets
+ * can't be sent this way.  They only exist as a placeholder for
+ * multicast source verification.
+ */
+static struct ifnet multicast_decap_if[MAXVIFS];
+
+#define ENCAP_TTL 64
+#define ENCAP_PROTO IPPROTO_IPIP	/* 4 */
+
+/* prototype IP hdr for encapsulated packets */
+static struct ip multicast_encap_iphdr = {
+#if BYTE_ORDER == LITTLE_ENDIAN
+	sizeof(struct ip) >> 2, IPVERSION,
+#else
+	IPVERSION, sizeof(struct ip) >> 2,
+#endif
+	0,				/* tos */
+	sizeof(struct ip),		/* total length */
+	0,				/* id */
+	0,				/* frag offset */
+	ENCAP_TTL, ENCAP_PROTO,	
+	0,				/* checksum */
+};
+
+/*
+ * Private variables.
+ */
+static vifi_t	   numvifs = 0;
+static int have_encap_tunnel = 0;
+
+/*
+ * one-back cache used by ipip_input to locate a tunnel's vif
+ * given a datagram's src ip address.
+ */
+static u_long last_encap_src;
+static struct vif *last_encap_vif;
+
+static u_long	X_ip_mcast_src __P((int vifi));
+static int	X_ip_mforward __P((struct ip *ip, struct ifnet *ifp, struct mbuf *m, struct ip_moptions *imo));
+static int	X_ip_mrouter_done __P((void));
+static int	X_ip_mrouter_get __P((struct socket *so, struct sockopt *m));
+static int	X_ip_mrouter_set __P((struct socket *so, struct sockopt *m));
+static int	X_legal_vif_num __P((int vif));
+static int	X_mrt_ioctl __P((int cmd, caddr_t data));
+
+static int get_sg_cnt(struct sioc_sg_req *);
+static int get_vif_cnt(struct sioc_vif_req *);
+static int ip_mrouter_init(struct socket *, int);
+static int add_vif(struct vifctl *);
+static int del_vif(vifi_t);
+static int add_mfc(struct mfcctl *);
+static int del_mfc(struct mfcctl *);
+static int socket_send(struct socket *, struct mbuf *, struct sockaddr_in *);
+static int set_assert(int);
+static void expire_upcalls(void *);
+static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *,
+		  vifi_t);
+static void phyint_send(struct ip *, struct vif *, struct mbuf *);
+static void encap_send(struct ip *, struct vif *, struct mbuf *);
+static void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long);
+static void tbf_queue(struct vif *, struct mbuf *);
+static void tbf_process_q(struct vif *);
+static void tbf_reprocess_q(void *);
+static int tbf_dq_sel(struct vif *, struct ip *);
+static void tbf_send_packet(struct vif *, struct mbuf *);
+static void tbf_update_tokens(struct vif *);
+static int priority(struct vif *, struct ip *);
+void multiencap_decap(struct mbuf *);
+
+/*
+ * whether or not special PIM assert processing is enabled.
+ */
+static int pim_assert;
+/*
+ * Rate limit for assert notification messages, in usec
+ */
+#define ASSERT_MSG_TIME		3000000
+
+/*
+ * Hash function for a source, group entry
+ */
+#define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \
+			((g) >> 20) ^ ((g) >> 10) ^ (g))
+
+/*
+ * Find a route for a given origin IP address and Multicast group address
+ * Type of service parameter to be added in the future!!!
+ */
+
+#define MFCFIND(o, g, rt) { \
+	register struct mfc *_rt = mfctable[MFCHASH(o,g)]; \
+	rt = NULL; \
+	++mrtstat.mrts_mfc_lookups; \
+	while (_rt) { \
+		if ((_rt->mfc_origin.s_addr == o) && \
+		    (_rt->mfc_mcastgrp.s_addr == g) && \
+		    (_rt->mfc_stall == NULL)) { \
+			rt = _rt; \
+			break; \
+		} \
+		_rt = _rt->mfc_next; \
+	} \
+	if (rt == NULL) { \
+		++mrtstat.mrts_mfc_misses; \
+	} \
+}
+
+
+/*
+ * Macros to compute elapsed time efficiently
+ * Borrowed from Van Jacobson's scheduling code
+ */
+#define TV_DELTA(a, b, delta) { \
+	    register int xxs; \
+		\
+	    delta = (a).tv_usec - (b).tv_usec; \
+	    if ((xxs = (a).tv_sec - (b).tv_sec)) { \
+	       switch (xxs) { \
+		      case 2: \
+			  delta += 1000000; \
+			      /* fall through */ \
+		      case 1: \
+			  delta += 1000000; \
+			  break; \
+		      default: \
+			  delta += (1000000 * xxs); \
+	       } \
+	    } \
+}
+
+#define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \
+	      (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)
+
+#ifdef UPCALL_TIMING
+u_long upcall_data[51];
+static void collate(struct timeval *);
+#endif /* UPCALL_TIMING */
+
+
+/*
+ * Handle MRT setsockopt commands to modify the multicast routing tables.
+ */
+static int
+X_ip_mrouter_set(so, sopt)
+	struct socket *so;
+	struct sockopt *sopt;
+{
+	int	error, optval;
+	vifi_t	vifi;
+	struct	vifctl vifc;
+	struct	mfcctl mfc;
+
+	if (so != ip_mrouter && sopt->sopt_name != MRT_INIT)
+		return (EPERM);
+
+	error = 0;
+	switch (sopt->sopt_name) {
+	case MRT_INIT:
+		error = sooptcopyin(sopt, &optval, sizeof optval, 
+				    sizeof optval);
+		if (error)
+			break;
+		error = ip_mrouter_init(so, optval);
+		break;
+
+	case MRT_DONE:
+		error = ip_mrouter_done();
+		break;
+
+	case MRT_ADD_VIF:
+		error = sooptcopyin(sopt, &vifc, sizeof vifc, sizeof vifc);
+		if (error)
+			break;
+		error = add_vif(&vifc);
+		break;
+
+	case MRT_DEL_VIF:
+		error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi);
+		if (error)
+			break;
+		error = del_vif(vifi);
+		break;
+
+	case MRT_ADD_MFC:
+	case MRT_DEL_MFC:
+		error = sooptcopyin(sopt, &mfc, sizeof mfc, sizeof mfc);
+		if (error)
+			break;
+		if (sopt->sopt_name == MRT_ADD_MFC)
+			error = add_mfc(&mfc);
+		else
+			error = del_mfc(&mfc);
+		break;
+
+	case MRT_ASSERT:
+		error = sooptcopyin(sopt, &optval, sizeof optval, 
+				    sizeof optval);
+		if (error)
+			break;
+		set_assert(optval);
+		break;
+
+	default:
+		error = EOPNOTSUPP;
+		break;
+	}
+	return (error);
+}
+
+#ifndef MROUTE_LKM
+int (*ip_mrouter_set)(struct socket *, struct sockopt *) = X_ip_mrouter_set;
+#endif
+
+/*
+ * Handle MRT getsockopt commands
+ */
+static int
+X_ip_mrouter_get(so, sopt)
+	struct socket *so;
+	struct sockopt *sopt;
+{
+	int error;
+	static int version = 0x0305; /* !!! why is this here? XXX */
+
+	switch (sopt->sopt_name) {
+	case MRT_VERSION:
+		error = sooptcopyout(sopt, &version, sizeof version);
+		break;
+
+	case MRT_ASSERT:
+		error = sooptcopyout(sopt, &pim_assert, sizeof pim_assert);
+		break;
+	default:
+		error = EOPNOTSUPP;
+		break;
+	}
+	return (error);
+}
+
+#ifndef MROUTE_LKM
+int (*ip_mrouter_get)(struct socket *, struct sockopt *) = X_ip_mrouter_get;
+#endif
+
+/*
+ * Handle ioctl commands to obtain information from the cache
+ */
+static int
+X_mrt_ioctl(cmd, data)
+    int cmd;
+    caddr_t data;
+{
+    int error = 0;
+
+    switch (cmd) {
+	case (SIOCGETVIFCNT):
+	    return (get_vif_cnt((struct sioc_vif_req *)data));
+	    break;
+	case (SIOCGETSGCNT):
+	    return (get_sg_cnt((struct sioc_sg_req *)data));
+	    break;
+	default:
+	    return (EINVAL);
+	    break;
+    }
+    return error;
+}
+
+#ifndef MROUTE_LKM
+int (*mrt_ioctl)(int, caddr_t) = X_mrt_ioctl;
+#endif
+
+/*
+ * returns the packet, byte, rpf-failure count for the source group provided
+ */
+static int
+get_sg_cnt(req)
+    register struct sioc_sg_req *req;
+{
+    register struct mfc *rt;
+    int s;
+
+    s = splnet();
+    MFCFIND(req->src.s_addr, req->grp.s_addr, rt);
+    splx(s);
+    if (rt != NULL) {
+	req->pktcnt = rt->mfc_pkt_cnt;
+	req->bytecnt = rt->mfc_byte_cnt;
+	req->wrong_if = rt->mfc_wrong_if;
+    } else
+	req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
+
+    return 0;
+}
+
+/*
+ * returns the input and output packet and byte counts on the vif provided
+ */
+static int
+get_vif_cnt(req)
+    register struct sioc_vif_req *req;
+{
+    register vifi_t vifi = req->vifi;
+
+    if (vifi >= numvifs) return EINVAL;
+
+    req->icount = viftable[vifi].v_pkt_in;
+    req->ocount = viftable[vifi].v_pkt_out;
+    req->ibytes = viftable[vifi].v_bytes_in;
+    req->obytes = viftable[vifi].v_bytes_out;
+
+    return 0;
+}
+
+/*
+ * Enable multicast routing
+ */
+static int
+ip_mrouter_init(so, version)
+	struct socket *so;
+	int version;
+{
+    if (mrtdebug)
+	log(LOG_DEBUG,"ip_mrouter_init: so_type = %d, pr_protocol = %d\n",
+		so->so_type, so->so_proto->pr_protocol);
+
+    if (so->so_type != SOCK_RAW ||
+	so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP;
+
+    if (version != 1)
+	return ENOPROTOOPT;
+
+    if (ip_mrouter != NULL) return EADDRINUSE;
+
+    ip_mrouter = so;
+
+    bzero((caddr_t)mfctable, sizeof(mfctable));
+    bzero((caddr_t)nexpire, sizeof(nexpire));
+
+    pim_assert = 0;
+
+    expire_upcalls_ch = timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT);
+
+    if (mrtdebug)
+	log(LOG_DEBUG, "ip_mrouter_init\n");
+
+    return 0;
+}
+
+/*
+ * Disable multicast routing
+ */
+static int
+X_ip_mrouter_done()
+{
+    vifi_t vifi;
+    int i;
+    struct ifnet *ifp;
+    struct ifreq ifr;
+    struct mfc *rt;
+    struct rtdetq *rte;
+    int s;
+
+    s = splnet();
+
+    /*
+     * For each phyint in use, disable promiscuous reception of all IP
+     * multicasts.
+     */
+    for (vifi = 0; vifi < numvifs; vifi++) {
+	if (viftable[vifi].v_lcl_addr.s_addr != 0 &&
+	    !(viftable[vifi].v_flags & VIFF_TUNNEL)) {
+	    ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
+	    ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr
+								= INADDR_ANY;
+	    ifp = viftable[vifi].v_ifp;
+	    if_allmulti(ifp, 0);
+	}
+    }
+    bzero((caddr_t)tbftable, sizeof(tbftable));
+    bzero((caddr_t)viftable, sizeof(viftable));
+    numvifs = 0;
+    pim_assert = 0;
+
+    untimeout(expire_upcalls, (caddr_t)NULL, expire_upcalls_ch);
+
+    /*
+     * Free all multicast forwarding cache entries.
+     */
+    for (i = 0; i < MFCTBLSIZ; i++) {
+	for (rt = mfctable[i]; rt != NULL; ) {
+	    struct mfc *nr = rt->mfc_next;
+
+	    for (rte = rt->mfc_stall; rte != NULL; ) {
+		struct rtdetq *n = rte->next;
+
+		m_freem(rte->m);
+		free(rte, M_MRTABLE);
+		rte = n;
+	    }
+	    free(rt, M_MRTABLE);
+	    rt = nr;
+	}
+    }
+
+    bzero((caddr_t)mfctable, sizeof(mfctable));
+
+    /*
+     * Reset de-encapsulation cache
+     */
+    last_encap_src = 0;
+    last_encap_vif = NULL;
+    have_encap_tunnel = 0;
+ 
+    ip_mrouter = NULL;
+
+    splx(s);
+
+    if (mrtdebug)
+	log(LOG_DEBUG, "ip_mrouter_done\n");
+
+    return 0;
+}
+
+#ifndef MROUTE_LKM
+int (*ip_mrouter_done)(void) = X_ip_mrouter_done;
+#endif
+
+/*
+ * Set PIM assert processing global
+ */
+static int
+set_assert(i)
+	int i;
+{
+    if ((i != 1) && (i != 0))
+	return EINVAL;
+
+    pim_assert = i;
+
+    return 0;
+}
+
+/*
+ * Add a vif to the vif table
+ */
+static int
+add_vif(vifcp)
+    register struct vifctl *vifcp;
+{
+    register struct vif *vifp = viftable + vifcp->vifc_vifi;
+    static struct sockaddr_in sin = {sizeof sin, AF_INET};
+    struct ifaddr *ifa;
+    struct ifnet *ifp;
+    int error, s;
+    struct tbf *v_tbf = tbftable + vifcp->vifc_vifi;
+
+    if (vifcp->vifc_vifi >= MAXVIFS)  return EINVAL;
+    if (vifp->v_lcl_addr.s_addr != 0) return EADDRINUSE;
+
+    /* Find the interface with an address in AF_INET family */
+    sin.sin_addr = vifcp->vifc_lcl_addr;
+    ifa = ifa_ifwithaddr((struct sockaddr *)&sin);
+    if (ifa == 0) return EADDRNOTAVAIL;
+    ifp = ifa->ifa_ifp;
+
+    if (vifcp->vifc_flags & VIFF_TUNNEL) {
+	if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) {
+		/*
+		 * An encapsulating tunnel is wanted.  Tell ipip_input() to
+		 * start paying attention to encapsulated packets.
+		 */
+		if (have_encap_tunnel == 0) {
+			have_encap_tunnel = 1;
+			for (s = 0; s < MAXVIFS; ++s) {
+				multicast_decap_if[s].if_name = "mdecap";
+				multicast_decap_if[s].if_unit = s;
+			}
+		}
+		/*
+		 * Set interface to fake encapsulator interface
+		 */
+		ifp = &multicast_decap_if[vifcp->vifc_vifi];
+		/*
+		 * Prepare cached route entry
+		 */
+		bzero(&vifp->v_route, sizeof(vifp->v_route));
+	} else {
+	    log(LOG_ERR, "source routed tunnels not supported\n");
+	    return EOPNOTSUPP;
+	}
+    } else {
+	/* Make sure the interface supports multicast */
+	if ((ifp->if_flags & IFF_MULTICAST) == 0)
+	    return EOPNOTSUPP;
+
+	/* Enable promiscuous reception of all IP multicasts from the if */
+	s = splnet();
+	error = if_allmulti(ifp, 1);
+	splx(s);
+	if (error)
+	    return error;
+    }
+
+    s = splnet();
+    /* define parameters for the tbf structure */
+    vifp->v_tbf = v_tbf;
+    GET_TIME(vifp->v_tbf->tbf_last_pkt_t);
+    vifp->v_tbf->tbf_n_tok = 0;
+    vifp->v_tbf->tbf_q_len = 0;
+    vifp->v_tbf->tbf_max_q_len = MAXQSIZE;
+    vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL;
+
+    vifp->v_flags     = vifcp->vifc_flags;
+    vifp->v_threshold = vifcp->vifc_threshold;
+    vifp->v_lcl_addr  = vifcp->vifc_lcl_addr;
+    vifp->v_rmt_addr  = vifcp->vifc_rmt_addr;
+    vifp->v_ifp       = ifp;
+    /* scaling up here allows division by 1024 in critical code */
+    vifp->v_rate_limit= vifcp->vifc_rate_limit * 1024 / 1000;
+    vifp->v_rsvp_on   = 0;
+    vifp->v_rsvpd     = NULL;
+    /* initialize per vif pkt counters */
+    vifp->v_pkt_in    = 0;
+    vifp->v_pkt_out   = 0;
+    vifp->v_bytes_in  = 0;
+    vifp->v_bytes_out = 0;
+    splx(s);
+
+    /* Adjust numvifs up if the vifi is higher than numvifs */
+    if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1;
+
+    if (mrtdebug)
+	log(LOG_DEBUG, "add_vif #%d, lcladdr %lx, %s %lx, thresh %x, rate %d\n",
+	    vifcp->vifc_vifi, 
+	    (u_long)ntohl(vifcp->vifc_lcl_addr.s_addr),
+	    (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask",
+	    (u_long)ntohl(vifcp->vifc_rmt_addr.s_addr),
+	    vifcp->vifc_threshold,
+	    vifcp->vifc_rate_limit);    
+
+    return 0;
+}
+
+/*
+ * Delete a vif from the vif table
+ */
+static int
+del_vif(vifi)
+	vifi_t vifi;
+{
+    register struct vif *vifp = &viftable[vifi];
+    register struct mbuf *m;
+    struct ifnet *ifp;
+    struct ifreq ifr;
+    int s;
+
+    if (vifi >= numvifs) return EINVAL;
+    if (vifp->v_lcl_addr.s_addr == 0) return EADDRNOTAVAIL;
+
+    s = splnet();
+
+    if (!(vifp->v_flags & VIFF_TUNNEL)) {
+	((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
+	((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY;
+	ifp = vifp->v_ifp;
+	if_allmulti(ifp, 0);
+    }
+
+    if (vifp == last_encap_vif) {
+	last_encap_vif = 0;
+	last_encap_src = 0;
+    }
+
+    /*
+     * Free packets queued at the interface
+     */
+    while (vifp->v_tbf->tbf_q) {
+	m = vifp->v_tbf->tbf_q;
+	vifp->v_tbf->tbf_q = m->m_act;
+	m_freem(m);
+    }
+
+    bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf)));
+    bzero((caddr_t)vifp, sizeof (*vifp));
+
+    if (mrtdebug)
+      log(LOG_DEBUG, "del_vif %d, numvifs %d\n", vifi, numvifs);
+
+    /* Adjust numvifs down */
+    for (vifi = numvifs; vifi > 0; vifi--)
+	if (viftable[vifi-1].v_lcl_addr.s_addr != 0) break;
+    numvifs = vifi;
+
+    splx(s);
+
+    return 0;
+}
+
+/*
+ * Add an mfc entry
+ */
+static int
+add_mfc(mfccp)
+    struct mfcctl *mfccp;
+{
+    struct mfc *rt;
+    u_long hash;
+    struct rtdetq *rte;
+    register u_short nstl;
+    int s;
+    int i;
+
+    MFCFIND(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr, rt);
+
+    /* If an entry already exists, just update the fields */
+    if (rt) {
+	if (mrtdebug & DEBUG_MFC)
+	    log(LOG_DEBUG,"add_mfc update o %lx g %lx p %x\n",
+		(u_long)ntohl(mfccp->mfcc_origin.s_addr),
+		(u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
+		mfccp->mfcc_parent);
+
+	s = splnet();
+	rt->mfc_parent = mfccp->mfcc_parent;
+	for (i = 0; i < numvifs; i++)
+	    rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
+	splx(s);
+	return 0;
+    }
+
+    /* 
+     * Find the entry for which the upcall was made and update
+     */
+    s = splnet();
+    hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);
+    for (rt = mfctable[hash], nstl = 0; rt; rt = rt->mfc_next) {
+
+	if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
+	    (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) &&
+	    (rt->mfc_stall != NULL)) {
+  
+	    if (nstl++)
+		log(LOG_ERR, "add_mfc %s o %lx g %lx p %x dbx %p\n",
+		    "multiple kernel entries",
+		    (u_long)ntohl(mfccp->mfcc_origin.s_addr),
+		    (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
+		    mfccp->mfcc_parent, (void *)rt->mfc_stall);
+
+	    if (mrtdebug & DEBUG_MFC)
+		log(LOG_DEBUG,"add_mfc o %lx g %lx p %x dbg %p\n",
+		    (u_long)ntohl(mfccp->mfcc_origin.s_addr),
+		    (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
+		    mfccp->mfcc_parent, (void *)rt->mfc_stall);
+
+	    rt->mfc_origin     = mfccp->mfcc_origin;
+	    rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
+	    rt->mfc_parent     = mfccp->mfcc_parent;
+	    for (i = 0; i < numvifs; i++)
+		rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
+	    /* initialize pkt counters per src-grp */
+	    rt->mfc_pkt_cnt    = 0;
+	    rt->mfc_byte_cnt   = 0;
+	    rt->mfc_wrong_if   = 0;
+	    rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
+
+	    rt->mfc_expire = 0;	/* Don't clean this guy up */
+	    nexpire[hash]--;
+
+	    /* free packets Qed at the end of this entry */
+	    for (rte = rt->mfc_stall; rte != NULL; ) {
+		struct rtdetq *n = rte->next;
+
+		ip_mdq(rte->m, rte->ifp, rt, -1);
+		m_freem(rte->m);
+#ifdef UPCALL_TIMING
+		collate(&(rte->t));
+#endif /* UPCALL_TIMING */
+		free(rte, M_MRTABLE);
+		rte = n;
+	    }
+	    rt->mfc_stall = NULL;
+	}
+    }
+
+    /*
+     * It is possible that an entry is being inserted without an upcall
+     */
+    if (nstl == 0) {
+	if (mrtdebug & DEBUG_MFC)
+	    log(LOG_DEBUG,"add_mfc no upcall h %lu o %lx g %lx p %x\n",
+		hash, (u_long)ntohl(mfccp->mfcc_origin.s_addr),
+		(u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
+		mfccp->mfcc_parent);
+	
+	for (rt = mfctable[hash]; rt != NULL; rt = rt->mfc_next) {
+	    
+	    if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
+		(rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) {
+
+		rt->mfc_origin     = mfccp->mfcc_origin;
+		rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
+		rt->mfc_parent     = mfccp->mfcc_parent;
+		for (i = 0; i < numvifs; i++)
+		    rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
+		/* initialize pkt counters per src-grp */
+		rt->mfc_pkt_cnt    = 0;
+		rt->mfc_byte_cnt   = 0;
+		rt->mfc_wrong_if   = 0;
+		rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
+		if (rt->mfc_expire)
+		    nexpire[hash]--;
+		rt->mfc_expire	   = 0;
+	    }
+	}
+	if (rt == NULL) {
+	    /* no upcall, so make a new entry */
+	    rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
+	    if (rt == NULL) {
+		splx(s);
+		return ENOBUFS;
+	    }
+	    
+	    /* insert new entry at head of hash chain */
+	    rt->mfc_origin     = mfccp->mfcc_origin;
+	    rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
+	    rt->mfc_parent     = mfccp->mfcc_parent;
+	    for (i = 0; i < numvifs; i++)
+		    rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
+	    /* initialize pkt counters per src-grp */
+	    rt->mfc_pkt_cnt    = 0;
+	    rt->mfc_byte_cnt   = 0;
+	    rt->mfc_wrong_if   = 0;
+	    rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
+	    rt->mfc_expire     = 0;
+	    rt->mfc_stall      = NULL;
+	    
+	    /* link into table */
+	    rt->mfc_next = mfctable[hash];
+	    mfctable[hash] = rt;
+	}
+    }
+    splx(s);
+    return 0;
+}
+
+#ifdef UPCALL_TIMING
+/*
+ * collect delay statistics on the upcalls 
+ */
+static void collate(t)
+register struct timeval *t;
+{
+    register u_long d;
+    register struct timeval tp;
+    register u_long delta;
+    
+    GET_TIME(tp);
+    
+    if (TV_LT(*t, tp))
+    {
+	TV_DELTA(tp, *t, delta);
+	
+	d = delta >> 10;
+	if (d > 50)
+	    d = 50;
+	
+	++upcall_data[d];
+    }
+}
+#endif /* UPCALL_TIMING */
+
+/*
+ * Delete an mfc entry
+ */
+static int
+del_mfc(mfccp)
+    struct mfcctl *mfccp;
+{
+    struct in_addr 	origin;
+    struct in_addr 	mcastgrp;
+    struct mfc 		*rt;
+    struct mfc	 	**nptr;
+    u_long 		hash;
+    int s;
+
+    origin = mfccp->mfcc_origin;
+    mcastgrp = mfccp->mfcc_mcastgrp;
+    hash = MFCHASH(origin.s_addr, mcastgrp.s_addr);
+
+    if (mrtdebug & DEBUG_MFC)
+	log(LOG_DEBUG,"del_mfc orig %lx mcastgrp %lx\n",
+	    (u_long)ntohl(origin.s_addr), (u_long)ntohl(mcastgrp.s_addr));
+
+    s = splnet();
+
+    nptr = &mfctable[hash];
+    while ((rt = *nptr) != NULL) {
+	if (origin.s_addr == rt->mfc_origin.s_addr &&
+	    mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr &&
+	    rt->mfc_stall == NULL)
+	    break;
+
+	nptr = &rt->mfc_next;
+    }
+    if (rt == NULL) {
+	splx(s);
+	return EADDRNOTAVAIL;
+    }
+
+    *nptr = rt->mfc_next;
+    free(rt, M_MRTABLE);
+
+    splx(s);
+
+    return 0;
+}
+
+/*
+ * Send a message to mrouted on the multicast routing socket
+ */
+static int
+socket_send(s, mm, src)
+	struct socket *s;
+	struct mbuf *mm;
+	struct sockaddr_in *src;
+{
+	if (s) {
+		if (sbappendaddr(&s->so_rcv,
+				 (struct sockaddr *)src,
+				 mm, (struct mbuf *)0) != 0) {
+			sorwakeup(s);
+			return 0;
+		}
+	}
+	m_freem(mm);
+	return -1;
+}
+
+/*
+ * IP multicast forwarding function. This function assumes that the packet
+ * pointed to by "ip" has arrived on (or is about to be sent to) the interface
+ * pointed to by "ifp", and the packet is to be relayed to other networks
+ * that have members of the packet's destination IP multicast group.
+ *
+ * The packet is returned unscathed to the caller, unless it is
+ * erroneous, in which case a non-zero return value tells the caller to
+ * discard it.
+ */
+
+#define IP_HDR_LEN  20	/* # bytes of fixed IP header (excluding options) */
+#define TUNNEL_LEN  12  /* # bytes of IP option for tunnel encapsulation  */
+
+static int
+X_ip_mforward(ip, ifp, m, imo)
+    register struct ip *ip;
+    struct ifnet *ifp;
+    struct mbuf *m;
+    struct ip_moptions *imo;
+{
+    register struct mfc *rt;
+    register u_char *ipoptions;
+    static struct sockaddr_in 	k_igmpsrc	= { sizeof k_igmpsrc, AF_INET };
+    static int srctun = 0;
+    register struct mbuf *mm;
+    int s;
+    vifi_t vifi;
+    struct vif *vifp;
+
+    if (mrtdebug & DEBUG_FORWARD)
+	log(LOG_DEBUG, "ip_mforward: src %lx, dst %lx, ifp %p\n",
+	    (u_long)ntohl(ip->ip_src.s_addr), (u_long)ntohl(ip->ip_dst.s_addr),
+	    (void *)ifp);
+
+    if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 ||
+	(ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) {
+	/*
+	 * Packet arrived via a physical interface or
+	 * an encapsulated tunnel.
+	 */
+    } else {
+	/*
+	 * Packet arrived through a source-route tunnel.
+	 * Source-route tunnels are no longer supported.
+	 */
+	if ((srctun++ % 1000) == 0)
+	    log(LOG_ERR,
+		"ip_mforward: received source-routed packet from %lx\n",
+		(u_long)ntohl(ip->ip_src.s_addr));
+
+	return 1;
+    }
+
+    if ((imo) && ((vifi = imo->imo_multicast_vif) < numvifs)) {
+	if (ip->ip_ttl < 255)
+		ip->ip_ttl++;	/* compensate for -1 in *_send routines */
+	if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
+	    vifp = viftable + vifi;
+	    printf("Sending IPPROTO_RSVP from %lx to %lx on vif %d (%s%s%d)\n",
+		ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), vifi,
+		(vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "",
+		vifp->v_ifp->if_name, vifp->v_ifp->if_unit);
+	}
+	return (ip_mdq(m, ifp, NULL, vifi));
+    }
+    if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
+	printf("Warning: IPPROTO_RSVP from %lx to %lx without vif option\n",
+	    ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr));
+	if(!imo)
+		printf("In fact, no options were specified at all\n");
+    }
+
+    /*
+     * Don't forward a packet with time-to-live of zero or one,
+     * or a packet destined to a local-only group.
+     */
+    if (ip->ip_ttl <= 1 ||
+	ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP)
+	return 0;
+
+    /*
+     * Determine forwarding vifs from the forwarding cache table
+     */
+    s = splnet();
+    MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt);
+
+    /* Entry exists, so forward if necessary */
+    if (rt != NULL) {
+	splx(s);
+	return (ip_mdq(m, ifp, rt, -1));
+    } else {
+	/*
+	 * If we don't have a route for packet's origin,
+	 * Make a copy of the packet &
+	 * send message to routing daemon
+	 */
+
+	register struct mbuf *mb0;
+	register struct rtdetq *rte;
+	register u_long hash;
+	int hlen = ip->ip_hl << 2;
+#ifdef UPCALL_TIMING
+	struct timeval tp;
+
+	GET_TIME(tp);
+#endif
+
+	mrtstat.mrts_no_route++;
+	if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC))
+	    log(LOG_DEBUG, "ip_mforward: no rte s %lx g %lx\n",
+		(u_long)ntohl(ip->ip_src.s_addr),
+		(u_long)ntohl(ip->ip_dst.s_addr));
+
+	/*
+	 * Allocate mbufs early so that we don't do extra work if we are
+	 * just going to fail anyway.  Make sure to pullup the header so
+	 * that other people can't step on it.
+	 */
+	rte = (struct rtdetq *)malloc((sizeof *rte), M_MRTABLE, M_NOWAIT);
+	if (rte == NULL) {
+	    splx(s);
+	    return ENOBUFS;
+	}
+	mb0 = m_copy(m, 0, M_COPYALL);
+	if (mb0 && (M_HASCL(mb0) || mb0->m_len < hlen))
+	    mb0 = m_pullup(mb0, hlen);
+	if (mb0 == NULL) {
+	    free(rte, M_MRTABLE);
+	    splx(s);
+	    return ENOBUFS;
+	}
+
+	/* is there an upcall waiting for this packet? */
+	hash = MFCHASH(ip->ip_src.s_addr, ip->ip_dst.s_addr);
+	for (rt = mfctable[hash]; rt; rt = rt->mfc_next) {
+	    if ((ip->ip_src.s_addr == rt->mfc_origin.s_addr) &&
+		(ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) &&
+		(rt->mfc_stall != NULL))
+		break;
+	}
+
+	if (rt == NULL) {
+	    int i;
+	    struct igmpmsg *im;
+
+	    /* no upcall, so make a new entry */
+	    rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
+	    if (rt == NULL) {
+		free(rte, M_MRTABLE);
+		m_freem(mb0);
+		splx(s);
+		return ENOBUFS;
+	    }
+	    /* Make a copy of the header to send to the user level process */
+	    mm = m_copy(mb0, 0, hlen);
+	    if (mm == NULL) {
+		free(rte, M_MRTABLE);
+		m_freem(mb0);
+		free(rt, M_MRTABLE);
+		splx(s);
+		return ENOBUFS;
+	    }
+
+	    /* 
+	     * Send message to routing daemon to install 
+	     * a route into the kernel table
+	     */
+	    k_igmpsrc.sin_addr = ip->ip_src;
+	    
+	    im = mtod(mm, struct igmpmsg *);
+	    im->im_msgtype	= IGMPMSG_NOCACHE;
+	    im->im_mbz		= 0;
+
+	    mrtstat.mrts_upcalls++;
+
+	    if (socket_send(ip_mrouter, mm, &k_igmpsrc) < 0) {
+		log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full\n");
+		++mrtstat.mrts_upq_sockfull;
+		free(rte, M_MRTABLE);
+		m_freem(mb0);
+		free(rt, M_MRTABLE);
+		splx(s);
+		return ENOBUFS;
+	    }
+
+	    /* insert new entry at head of hash chain */
+	    rt->mfc_origin.s_addr     = ip->ip_src.s_addr;
+	    rt->mfc_mcastgrp.s_addr   = ip->ip_dst.s_addr;
+	    rt->mfc_expire	      = UPCALL_EXPIRE;
+	    nexpire[hash]++;
+	    for (i = 0; i < numvifs; i++)
+		rt->mfc_ttls[i] = 0;
+	    rt->mfc_parent = -1;
+
+	    /* link into table */
+	    rt->mfc_next   = mfctable[hash];
+	    mfctable[hash] = rt;
+	    rt->mfc_stall = rte;
+
+	} else {
+	    /* determine if q has overflowed */
+	    int npkts = 0;
+	    struct rtdetq **p;
+
+	    for (p = &rt->mfc_stall; *p != NULL; p = &(*p)->next)
+		npkts++;
+
+	    if (npkts > MAX_UPQ) {
+		mrtstat.mrts_upq_ovflw++;
+		free(rte, M_MRTABLE);
+		m_freem(mb0);
+		splx(s);
+		return 0;
+	    }
+
+	    /* Add this entry to the end of the queue */
+	    *p = rte;
+	}
+
+	rte->m 			= mb0;
+	rte->ifp 		= ifp;
+#ifdef UPCALL_TIMING
+	rte->t			= tp;
+#endif
+	rte->next		= NULL;
+
+	splx(s);
+
+	return 0;
+    }		
+}
+
+#ifndef MROUTE_LKM
+int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
+		   struct ip_moptions *) = X_ip_mforward;
+#endif
+
+/*
+ * Clean up the cache entry if upcall is not serviced
+ */
+static void
+expire_upcalls(void *unused)
+{
+    struct rtdetq *rte;
+    struct mfc *mfc, **nptr;
+    int i;
+    int s;
+
+    s = splnet();
+    for (i = 0; i < MFCTBLSIZ; i++) {
+	if (nexpire[i] == 0)
+	    continue;
+	nptr = &mfctable[i];
+	for (mfc = *nptr; mfc != NULL; mfc = *nptr) {
+	    /*
+	     * Skip real cache entries
+	     * Make sure it wasn't marked to not expire (shouldn't happen)
+	     * If it expires now
+	     */
+	    if (mfc->mfc_stall != NULL &&
+	        mfc->mfc_expire != 0 &&
+		--mfc->mfc_expire == 0) {
+		if (mrtdebug & DEBUG_EXPIRE)
+		    log(LOG_DEBUG, "expire_upcalls: expiring (%lx %lx)\n",
+			(u_long)ntohl(mfc->mfc_origin.s_addr),
+			(u_long)ntohl(mfc->mfc_mcastgrp.s_addr));
+		/*
+		 * drop all the packets
+		 * free the mbuf with the pkt, if, timing info
+		 */
+		for (rte = mfc->mfc_stall; rte; ) {
+		    struct rtdetq *n = rte->next;
+
+		    m_freem(rte->m);
+		    free(rte, M_MRTABLE);
+		    rte = n;
+		}
+		++mrtstat.mrts_cache_cleanups;
+		nexpire[i]--;
+
+		*nptr = mfc->mfc_next;
+		free(mfc, M_MRTABLE);
+	    } else {
+		nptr = &mfc->mfc_next;
+	    }
+	}
+    }
+    splx(s);
+    expire_upcalls_ch = timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT);
+}
+
+/*
+ * Packet forwarding routine once entry in the cache is made
+ */
+static int
+ip_mdq(m, ifp, rt, xmt_vif)
+    register struct mbuf *m;
+    register struct ifnet *ifp;
+    register struct mfc *rt;
+    register vifi_t xmt_vif;
+{
+    register struct ip  *ip = mtod(m, struct ip *);
+    register vifi_t vifi;
+    register struct vif *vifp;
+    register int plen = ip->ip_len;
+
+/*
+ * Macro to send packet on vif.  Since RSVP packets don't get counted on
+ * input, they shouldn't get counted on output, so statistics keeping is
+ * separate.
+ */
+#define MC_SEND(ip,vifp,m) {                             \
+                if ((vifp)->v_flags & VIFF_TUNNEL)  	 \
+                    encap_send((ip), (vifp), (m));       \
+                else                                     \
+                    phyint_send((ip), (vifp), (m));      \
+}
+
+    /*
+     * If xmt_vif is not -1, send on only the requested vif.
+     *
+     * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.)
+     */
+    if (xmt_vif < numvifs) {
+	MC_SEND(ip, viftable + xmt_vif, m);
+	return 1;
+    }
+
+    /*
+     * Don't forward if it didn't arrive from the parent vif for its origin.
+     */
+    vifi = rt->mfc_parent;
+    if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) {
+	/* came in the wrong interface */
+	if (mrtdebug & DEBUG_FORWARD)
+	    log(LOG_DEBUG, "wrong if: ifp %p vifi %d vififp %p\n",
+		(void *)ifp, vifi, (void *)viftable[vifi].v_ifp); 
+	++mrtstat.mrts_wrong_if;
+	++rt->mfc_wrong_if;
+	/*
+	 * If we are doing PIM assert processing, and we are forwarding
+	 * packets on this interface, and it is a broadcast medium
+	 * interface (and not a tunnel), send a message to the routing daemon.
+	 */
+	if (pim_assert && rt->mfc_ttls[vifi] &&
+		(ifp->if_flags & IFF_BROADCAST) &&
+		!(viftable[vifi].v_flags & VIFF_TUNNEL)) {
+	    struct sockaddr_in k_igmpsrc;
+	    struct mbuf *mm;
+	    struct igmpmsg *im;
+	    int hlen = ip->ip_hl << 2;
+	    struct timeval now;
+	    register u_long delta;
+
+	    GET_TIME(now);
+
+	    TV_DELTA(rt->mfc_last_assert, now, delta);
+
+	    if (delta > ASSERT_MSG_TIME) {
+		mm = m_copy(m, 0, hlen);
+		if (mm && (M_HASCL(mm) || mm->m_len < hlen))
+		    mm = m_pullup(mm, hlen);
+		if (mm == NULL) {
+		    return ENOBUFS;
+		}
+
+		rt->mfc_last_assert = now;
+
+		im = mtod(mm, struct igmpmsg *);
+		im->im_msgtype	= IGMPMSG_WRONGVIF;
+		im->im_mbz		= 0;
+		im->im_vif		= vifi;
+
+		k_igmpsrc.sin_addr = im->im_src;
+
+		socket_send(ip_mrouter, mm, &k_igmpsrc);
+	    }
+	}
+	return 0;
+    }
+
+    /* If I sourced this packet, it counts as output, else it was input. */
+    if (ip->ip_src.s_addr == viftable[vifi].v_lcl_addr.s_addr) {
+	viftable[vifi].v_pkt_out++;
+	viftable[vifi].v_bytes_out += plen;
+    } else {
+	viftable[vifi].v_pkt_in++;
+	viftable[vifi].v_bytes_in += plen;
+    }
+    rt->mfc_pkt_cnt++;
+    rt->mfc_byte_cnt += plen;
+
+    /*
+     * For each vif, decide if a copy of the packet should be forwarded.
+     * Forward if:
+     *		- the ttl exceeds the vif's threshold
+     *		- there are group members downstream on interface
+     */
+    for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++)
+	if ((rt->mfc_ttls[vifi] > 0) &&
+	    (ip->ip_ttl > rt->mfc_ttls[vifi])) {
+	    vifp->v_pkt_out++;
+	    vifp->v_bytes_out += plen;
+	    MC_SEND(ip, vifp, m);
+	}
+
+    return 0;
+}
+
+/*
+ * check if a vif number is legal/ok. This is used by ip_output, to export
+ * numvifs there, 
+ */
+static int
+X_legal_vif_num(vif)
+    int vif;
+{
+    if (vif >= 0 && vif < numvifs)
+       return(1);
+    else
+       return(0);
+}
+
+#ifndef MROUTE_LKM
+int (*legal_vif_num)(int) = X_legal_vif_num;
+#endif
+
+/*
+ * Return the local address used by this vif
+ */
+static u_long
+X_ip_mcast_src(vifi)
+    int vifi;
+{
+    if (vifi >= 0 && vifi < numvifs)
+	return viftable[vifi].v_lcl_addr.s_addr;
+    else
+	return INADDR_ANY;
+}
+
+#ifndef MROUTE_LKM
+u_long (*ip_mcast_src)(int) = X_ip_mcast_src;
+#endif
+
+static void
+phyint_send(ip, vifp, m)
+    struct ip *ip;
+    struct vif *vifp;
+    struct mbuf *m;
+{
+    register struct mbuf *mb_copy;
+    register int hlen = ip->ip_hl << 2;
+
+    /*
+     * Make a new reference to the packet; make sure that
+     * the IP header is actually copied, not just referenced,
+     * so that ip_output() only scribbles on the copy.
+     */
+    mb_copy = m_copy(m, 0, M_COPYALL);
+    if (mb_copy && (M_HASCL(mb_copy) || mb_copy->m_len < hlen))
+	mb_copy = m_pullup(mb_copy, hlen);
+    if (mb_copy == NULL)
+	return;
+
+    if (vifp->v_rate_limit == 0)
+	tbf_send_packet(vifp, mb_copy);
+    else
+	tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len);
+}
+
+static void
+encap_send(ip, vifp, m)
+    register struct ip *ip;
+    register struct vif *vifp;
+    register struct mbuf *m;
+{
+    register struct mbuf *mb_copy;
+    register struct ip *ip_copy;
+    register int i, len = ip->ip_len;
+
+    /*
+     * copy the old packet & pullup its IP header into the
+     * new mbuf so we can modify it.  Try to fill the new
+     * mbuf since if we don't the ethernet driver will.
+     */
+    MGETHDR(mb_copy, M_DONTWAIT, MT_HEADER);
+    if (mb_copy == NULL)
+	return;
+    mb_copy->m_data += max_linkhdr;
+    mb_copy->m_len = sizeof(multicast_encap_iphdr);
+
+    if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) {
+	m_freem(mb_copy);
+	return;
+    }
+    i = MHLEN - M_LEADINGSPACE(mb_copy);
+    if (i > len)
+	i = len;
+    mb_copy = m_pullup(mb_copy, i);
+    if (mb_copy == NULL)
+	return;
+    mb_copy->m_pkthdr.len = len + sizeof(multicast_encap_iphdr);
+
+    /*
+     * fill in the encapsulating IP header.
+     */
+    ip_copy = mtod(mb_copy, struct ip *);
+    *ip_copy = multicast_encap_iphdr;
+    ip_copy->ip_id = htons(ip_id++);
+    ip_copy->ip_len += len;
+    ip_copy->ip_src = vifp->v_lcl_addr;
+    ip_copy->ip_dst = vifp->v_rmt_addr;
+
+    /*
+     * turn the encapsulated IP header back into a valid one.
+     */
+    ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr));
+    --ip->ip_ttl;
+    HTONS(ip->ip_len);
+    HTONS(ip->ip_off);
+    ip->ip_sum = 0;
+    mb_copy->m_data += sizeof(multicast_encap_iphdr);
+    ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
+    mb_copy->m_data -= sizeof(multicast_encap_iphdr);
+
+    if (vifp->v_rate_limit == 0)
+	tbf_send_packet(vifp, mb_copy);
+    else
+	tbf_control(vifp, mb_copy, ip, ip_copy->ip_len);
+}
+
+/*
+ * De-encapsulate a packet and feed it back through ip input (this
+ * routine is called whenever IP gets a packet with proto type
+ * ENCAP_PROTO and a local destination address).
+ */
+void
+#ifdef MROUTE_LKM
+X_ipip_input(m, off, proto)
+#else
+ipip_input(m, off, proto)
+#endif
+	register struct mbuf *m;
+	int off;
+	int proto;
+{
+    struct ifnet *ifp = m->m_pkthdr.rcvif;
+    register struct ip *ip = mtod(m, struct ip *);
+    register int hlen = ip->ip_hl << 2;
+    register struct vif *vifp;
+
+    if (!have_encap_tunnel) {
+	    rip_input(m, off, proto);
+	    return;
+    }
+    /*
+     * dump the packet if it's not to a multicast destination or if
+     * we don't have an encapsulating tunnel with the source.
+     * Note:  This code assumes that the remote site IP address
+     * uniquely identifies the tunnel (i.e., that this site has
+     * at most one tunnel with the remote site).
+     */
+    if (! IN_MULTICAST(ntohl(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr))) {
+	++mrtstat.mrts_bad_tunnel;
+	m_freem(m);
+	return;
+    }
+    if (ip->ip_src.s_addr != last_encap_src) {
+	register struct vif *vife;
+	
+	vifp = viftable;
+	vife = vifp + numvifs;
+	last_encap_src = ip->ip_src.s_addr;
+	last_encap_vif = 0;
+	for ( ; vifp < vife; ++vifp)
+	    if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) {
+		if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT))
+		    == VIFF_TUNNEL)
+		    last_encap_vif = vifp;
+		break;
+	    }
+    }
+    if ((vifp = last_encap_vif) == 0) {
+	last_encap_src = 0;
+	mrtstat.mrts_cant_tunnel++; /*XXX*/
+	m_freem(m);
+	if (mrtdebug)
+	  log(LOG_DEBUG, "ip_mforward: no tunnel with %lx\n",
+		(u_long)ntohl(ip->ip_src.s_addr));
+	return;
+    }
+    ifp = vifp->v_ifp;
+
+    if (hlen > IP_HDR_LEN)
+      ip_stripoptions(m, (struct mbuf *) 0);
+    m->m_data += IP_HDR_LEN;
+    m->m_len -= IP_HDR_LEN;
+    m->m_pkthdr.len -= IP_HDR_LEN;
+    m->m_pkthdr.rcvif = ifp;
+
+    (void) IF_HANDOFF(&ipintrq, m, NULL);
+	/*
+	 * normally we would need a "schednetisr(NETISR_IP)"
+	 * here but we were called by ip_input and it is going
+	 * to loop back & try to dequeue the packet we just
+	 * queued as soon as we return so we avoid the
+	 * unnecessary software interrrupt.
+	 */
+}
+
+/*
+ * Token bucket filter module
+ */
+
+static void
+tbf_control(vifp, m, ip, p_len)
+	register struct vif *vifp;
+	register struct mbuf *m;
+	register struct ip *ip;
+	register u_long p_len;
+{
+    register struct tbf *t = vifp->v_tbf;
+
+    if (p_len > MAX_BKT_SIZE) {
+	/* drop if packet is too large */
+	mrtstat.mrts_pkt2large++;
+	m_freem(m);
+	return;
+    }
+
+    tbf_update_tokens(vifp);
+
+    /* if there are enough tokens, 
+     * and the queue is empty,
+     * send this packet out
+     */
+
+    if (t->tbf_q_len == 0) {
+	/* queue empty, send packet if enough tokens */
+	if (p_len <= t->tbf_n_tok) {
+	    t->tbf_n_tok -= p_len;
+	    tbf_send_packet(vifp, m);
+	} else {
+	    /* queue packet and timeout till later */
+	    tbf_queue(vifp, m);
+	    timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS);
+	}
+    } else if (t->tbf_q_len < t->tbf_max_q_len) {
+	/* finite queue length, so queue pkts and process queue */
+	tbf_queue(vifp, m);
+	tbf_process_q(vifp);
+    } else {
+	/* queue length too much, try to dq and queue and process */
+	if (!tbf_dq_sel(vifp, ip)) {
+	    mrtstat.mrts_q_overflow++;
+	    m_freem(m);
+	    return;
+	} else {
+	    tbf_queue(vifp, m);
+	    tbf_process_q(vifp);
+	}
+    }
+    return;
+}
+
+/* 
+ * adds a packet to the queue at the interface
+ */
+static void
+tbf_queue(vifp, m) 
+	register struct vif *vifp;
+	register struct mbuf *m;
+{
+    register int s = splnet();
+    register struct tbf *t = vifp->v_tbf;
+
+    if (t->tbf_t == NULL) {
+	/* Queue was empty */
+	t->tbf_q = m;
+    } else {
+	/* Insert at tail */
+	t->tbf_t->m_act = m;
+    }
+
+    /* Set new tail pointer */
+    t->tbf_t = m;
+
+#ifdef DIAGNOSTIC
+    /* Make sure we didn't get fed a bogus mbuf */
+    if (m->m_act)
+	panic("tbf_queue: m_act");
+#endif
+    m->m_act = NULL;
+
+    t->tbf_q_len++;
+
+    splx(s);
+}
+
+
+/* 
+ * processes the queue at the interface
+ */
+static void
+tbf_process_q(vifp)
+    register struct vif *vifp;
+{
+    register struct mbuf *m;
+    register int len;
+    register int s = splnet();
+    register struct tbf *t = vifp->v_tbf;
+
+    /* loop through the queue at the interface and send as many packets
+     * as possible
+     */
+    while (t->tbf_q_len > 0) {
+	m = t->tbf_q;
+
+	len = mtod(m, struct ip *)->ip_len;
+
+	/* determine if the packet can be sent */
+	if (len <= t->tbf_n_tok) {
+	    /* if so,
+	     * reduce no of tokens, dequeue the packet,
+	     * send the packet.
+	     */
+	    t->tbf_n_tok -= len;
+
+	    t->tbf_q = m->m_act;
+	    if (--t->tbf_q_len == 0)
+		t->tbf_t = NULL;
+
+	    m->m_act = NULL;
+	    tbf_send_packet(vifp, m);
+
+	} else break;
+    }
+    splx(s);
+}
+
+static void
+tbf_reprocess_q(xvifp)
+	void *xvifp;
+{
+    register struct vif *vifp = xvifp;
+    if (ip_mrouter == NULL) 
+	return;
+
+    tbf_update_tokens(vifp);
+
+    tbf_process_q(vifp);
+
+    if (vifp->v_tbf->tbf_q_len)
+	timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS);
+}
+
+/* function that will selectively discard a member of the queue
+ * based on the precedence value and the priority
+ */
+static int
+tbf_dq_sel(vifp, ip)
+    register struct vif *vifp;
+    register struct ip *ip;
+{
+    register int s = splnet();
+    register u_int p;
+    register struct mbuf *m, *last;
+    register struct mbuf **np;
+    register struct tbf *t = vifp->v_tbf;
+
+    p = priority(vifp, ip);
+
+    np = &t->tbf_q;
+    last = NULL;
+    while ((m = *np) != NULL) {
+	if (p > priority(vifp, mtod(m, struct ip *))) {
+	    *np = m->m_act;
+	    /* If we're removing the last packet, fix the tail pointer */
+	    if (m == t->tbf_t)
+		t->tbf_t = last;
+	    m_freem(m);
+	    /* it's impossible for the queue to be empty, but
+	     * we check anyway. */
+	    if (--t->tbf_q_len == 0)
+		t->tbf_t = NULL;
+	    splx(s);
+	    mrtstat.mrts_drop_sel++;
+	    return(1);
+	}
+	np = &m->m_act;
+	last = m;
+    }
+    splx(s);
+    return(0);
+}
+
+static void
+tbf_send_packet(vifp, m)
+    register struct vif *vifp;
+    register struct mbuf *m;
+{
+    struct ip_moptions imo;
+    int error;
+    static struct route ro;
+    int s = splnet();
+
+    if (vifp->v_flags & VIFF_TUNNEL) {
+	/* If tunnel options */
+	ip_output(m, (struct mbuf *)0, &vifp->v_route,
+		  IP_FORWARDING, (struct ip_moptions *)0);
+    } else {
+	imo.imo_multicast_ifp  = vifp->v_ifp;
+	imo.imo_multicast_ttl  = mtod(m, struct ip *)->ip_ttl - 1;
+	imo.imo_multicast_loop = 1;
+	imo.imo_multicast_vif  = -1;
+
+	/*
+	 * Re-entrancy should not be a problem here, because
+	 * the packets that we send out and are looped back at us
+	 * should get rejected because they appear to come from
+	 * the loopback interface, thus preventing looping.
+	 */
+	error = ip_output(m, (struct mbuf *)0, &ro,
+			  IP_FORWARDING, &imo);
+
+	if (mrtdebug & DEBUG_XMIT)
+	    log(LOG_DEBUG, "phyint_send on vif %d err %d\n", 
+		vifp - viftable, error);
+    }
+    splx(s);
+}
+
+/* determine the current time and then
+ * the elapsed time (between the last time and time now)
+ * in milliseconds & update the no. of tokens in the bucket
+ */
+static void
+tbf_update_tokens(vifp)
+    register struct vif *vifp;
+{
+    struct timeval tp;
+    register u_long tm;
+    register int s = splnet();
+    register struct tbf *t = vifp->v_tbf;
+
+    GET_TIME(tp);
+
+    TV_DELTA(tp, t->tbf_last_pkt_t, tm);
+
+    /*
+     * This formula is actually
+     * "time in seconds" * "bytes/second".
+     *
+     * (tm / 1000000) * (v_rate_limit * 1000 * (1000/1024) / 8)
+     *
+     * The (1000/1024) was introduced in add_vif to optimize
+     * this divide into a shift.
+     */
+    t->tbf_n_tok += tm * vifp->v_rate_limit / 1024 / 8;
+    t->tbf_last_pkt_t = tp;
+
+    if (t->tbf_n_tok > MAX_BKT_SIZE)
+	t->tbf_n_tok = MAX_BKT_SIZE;
+
+    splx(s);
+}
+
+static int
+priority(vifp, ip)
+    register struct vif *vifp;
+    register struct ip *ip;
+{
+    register int prio;
+
+    /* temporary hack; may add general packet classifier some day */
+
+    /*
+     * The UDP port space is divided up into four priority ranges:
+     * [0, 16384)     : unclassified - lowest priority
+     * [16384, 32768) : audio - highest priority
+     * [32768, 49152) : whiteboard - medium priority
+     * [49152, 65536) : video - low priority
+     */
+    if (ip->ip_p == IPPROTO_UDP) {
+	struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2));
+	switch (ntohs(udp->uh_dport) & 0xc000) {
+	    case 0x4000:
+		prio = 70;
+		break;
+	    case 0x8000:
+		prio = 60;
+		break;
+	    case 0xc000:
+		prio = 55;
+		break;
+	    default:
+		prio = 50;
+		break;
+	}
+	if (tbfdebug > 1)
+		log(LOG_DEBUG, "port %x prio%d\n", ntohs(udp->uh_dport), prio);
+    } else {
+	    prio = 50;
+    }
+    return prio;
+}
+
+/*
+ * End of token bucket filter modifications 
+ */
+
+int
+ip_rsvp_vif_init(so, sopt)
+	struct socket *so;
+	struct sockopt *sopt;
+{
+    int error, i, s;
+
+    if (rsvpdebug)
+	printf("ip_rsvp_vif_init: so_type = %d, pr_protocol = %d\n",
+	       so->so_type, so->so_proto->pr_protocol);
+
+    if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
+	return EOPNOTSUPP;
+
+    /* Check mbuf. */
+    error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
+    if (error)
+	    return (error);
+ 
+    if (rsvpdebug)
+	printf("ip_rsvp_vif_init: vif = %d rsvp_on = %d\n", i, rsvp_on);
+ 
+    s = splnet();
+
+    /* Check vif. */
+    if (!legal_vif_num(i)) {
+	splx(s);
+	return EADDRNOTAVAIL;
+    }
+
+    /* Check if socket is available. */
+    if (viftable[i].v_rsvpd != NULL) {
+	splx(s);
+	return EADDRINUSE;
+    }
+
+    viftable[i].v_rsvpd = so;
+    /* This may seem silly, but we need to be sure we don't over-increment
+     * the RSVP counter, in case something slips up.
+     */
+    if (!viftable[i].v_rsvp_on) {
+	viftable[i].v_rsvp_on = 1;
+	rsvp_on++;
+    }
+
+    splx(s);
+    return 0;
+}
+
+int
+ip_rsvp_vif_done(so, sopt)
+	struct socket *so;
+	struct sockopt *sopt;
+{
+	int error, i, s;
+ 
+	if (rsvpdebug)
+		printf("ip_rsvp_vif_done: so_type = %d, pr_protocol = %d\n",
+		       so->so_type, so->so_proto->pr_protocol);
+ 
+	if (so->so_type != SOCK_RAW || 
+	    so->so_proto->pr_protocol != IPPROTO_RSVP)
+		return EOPNOTSUPP;
+ 
+	error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
+	if (error)
+		return (error);
+ 
+	s = splnet();
+ 
+	/* Check vif. */
+	if (!legal_vif_num(i)) {
+		splx(s);
+		return EADDRNOTAVAIL;
+	}
+
+	if (rsvpdebug)
+		printf("ip_rsvp_vif_done: v_rsvpd = %p so = %p\n",
+		       viftable[i].v_rsvpd, so);
+
+	viftable[i].v_rsvpd = NULL;
+	/*
+	 * This may seem silly, but we need to be sure we don't over-decrement
+	 * the RSVP counter, in case something slips up.
+	 */
+	if (viftable[i].v_rsvp_on) {
+		viftable[i].v_rsvp_on = 0;
+		rsvp_on--;
+	}
+
+	splx(s);
+	return 0;
+}
+
+void
+ip_rsvp_force_done(so)
+    struct socket *so;
+{
+    int vifi;
+    register int s;
+
+    /* Don't bother if it is not the right type of socket. */
+    if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
+	return;
+
+    s = splnet();
+
+    /* The socket may be attached to more than one vif...this
+     * is perfectly legal.
+     */
+    for (vifi = 0; vifi < numvifs; vifi++) {
+	if (viftable[vifi].v_rsvpd == so) {
+	    viftable[vifi].v_rsvpd = NULL;
+	    /* This may seem silly, but we need to be sure we don't
+	     * over-decrement the RSVP counter, in case something slips up.
+	     */
+	    if (viftable[vifi].v_rsvp_on) {
+		viftable[vifi].v_rsvp_on = 0;
+		rsvp_on--;
+	    }
+	}
+    }
+
+    splx(s);
+    return;
+}
+
+void
+rsvp_input(m, off, proto)
+	struct mbuf *m;
+	int off;
+	int proto;
+{
+    int vifi;
+    register struct ip *ip = mtod(m, struct ip *);
+    static struct sockaddr_in rsvp_src = { sizeof rsvp_src, AF_INET };
+    register int s;
+    struct ifnet *ifp;
+
+    if (rsvpdebug)
+	printf("rsvp_input: rsvp_on %d\n",rsvp_on);
+
+    /* Can still get packets with rsvp_on = 0 if there is a local member
+     * of the group to which the RSVP packet is addressed.  But in this
+     * case we want to throw the packet away.
+     */
+    if (!rsvp_on) {
+	m_freem(m);
+	return;
+    }
+
+    s = splnet();
+
+    if (rsvpdebug)
+	printf("rsvp_input: check vifs\n");
+
+#ifdef DIAGNOSTIC
+    if (!(m->m_flags & M_PKTHDR))
+	    panic("rsvp_input no hdr");
+#endif
+
+    ifp = m->m_pkthdr.rcvif;
+    /* Find which vif the packet arrived on. */
+    for (vifi = 0; vifi < numvifs; vifi++)
+	if (viftable[vifi].v_ifp == ifp)
+	    break;
+
+    if (vifi == numvifs || viftable[vifi].v_rsvpd == NULL) {
+	/*
+	 * If the old-style non-vif-associated socket is set,
+	 * then use it.  Otherwise, drop packet since there
+	 * is no specific socket for this vif.
+	 */
+	if (ip_rsvpd != NULL) {
+	    if (rsvpdebug)
+		printf("rsvp_input: Sending packet up old-style socket\n");
+	    rip_input(m, off, proto);  /* xxx */
+	} else {
+	    if (rsvpdebug && vifi == numvifs)
+		printf("rsvp_input: Can't find vif for packet.\n");
+	    else if (rsvpdebug && viftable[vifi].v_rsvpd == NULL)
+		printf("rsvp_input: No socket defined for vif %d\n",vifi);
+	    m_freem(m);
+	}
+	splx(s);
+	return;
+    }
+    rsvp_src.sin_addr = ip->ip_src;
+
+    if (rsvpdebug && m)
+	printf("rsvp_input: m->m_len = %d, sbspace() = %ld\n",
+	       m->m_len,sbspace(&(viftable[vifi].v_rsvpd->so_rcv)));
+
+    if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0) {
+	if (rsvpdebug)
+	    printf("rsvp_input: Failed to append to socket\n");
+    } else {
+	if (rsvpdebug)
+	    printf("rsvp_input: send packet up\n");
+    }
+
+    splx(s);
+}
+
+#ifdef MROUTE_LKM
+#include <sys/conf.h>
+#include <sys/exec.h>
+#include <sys/sysent.h>
+#include <sys/lkm.h>
+
+MOD_MISC("ip_mroute_mod")
+
+static int
+ip_mroute_mod_handle(struct lkm_table *lkmtp, int cmd)
+{
+	int i;
+	struct lkm_misc	*args = lkmtp->private.lkm_misc;
+	int err = 0;
+
+	switch(cmd) {
+		static int (*old_ip_mrouter_cmd)();
+		static int (*old_ip_mrouter_done)();
+		static int (*old_ip_mforward)();
+		static int (*old_mrt_ioctl)();
+		static void (*old_proto4_input)();
+		static int (*old_legal_vif_num)();
+		extern struct protosw inetsw[];
+
+	case LKM_E_LOAD:
+		if(lkmexists(lkmtp) || ip_mrtproto)
+		  return(EEXIST);
+		old_ip_mrouter_cmd = ip_mrouter_cmd;
+		ip_mrouter_cmd = X_ip_mrouter_cmd;
+		old_ip_mrouter_done = ip_mrouter_done;
+		ip_mrouter_done = X_ip_mrouter_done;
+		old_ip_mforward = ip_mforward;
+		ip_mforward = X_ip_mforward;
+		old_mrt_ioctl = mrt_ioctl;
+		mrt_ioctl = X_mrt_ioctl;
+              old_proto4_input = inetsw[ip_protox[ENCAP_PROTO]].pr_input;
+              inetsw[ip_protox[ENCAP_PROTO]].pr_input = X_ipip_input;
+		old_legal_vif_num = legal_vif_num;
+		legal_vif_num = X_legal_vif_num;
+		ip_mrtproto = IGMP_DVMRP;
+
+		printf("\nIP multicast routing loaded\n");
+		break;
+
+	case LKM_E_UNLOAD:
+		if (ip_mrouter)
+		  return EINVAL;
+
+		ip_mrouter_cmd = old_ip_mrouter_cmd;
+		ip_mrouter_done = old_ip_mrouter_done;
+		ip_mforward = old_ip_mforward;
+		mrt_ioctl = old_mrt_ioctl;
+              inetsw[ip_protox[ENCAP_PROTO]].pr_input = old_proto4_input;
+		legal_vif_num = old_legal_vif_num;
+		ip_mrtproto = 0;
+		break;
+
+	default:
+		err = EINVAL;
+		break;
+	}
+
+	return(err);
+}
+
+int
+ip_mroute_mod(struct lkm_table *lkmtp, int cmd, int ver) {
+	DISPATCH(lkmtp, cmd, ver, ip_mroute_mod_handle, ip_mroute_mod_handle,
+		 nosys);
+}
+
+#endif /* MROUTE_LKM */
+#endif /* MROUTING */
diff --git a/sys/netinet/ip_mroute.h b/sys/netinet/ip_mroute.h
new file mode 100644
index 0000000..8c990c8
--- /dev/null
+++ b/sys/netinet/ip_mroute.h
@@ -0,0 +1,267 @@
+/*
+ * Copyright (c) 1989 Stephen Deering.
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Stephen Deering of Stanford University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ip_mroute.h	8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_IP_MROUTE_H_
+#define _NETINET_IP_MROUTE_H_
+
+/*
+ * Definitions for IP multicast forwarding.
+ *
+ * Written by David Waitzman, BBN Labs, August 1988.
+ * Modified by Steve Deering, Stanford, February 1989.
+ * Modified by Ajit Thyagarajan, PARC, August 1993.
+ * Modified by Ajit Thyagarajan, PARC, August 1994.
+ *
+ * MROUTING Revision: 3.3.1.3
+ */
+
+
+/*
+ * Multicast Routing set/getsockopt commands.
+ */
+#define	MRT_INIT	100	/* initialize forwarder */
+#define	MRT_DONE	101	/* shut down forwarder */
+#define	MRT_ADD_VIF	102	/* create virtual interface */
+#define	MRT_DEL_VIF	103	/* delete virtual interface */
+#define MRT_ADD_MFC	104	/* insert forwarding cache entry */
+#define MRT_DEL_MFC	105	/* delete forwarding cache entry */
+#define MRT_VERSION	106	/* get kernel version number */
+#define MRT_ASSERT      107     /* enable PIM assert processing */
+
+
+#define GET_TIME(t)	microtime(&t)
+
+/*
+ * Types and macros for handling bitmaps with one bit per virtual interface.
+ */
+#define	MAXVIFS 32
+typedef u_long vifbitmap_t;
+typedef u_short vifi_t;		/* type of a vif index */
+#define ALL_VIFS (vifi_t)-1
+
+#define	VIFM_SET(n, m)		((m) |= (1 << (n)))
+#define	VIFM_CLR(n, m)		((m) &= ~(1 << (n)))
+#define	VIFM_ISSET(n, m)	((m) & (1 << (n)))
+#define	VIFM_CLRALL(m)		((m) = 0x00000000)
+#define	VIFM_COPY(mfrom, mto)	((mto) = (mfrom))
+#define	VIFM_SAME(m1, m2)	((m1) == (m2))
+
+
+/*
+ * Argument structure for MRT_ADD_VIF.
+ * (MRT_DEL_VIF takes a single vifi_t argument.)
+ */
+struct vifctl {
+	vifi_t	vifc_vifi;	    	/* the index of the vif to be added */
+	u_char	vifc_flags;     	/* VIFF_ flags defined below */
+	u_char	vifc_threshold; 	/* min ttl required to forward on vif */
+	u_int	vifc_rate_limit;	/* max rate */
+	struct	in_addr vifc_lcl_addr;	/* local interface address */
+	struct	in_addr vifc_rmt_addr;	/* remote address (tunnels only) */
+};
+
+#define	VIFF_TUNNEL	0x1		/* vif represents a tunnel end-point */
+#define VIFF_SRCRT	0x2		/* tunnel uses IP source routing */
+
+/*
+ * Argument structure for MRT_ADD_MFC and MRT_DEL_MFC
+ * (mfcc_tos to be added at a future point)
+ */
+struct mfcctl {
+    struct in_addr  mfcc_origin;		/* ip origin of mcasts       */
+    struct in_addr  mfcc_mcastgrp; 		/* multicast group associated*/
+    vifi_t	    mfcc_parent;   		/* incoming vif              */
+    u_char	    mfcc_ttls[MAXVIFS]; 	/* forwarding ttls on vifs   */
+};
+
+/*
+ * The kernel's multicast routing statistics.
+ */
+struct mrtstat {
+    u_long	mrts_mfc_lookups;	/* # forw. cache hash table hits   */
+    u_long	mrts_mfc_misses;	/* # forw. cache hash table misses */
+    u_long	mrts_upcalls;		/* # calls to mrouted              */
+    u_long	mrts_no_route;		/* no route for packet's origin    */
+    u_long	mrts_bad_tunnel;	/* malformed tunnel options        */
+    u_long	mrts_cant_tunnel;	/* no room for tunnel options      */
+    u_long	mrts_wrong_if;		/* arrived on wrong interface	   */
+    u_long	mrts_upq_ovflw;		/* upcall Q overflow		   */
+    u_long	mrts_cache_cleanups;	/* # entries with no upcalls 	   */
+    u_long  	mrts_drop_sel;     	/* pkts dropped selectively        */
+    u_long  	mrts_q_overflow;    	/* pkts dropped - Q overflow       */
+    u_long  	mrts_pkt2large;     	/* pkts dropped - size > BKT SIZE  */
+    u_long	mrts_upq_sockfull;	/* upcalls dropped - socket full */
+};
+
+/*
+ * Argument structure used by mrouted to get src-grp pkt counts
+ */
+struct sioc_sg_req {
+    struct in_addr src;
+    struct in_addr grp;
+    u_long pktcnt;
+    u_long bytecnt;
+    u_long wrong_if;
+};
+
+/*
+ * Argument structure used by mrouted to get vif pkt counts
+ */
+struct sioc_vif_req {
+    vifi_t vifi;		/* vif number				*/
+    u_long icount;		/* Input packet count on vif		*/
+    u_long ocount;		/* Output packet count on vif		*/
+    u_long ibytes;		/* Input byte count on vif		*/
+    u_long obytes;		/* Output byte count on vif		*/
+};
+    
+
+/*
+ * The kernel's virtual-interface structure.
+ */
+struct vif {
+    u_char   		v_flags;     	/* VIFF_ flags defined above         */
+    u_char   		v_threshold;	/* min ttl required to forward on vif*/
+    u_int      		v_rate_limit; 	/* max rate			     */
+    struct tbf 	       *v_tbf;       	/* token bucket structure at intf.   */
+    struct in_addr 	v_lcl_addr;   	/* local interface address           */
+    struct in_addr 	v_rmt_addr;   	/* remote address (tunnels only)     */
+    struct ifnet       *v_ifp;	     	/* pointer to interface              */
+    u_long		v_pkt_in;	/* # pkts in on interface            */
+    u_long		v_pkt_out;	/* # pkts out on interface           */
+    u_long		v_bytes_in;	/* # bytes in on interface	     */
+    u_long		v_bytes_out;	/* # bytes out on interface	     */
+    struct route	v_route;	/* cached route if this is a tunnel */
+    u_int		v_rsvp_on;	/* RSVP listening on this vif */
+    struct socket      *v_rsvpd;	/* RSVP daemon socket */
+};
+
+/*
+ * The kernel's multicast forwarding cache entry structure 
+ * (A field for the type of service (mfc_tos) is to be added 
+ * at a future point)
+ */
+struct mfc {
+    struct in_addr  mfc_origin;	 		/* IP origin of mcasts   */
+    struct in_addr  mfc_mcastgrp;  		/* multicast group associated*/
+    vifi_t	    mfc_parent; 		/* incoming vif              */
+    u_char	    mfc_ttls[MAXVIFS]; 		/* forwarding ttls on vifs   */
+    u_long	    mfc_pkt_cnt;		/* pkt count for src-grp     */
+    u_long	    mfc_byte_cnt;		/* byte count for src-grp    */
+    u_long	    mfc_wrong_if;		/* wrong if for src-grp	     */
+    int		    mfc_expire;			/* time to clean entry up    */
+    struct timeval  mfc_last_assert;		/* last time I sent an assert*/
+    struct rtdetq  *mfc_stall;			/* q of packets awaiting mfc */
+    struct mfc     *mfc_next;			/* next mfc entry            */
+};
+
+/*
+ * Struct used to communicate from kernel to multicast router
+ * note the convenient similarity to an IP packet
+ */
+struct igmpmsg {
+    u_long	    unused1;
+    u_long	    unused2;
+    u_char	    im_msgtype;			/* what type of message	    */
+#define IGMPMSG_NOCACHE		1
+#define IGMPMSG_WRONGVIF	2
+    u_char	    im_mbz;			/* must be zero		    */
+    u_char	    im_vif;			/* vif rec'd on		    */
+    u_char	    unused3;
+    struct in_addr  im_src, im_dst;
+};
+
+/*
+ * Argument structure used for pkt info. while upcall is made
+ */
+struct rtdetq {
+    struct mbuf 	*m;		/* A copy of the packet		    */
+    struct ifnet	*ifp;		/* Interface pkt came in on	    */
+    vifi_t		xmt_vif;	/* Saved copy of imo_multicast_vif  */
+#ifdef UPCALL_TIMING
+    struct timeval	t;		/* Timestamp */
+#endif /* UPCALL_TIMING */
+    struct rtdetq	*next;		/* Next in list of packets          */
+};
+
+#define MFCTBLSIZ	256
+#if (MFCTBLSIZ & (MFCTBLSIZ - 1)) == 0	  /* from sys:route.h */
+#define MFCHASHMOD(h)	((h) & (MFCTBLSIZ - 1))
+#else
+#define MFCHASHMOD(h)	((h) % MFCTBLSIZ)
+#endif
+
+#define MAX_UPQ	4		/* max. no of pkts in upcall Q */
+
+/*
+ * Token Bucket filter code 
+ */
+#define MAX_BKT_SIZE    10000             /* 10K bytes size 		*/
+#define MAXQSIZE        10                /* max # of pkts in queue 	*/
+
+/*
+ * the token bucket filter at each vif
+ */
+struct tbf
+{
+    struct timeval tbf_last_pkt_t; /* arr. time of last pkt 	*/
+    u_long tbf_n_tok;      	/* no of tokens in bucket 	*/
+    u_long tbf_q_len;    	/* length of queue at this vif	*/
+    u_long tbf_max_q_len;	/* max. queue length		*/
+    struct mbuf *tbf_q;		/* Packet queue			*/
+    struct mbuf *tbf_t;		/* tail-insertion pointer	*/
+};
+
+#ifdef _KERNEL
+
+struct sockopt;
+
+extern int	(*ip_mrouter_set) __P((struct socket *, struct sockopt *));
+extern int	(*ip_mrouter_get) __P((struct socket *, struct sockopt *));
+extern int	(*ip_mrouter_done) __P((void));
+#ifdef MROUTING
+extern int	(*mrt_ioctl) __P((int, caddr_t));
+#else
+extern int	(*mrt_ioctl) __P((int, caddr_t, struct proc *));
+#endif
+
+#endif /* _KERNEL */
+
+#endif /* _NETINET_IP_MROUTE_H_ */
diff --git a/sys/netinet/ip_nat.c b/sys/netinet/ip_nat.c
new file mode 100644
index 0000000..816d8e7
--- /dev/null
+++ b/sys/netinet/ip_nat.c
@@ -0,0 +1,2739 @@
+/*
+ * Copyright (C) 1995-2000 by Darren Reed.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that this notice is preserved and due credit is given
+ * to the original author and the contributors.
+ *
+ * Added redirect stuff and a LOT of bug fixes. (mcn@EnGarde.com)
+ */
+#if !defined(lint)
+/*static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.37.2.16 2000/07/18 13:57:40 darrenr Exp $";*/
+static const char rcsid[] = "@(#)$FreeBSD$";
+#endif
+
+#if defined(__FreeBSD__) && defined(KERNEL) && !defined(_KERNEL)
+#define _KERNEL
+#endif
+
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/file.h>
+#if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
+    defined(_KERNEL)
+# include "opt_ipfilter_log.h"
+#endif
+#if !defined(_KERNEL) && !defined(KERNEL)
+# include <stdio.h>
+# include <string.h>
+# include <stdlib.h>
+#endif
+#if (defined(KERNEL) || defined(_KERNEL)) && (__FreeBSD_version >= 220000)
+# include <sys/filio.h>
+# include <sys/fcntl.h>
+#else
+# include <sys/ioctl.h>
+#endif
+#include <sys/fcntl.h>
+#include <sys/uio.h>
+#ifndef linux
+# include <sys/protosw.h>
+#endif
+#include <sys/socket.h>
+#if defined(_KERNEL) && !defined(linux)
+# include <sys/systm.h>
+#endif
+#if !defined(__SVR4) && !defined(__svr4__)
+# ifndef linux
+#  include <sys/mbuf.h>
+# endif
+#else
+# include <sys/filio.h>
+# include <sys/byteorder.h>
+# ifdef _KERNEL
+#  include <sys/dditypes.h>
+# endif
+# include <sys/stream.h>
+# include <sys/kmem.h>
+#endif
+#if __FreeBSD_version >= 300000
+# include <sys/queue.h>
+#endif
+#include <net/if.h>
+#if __FreeBSD_version >= 300000
+# include <net/if_var.h>
+# if defined(_KERNEL) && !defined(IPFILTER_LKM)
+#  include "opt_ipfilter.h"
+# endif
+#endif
+#ifdef sun
+# include <net/af.h>
+#endif
+#include <net/route.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+
+#ifdef __sgi
+# ifdef IFF_DRVRLOCK /* IRIX6 */
+#include <sys/hashing.h>
+#include <netinet/in_var.h>
+# endif
+#endif
+
+#ifdef RFC1825
+# include <vpn/md5.h>
+# include <vpn/ipsec.h>
+extern struct ifnet vpnif;
+#endif
+
+#ifndef linux
+# include <netinet/ip_var.h>
+#endif
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet/ip_icmp.h>
+#include "netinet/ip_compat.h"
+#include <netinet/tcpip.h>
+#include "netinet/ip_fil.h"
+#include "netinet/ip_proxy.h"
+#include "netinet/ip_nat.h"
+#include "netinet/ip_frag.h"
+#include "netinet/ip_state.h"
+#if (__FreeBSD_version >= 300000)
+# include <sys/malloc.h>
+#endif
+#ifndef	MIN
+# define	MIN(a,b)	(((a)<(b))?(a):(b))
+#endif
+#undef	SOCKADDR_IN
+#define	SOCKADDR_IN	struct sockaddr_in
+
+nat_t	**nat_table[2] = { NULL, NULL },
+	*nat_instances = NULL;
+ipnat_t	*nat_list = NULL;
+u_int	ipf_nattable_sz = NAT_TABLE_SZ;
+u_int	ipf_natrules_sz = NAT_SIZE;
+u_int	ipf_rdrrules_sz = RDR_SIZE;
+u_int	ipf_hostmap_sz = HOSTMAP_SIZE;
+u_32_t	nat_masks = 0;
+u_32_t	rdr_masks = 0;
+ipnat_t	**nat_rules = NULL;
+ipnat_t	**rdr_rules = NULL;
+hostmap_t	**maptable  = NULL;
+
+u_long	fr_defnatage = DEF_NAT_AGE,
+	fr_defnaticmpage = 6;		/* 3 seconds */
+natstat_t nat_stats;
+int	fr_nat_lock = 0;
+#if	(SOLARIS || defined(__sgi)) && defined(_KERNEL)
+extern	kmutex_t	ipf_rw;
+extern	KRWLOCK_T	ipf_nat;
+#endif
+
+static	int	nat_flushtable __P((void));
+static	int	nat_clearlist __P((void));
+static	void	nat_addnat __P((struct ipnat *));
+static	void	nat_addrdr __P((struct ipnat *));
+static	void	nat_delete __P((struct nat *));
+static	void	nat_delrdr __P((struct ipnat *));
+static	void	nat_delnat __P((struct ipnat *));
+static	int	fr_natgetent __P((caddr_t));
+static	int	fr_natgetsz __P((caddr_t));
+static	int	fr_natputent __P((caddr_t));
+static	void	nat_tabmove __P((nat_t *, u_32_t));
+static	int	nat_match __P((fr_info_t *, ipnat_t *, ip_t *));
+static	hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
+				    struct in_addr));
+static	void	nat_hostmapdel __P((struct hostmap *));
+
+
+int nat_init()
+{
+	KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
+	if (nat_table[0] != NULL)
+		bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *));
+	else
+		return -1;
+
+	KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz);
+	if (nat_table[1] != NULL)
+		bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *));
+	else
+		return -1;
+
+	KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz);
+	if (nat_rules != NULL)
+		bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *));
+	else
+		return -1;
+
+	KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz);
+	if (rdr_rules != NULL)
+		bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *));
+	else
+		return -1;
+
+	KMALLOCS(maptable, hostmap_t **, sizeof(hostmap_t *) * ipf_hostmap_sz);
+	if (maptable != NULL)
+		bzero((char *)maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
+	else
+		return -1;
+	return 0;
+}
+
+
+static void nat_addrdr(n)
+ipnat_t *n;
+{
+	ipnat_t **np;
+	u_32_t j;
+	u_int hv;
+	int k;
+
+	k = countbits(n->in_outmsk);
+	if ((k >= 0) && (k != 32))
+		rdr_masks |= 1 << k;
+	j = (n->in_outip & n->in_outmsk);
+	hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz);
+	np = rdr_rules + hv;
+	while (*np != NULL)
+		np = &(*np)->in_rnext;
+	n->in_rnext = NULL;
+	n->in_prnext = np;
+	*np = n;
+}
+
+
+static void nat_addnat(n)
+ipnat_t *n;
+{
+	ipnat_t **np;
+	u_32_t j;
+	u_int hv;
+	int k;
+
+	k = countbits(n->in_inmsk);
+	if ((k >= 0) && (k != 32))
+		nat_masks |= 1 << k;
+	j = (n->in_inip & n->in_inmsk);
+	hv = NAT_HASH_FN(j, 0, ipf_natrules_sz);
+	np = nat_rules + hv;
+	while (*np != NULL)
+		np = &(*np)->in_mnext;
+	n->in_mnext = NULL;
+	n->in_pmnext = np;
+	*np = n;
+}
+
+
+static void nat_delrdr(n)
+ipnat_t *n;
+{
+	if (n->in_rnext)
+		n->in_rnext->in_prnext = n->in_prnext;
+	*n->in_prnext = n->in_rnext;
+}
+
+
+static void nat_delnat(n)
+ipnat_t *n;
+{
+	if (n->in_mnext)
+		n->in_mnext->in_pmnext = n->in_pmnext;
+	*n->in_pmnext = n->in_mnext;
+}
+
+
+/*
+ * check if an ip address has already been allocated for a given mapping that
+ * is not doing port based translation.
+ *
+ * Must be called with ipf_nat held as a write lock.
+ */
+static struct hostmap *nat_hostmap(np, real, map)
+ipnat_t *np;
+struct in_addr real;
+struct in_addr map;
+{
+	hostmap_t *hm;
+	u_int hv;
+
+	hv = real.s_addr % HOSTMAP_SIZE;
+	for (hm = maptable[hv]; hm; hm = hm->hm_next)
+		if ((hm->hm_realip.s_addr == real.s_addr) &&
+		    (np == hm->hm_ipnat)) {
+			hm->hm_ref++;
+			return hm;
+		}
+
+	KMALLOC(hm, hostmap_t *);
+	if (hm) {
+		hm->hm_next = maptable[hv];
+		hm->hm_pnext = maptable + hv;
+		if (maptable[hv])
+			maptable[hv]->hm_pnext = &hm->hm_next;
+		maptable[hv] = hm;
+		hm->hm_ipnat = np;
+		hm->hm_realip = real;
+		hm->hm_mapip = map;
+		hm->hm_ref = 1;
+	}
+	return hm;
+}
+
+
+/*
+ * Must be called with ipf_nat held as a write lock.
+ */
+static void nat_hostmapdel(hm)
+struct hostmap *hm;
+{
+	ATOMIC_DEC32(hm->hm_ref);
+	if (hm->hm_ref == 0) {
+		if (hm->hm_next)
+			hm->hm_next->hm_pnext = hm->hm_pnext;
+		*hm->hm_pnext = hm->hm_next;
+		KFREE(hm);
+	}
+}
+
+
+void fix_outcksum(sp, n)
+u_short *sp;
+u_32_t n;
+{
+	register u_short sumshort;
+	register u_32_t sum1;
+
+	if (!n)
+		return;
+#if SOLARIS2 >= 6
+	else if (n & NAT_HW_CKSUM) {
+		*sp = n & 0xffff;
+		return;
+	}
+#endif
+	sum1 = (~ntohs(*sp)) & 0xffff;
+	sum1 += (n);
+	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
+	/* Again */
+	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
+	sumshort = ~(u_short)sum1;
+	*(sp) = htons(sumshort);
+}
+
+
+void fix_incksum(sp, n)
+u_short *sp;
+u_32_t n;
+{
+	register u_short sumshort;
+	register u_32_t sum1;
+
+	if (!n)
+		return;
+#if SOLARIS2 >= 6
+	else if (n & NAT_HW_CKSUM) {
+		*sp = n & 0xffff;
+		return;
+	}
+#endif
+#ifdef sparc
+	sum1 = (~(*sp)) & 0xffff;
+#else
+	sum1 = (~ntohs(*sp)) & 0xffff;
+#endif
+	sum1 += ~(n) & 0xffff;
+	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
+	/* Again */
+	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
+	sumshort = ~(u_short)sum1;
+	*(sp) = htons(sumshort);
+}
+
+
+/*
+ * fix_datacksum is used *only* for the adjustments of checksums in the data
+ * section of an IP packet.
+ *
+ * The only situation in which you need to do this is when NAT'ing an 
+ * ICMP error message. Such a message, contains in its body the IP header
+ * of the original IP packet, that causes the error.
+ *
+ * You can't use fix_incksum or fix_outcksum in that case, because for the
+ * kernel the data section of the ICMP error is just data, and no special 
+ * processing like hardware cksum or ntohs processing have been done by the 
+ * kernel on the data section.
+ */
+void fix_datacksum(sp, n)
+u_short *sp;
+u_32_t n;
+{
+	register u_short sumshort;
+	register u_32_t sum1;
+
+	if (!n)
+		return;
+
+	sum1 = (~ntohs(*sp)) & 0xffff;
+	sum1 += (n);
+	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
+	/* Again */
+	sum1 = (sum1 >> 16) + (sum1 & 0xffff);
+	sumshort = ~(u_short)sum1;
+	*(sp) = htons(sumshort);
+}
+
+/*
+ * How the NAT is organised and works.
+ *
+ * Inside (interface y) NAT       Outside (interface x)
+ * -------------------- -+- -------------------------------------
+ * Packet going          |   out, processsed by ip_natout() for x
+ * ------------>         |   ------------>
+ * src=10.1.1.1          |   src=192.1.1.1
+ *                       |
+ *                       |   in, processed by ip_natin() for x
+ * <------------         |   <------------
+ * dst=10.1.1.1          |   dst=192.1.1.1
+ * -------------------- -+- -------------------------------------
+ * ip_natout() - changes ip_src and if required, sport
+ *             - creates a new mapping, if required.
+ * ip_natin()  - changes ip_dst and if required, dport
+ *
+ * In the NAT table, internal source is recorded as "in" and externally
+ * seen as "out".
+ */
+
+/*
+ * Handle ioctls which manipulate the NAT.
+ */
+int nat_ioctl(data, cmd, mode)
+#if defined(__NetBSD__) || defined(__OpenBSD__) || (__FreeBSD_version >= 300003)
+u_long cmd;
+#else
+int cmd;
+#endif
+caddr_t data;
+int mode;
+{
+	register ipnat_t *nat, *nt, *n = NULL, **np = NULL;
+	int error = 0, ret, arg;
+	ipnat_t natd;
+	u_32_t i, j;
+
+#if (BSD >= 199306) && defined(_KERNEL)
+	if ((securelevel >= 2) && (mode & FWRITE))
+		return EPERM;
+#endif
+
+	nat = NULL;     /* XXX gcc -Wuninitialized */
+	KMALLOC(nt, ipnat_t *);
+	if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT))
+		error = IRCOPYPTR(data, (char *)&natd, sizeof(natd));
+	else if (cmd == SIOCIPFFL) {	/* SIOCFLNAT & SIOCCNATL */
+		error = IRCOPY(data, (char *)&arg, sizeof(arg));
+		if (error)
+			error = EFAULT;
+	}
+
+	if (error)
+		goto done;
+
+	/*
+	 * For add/delete, look to see if the NAT entry is already present
+	 */
+	WRITE_ENTER(&ipf_nat);
+	if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT)) {
+		nat = &natd;
+		nat->in_flags &= IPN_USERFLAGS;
+		if ((nat->in_redir & NAT_MAPBLK) == 0) {
+			if ((nat->in_flags & IPN_SPLIT) == 0)
+				nat->in_inip &= nat->in_inmsk;
+			if ((nat->in_flags & IPN_IPRANGE) == 0)
+				nat->in_outip &= nat->in_outmsk;
+		}
+		for (np = &nat_list; (n = *np); np = &n->in_next)
+			if (!bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
+					IPN_CMPSIZ))
+				break;
+	}
+
+	switch (cmd)
+	{
+#ifdef  IPFILTER_LOG
+	case SIOCIPFFB :
+	{
+		int tmp;
+
+		if (!(mode & FWRITE))
+			error = EPERM;
+		else {
+			tmp = ipflog_clear(IPL_LOGNAT);
+			IWCOPY((char *)&tmp, (char *)data, sizeof(tmp));
+		}
+		break;
+	}
+#endif
+	case SIOCADNAT :
+		if (!(mode & FWRITE)) {
+			error = EPERM;
+			break;
+		}
+		if (n) {
+			error = EEXIST;
+			break;
+		}
+		if (nt == NULL) {
+			error = ENOMEM;
+			break;
+		}
+		n = nt;
+		nt = NULL;
+		bcopy((char *)nat, (char *)n, sizeof(*n));
+		n->in_ifp = (void *)GETUNIT(n->in_ifname, 4);
+		if (!n->in_ifp)
+			n->in_ifp = (void *)-1;
+		if (n->in_plabel[0] != '\0') {
+			n->in_apr = appr_match(n->in_p, n->in_plabel);
+			if (!n->in_apr) {
+				error = ENOENT;
+				break;
+			}
+		}
+		n->in_next = NULL;
+		*np = n;
+
+		if (n->in_redir & NAT_REDIRECT) {
+			n->in_flags &= ~IPN_NOTDST;
+			nat_addrdr(n);
+		}
+		if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
+			n->in_flags &= ~IPN_NOTSRC;
+			nat_addnat(n);
+		}
+
+		n->in_use = 0;
+		if (n->in_redir & NAT_MAPBLK)
+			n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
+		else if (n->in_flags & IPN_AUTOPORTMAP)
+			n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
+		else if (n->in_flags & IPN_IPRANGE)
+			n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
+		else if (n->in_flags & IPN_SPLIT)
+			n->in_space = 2;
+		else
+			n->in_space = ~ntohl(n->in_outmsk);
+		/*
+		 * Calculate the number of valid IP addresses in the output
+		 * mapping range.  In all cases, the range is inclusive of
+		 * the start and ending IP addresses.
+		 * If to a CIDR address, lose 2: broadcast + network address
+		 *			         (so subtract 1)
+		 * If to a range, add one.
+		 * If to a single IP address, set to 1.
+		 */
+		if (n->in_space) {
+			if ((n->in_flags & IPN_IPRANGE) != 0)
+				n->in_space += 1;
+			else
+				n->in_space -= 1;
+		} else
+			n->in_space = 1;
+		if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
+		    ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
+			n->in_nip = ntohl(n->in_outip) + 1;
+		else if ((n->in_flags & IPN_SPLIT) &&
+			 (n->in_redir & NAT_REDIRECT))
+			n->in_nip = ntohl(n->in_inip);
+		else
+			n->in_nip = ntohl(n->in_outip);
+		if (n->in_redir & NAT_MAP) {
+			n->in_pnext = ntohs(n->in_pmin);
+			/*
+			 * Multiply by the number of ports made available.
+			 */
+			if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
+				n->in_space *= (ntohs(n->in_pmax) -
+						ntohs(n->in_pmin) + 1);
+				/*
+				 * Because two different sources can map to
+				 * different destinations but use the same
+				 * local IP#/port #.
+				 * If the result is smaller than in_space, then
+				 * we may have wrapped around 32bits.
+				 */
+				i = n->in_inmsk;
+				if ((i != 0) && (i != 0xffffffff)) {
+					j = n->in_space * (~ntohl(i) + 1);
+					if (j >= n->in_space)
+						n->in_space = j;
+					else
+						n->in_space = 0xffffffff;
+				}
+			}
+			/*
+			 * If no protocol is specified, multiple by 256.
+			 */
+			if ((n->in_flags & IPN_TCPUDP) == 0) {
+					j = n->in_space * 256;
+					if (j >= n->in_space)
+						n->in_space = j;
+					else
+						n->in_space = 0xffffffff;
+			}
+		}
+		/* Otherwise, these fields are preset */
+		n = NULL;
+		nat_stats.ns_rules++;
+		break;
+	case SIOCRMNAT :
+		if (!(mode & FWRITE)) {
+			error = EPERM;
+			n = NULL;
+			break;
+		}
+		if (!n) {
+			error = ESRCH;
+			break;
+		}
+		if (n->in_redir & NAT_REDIRECT)
+			nat_delrdr(n);
+		if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
+			nat_delnat(n);
+		if (nat_list == NULL) {
+			nat_masks = 0;
+			rdr_masks = 0;
+		}
+		*np = n->in_next;
+		if (!n->in_use) {
+			if (n->in_apr)
+				appr_free(n->in_apr);
+			KFREE(n);
+			nat_stats.ns_rules--;
+		} else {
+			n->in_flags |= IPN_DELETE;
+			n->in_next = NULL;
+		}
+		n = NULL;
+		break;
+	case SIOCGNATS :
+		MUTEX_DOWNGRADE(&ipf_nat);
+		nat_stats.ns_table[0] = nat_table[0];
+		nat_stats.ns_table[1] = nat_table[1];
+		nat_stats.ns_list = nat_list;
+		nat_stats.ns_nattab_sz = ipf_nattable_sz;
+		nat_stats.ns_rultab_sz = ipf_natrules_sz;
+		nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz;
+		nat_stats.ns_instances = nat_instances;
+		nat_stats.ns_apslist = ap_sess_list;
+		error = IWCOPYPTR((char *)&nat_stats, (char *)data,
+				  sizeof(nat_stats));
+		break;
+	case SIOCGNATL :
+	    {
+		natlookup_t nl;
+
+		MUTEX_DOWNGRADE(&ipf_nat);
+		error = IRCOPYPTR((char *)data, (char *)&nl, sizeof(nl));
+		if (error)
+			break;
+
+		if (nat_lookupredir(&nl)) {
+			error = IWCOPYPTR((char *)&nl, (char *)data,
+					  sizeof(nl));
+		} else
+			error = ESRCH;
+		break;
+	    }
+	case SIOCIPFFL :	/* old SIOCFLNAT & SIOCCNATL */
+		if (!(mode & FWRITE)) {
+			error = EPERM;
+			break;
+		}
+		error = 0;
+		if (arg == 0)
+			ret = nat_flushtable();
+		else if (arg == 1)
+			ret = nat_clearlist();
+		else
+			error = EINVAL;
+		MUTEX_DOWNGRADE(&ipf_nat);
+		if (!error) {
+			error = IWCOPY((caddr_t)&ret, data, sizeof(ret));
+			if (error)
+				error = EFAULT;
+		}
+		break;
+	case SIOCSTLCK :
+		error = IRCOPY(data, (caddr_t)&arg, sizeof(arg));
+		if (!error) {
+			error = IWCOPY((caddr_t)&fr_nat_lock, data,
+					sizeof(fr_nat_lock));
+			if (!error)
+				fr_nat_lock = arg;
+		} else
+			error = EFAULT;
+		break;
+	case SIOCSTPUT :
+		if (fr_nat_lock)
+			error = fr_natputent(data);
+		else
+			error = EACCES;
+		break;
+	case SIOCSTGSZ :
+		if (fr_nat_lock)
+			error = fr_natgetsz(data);
+		else
+			error = EACCES;
+		break;
+	case SIOCSTGET :
+		if (fr_nat_lock)
+			error = fr_natgetent(data);
+		else
+			error = EACCES;
+		break;
+	case FIONREAD :
+#ifdef	IPFILTER_LOG
+		arg = (int)iplused[IPL_LOGNAT];
+		MUTEX_DOWNGRADE(&ipf_nat);
+		error = IWCOPY((caddr_t)&arg, (caddr_t)data, sizeof(arg));
+		if (error)
+			error = EFAULT;
+#endif
+		break;
+	default :
+		error = EINVAL;
+		break;
+	}
+	RWLOCK_EXIT(&ipf_nat);			/* READ/WRITE */
+done:
+	if (nt)
+		KFREE(nt);
+	return error;
+}
+
+
+static int fr_natgetsz(data)
+caddr_t data;
+{
+	ap_session_t *aps;
+	nat_t *nat, *n;
+	int error = 0;
+	natget_t ng;
+
+	error = IRCOPY(data, (caddr_t)&ng, sizeof(ng));
+	if (error)
+		return EFAULT;
+
+	nat = ng.ng_ptr;
+	if (!nat) {
+		nat = nat_instances;
+		ng.ng_sz = 0;
+		if (nat == NULL) {
+			error = IWCOPY((caddr_t)&ng, data, sizeof(ng));
+			if (error)
+				error = EFAULT;
+			return error;
+		}
+	} else {
+		/*
+		 * Make sure the pointer we're copying from exists in the
+		 * current list of entries.  Security precaution to prevent
+		 * copying of random kernel data.
+		 */
+		for (n = nat_instances; n; n = n->nat_next)
+			if (n == nat)
+				break;
+		if (!n)
+			return ESRCH;
+	}
+
+	ng.ng_sz = sizeof(nat_save_t);
+	aps = nat->nat_aps;
+	if ((aps != NULL) && (aps->aps_data != 0)) {
+		ng.ng_sz += sizeof(ap_session_t);
+		ng.ng_sz += aps->aps_psiz;
+	}
+
+	error = IWCOPY((caddr_t)&ng, data, sizeof(ng));
+	if (error)
+		error = EFAULT;
+	return error;
+}
+
+
+static int fr_natgetent(data)
+caddr_t data;
+{
+	nat_save_t ipn, *ipnp, *ipnn = NULL;
+	register nat_t *n, *nat;
+	ap_session_t *aps;
+	int error;
+
+	error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp));
+	if (error)
+		return EFAULT;
+	error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn));
+	if (error)
+		return EFAULT;
+
+	nat = ipn.ipn_next;
+	if (!nat) {
+		nat = nat_instances;
+		if (nat == NULL) {
+			if (nat_instances == NULL)
+				return ENOENT;
+			return 0;
+		}
+	} else {
+		/*
+		 * Make sure the pointer we're copying from exists in the
+		 * current list of entries.  Security precaution to prevent
+		 * copying of random kernel data.
+		 */
+		for (n = nat_instances; n; n = n->nat_next)
+			if (n == nat)
+				break;
+		if (!n)
+			return ESRCH;
+	}
+
+	ipn.ipn_next = nat->nat_next;
+	ipn.ipn_dsize = 0;
+	bcopy((char *)nat, (char *)&ipn.ipn_nat, sizeof(ipn.ipn_nat));
+	ipn.ipn_nat.nat_data = NULL;
+
+	if (nat->nat_ptr) {
+		bcopy((char *)nat->nat_ptr, (char *)&ipn.ipn_ipnat,
+		      sizeof(ipn.ipn_ipnat));
+	}
+
+	if (nat->nat_fr)
+		bcopy((char *)nat->nat_fr, (char *)&ipn.ipn_rule,
+		      sizeof(ipn.ipn_rule));
+
+	if ((aps = nat->nat_aps)) {
+		ipn.ipn_dsize = sizeof(*aps);
+		if (aps->aps_data)
+			ipn.ipn_dsize += aps->aps_psiz;
+		KMALLOCS(ipnn, nat_save_t *, sizeof(*ipnn) + ipn.ipn_dsize);
+		if (ipnn == NULL)
+			return ENOMEM;
+		bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn));
+
+		bcopy((char *)aps, ipnn->ipn_data, sizeof(*aps));
+		if (aps->aps_data) {
+			bcopy(aps->aps_data, ipnn->ipn_data + sizeof(*aps),
+			      aps->aps_psiz);
+			ipnn->ipn_dsize += aps->aps_psiz;
+		}
+		error = IWCOPY((caddr_t)ipnn, ipnp,
+			       sizeof(ipn) + ipn.ipn_dsize);
+		if (error)
+			error = EFAULT;
+		KFREES(ipnn, sizeof(*ipnn) + ipn.ipn_dsize);
+	} else {
+		error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn));
+		if (error)
+			error = EFAULT;
+	}
+	return error;
+}
+
+
+static int fr_natputent(data)
+caddr_t data;
+{
+	nat_save_t ipn, *ipnp, *ipnn = NULL;
+	register nat_t *n, *nat;
+	ap_session_t *aps;
+	frentry_t *fr;
+	ipnat_t *in;
+
+	int error;
+
+	error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp));
+	if (error)
+		return EFAULT;
+	error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn));
+	if (error)
+		return EFAULT;
+	nat = NULL;
+	if (ipn.ipn_dsize) {
+		KMALLOCS(ipnn, nat_save_t *, sizeof(ipn) + ipn.ipn_dsize);
+		if (ipnn == NULL)
+			return ENOMEM;
+		bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn));
+		error = IRCOPY((caddr_t)ipnp, (caddr_t)ipn.ipn_data,
+			       ipn.ipn_dsize);
+		if (error) {
+			error = EFAULT;
+			goto junkput;
+		}
+	} else
+		ipnn = NULL;
+
+	KMALLOC(nat, nat_t *);
+	if (nat == NULL) {
+		error = EFAULT;
+		goto junkput;
+	}
+
+	bcopy((char *)&ipn.ipn_nat, (char *)nat, sizeof(*nat));
+	/*
+	 * Initialize all these so that nat_delete() doesn't cause a crash.
+	 */
+	nat->nat_phnext[0] = NULL;
+	nat->nat_phnext[1] = NULL;
+	fr = nat->nat_fr;
+	nat->nat_fr = NULL;
+	aps = nat->nat_aps;
+	nat->nat_aps = NULL;
+	in = nat->nat_ptr;
+	nat->nat_ptr = NULL;
+	nat->nat_data = NULL;
+
+	/*
+	 * Restore the rule associated with this nat session
+	 */
+	if (in) {
+		KMALLOC(in, ipnat_t *);
+		if (in == NULL) {
+			error = ENOMEM;
+			goto junkput;
+		}
+		nat->nat_ptr = in;
+		bcopy((char *)&ipn.ipn_ipnat, (char *)in, sizeof(*in));
+		in->in_use = 1;
+		in->in_flags |= IPN_DELETE;
+		in->in_next = NULL;
+		in->in_rnext = NULL;
+		in->in_prnext = NULL;
+		in->in_mnext = NULL;
+		in->in_pmnext = NULL;
+		in->in_ifp = GETUNIT(in->in_ifname, 4);
+		if (in->in_plabel[0] != '\0') {
+			in->in_apr = appr_match(in->in_p, in->in_plabel);
+		}
+	}
+
+	/*
+	 * Restore ap_session_t structure.  Include the private data allocated
+	 * if it was there.
+	 */
+	if (aps) {
+		KMALLOC(aps, ap_session_t *);
+		if (aps == NULL) {
+			error = ENOMEM;
+			goto junkput;
+		}
+		nat->nat_aps = aps;
+		aps->aps_next = ap_sess_list;
+		ap_sess_list = aps;
+		bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
+		if (in)
+			aps->aps_apr = in->in_apr;
+		if (aps->aps_psiz) {
+			KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
+			if (aps->aps_data == NULL) {
+				error = ENOMEM;
+				goto junkput;
+			}
+			bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
+			      aps->aps_psiz);
+		} else {
+			aps->aps_psiz = 0;
+			aps->aps_data = NULL;
+		}
+	}
+
+	/*
+	 * If there was a filtering rule associated with this entry then
+	 * build up a new one.
+	 */
+	if (fr != NULL) {
+		if (nat->nat_flags & FI_NEWFR) {
+			KMALLOC(fr, frentry_t *);
+			nat->nat_fr = fr;
+			if (fr == NULL) {
+				error = ENOMEM;
+				goto junkput;
+			}
+			bcopy((char *)&ipn.ipn_fr, (char *)fr, sizeof(*fr));
+			ipn.ipn_nat.nat_fr = fr;
+			error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn));
+			if (error) {
+				error = EFAULT;
+				goto junkput;
+			}
+		} else {
+			for (n = nat_instances; n; n = n->nat_next)
+				if (n->nat_fr == fr)
+					break;
+			if (!n) {
+				error = ESRCH;
+				goto junkput;
+			}
+		}
+	}
+
+	if (ipnn)
+		KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize);
+	nat_insert(nat);
+	return 0;
+junkput:
+	if (ipnn)
+		KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize);
+	if (nat)
+		nat_delete(nat);
+	return error;
+}
+
+
+/*
+ * Delete a nat entry from the various lists and table.
+ */
+static void nat_delete(natd)
+struct nat *natd;
+{
+	struct ipnat *ipn;
+
+	if (natd->nat_flags & FI_WILDP)
+		nat_stats.ns_wilds--;
+	if (natd->nat_hnext[0])
+		natd->nat_hnext[0]->nat_phnext[0] = natd->nat_phnext[0];
+	*natd->nat_phnext[0] = natd->nat_hnext[0];
+	if (natd->nat_hnext[1])
+		natd->nat_hnext[1]->nat_phnext[1] = natd->nat_phnext[1];
+	*natd->nat_phnext[1] = natd->nat_hnext[1];
+
+	if (natd->nat_fr != NULL) {
+		ATOMIC_DEC32(natd->nat_fr->fr_ref);
+	}
+
+	if (natd->nat_hm != NULL)
+		nat_hostmapdel(natd->nat_hm);
+
+	/*
+	 * If there is an active reference from the nat entry to its parent
+	 * rule, decrement the rule's reference count and free it too if no
+	 * longer being used.
+	 */
+	ipn = natd->nat_ptr;
+	if (ipn != NULL) {
+		ipn->in_space++;
+		ipn->in_use--;
+		if (!ipn->in_use && (ipn->in_flags & IPN_DELETE)) {
+			if (ipn->in_apr)
+				appr_free(ipn->in_apr);
+			KFREE(ipn);
+			nat_stats.ns_rules--;
+		}
+	}
+
+	MUTEX_DESTROY(&natd->nat_lock);
+	/*
+	 * If there's a fragment table entry too for this nat entry, then
+	 * dereference that as well.
+	 */
+	ipfr_forget((void *)natd);
+	aps_free(natd->nat_aps);
+	nat_stats.ns_inuse--;
+	KFREE(natd);
+}
+
+
+/*
+ * nat_flushtable - clear the NAT table of all mapping entries.
+ */
+static int nat_flushtable()
+{
+	register nat_t *nat, **natp;
+	register int j = 0;
+
+	/*
+	 * ALL NAT mappings deleted, so lets just make the deletions
+	 * quicker.
+	 */
+	if (nat_table[0] != NULL)
+		bzero((char *)nat_table[0],
+		      sizeof(nat_table[0]) * ipf_nattable_sz);
+	if (nat_table[1] != NULL)
+		bzero((char *)nat_table[1],
+		      sizeof(nat_table[1]) * ipf_nattable_sz);
+
+	for (natp = &nat_instances; (nat = *natp); ) {
+		*natp = nat->nat_next;
+#ifdef	IPFILTER_LOG
+		nat_log(nat, NL_FLUSH);
+#endif
+		nat_delete(nat);
+		j++;
+	}
+	nat_stats.ns_inuse = 0;
+	return j;
+}
+
+
+/*
+ * nat_clearlist - delete all rules in the active NAT mapping list.
+ */
+static int nat_clearlist()
+{
+	register ipnat_t *n, **np = &nat_list;
+	int i = 0;
+
+	if (nat_rules != NULL)
+		bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz);
+	if (rdr_rules != NULL)
+		bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz);
+
+	while ((n = *np)) {
+		*np = n->in_next;
+		if (!n->in_use) {
+			if (n->in_apr)
+				appr_free(n->in_apr);
+			KFREE(n);
+			nat_stats.ns_rules--;
+		} else {
+			n->in_flags |= IPN_DELETE;
+			n->in_next = NULL;
+		}
+		i++;
+	}
+	nat_masks = 0;
+	rdr_masks = 0;
+	return i;
+}
+
+
+/*
+ * Create a new NAT table entry.
+ * NOTE: assumes write lock on ipf_nat has been obtained already.
+ */
+nat_t *nat_new(np, ip, fin, flags, direction)
+ipnat_t *np;
+ip_t *ip;
+fr_info_t *fin;
+u_int flags;
+int direction;
+{
+	register u_32_t sum1, sum2, sumd, l;
+	u_short port = 0, sport = 0, dport = 0, nport = 0;
+	struct in_addr in, inb;
+	tcphdr_t *tcp = NULL;
+	hostmap_t *hm = NULL;
+	nat_t *nat, *natl;
+	u_short nflags;
+#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
+	qif_t *qf = fin->fin_qif;
+#endif
+
+	nflags = flags & np->in_flags;
+	if (flags & IPN_TCPUDP) {
+		tcp = (tcphdr_t *)fin->fin_dp;
+		sport = tcp->th_sport;
+		dport = tcp->th_dport;
+	}
+
+	/* Give me a new nat */
+	KMALLOC(nat, nat_t *);
+	if (nat == NULL) {
+		nat_stats.ns_memfail++;
+		return NULL;
+	}
+
+	bzero((char *)nat, sizeof(*nat));
+	nat->nat_flags = flags;
+	if (flags & FI_WILDP)
+		nat_stats.ns_wilds++;
+	/*
+	 * Search the current table for a match.
+	 */
+	if (direction == NAT_OUTBOUND) {
+		/*
+		 * Values at which the search for a free resouce starts.
+		 */
+		u_32_t st_ip;
+		u_short st_port;
+
+		/*
+		 * If it's an outbound packet which doesn't match any existing
+		 * record, then create a new port
+		 */
+		l = 0;
+		st_ip = np->in_nip;
+		st_port = np->in_pnext;
+
+		do {
+			port = 0;
+			in.s_addr = htonl(np->in_nip);
+			if (l == 0) {
+				/*
+				 * Check to see if there is an existing NAT
+				 * setup for this IP address pair.
+				 */
+				hm = nat_hostmap(np, ip->ip_src, in);
+				if (hm != NULL)
+					in.s_addr = hm->hm_mapip.s_addr;
+			} else if ((l == 1) && (hm != NULL)) {
+				nat_hostmapdel(hm);
+				hm = NULL;
+			}
+			in.s_addr = ntohl(in.s_addr);
+
+			nat->nat_hm = hm;
+
+			if ((np->in_outmsk == 0xffffffff) &&
+			    (np->in_pnext == 0)) {
+				if (l > 0)
+					goto badnat;
+			}
+
+			if (np->in_redir & NAT_MAPBLK) {
+				if ((l >= np->in_ppip) || ((l > 0) &&
+				     !(flags & IPN_TCPUDP)))
+					goto badnat;
+				/*
+				 * map-block - Calculate destination address.
+				 */
+				in.s_addr = ntohl(ip->ip_src.s_addr);
+				in.s_addr &= ntohl(~np->in_inmsk);
+				inb.s_addr = in.s_addr;
+				in.s_addr /= np->in_ippip;
+				in.s_addr &= ntohl(~np->in_outmsk);
+				in.s_addr += ntohl(np->in_outip);
+				/*
+				 * Calculate destination port.
+				 */
+				if ((flags & IPN_TCPUDP) &&
+				    (np->in_ppip != 0)) {
+					port = ntohs(sport) + l;
+					port %= np->in_ppip;
+					port += np->in_ppip *
+						(inb.s_addr % np->in_ippip);
+					port += MAPBLK_MINPORT;
+					port = htons(port);
+				}
+			} else if (!np->in_outip &&
+				   (np->in_outmsk == 0xffffffff)) {
+				/*
+				 * 0/32 - use the interface's IP address.
+				 */
+				if ((l > 0) ||
+				    fr_ifpaddr(4, fin->fin_ifp, &in) == -1)
+					goto badnat;
+				in.s_addr = ntohl(in.s_addr);
+			} else if (!np->in_outip && !np->in_outmsk) {
+				/*
+				 * 0/0 - use the original source address/port.
+				 */
+				if (l > 0)
+					goto badnat;
+				in.s_addr = ntohl(ip->ip_src.s_addr);
+			} else if ((np->in_outmsk != 0xffffffff) &&
+				   (np->in_pnext == 0) &&
+				   ((l > 0) || (hm == NULL)))
+				np->in_nip++;
+			natl = NULL;
+
+			if ((nflags & IPN_TCPUDP) &&
+			    ((np->in_redir & NAT_MAPBLK) == 0) &&
+			    (np->in_flags & IPN_AUTOPORTMAP)) {
+				if ((l > 0) && (l % np->in_ppip == 0)) {
+					if (l > np->in_space) {
+						goto badnat;
+					} else if ((l > np->in_ppip) &&
+						   np->in_outmsk != 0xffffffff)
+						np->in_nip++;
+				}
+				if (np->in_ppip != 0) {
+					port = ntohs(sport);
+					port += (l % np->in_ppip);
+					port %= np->in_ppip;
+					port += np->in_ppip *
+						(ntohl(ip->ip_src.s_addr) %
+						 np->in_ippip);
+					port += MAPBLK_MINPORT;
+					port = htons(port);
+				}
+			} else if (((np->in_redir & NAT_MAPBLK) == 0) &&
+				   (nflags & IPN_TCPUDP) &&
+				   (np->in_pnext != 0)) {
+				port = htons(np->in_pnext++);
+				if (np->in_pnext > ntohs(np->in_pmax)) {
+					np->in_pnext = ntohs(np->in_pmin);
+					if (np->in_outmsk != 0xffffffff)
+						np->in_nip++;
+				}
+			}
+
+			if (np->in_flags & IPN_IPRANGE) {
+				if (np->in_nip > ntohl(np->in_outmsk))
+					np->in_nip = ntohl(np->in_outip);
+			} else {
+				if ((np->in_outmsk != 0xffffffff) &&
+				    ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
+				    ntohl(np->in_outip))
+					np->in_nip = ntohl(np->in_outip) + 1;
+			}
+
+			if (!port && (flags & IPN_TCPUDP))
+				port = sport;
+
+			/*
+			 * Here we do a lookup of the connection as seen from
+			 * the outside.  If an IP# pair already exists, try
+			 * again.  So if you have A->B becomes C->B, you can
+			 * also have D->E become C->E but not D->B causing
+			 * another C->B.  Also take protocol and ports into
+			 * account when determining whether a pre-existing
+			 * NAT setup will cause an external conflict where
+			 * this is appropriate.
+			 */
+			inb.s_addr = htonl(in.s_addr);
+			natl = nat_inlookup(fin->fin_ifp, flags & ~FI_WILDP,
+					    (u_int)ip->ip_p, ip->ip_dst, inb,
+					    (port << 16) | dport, 1);
+
+			/*
+			 * Has the search wrapped around and come back to the
+			 * start ?
+			 */
+			if ((natl != NULL) &&
+			    (np->in_pnext != 0) && (st_port == np->in_pnext) &&
+			    (np->in_nip != 0) && (st_ip == np->in_nip))
+				goto badnat;
+			l++;
+		} while (natl != NULL);
+
+		if (np->in_space > 0)
+			np->in_space--;
+
+		/* Setup the NAT table */
+		nat->nat_inip = ip->ip_src;
+		nat->nat_outip.s_addr = htonl(in.s_addr);
+		nat->nat_oip = ip->ip_dst;
+		if (nat->nat_hm == NULL)
+			nat->nat_hm = nat_hostmap(np, ip->ip_src,
+						  nat->nat_outip);
+
+		sum1 = LONG_SUM(ntohl(ip->ip_src.s_addr)) + ntohs(sport);
+		sum2 = LONG_SUM(in.s_addr) + ntohs(port);
+
+		if (flags & IPN_TCPUDP) {
+			nat->nat_inport = sport;
+			nat->nat_outport = port;	/* sport */
+			nat->nat_oport = dport;
+		}
+	} else {
+		/*
+		 * Otherwise, it's an inbound packet. Most likely, we don't
+		 * want to rewrite source ports and source addresses. Instead,
+		 * we want to rewrite to a fixed internal address and fixed
+		 * internal port.
+		 */
+		if (np->in_flags & IPN_SPLIT) {
+			in.s_addr = np->in_nip;
+			if (np->in_inip == htonl(in.s_addr))
+				np->in_nip = ntohl(np->in_inmsk);
+			else {
+				np->in_nip = ntohl(np->in_inip);
+				if (np->in_flags & IPN_ROUNDR) {
+					nat_delrdr(np);
+					nat_addrdr(np);
+				}
+			}
+		} else {
+			in.s_addr = ntohl(np->in_inip);
+			if (np->in_flags & IPN_ROUNDR) {
+				nat_delrdr(np);
+				nat_addrdr(np);
+			}
+		}
+		if (!np->in_pnext)
+			nport = dport;
+		else {
+			/*
+			 * Whilst not optimized for the case where
+			 * pmin == pmax, the gain is not significant.
+			 */
+			nport = ntohs(dport) - ntohs(np->in_pmin) +
+				ntohs(np->in_pnext);
+			nport = htons(nport);
+		}
+
+		/*
+		 * When the redirect-to address is set to 0.0.0.0, just
+		 * assume a blank `forwarding' of the packet.  We don't
+		 * setup any translation for this either.
+		 */
+		if (in.s_addr == 0) {
+			if (nport == dport)
+				goto badnat;
+			in.s_addr = ntohl(ip->ip_dst.s_addr);
+		}
+
+		nat->nat_inip.s_addr = htonl(in.s_addr);
+		nat->nat_outip = ip->ip_dst;
+		nat->nat_oip = ip->ip_src;
+
+		sum1 = LONG_SUM(ntohl(ip->ip_dst.s_addr)) + ntohs(dport);
+		sum2 = LONG_SUM(in.s_addr) + ntohs(nport);
+
+		if (flags & IPN_TCPUDP) {
+			nat->nat_inport = nport;
+			nat->nat_outport = dport;
+			nat->nat_oport = sport;
+		}
+	}
+
+	CALC_SUMD(sum1, sum2, sumd);
+	nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
+#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6)
+	if ((flags == IPN_TCP) && dohwcksum &&
+	    (qf->qf_ill->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) {
+		if (direction == NAT_OUTBOUND)
+			sum1 = LONG_SUM(ntohl(in.s_addr));
+		else
+			sum1 = LONG_SUM(ntohl(ip->ip_src.s_addr));
+		sum1 += LONG_SUM(ntohl(ip->ip_dst.s_addr));
+		sum1 += 30;
+		sum1 = (sum1 & 0xffff) + (sum1 >> 16);
+		nat->nat_sumd[1] = NAT_HW_CKSUM|(sum1 & 0xffff);
+	} else
+#endif
+		nat->nat_sumd[1] = nat->nat_sumd[0];
+
+	if ((flags & IPN_TCPUDP) && ((sport != port) || (dport != nport))) {
+		if (direction == NAT_OUTBOUND)
+			sum1 = LONG_SUM(ntohl(ip->ip_src.s_addr));
+		else
+			sum1 = LONG_SUM(ntohl(ip->ip_dst.s_addr));
+
+		sum2 = LONG_SUM(in.s_addr);
+
+		CALC_SUMD(sum1, sum2, sumd);
+		nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
+	} else
+		nat->nat_ipsumd = nat->nat_sumd[0];
+
+	in.s_addr = htonl(in.s_addr);
+
+#ifdef  _KERNEL
+	strncpy(nat->nat_ifname, IFNAME(fin->fin_ifp), IFNAMSIZ);
+#endif
+	nat_insert(nat);
+
+	nat->nat_dir = direction;
+	nat->nat_ifp = fin->fin_ifp;
+	nat->nat_ptr = np;
+	nat->nat_p = ip->ip_p;
+	nat->nat_bytes = 0;
+	nat->nat_pkts = 0;
+	nat->nat_fr = fin->fin_fr;
+	if (nat->nat_fr != NULL) {
+		ATOMIC_INC32(nat->nat_fr->fr_ref);
+	}
+	if (direction == NAT_OUTBOUND) {
+		if (flags & IPN_TCPUDP)
+			tcp->th_sport = port;
+	} else {
+		if (flags & IPN_TCPUDP)
+			tcp->th_dport = nport;
+	}
+	np->in_use++;
+#ifdef	IPFILTER_LOG
+	nat_log(nat, (u_int)np->in_redir);
+#endif
+	return nat;
+badnat:
+	nat_stats.ns_badnat++;
+	if ((hm = nat->nat_hm) != NULL)
+		nat_hostmapdel(hm);
+	KFREE(nat);
+	return NULL;
+}
+
+
+void	nat_insert(nat)
+nat_t	*nat;
+{
+	nat_t **natp;
+	u_int hv;
+
+	MUTEX_INIT(&nat->nat_lock, "nat entry lock", NULL);
+
+	nat->nat_age = fr_defnatage;
+	nat->nat_ifname[sizeof(nat->nat_ifname) - 1] = '\0';
+	if (nat->nat_ifname[0] !='\0') {
+		nat->nat_ifp = GETUNIT(nat->nat_ifname, 4);
+	}
+
+	nat->nat_next = nat_instances;
+	nat_instances = nat;
+
+	hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
+			 ipf_nattable_sz);
+	natp = &nat_table[0][hv];
+	if (*natp)
+		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
+	nat->nat_phnext[0] = natp;
+	nat->nat_hnext[0] = *natp;
+	*natp = nat;
+
+	hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
+			 ipf_nattable_sz);
+	natp = &nat_table[1][hv];
+	if (*natp)
+		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
+	nat->nat_phnext[1] = natp;
+	nat->nat_hnext[1] = *natp;
+	*natp = nat;
+
+	nat_stats.ns_added++;
+	nat_stats.ns_inuse++;
+}
+
+
+nat_t *nat_icmplookup(ip, fin, dir)
+ip_t *ip;
+fr_info_t *fin;
+int dir;
+{
+	icmphdr_t *icmp;
+	tcphdr_t *tcp = NULL;
+	ip_t *oip;
+	int flags = 0, type, minlen;
+
+	icmp = (icmphdr_t *)fin->fin_dp;
+	/*
+	 * Does it at least have the return (basic) IP header ?
+	 * Only a basic IP header (no options) should be with an ICMP error
+	 * header.
+	 */
+	if ((ip->ip_hl != 5) || (ip->ip_len < ICMPERR_MINPKTLEN))
+		return NULL;
+	type = icmp->icmp_type;
+	/*
+	 * If it's not an error type, then return.
+	 */
+	if ((type != ICMP_UNREACH) && (type != ICMP_SOURCEQUENCH) &&
+	    (type != ICMP_REDIRECT) && (type != ICMP_TIMXCEED) &&
+	    (type != ICMP_PARAMPROB))
+		return NULL;
+
+	oip = (ip_t *)((char *)fin->fin_dp + 8);
+	minlen = (oip->ip_hl << 2);
+	if (minlen < sizeof(ip_t))
+		return NULL;
+	if (ip->ip_len < ICMPERR_IPICMPHLEN + minlen)
+		return NULL;
+	/*
+	 * Is the buffer big enough for all of it ?  It's the size of the IP
+	 * header claimed in the encapsulated part which is of concern.  It
+	 * may be too big to be in this buffer but not so big that it's
+	 * outside the ICMP packet, leading to TCP deref's causing problems.
+	 * This is possible because we don't know how big oip_hl is when we
+	 * do the pullup early in fr_check() and thus can't gaurantee it is
+	 * all here now.
+	 */
+#ifdef  _KERNEL
+	{
+	mb_t *m;
+
+# if SOLARIS
+	m = fin->fin_qfm;
+	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
+		return NULL;
+# else
+	m = *(mb_t **)fin->fin_mp;
+	if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
+	    (char *)ip + m->m_len)
+		return NULL;
+# endif
+	}
+#endif
+
+	if (oip->ip_p == IPPROTO_TCP)
+		flags = IPN_TCP;
+	else if (oip->ip_p == IPPROTO_UDP)
+		flags = IPN_UDP;
+	if (flags & IPN_TCPUDP) {
+		minlen += 8;		/* + 64bits of data to get ports */
+		if (ip->ip_len < ICMPERR_IPICMPHLEN + minlen)
+			return NULL;
+		tcp = (tcphdr_t *)((char *)oip + (oip->ip_hl << 2));
+		if (dir == NAT_INBOUND)
+			return nat_inlookup(fin->fin_ifp, flags,
+				(u_int)oip->ip_p, oip->ip_dst, oip->ip_src,
+				(tcp->th_sport << 16) | tcp->th_dport, 0);
+		else
+			return nat_outlookup(fin->fin_ifp, flags,
+				(u_int)oip->ip_p, oip->ip_dst, oip->ip_src,
+				(tcp->th_sport << 16) | tcp->th_dport, 0);
+	}
+	if (dir == NAT_INBOUND)
+		return nat_inlookup(fin->fin_ifp, 0, (u_int)oip->ip_p,
+			oip->ip_dst, oip->ip_src, 0, 0);
+	else
+		return nat_outlookup(fin->fin_ifp, 0, (u_int)oip->ip_p,
+			oip->ip_dst, oip->ip_src, 0, 0);
+}
+
+
+/*
+ * This should *ONLY* be used for incoming packets to make sure a NAT'd ICMP
+ * packet gets correctly recognised.
+ */
+nat_t *nat_icmp(ip, fin, nflags, dir)
+ip_t *ip;
+fr_info_t *fin;
+u_int *nflags;
+int dir;
+{
+	u_32_t sum1, sum2, sumd, sumd2 = 0;
+	struct in_addr in;
+	icmphdr_t *icmp;
+	udphdr_t *udp;
+	nat_t *nat;
+	ip_t *oip;
+	int flags = 0;
+
+	if ((fin->fin_fi.fi_fl & FI_SHORT) || (ip->ip_off & IP_OFFMASK))
+		return NULL;
+	/*
+	 * nat_icmplookup() will return NULL for `defective' packets.
+	 */
+	if ((ip->ip_v != 4) || !(nat = nat_icmplookup(ip, fin, dir)))
+		return NULL;
+	*nflags = IPN_ICMPERR;
+	icmp = (icmphdr_t *)fin->fin_dp;
+	oip = (ip_t *)&icmp->icmp_ip;
+	if (oip->ip_p == IPPROTO_TCP)
+		flags = IPN_TCP;
+	else if (oip->ip_p == IPPROTO_UDP)
+		flags = IPN_UDP;
+	udp = (udphdr_t *)((((char *)oip) + (oip->ip_hl << 2)));
+	/*
+	 * Need to adjust ICMP header to include the real IP#'s and
+	 * port #'s.  Only apply a checksum change relative to the
+	 * IP address change as it will be modified again in ip_natout
+	 * for both address and port.  Two checksum changes are
+	 * necessary for the two header address changes.  Be careful
+	 * to only modify the checksum once for the port # and twice
+	 * for the IP#.
+	 */
+
+	/*
+	 * Step 1
+	 * Fix the IP addresses in the offending IP packet. You also need
+	 * to adjust the IP header checksum of that offending IP packet
+	 * and the ICMP checksum of the ICMP error message itself.
+	 *
+	 * Unfortunately, for UDP and TCP, the IP addresses are also contained
+	 * in the pseudo header that is used to compute the UDP resp. TCP
+	 * checksum. So, we must compensate that as well. Even worse, the
+	 * change in the UDP and TCP checksums require yet another
+	 * adjustment of the ICMP checksum of the ICMP error message.
+	 *
+	 * For the moment we forget about TCP, because that checksum is not
+	 * in the first 8 bytes, so it will not be available in most cases.
+	 */
+
+	if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) {
+		sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
+		in = nat->nat_inip;
+		oip->ip_src = in;
+	} else {
+		sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
+		in = nat->nat_outip;
+		oip->ip_dst = in;
+	}
+
+	sum2 = LONG_SUM(ntohl(in.s_addr));
+
+	CALC_SUMD(sum1, sum2, sumd);
+
+	if (nat->nat_dir == NAT_OUTBOUND) {
+		/*
+		 * Fix IP checksum of the offending IP packet to adjust for
+		 * the change in the IP address.
+		 *
+		 * Normally, you would expect that the ICMP checksum of the 
+		 * ICMP error message needs to be adjusted as well for the
+		 * IP address change in oip.
+		 * However, this is a NOP, because the ICMP checksum is 
+		 * calculated over the complete ICMP packet, which includes the
+		 * changed oip IP addresses and oip->ip_sum. However, these 
+		 * two changes cancel each other out (if the delta for
+		 * the IP address is x, then the delta for ip_sum is minus x), 
+		 * so no change in the icmp_cksum is necessary.
+		 *
+		 * Be careful that nat_dir refers to the direction of the
+		 * offending IP packet (oip), not to its ICMP response (icmp)
+		 */
+		fix_datacksum(&oip->ip_sum, sumd);
+
+		/*
+		 * Fix UDP pseudo header checksum to compensate for the
+		 * IP address change.
+		 */
+		if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
+			/*
+			 * The UDP checksum is optional, only adjust it 
+			 * if it has been set.
+			 */
+			sum1 = ntohs(udp->uh_sum);
+			fix_datacksum(&udp->uh_sum, sumd);
+			sum2 = ntohs(udp->uh_sum);
+
+			/*
+			 * Fix ICMP checksum to compensate the UDP 
+			 * checksum adjustment.
+			 */
+			CALC_SUMD(sum1, sum2, sumd);
+			sumd2 = sumd;
+		}
+
+#if 0
+		/*
+		 * Fix TCP pseudo header checksum to compensate for the 
+		 * IP address change. Before we can do the change, we
+		 * must make sure that oip is sufficient large to hold
+		 * the TCP checksum (normally it does not!).
+		 */
+		if (oip->ip_p == IPPROTO_TCP) {
+		
+		}
+#endif
+	} else {
+
+		/*
+		 * Fix IP checksum of the offending IP packet to adjust for
+		 * the change in the IP address.
+		 *
+		 * Normally, you would expect that the ICMP checksum of the 
+		 * ICMP error message needs to be adjusted as well for the
+		 * IP address change in oip.
+		 * However, this is a NOP, because the ICMP checksum is 
+		 * calculated over the complete ICMP packet, which includes the
+		 * changed oip IP addresses and oip->ip_sum. However, these 
+		 * two changes cancel each other out (if the delta for
+		 * the IP address is x, then the delta for ip_sum is minus x), 
+		 * so no change in the icmp_cksum is necessary.
+		 *
+		 * Be careful that nat_dir refers to the direction of the
+		 * offending IP packet (oip), not to its ICMP response (icmp)
+		 */
+		fix_datacksum(&oip->ip_sum, sumd);
+
+/* XXX FV : without having looked at Solaris source code, it seems unlikely
+ * that SOLARIS would compensate this in the kernel (a body of an IP packet 
+ * in the data section of an ICMP packet). I have the feeling that this should
+ * be unconditional, but I'm not in a position to check.
+ */
+#if !SOLARIS && !defined(__sgi)
+		/*
+		 * Fix UDP pseudo header checksum to compensate for the
+		 * IP address change.
+		 */
+		if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
+			/*
+			 * The UDP checksum is optional, only adjust it 
+			 * if it has been set 
+			 */
+			sum1 = ntohs(udp->uh_sum);
+			fix_datacksum(&udp->uh_sum, sumd);
+			sum2 = ntohs(udp->uh_sum);
+
+			/*
+			 * Fix ICMP checksum to compensate the UDP 
+			 * checksum adjustment.
+			 */
+			CALC_SUMD(sum1, sum2, sumd);
+			sumd2 = sumd;
+		}
+		
+#if 0
+		/* 
+		 * Fix TCP pseudo header checksum to compensate for the 
+		 * IP address change. Before we can do the change, we
+		 * must make sure that oip is sufficient large to hold
+		 * the TCP checksum (normally it does not!).
+		 */
+		if (oip->ip_p == IPPROTO_TCP) {
+		
+		};
+#endif
+		
+#endif
+	}
+
+	if ((flags & IPN_TCPUDP) != 0) {
+		tcphdr_t *tcp;
+
+		/*
+		 * XXX - what if this is bogus hl and we go off the end ?
+		 * In this case, nat_icmpinlookup() will have returned NULL.
+		 */
+		tcp = (tcphdr_t *)udp;
+
+		/*
+		 * Step 2 :
+		 * For offending TCP/UDP IP packets, translate the ports as
+		 * well, based on the NAT specification. Of course such
+		 * a change must be reflected in the ICMP checksum as well.
+		 *
+		 * Advance notice : Now it becomes complicated :-)
+		 *
+		 * Since the port fields are part of the TCP/UDP checksum
+		 * of the offending IP packet, you need to adjust that checksum
+		 * as well... but, if you change, you must change the icmp
+		 * checksum *again*, to reflect that change.
+		 *
+		 * To further complicate: the TCP checksum is not in the first
+		 * 8 bytes of the offending ip packet, so it most likely is not
+		 * available (we might have to fix that if the encounter a
+		 * device that returns more than 8 data bytes on icmp error)
+		 */
+
+		if (nat->nat_oport == tcp->th_dport) {
+			if (tcp->th_sport != nat->nat_inport) {
+				/*
+				 * Fix ICMP checksum to compensate port
+				 * adjustment.
+				 */
+				sum1 = ntohs(tcp->th_sport);
+				sum2 = ntohs(nat->nat_inport);
+				CALC_SUMD(sum1, sum2, sumd);
+				sumd2 += sumd;
+				tcp->th_sport = nat->nat_inport;
+
+				/*
+				 * Fix udp checksum to compensate port
+				 * adjustment.  NOTE : the offending IP packet
+				 * flows the other direction compared to the
+				 * ICMP message.
+				 *
+				 * The UDP checksum is optional, only adjust
+				 * it if it has been set.
+				 */
+				if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
+
+					sum1 = ntohs(udp->uh_sum);
+					fix_datacksum(&udp->uh_sum, sumd);
+					sum2 = ntohs(udp->uh_sum);
+
+					/*
+					 * Fix ICMP checksum to 
+					 * compensate UDP checksum 
+					 * adjustment.
+					 */
+					CALC_SUMD(sum1, sum2, sumd);
+					sumd2 += sumd;
+				}
+			}
+		} else {
+			if (tcp->th_dport != nat->nat_outport) {
+				/*
+				 * Fix ICMP checksum to compensate port
+				 * adjustment.
+				 */
+				sum1 = ntohs(tcp->th_dport);
+				sum2 = ntohs(nat->nat_outport);
+				CALC_SUMD(sum1, sum2, sumd);
+				sumd2 += sumd;
+				tcp->th_dport = nat->nat_outport;
+
+				/*
+				 * Fix udp checksum to compensate port
+				 * adjustment.   NOTE : the offending IP
+				 * packet flows the other direction compared
+				 * to the ICMP message.
+				 *
+				 * The UDP checksum is optional, only adjust
+				 * it if it has been set.
+				 */
+				if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) {
+
+					sum1 = ntohs(udp->uh_sum);
+					fix_datacksum(&udp->uh_sum, sumd);
+					sum2 = ntohs(udp->uh_sum);
+
+					/*
+					 * Fix ICMP checksum to compensate
+					 * UDP checksum adjustment.
+					 */
+					CALC_SUMD(sum1, sum2, sumd);
+					sumd2 += sumd;
+				}
+			}
+		}
+		if (sumd2) {
+			sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
+			sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
+			if (nat->nat_dir == NAT_OUTBOUND) {
+				fix_outcksum(&icmp->icmp_cksum, sumd2);
+			} else {
+				fix_incksum(&icmp->icmp_cksum, sumd2);
+			}
+		}
+	}
+	nat->nat_age = fr_defnaticmpage;
+	return nat;
+}
+
+
+/*
+ * NB: these lookups don't lock access to the list, it assume it has already
+ * been done!
+ */
+/*
+ * Lookup a nat entry based on the mapped destination ip address/port and
+ * real source address/port.  We use this lookup when receiving a packet,
+ * we're looking for a table entry, based on the destination address.
+ * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.
+ */
+nat_t *nat_inlookup(ifp, flags, p, src, mapdst, ports, rw)
+void *ifp;
+register u_int flags, p;
+struct in_addr src , mapdst;
+u_32_t ports;
+int rw;
+{
+	register u_short sport, dport;
+	register nat_t *nat;
+	register int nflags;
+	register u_32_t dst;
+	u_int hv;
+
+	dst = mapdst.s_addr;
+	dport = ports >> 16;
+	sport = ports & 0xffff;
+	flags &= IPN_TCPUDP;
+
+	hv = NAT_HASH_FN(dst, dport, ipf_nattable_sz);
+	nat = nat_table[1][hv];
+	for (; nat; nat = nat->nat_hnext[1]) {
+		nflags = nat->nat_flags;
+		if ((!ifp || ifp == nat->nat_ifp) &&
+		    nat->nat_oip.s_addr == src.s_addr &&
+		    nat->nat_outip.s_addr == dst &&
+		    (((p == 0) && (flags == (nat->nat_flags & IPN_TCPUDP)))
+		     || (p == nat->nat_p)) && (!flags ||
+		     (((nat->nat_oport == sport) || (nflags & FI_W_DPORT)) &&
+		      ((nat->nat_outport == dport) || (nflags & FI_W_SPORT)))))
+			return nat;
+	}
+	if (!nat_stats.ns_wilds || !(flags & IPN_TCPUDP))
+		return NULL;
+	if (!rw) {
+		RWLOCK_EXIT(&ipf_nat);
+	}
+	hv = NAT_HASH_FN(dst, 0, ipf_nattable_sz);
+	if (!rw) {
+		WRITE_ENTER(&ipf_nat);
+	}
+	nat = nat_table[1][hv];
+	for (; nat; nat = nat->nat_hnext[1]) {
+		nflags = nat->nat_flags;
+		if (ifp && ifp != nat->nat_ifp)
+			continue;
+		if (!(nflags & IPN_TCPUDP))
+			continue;
+		if (!(nflags & FI_WILDP))
+			continue;
+		if (nat->nat_oip.s_addr != src.s_addr ||
+		    nat->nat_outip.s_addr != dst)
+			continue;
+		if (((nat->nat_oport == sport) || (nflags & FI_W_DPORT)) &&
+		    ((nat->nat_outport == dport) || (nflags & FI_W_SPORT))) {
+			nat_tabmove(nat, ports);
+			break;
+		}
+	}
+	if (!rw) {
+		MUTEX_DOWNGRADE(&ipf_nat);
+	}
+	return nat;
+}
+
+
+/*
+ * This function is only called for TCP/UDP NAT table entries where the
+ * original was placed in the table without hashing on the ports and we now
+ * want to include hashing on port numbers.
+ */
+static void nat_tabmove(nat, ports)
+nat_t *nat;
+u_32_t ports;
+{
+	register u_short sport, dport;
+	nat_t **natp;
+	u_int hv;
+
+	dport = ports >> 16;
+	sport = ports & 0xffff;
+
+	if (nat->nat_oport == dport) {
+		nat->nat_inport = sport;
+		nat->nat_outport = sport;
+	}
+
+	/*
+	 * Remove the NAT entry from the old location
+	 */
+	if (nat->nat_hnext[0])
+		nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
+	*nat->nat_phnext[0] = nat->nat_hnext[0];
+
+	if (nat->nat_hnext[1])
+		nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
+	*nat->nat_phnext[1] = nat->nat_hnext[1];
+
+	/*
+	 * Add into the NAT table in the new position
+	 */
+	hv = NAT_HASH_FN(nat->nat_inip.s_addr, sport, ipf_nattable_sz);
+	natp = &nat_table[0][hv];
+	if (*natp)
+		(*natp)->nat_phnext[0] = &nat->nat_hnext[0];
+	nat->nat_phnext[0] = natp;
+	nat->nat_hnext[0] = *natp;
+	*natp = nat;
+
+	hv = NAT_HASH_FN(nat->nat_outip.s_addr, sport, ipf_nattable_sz);
+	natp = &nat_table[1][hv];
+	if (*natp)
+		(*natp)->nat_phnext[1] = &nat->nat_hnext[1];
+	nat->nat_phnext[1] = natp;
+	nat->nat_hnext[1] = *natp;
+	*natp = nat;
+}
+
+
+/*
+ * Lookup a nat entry based on the source 'real' ip address/port and
+ * destination address/port.  We use this lookup when sending a packet out,
+ * we're looking for a table entry, based on the source address.
+ * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY.
+ */
+nat_t *nat_outlookup(ifp, flags, p, src, dst, ports, rw)
+void *ifp;
+register u_int flags, p;
+struct in_addr src , dst;
+u_32_t ports;
+int rw;
+{
+	register u_short sport, dport;
+	register nat_t *nat;
+	register int nflags;
+	u_32_t srcip;
+	u_int hv;
+
+	sport = ports & 0xffff;
+	dport = ports >> 16;
+	flags &= IPN_TCPUDP;
+	srcip = src.s_addr;
+
+	hv = NAT_HASH_FN(srcip, sport, ipf_nattable_sz);
+	nat = nat_table[0][hv];
+	for (; nat; nat = nat->nat_hnext[0]) {
+		nflags = nat->nat_flags;
+
+		if ((!ifp || ifp == nat->nat_ifp) &&
+		    nat->nat_inip.s_addr == srcip &&
+		    nat->nat_oip.s_addr == dst.s_addr &&
+		    (((p == 0) && (flags == (nflags & IPN_TCPUDP)))
+		     || (p == nat->nat_p)) && (!flags ||
+		     ((nat->nat_inport == sport || nflags & FI_W_SPORT) &&
+		      (nat->nat_oport == dport || nflags & FI_W_DPORT))))
+			return nat;
+	}
+	if (!nat_stats.ns_wilds || !(flags & IPN_TCPUDP))
+		return NULL;
+	if (!rw) {
+		RWLOCK_EXIT(&ipf_nat);
+	}
+	hv = NAT_HASH_FN(srcip, 0, ipf_nattable_sz);
+	if (!rw) {
+		WRITE_ENTER(&ipf_nat);
+	}
+	nat = nat_table[0][hv];
+	for (; nat; nat = nat->nat_hnext[0]) {
+		nflags = nat->nat_flags;
+		if (ifp && ifp != nat->nat_ifp)
+			continue;
+		if (!(nflags & IPN_TCPUDP))
+			continue;
+		if (!(nflags & FI_WILDP))
+			continue;
+		if ((nat->nat_inip.s_addr != srcip) ||
+		    (nat->nat_oip.s_addr != dst.s_addr))
+			continue;
+		if (((nat->nat_inport == sport) || (nflags & FI_W_SPORT)) &&
+		    ((nat->nat_oport == dport) || (nflags & FI_W_DPORT))) {
+			nat_tabmove(nat, ports);
+			break;
+		}
+	}
+	if (!rw) {
+		MUTEX_DOWNGRADE(&ipf_nat);
+	}
+	return nat;
+}
+
+
+/*
+ * Lookup the NAT tables to search for a matching redirect
+ */
+nat_t *nat_lookupredir(np)
+register natlookup_t *np;
+{
+	u_32_t ports;
+	nat_t *nat;
+
+	ports = (np->nl_outport << 16) | np->nl_inport;
+	/*
+	 * If nl_inip is non null, this is a lookup based on the real
+	 * ip address. Else, we use the fake.
+	 */
+	if ((nat = nat_outlookup(NULL, np->nl_flags, 0, np->nl_inip,
+				 np->nl_outip, ports, 0))) {
+		np->nl_realip = nat->nat_outip;
+		np->nl_realport = nat->nat_outport;
+	}
+	return nat;
+}
+
+
+static int nat_match(fin, np, ip)
+fr_info_t *fin;
+ipnat_t *np;
+ip_t *ip;
+{
+	frtuc_t *ft;
+
+	if (ip->ip_v != 4)
+		return 0;
+
+	if (np->in_p && ip->ip_p != np->in_p)
+		return 0;
+	if (fin->fin_out) {
+		if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
+			return 0;
+		if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
+		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
+			return 0;
+		if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
+		    ^ ((np->in_flags & IPN_NOTDST) != 0))
+			return 0;
+	} else {
+		if (!(np->in_redir & NAT_REDIRECT))
+			return 0;
+		if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
+		    ^ ((np->in_flags & IPN_NOTSRC) != 0))
+			return 0;
+		if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
+		    ^ ((np->in_flags & IPN_NOTDST) != 0))
+			return 0;
+	}
+
+	ft = &np->in_tuc;
+	if (!(fin->fin_fi.fi_fl & FI_TCPUDP) ||
+	    (fin->fin_fi.fi_fl & FI_SHORT) || (ip->ip_off & IP_OFFMASK)) {
+		if (ft->ftu_scmp || ft->ftu_dcmp)
+			return 0;
+		return 1;
+	}
+
+	return fr_tcpudpchk(ft, fin);
+}
+
+
+/*
+ * Packets going out on the external interface go through this.
+ * Here, the source address requires alteration, if anything.
+ */
+int ip_natout(ip, fin)
+ip_t *ip;
+fr_info_t *fin;
+{
+	register ipnat_t *np = NULL;
+	register u_32_t ipa;
+	tcphdr_t *tcp = NULL;
+	u_short sport = 0, dport = 0, *csump = NULL;
+	struct ifnet *ifp;
+	int natadd = 1;
+	frentry_t *fr;
+	u_int nflags = 0, hv, msk;
+	u_32_t iph;
+	nat_t *nat;
+	int i;
+
+	if (nat_list == NULL || (fr_nat_lock))
+		return 0;
+
+	if ((fr = fin->fin_fr) && !(fr->fr_flags & FR_DUP) &&
+	    fr->fr_tif.fd_ifp && fr->fr_tif.fd_ifp != (void *)-1)
+		ifp = fr->fr_tif.fd_ifp;
+	else
+		ifp = fin->fin_ifp;
+
+	if (!(ip->ip_off & IP_OFFMASK) && !(fin->fin_fi.fi_fl & FI_SHORT)) {
+		if (ip->ip_p == IPPROTO_TCP)
+			nflags = IPN_TCP;
+		else if (ip->ip_p == IPPROTO_UDP)
+			nflags = IPN_UDP;
+		if ((nflags & IPN_TCPUDP)) {
+			tcp = (tcphdr_t *)fin->fin_dp;
+			sport = tcp->th_sport;
+			dport = tcp->th_dport;
+		}
+	}
+
+	ipa = ip->ip_src.s_addr;
+
+	READ_ENTER(&ipf_nat);
+
+	if ((ip->ip_p == IPPROTO_ICMP) &&
+	    (nat = nat_icmp(ip, fin, &nflags, NAT_OUTBOUND)))
+		;
+	else if ((ip->ip_off & (IP_OFFMASK|IP_MF)) &&
+	    (nat = ipfr_nat_knownfrag(ip, fin)))
+		natadd = 0;
+	else if ((nat = nat_outlookup(ifp, nflags, (u_int)ip->ip_p,
+				      ip->ip_src, ip->ip_dst,
+				      (dport << 16) | sport, 0))) {
+		nflags = nat->nat_flags;
+		if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) {
+			if ((nflags & FI_W_SPORT) &&
+			    (nat->nat_inport != sport))
+				nat->nat_inport = sport;
+			else if ((nflags & FI_W_DPORT) &&
+				 (nat->nat_oport != dport))
+				nat->nat_oport = dport;
+			if (nat->nat_outport == 0)
+				nat->nat_outport = sport;
+			nat->nat_flags &= ~(FI_W_DPORT|FI_W_SPORT);
+			nflags = nat->nat_flags;
+			nat_stats.ns_wilds--;
+		}
+	} else {
+		RWLOCK_EXIT(&ipf_nat);
+		WRITE_ENTER(&ipf_nat);
+		/*
+		 * If there is no current entry in the nat table for this IP#,
+		 * create one for it (if there is a matching rule).
+		 */
+		msk = 0xffffffff;
+		i = 32;
+maskloop:
+		iph = ipa & htonl(msk);
+		hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz);
+		for (np = nat_rules[hv]; np; np = np->in_mnext)
+		{
+			if ((np->in_ifp && (np->in_ifp != ifp)) ||
+			    !np->in_space)
+				continue;
+			if ((np->in_flags & IPN_RF) &&
+			    !(np->in_flags & nflags))
+				continue;
+			if (np->in_flags & IPN_FILTER) {
+				if (!nat_match(fin, np, ip))
+					continue;
+			} else if ((ipa & np->in_inmsk) != np->in_inip)
+				continue;
+			if (np->in_redir & (NAT_MAP|NAT_MAPBLK)) {
+				if (*np->in_plabel && !appr_ok(ip, tcp, np))
+					continue;
+				/*
+				 * If it's a redirection, then we don't want to
+				 * create new outgoing port stuff.
+				 * Redirections are only for incoming
+				 * connections.
+				 */
+				if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
+					continue;
+				if ((nat = nat_new(np, ip, fin, (u_int)nflags,
+						    NAT_OUTBOUND))) {
+					np->in_hits++;
+					break;
+				}
+			}
+		}
+		if ((np == NULL) && (i > 0)) {
+			do {
+				i--;
+				msk <<= 1;
+			} while ((i >= 0) && ((nat_masks & (1 << i)) == 0));
+			if (i >= 0)
+				goto maskloop;
+		}
+		MUTEX_DOWNGRADE(&ipf_nat);
+	}
+
+	/*
+	 * NOTE: ipf_nat must now only be held as a read lock
+	 */
+	if (nat) {
+		np = nat->nat_ptr;
+		if (natadd && (fin->fin_fi.fi_fl & FI_FRAG) &&
+		    np && (np->in_flags & IPN_FRAG))
+			ipfr_nat_newfrag(ip, fin, 0, nat);
+		MUTEX_ENTER(&nat->nat_lock);
+		nat->nat_age = fr_defnatage;
+		nat->nat_bytes += ip->ip_len;
+		nat->nat_pkts++;
+		MUTEX_EXIT(&nat->nat_lock);
+
+		/*
+		 * Fix up checksums, not by recalculating them, but
+		 * simply computing adjustments.
+		 */
+		if (nflags == IPN_ICMPERR) {
+			u_32_t s1, s2, sumd;
+
+			s1 = LONG_SUM(ntohl(ip->ip_src.s_addr));
+			s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
+			CALC_SUMD(s1, s2, sumd);
+
+			if (nat->nat_dir == NAT_OUTBOUND)
+				fix_incksum(&ip->ip_sum, sumd);
+			else
+				fix_outcksum(&ip->ip_sum, sumd);
+		}
+#if SOLARIS || defined(__sgi)
+		else {
+			if (nat->nat_dir == NAT_OUTBOUND)
+				fix_outcksum(&ip->ip_sum, nat->nat_ipsumd);
+			else
+				fix_incksum(&ip->ip_sum, nat->nat_ipsumd);
+		}
+#endif
+		ip->ip_src = nat->nat_outip;
+
+		if (!(ip->ip_off & IP_OFFMASK) &&
+		    !(fin->fin_fi.fi_fl & FI_SHORT)) {
+
+			if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
+				tcp->th_sport = nat->nat_outport;
+				fin->fin_data[0] = ntohs(tcp->th_sport);
+			}
+
+			if (ip->ip_p == IPPROTO_TCP) {
+				csump = &tcp->th_sum;
+				MUTEX_ENTER(&nat->nat_lock);
+				fr_tcp_age(&nat->nat_age,
+					   nat->nat_tcpstate, fin, 1);
+				if (nat->nat_age < fr_defnaticmpage)
+					nat->nat_age = fr_defnaticmpage;
+#ifdef LARGE_NAT
+				else if (nat->nat_age > fr_defnatage)
+					nat->nat_age = fr_defnatage;
+#endif
+				/*
+				 * Increase this because we may have
+				 * "keep state" following this too and
+				 * packet storms can occur if this is
+				 * removed too quickly.
+				 */
+				if (nat->nat_age == fr_tcpclosed)
+					nat->nat_age = fr_tcplastack;
+				MUTEX_EXIT(&nat->nat_lock);
+			} else if (ip->ip_p == IPPROTO_UDP) {
+				udphdr_t *udp = (udphdr_t *)tcp;
+
+				if (udp->uh_sum)
+					csump = &udp->uh_sum;
+			} else if (ip->ip_p == IPPROTO_ICMP) {
+				nat->nat_age = fr_defnaticmpage;
+			}
+
+			if (csump) {
+				if (nat->nat_dir == NAT_OUTBOUND)
+					fix_outcksum(csump, nat->nat_sumd[1]);
+				else
+					fix_incksum(csump, nat->nat_sumd[1]);
+			}
+		}
+
+		if ((np->in_apr != NULL) && (np->in_dport == 0 ||
+		     (tcp != NULL && dport == np->in_dport))) {
+			i = appr_check(ip, fin, nat);
+			if (i == 0)
+				i = 1;
+		} else
+			i = 1;
+		ATOMIC_INCL(nat_stats.ns_mapped[1]);
+		RWLOCK_EXIT(&ipf_nat);	/* READ */
+		return i;
+	}
+	RWLOCK_EXIT(&ipf_nat);			/* READ/WRITE */
+	return 0;
+}
+
+
+/*
+ * Packets coming in from the external interface go through this.
+ * Here, the destination address requires alteration, if anything.
+ */
+int ip_natin(ip, fin)
+ip_t *ip;
+fr_info_t *fin;
+{
+	register struct in_addr src;
+	register struct in_addr in;
+	register ipnat_t *np;
+	u_int nflags = 0, natadd = 1, hv, msk;
+	struct ifnet *ifp = fin->fin_ifp;
+	tcphdr_t *tcp = NULL;
+	u_short sport = 0, dport = 0, *csump = NULL;
+	nat_t *nat;
+	u_32_t iph;
+	int i;
+
+	if ((nat_list == NULL) || (ip->ip_v != 4) || (fr_nat_lock))
+		return 0;
+
+	if (!(ip->ip_off & IP_OFFMASK) && !(fin->fin_fi.fi_fl & FI_SHORT)) {
+		if (ip->ip_p == IPPROTO_TCP)
+			nflags = IPN_TCP;
+		else if (ip->ip_p == IPPROTO_UDP)
+			nflags = IPN_UDP;
+		if ((nflags & IPN_TCPUDP)) {
+			tcp = (tcphdr_t *)fin->fin_dp;
+			dport = tcp->th_dport;
+			sport = tcp->th_sport;
+		}
+	}
+
+	in = ip->ip_dst;
+	/* make sure the source address is to be redirected */
+	src = ip->ip_src;
+
+	READ_ENTER(&ipf_nat);
+
+	if ((ip->ip_p == IPPROTO_ICMP) &&
+	    (nat = nat_icmp(ip, fin, &nflags, NAT_INBOUND)))
+		;
+	else if ((ip->ip_off & (IP_OFFMASK|IP_MF)) &&
+		 (nat = ipfr_nat_knownfrag(ip, fin)))
+		natadd = 0;
+	else if ((nat = nat_inlookup(fin->fin_ifp, nflags, (u_int)ip->ip_p,
+				     ip->ip_src, in, (dport << 16) | sport,
+				     0))) {
+		nflags = nat->nat_flags;
+		if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) {
+			if ((nat->nat_oport != sport) && (nflags & FI_W_DPORT))
+				nat->nat_oport = sport;
+			else if ((nat->nat_outport != dport) &&
+				 (nflags & FI_W_SPORT))
+				nat->nat_outport = dport;
+			nat->nat_flags &= ~(FI_W_SPORT|FI_W_DPORT);
+			nflags = nat->nat_flags;
+			nat_stats.ns_wilds--;
+		}
+	} else {
+		RWLOCK_EXIT(&ipf_nat);
+		WRITE_ENTER(&ipf_nat);
+		/*
+		 * If there is no current entry in the nat table for this IP#,
+		 * create one for it (if there is a matching rule).
+		 */
+		msk = 0xffffffff;
+		i = 32;
+maskloop:
+		iph = in.s_addr & htonl(msk);
+		hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz);
+		for (np = rdr_rules[hv]; np; np = np->in_rnext) {
+			if ((np->in_ifp && (np->in_ifp != ifp)) ||
+			    (np->in_p && (np->in_p != ip->ip_p)) ||
+			    (np->in_flags && !(nflags & np->in_flags)))
+				continue;
+			if (np->in_flags & IPN_FILTER) {
+				if (!nat_match(fin, np, ip))
+					continue;
+			} else if ((in.s_addr & np->in_outmsk) != np->in_outip)
+				continue;
+			if ((np->in_redir & NAT_REDIRECT) &&
+			    (!np->in_pmin || (np->in_flags & IPN_FILTER) ||
+			     ((ntohs(np->in_pmax) >= ntohs(dport)) &&
+			      (ntohs(dport) >= ntohs(np->in_pmin)))))
+				if ((nat = nat_new(np, ip, fin, nflags,
+						    NAT_INBOUND))) {
+					np->in_hits++;
+					break;
+				}
+		}
+
+		if ((np == NULL) && (i > 0)) {
+			do {
+				i--;
+				msk <<= 1;
+			} while ((i >= 0) && ((rdr_masks & (1 << i)) == 0));
+			if (i >= 0)
+				goto maskloop;
+		}
+		MUTEX_DOWNGRADE(&ipf_nat);
+	}
+
+	/*
+	 * NOTE: ipf_nat must now only be held as a read lock
+	 */
+	if (nat) {
+		np = nat->nat_ptr;
+		fin->fin_fr = nat->nat_fr;
+		if (natadd && (fin->fin_fi.fi_fl & FI_FRAG) &&
+		    np && (np->in_flags & IPN_FRAG))
+			ipfr_nat_newfrag(ip, fin, 0, nat);
+		if ((np->in_apr != NULL) && (np->in_dport == 0 ||
+		    (tcp != NULL && sport == np->in_dport))) {
+			i = appr_check(ip, fin, nat);
+			if (i == -1) {
+				RWLOCK_EXIT(&ipf_nat);
+				return i;
+			}
+		}
+
+		MUTEX_ENTER(&nat->nat_lock);
+		if (nflags != IPN_ICMPERR)
+			nat->nat_age = fr_defnatage;
+
+		nat->nat_bytes += ip->ip_len;
+		nat->nat_pkts++;
+		MUTEX_EXIT(&nat->nat_lock);
+		ip->ip_dst = nat->nat_inip;
+		fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
+
+		/*
+		 * Fix up checksums, not by recalculating them, but
+		 * simply computing adjustments.
+		 */
+#if SOLARIS || defined(__sgi)
+		if (nat->nat_dir == NAT_OUTBOUND)
+			fix_incksum(&ip->ip_sum, nat->nat_ipsumd);
+		else
+			fix_outcksum(&ip->ip_sum, nat->nat_ipsumd);
+#endif
+		if (!(ip->ip_off & IP_OFFMASK) &&
+		    !(fin->fin_fi.fi_fl & FI_SHORT)) {
+
+			if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
+				tcp->th_dport = nat->nat_inport;
+				fin->fin_data[1] = ntohs(tcp->th_dport);
+			}
+
+			if (ip->ip_p == IPPROTO_TCP) {
+				csump = &tcp->th_sum;
+				MUTEX_ENTER(&nat->nat_lock);
+				fr_tcp_age(&nat->nat_age,
+					   nat->nat_tcpstate, fin, 0);
+				if (nat->nat_age < fr_defnaticmpage)
+					nat->nat_age = fr_defnaticmpage;
+#ifdef LARGE_NAT
+				else if (nat->nat_age > fr_defnatage)
+					nat->nat_age = fr_defnatage;
+#endif
+				/*
+				 * Increase this because we may have
+				 * "keep state" following this too and
+				 * packet storms can occur if this is
+				 * removed too quickly.
+				 */
+				if (nat->nat_age == fr_tcpclosed)
+					nat->nat_age = fr_tcplastack;
+				MUTEX_EXIT(&nat->nat_lock);
+			} else if (ip->ip_p == IPPROTO_UDP) {
+				udphdr_t *udp = (udphdr_t *)tcp;
+
+				if (udp->uh_sum)
+					csump = &udp->uh_sum;
+			} else if (ip->ip_p == IPPROTO_ICMP) {
+				nat->nat_age = fr_defnaticmpage;
+			}
+
+			if (csump) {
+				if (nat->nat_dir == NAT_OUTBOUND)
+					fix_incksum(csump, nat->nat_sumd[0]);
+				else
+					fix_outcksum(csump, nat->nat_sumd[0]);
+			}
+		}
+		ATOMIC_INCL(nat_stats.ns_mapped[0]);
+		RWLOCK_EXIT(&ipf_nat);			/* READ */
+		return 1;
+	}
+	RWLOCK_EXIT(&ipf_nat);			/* READ/WRITE */
+	return 0;
+}
+
+
+/*
+ * Free all memory used by NAT structures allocated at runtime.
+ */
+void ip_natunload()
+{
+	WRITE_ENTER(&ipf_nat);
+	(void) nat_clearlist();
+	(void) nat_flushtable();
+	RWLOCK_EXIT(&ipf_nat);
+
+	if (nat_table[0] != NULL) {
+		KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz);
+		nat_table[0] = NULL;
+	}
+	if (nat_table[1] != NULL) {
+		KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz);
+		nat_table[1] = NULL;
+	}
+	if (nat_rules != NULL) {
+		KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz);
+		nat_rules = NULL;
+	}
+	if (rdr_rules != NULL) {
+		KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz);
+		rdr_rules = NULL;
+	}
+	if (maptable != NULL) {
+		KFREES(maptable, sizeof(hostmap_t *) * ipf_hostmap_sz);
+		maptable = NULL;
+	}
+}
+
+
+/*
+ * Slowly expire held state for NAT entries.  Timeouts are set in
+ * expectation of this being called twice per second.
+ */
+void ip_natexpire()
+{
+	register struct nat *nat, **natp;
+#if defined(_KERNEL) && !SOLARIS
+	int s;
+#endif
+
+	SPL_NET(s);
+	WRITE_ENTER(&ipf_nat);
+	for (natp = &nat_instances; (nat = *natp); ) {
+		nat->nat_age--;
+		if (nat->nat_age) {
+			natp = &nat->nat_next;
+			continue;
+		}
+		*natp = nat->nat_next;
+#ifdef	IPFILTER_LOG
+		nat_log(nat, NL_EXPIRE);
+#endif
+		nat_delete(nat);
+		nat_stats.ns_expire++;
+	}
+	RWLOCK_EXIT(&ipf_nat);
+	SPL_X(s);
+}
+
+
+/*
+ */
+void ip_natsync(ifp)
+void *ifp;
+{
+	register ipnat_t *n;
+	register nat_t *nat;
+	register u_32_t sum1, sum2, sumd;
+	struct in_addr in;
+	ipnat_t *np;
+	void *ifp2;
+#if defined(_KERNEL) && !SOLARIS
+	int s;
+#endif
+
+	/*
+	 * Change IP addresses for NAT sessions for any protocol except TCP
+	 * since it will break the TCP connection anyway.
+	 */
+	SPL_NET(s);
+	WRITE_ENTER(&ipf_nat);
+	for (nat = nat_instances; nat; nat = nat->nat_next)
+		if (((ifp == NULL) || (ifp == nat->nat_ifp)) &&
+		    !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr) &&
+		    (np->in_outmsk == 0xffffffff) && !np->in_nip) {
+			ifp2 = nat->nat_ifp;
+			/*
+			 * Change the map-to address to be the same as the
+			 * new one.
+			 */
+			sum1 = nat->nat_outip.s_addr;
+			if (fr_ifpaddr(4, ifp2, &in) != -1)
+				nat->nat_outip = in;
+			sum2 = nat->nat_outip.s_addr;
+
+			if (sum1 == sum2)
+				continue;
+			/*
+			 * Readjust the checksum adjustment to take into
+			 * account the new IP#.
+			 */
+			CALC_SUMD(sum1, sum2, sumd);
+			/* XXX - dont change for TCP when solaris does
+			 * hardware checksumming.
+			 */
+			sumd += nat->nat_sumd[0];
+			nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
+			nat->nat_sumd[1] = nat->nat_sumd[0];
+		}
+
+	for (n = nat_list; (n != NULL); n = n->in_next)
+		if (n->in_ifp == ifp) {
+			n->in_ifp = (void *)GETUNIT(n->in_ifname, 4);
+			if (!n->in_ifp)
+				n->in_ifp = (void *)-1;
+		}
+	RWLOCK_EXIT(&ipf_nat);
+	SPL_X(s);
+}
+
+
+#ifdef	IPFILTER_LOG
+void nat_log(nat, type)
+struct nat *nat;
+u_int type;
+{
+	struct ipnat *np;
+	struct natlog natl;
+	void *items[1];
+	size_t sizes[1];
+	int rulen, types[1];
+
+	natl.nl_inip = nat->nat_inip;
+	natl.nl_outip = nat->nat_outip;
+	natl.nl_origip = nat->nat_oip;
+	natl.nl_bytes = nat->nat_bytes;
+	natl.nl_pkts = nat->nat_pkts;
+	natl.nl_origport = nat->nat_oport;
+	natl.nl_inport = nat->nat_inport;
+	natl.nl_outport = nat->nat_outport;
+	natl.nl_p = nat->nat_p;
+	natl.nl_type = type;
+	natl.nl_rule = -1;
+#ifndef LARGE_NAT
+	if (nat->nat_ptr != NULL) {
+		for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++)
+			if (np == nat->nat_ptr) {
+				natl.nl_rule = rulen;
+				break;
+			}
+	}
+#endif
+	items[0] = &natl;
+	sizes[0] = sizeof(natl);
+	types[0] = 0;
+
+	(void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1);
+}
+#endif
diff --git a/sys/netinet/ip_nat.h b/sys/netinet/ip_nat.h
new file mode 100644
index 0000000..afa7e14
--- /dev/null
+++ b/sys/netinet/ip_nat.h
@@ -0,0 +1,309 @@
+/*
+ * Copyright (C) 1995-2000 by Darren Reed.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that this notice is preserved and due credit is given
+ * to the original author and the contributors.
+ *
+ * @(#)ip_nat.h	1.5 2/4/96
+ * $Id: ip_nat.h,v 2.17.2.14 2000/11/18 03:58:04 darrenr Exp $
+ * $FreeBSD$
+ */
+
+#ifndef	__IP_NAT_H__
+#define	__IP_NAT_H__
+
+#ifndef SOLARIS
+#define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4)))
+#endif
+
+#if defined(__STDC__) || defined(__GNUC__)
+#define	SIOCADNAT	_IOW('r', 60, struct ipnat *)
+#define	SIOCRMNAT	_IOW('r', 61, struct ipnat *)
+#define	SIOCGNATS	_IOWR('r', 62, struct natstat *)
+#define	SIOCGNATL	_IOWR('r', 63, struct natlookup *)
+#else
+#define	SIOCADNAT	_IOW(r, 60, struct ipnat *)
+#define	SIOCRMNAT	_IOW(r, 61, struct ipnat *)
+#define	SIOCGNATS	_IOWR(r, 62, struct natstat *)
+#define	SIOCGNATL	_IOWR(r, 63, struct natlookup *)
+#endif
+
+#undef	LARGE_NAT	/* define this if you're setting up a system to NAT
+			 * LARGE numbers of networks/hosts - i.e. in the
+			 * hundreds or thousands.  In such a case, you should
+			 * also change the RDR_SIZE and NAT_SIZE below to more
+			 * appropriate sizes.  The figures below were used for
+			 * a setup with 1000-2000 networks to NAT.
+			 */
+#define	NAT_SIZE	127
+#define	RDR_SIZE	127
+#define	HOSTMAP_SIZE	127
+#define	NAT_TABLE_SZ	127
+#ifdef	LARGE_NAT
+#undef	NAT_SIZE
+#undef	RDR_SIZE
+#undef	NAT_TABLE_SZ
+#undef	HOSTMAP_SIZE	127
+#define	NAT_SIZE	2047
+#define	RDR_SIZE	2047
+#define	NAT_TABLE_SZ	16383
+#define	HOSTMAP_SIZE	8191
+#endif
+#ifndef	APR_LABELLEN
+#define	APR_LABELLEN	16
+#endif
+#define	NAT_HW_CKSUM	0x80000000
+
+#define	DEF_NAT_AGE	1200     /* 10 minutes (600 seconds) */
+
+struct ap_session;
+
+typedef	struct	nat	{
+	u_long	nat_age;
+	int	nat_flags;
+	u_32_t	nat_sumd[2];
+	u_32_t	nat_ipsumd;
+	void	*nat_data;
+	struct	ap_session	*nat_aps;		/* proxy session */
+	struct	frentry	*nat_fr;	/* filter rule ptr if appropriate */
+	struct	in_addr	nat_inip;
+	struct	in_addr	nat_outip;
+	struct	in_addr	nat_oip;	/* other ip */
+	U_QUAD_T	nat_pkts;
+	U_QUAD_T	nat_bytes;
+	u_short	nat_oport;		/* other port */
+	u_short	nat_inport;
+	u_short	nat_outport;
+	u_short	nat_use;
+	u_char	nat_tcpstate[2];
+	u_char	nat_p;			/* protocol for NAT */
+	struct	ipnat	*nat_ptr;	/* pointer back to the rule */
+	struct	hostmap	*nat_hm;
+	struct	nat	*nat_next;
+	struct	nat	*nat_hnext[2];
+	struct	nat	**nat_phnext[2];
+	void	*nat_ifp;
+	int	nat_dir;
+	char	nat_ifname[IFNAMSIZ];
+#if SOLARIS || defined(__sgi)
+	kmutex_t	nat_lock;
+#endif
+} nat_t;
+
+typedef	struct	ipnat	{
+	struct	ipnat	*in_next;
+	struct	ipnat	*in_rnext;
+	struct	ipnat	**in_prnext;
+	struct	ipnat	*in_mnext;
+	struct	ipnat	**in_pmnext;
+	void	*in_ifp;
+	void	*in_apr;
+	u_long	in_space;
+	u_int	in_use;
+	u_int	in_hits;
+	struct	in_addr	in_nextip;
+	u_short	in_pnext;
+	u_short	in_ippip;	/* IP #'s per IP# */
+	u_32_t	in_flags;	/* From here to in_dport must be reflected */
+	u_short	in_spare;
+	u_short	in_ppip;	/* ports per IP */
+	u_short	in_port[2];	/* correctly in IPN_CMPSIZ */
+	struct	in_addr	in_in[2];
+	struct	in_addr	in_out[2];
+	struct	in_addr	in_src[2];
+	struct	frtuc	in_tuc;
+	int	in_redir; /* 0 if it's a mapping, 1 if it's a hard redir */
+	char	in_ifname[IFNAMSIZ];
+	char	in_plabel[APR_LABELLEN];	/* proxy label */
+	char	in_p;	/* protocol */
+} ipnat_t;
+
+#define	in_pmin		in_port[0]	/* Also holds static redir port */
+#define	in_pmax		in_port[1]
+#define	in_nip		in_nextip.s_addr
+#define	in_inip		in_in[0].s_addr
+#define	in_inmsk	in_in[1].s_addr
+#define	in_outip	in_out[0].s_addr
+#define	in_outmsk	in_out[1].s_addr
+#define	in_srcip	in_src[0].s_addr
+#define	in_srcmsk	in_src[1].s_addr
+#define	in_scmp		in_tuc.ftu_scmp
+#define	in_dcmp		in_tuc.ftu_dcmp
+#define	in_stop		in_tuc.ftu_stop
+#define	in_dtop		in_tuc.ftu_dtop
+#define	in_sport	in_tuc.ftu_sport
+#define	in_dport	in_tuc.ftu_dport
+
+#define	NAT_OUTBOUND	0
+#define	NAT_INBOUND	1
+
+#define	NAT_MAP		0x01
+#define	NAT_REDIRECT	0x02
+#define	NAT_BIMAP	(NAT_MAP|NAT_REDIRECT)
+#define	NAT_MAPBLK	0x04
+/* 0x100 reserved for FI_W_SPORT */
+/* 0x200 reserved for FI_W_DPORT */
+/* 0x400 reserved for FI_W_SADDR */
+/* 0x800 reserved for FI_W_DADDR */
+/* 0x1000 reserved for FI_W_NEWFR */
+
+#define	MAPBLK_MINPORT	1024	/* don't use reserved ports for src port */
+#define	USABLE_PORTS	(65536 - MAPBLK_MINPORT)
+
+#define	IPN_CMPSIZ	(sizeof(ipnat_t) - offsetof(ipnat_t, in_flags))
+
+typedef	struct	natlookup {
+	struct	in_addr	nl_inip;
+	struct	in_addr	nl_outip;
+	struct	in_addr	nl_realip;
+	int	nl_flags;
+	u_short	nl_inport;
+	u_short	nl_outport;
+	u_short	nl_realport;
+} natlookup_t;
+
+
+typedef struct  nat_save    {
+	void	*ipn_next;
+	struct	nat	ipn_nat;
+	struct	ipnat	ipn_ipnat;
+	struct	frentry ipn_fr;
+	int	ipn_dsize;
+	char	ipn_data[4];
+} nat_save_t;
+
+#define	ipn_rule	ipn_nat.nat_fr
+
+typedef	struct	natget	{
+	void	*ng_ptr;
+	int	ng_sz;
+} natget_t;
+
+
+typedef	struct	hostmap	{
+	struct	hostmap	*hm_next;
+	struct	hostmap	**hm_pnext;
+	struct	ipnat	*hm_ipnat;
+	struct	in_addr	hm_realip;
+	struct	in_addr	hm_mapip;
+	int	hm_ref;
+} hostmap_t;
+
+
+typedef	struct	natstat	{
+	u_long	ns_mapped[2];
+	u_long	ns_rules;
+	u_long	ns_added;
+	u_long	ns_expire;
+	u_long	ns_inuse;
+	u_long	ns_logged;
+	u_long	ns_logfail;
+	u_long	ns_memfail;
+	u_long	ns_badnat;
+	nat_t	**ns_table[2];
+	ipnat_t	*ns_list;
+	void	*ns_apslist;
+	u_int	ns_nattab_sz;
+	u_int	ns_rultab_sz;
+	u_int	ns_rdrtab_sz;
+	nat_t	*ns_instances;
+	u_int	ns_wilds;
+} natstat_t;
+
+#define	IPN_ANY		0x000
+#define	IPN_TCP		0x001
+#define	IPN_UDP		0x002
+#define	IPN_TCPUDP	(IPN_TCP|IPN_UDP)
+#define	IPN_DELETE	0x004
+#define	IPN_ICMPERR	0x008
+#define	IPN_RF		(IPN_TCPUDP|IPN_DELETE|IPN_ICMPERR)
+#define	IPN_AUTOPORTMAP	0x010
+#define	IPN_IPRANGE	0x020
+#define	IPN_USERFLAGS	(IPN_TCPUDP|IPN_AUTOPORTMAP|IPN_IPRANGE|IPN_SPLIT|\
+			 IPN_ROUNDR|IPN_FILTER|IPN_NOTSRC|IPN_NOTDST)
+#define	IPN_FILTER	0x040
+#define	IPN_SPLIT	0x080
+#define	IPN_ROUNDR	0x100
+#define	IPN_NOTSRC	0x080000
+#define	IPN_NOTDST	0x100000
+#define	IPN_FRAG	0x200000
+
+
+typedef	struct	natlog {
+	struct	in_addr	nl_origip;
+	struct	in_addr	nl_outip;
+	struct	in_addr	nl_inip;
+	u_short	nl_origport;
+	u_short	nl_outport;
+	u_short	nl_inport;
+	u_short	nl_type;
+	int	nl_rule;
+	U_QUAD_T	nl_pkts;
+	U_QUAD_T	nl_bytes;
+	u_char	nl_p;
+} natlog_t;
+
+
+#define	NL_NEWMAP	NAT_MAP
+#define	NL_NEWRDR	NAT_REDIRECT
+#define	NL_NEWBIMAP	NAT_BIMAP
+#define	NL_NEWBLOCK	NAT_MAPBLK
+#define	NL_FLUSH	0xfffe
+#define	NL_EXPIRE	0xffff
+
+#define	NAT_HASH_FN(k,l,m)	(((k) + ((k) >> 12) + l) % (m))
+
+#define	LONG_SUM(in)	(((in) & 0xffff) + ((in) >> 16))
+
+#define	CALC_SUMD(s1, s2, sd) { \
+			    (s1) = ((s1) & 0xffff) + ((s1) >> 16); \
+			    (s2) = ((s2) & 0xffff) + ((s2) >> 16); \
+			    /* Do it twice */ \
+			    (s1) = ((s1) & 0xffff) + ((s1) >> 16); \
+			    (s2) = ((s2) & 0xffff) + ((s2) >> 16); \
+			    /* Because ~1 == -2, We really need ~1 == -1 */ \
+			    if ((s1) > (s2)) (s2)--; \
+			    (sd) = (s2) - (s1); \
+			    (sd) = ((sd) & 0xffff) + ((sd) >> 16); }
+
+
+extern	u_int	ipf_nattable_sz;
+extern	u_int	ipf_natrules_sz;
+extern	u_int	ipf_rdrrules_sz;
+extern	int	fr_nat_lock;
+extern	void	ip_natsync __P((void *));
+extern	u_long	fr_defnatage;
+extern	u_long	fr_defnaticmpage;
+extern	nat_t	**nat_table[2];
+extern	nat_t	*nat_instances;
+extern	ipnat_t	**nat_rules;
+extern	ipnat_t	**rdr_rules;
+extern	natstat_t	nat_stats;
+#if defined(__NetBSD__) || defined(__OpenBSD__) || (__FreeBSD_version >= 300003)
+extern	int	nat_ioctl __P((caddr_t, u_long, int));
+#else
+extern	int	nat_ioctl __P((caddr_t, int, int));
+#endif
+extern	int	nat_init __P((void));
+extern	nat_t	*nat_new __P((ipnat_t *, ip_t *, fr_info_t *, u_int, int));
+extern	nat_t	*nat_outlookup __P((void *, u_int, u_int, struct in_addr,
+				 struct in_addr, u_32_t, int));
+extern	nat_t	*nat_inlookup __P((void *, u_int, u_int, struct in_addr,
+				struct in_addr, u_32_t, int));
+extern	nat_t	*nat_maplookup __P((void *, u_int, struct in_addr,
+				struct in_addr));
+extern	nat_t	*nat_lookupredir __P((natlookup_t *));
+extern	nat_t	*nat_icmplookup __P((ip_t *, fr_info_t *, int));
+extern	nat_t	*nat_icmp __P((ip_t *, fr_info_t *, u_int *, int));
+extern	void	nat_insert __P((nat_t *));
+
+extern	int	ip_natout __P((ip_t *, fr_info_t *));
+extern	int	ip_natin __P((ip_t *, fr_info_t *));
+extern	void	ip_natunload __P((void)), ip_natexpire __P((void));
+extern	void	nat_log __P((struct nat *, u_int));
+extern	void	fix_incksum __P((u_short *, u_32_t));
+extern	void	fix_outcksum __P((u_short *, u_32_t));
+extern	void	fix_datacksum __P((u_short *, u_32_t));
+
+#endif /* __IP_NAT_H__ */
diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c
new file mode 100644
index 0000000..ed20168
--- /dev/null
+++ b/sys/netinet/ip_output.c
@@ -0,0 +1,1915 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
+ * $FreeBSD$
+ */
+
+#define _IP_VHL
+
+#include "opt_ipfw.h"
+#include "opt_ipdn.h"
+#include "opt_ipdivert.h"
+#include "opt_ipfilter.h"
+#include "opt_ipsec.h"
+#include "opt_pfil_hooks.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+
+#include "faith.h"
+
+#include <machine/in_cksum.h>
+
+static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
+
+#ifdef IPSEC
+#include <netinet6/ipsec.h>
+#include <netkey/key.h>
+#ifdef IPSEC_DEBUG
+#include <netkey/key_debug.h>
+#else
+#define	KEYDEBUG(lev,arg)
+#endif
+#endif /*IPSEC*/
+
+#include <netinet/ip_fw.h>
+
+#ifdef DUMMYNET
+#include <netinet/ip_dummynet.h>
+#endif
+
+#ifdef IPFIREWALL_FORWARD_DEBUG
+#define print_ip(a)	 printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\
+				 		  (ntohl(a.s_addr)>>16)&0xFF,\
+						  (ntohl(a.s_addr)>>8)&0xFF,\
+						  (ntohl(a.s_addr))&0xFF);
+#endif
+
+u_short ip_id;
+
+static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
+static void	ip_mloopback
+	__P((struct ifnet *, struct mbuf *, struct sockaddr_in *, int));
+static int	ip_getmoptions
+	__P((struct sockopt *, struct ip_moptions *));
+static int	ip_pcbopts __P((int, struct mbuf **, struct mbuf *));
+static int	ip_setmoptions
+	__P((struct sockopt *, struct ip_moptions **));
+
+int	ip_optcopy __P((struct ip *, struct ip *));
+
+
+extern	struct protosw inetsw[];
+
+/*
+ * IP output.  The packet in mbuf chain m contains a skeletal IP
+ * header (with len, off, ttl, proto, tos, src, dst).
+ * The mbuf chain containing the packet will be freed.
+ * The mbuf opt, if present, will not be freed.
+ */
+int
+ip_output(m0, opt, ro, flags, imo)
+	struct mbuf *m0;
+	struct mbuf *opt;
+	struct route *ro;
+	int flags;
+	struct ip_moptions *imo;
+{
+	struct ip *ip, *mhip;
+	struct ifnet *ifp;
+	struct mbuf *m = m0;
+	int hlen = sizeof (struct ip);
+	int len, off, error = 0;
+	struct sockaddr_in *dst;
+	struct in_ifaddr *ia;
+	int isbroadcast, sw_csum;
+#ifdef IPSEC
+	struct route iproute;
+	struct socket *so = NULL;
+	struct secpolicy *sp = NULL;
+#endif
+	u_int16_t divert_cookie;		/* firewall cookie */
+#ifdef PFIL_HOOKS
+	struct packet_filter_hook *pfh;
+	struct mbuf *m1;
+	int rv;
+#endif /* PFIL_HOOKS */
+#ifdef IPFIREWALL_FORWARD
+	int fwd_rewrite_src = 0;
+#endif
+	struct ip_fw_chain *rule = NULL;
+  
+#ifdef IPDIVERT
+	/* Get and reset firewall cookie */
+	divert_cookie = ip_divert_cookie;
+	ip_divert_cookie = 0;
+#else
+	divert_cookie = 0;
+#endif
+
+#if defined(IPFIREWALL) && defined(DUMMYNET)
+        /*  
+         * dummynet packet are prepended a vestigial mbuf with
+         * m_type = MT_DUMMYNET and m_data pointing to the matching
+         * rule.
+         */ 
+        if (m->m_type == MT_DUMMYNET) {
+            /*
+             * the packet was already tagged, so part of the
+             * processing was already done, and we need to go down.
+             * Get parameters from the header.
+             */
+            rule = (struct ip_fw_chain *)(m->m_data) ;
+	    opt = NULL ;
+	    ro = & ( ((struct dn_pkt *)m)->ro ) ;
+	    imo = NULL ;
+	    dst = ((struct dn_pkt *)m)->dn_dst ;
+	    ifp = ((struct dn_pkt *)m)->ifp ;
+	    flags = ((struct dn_pkt *)m)->flags ;
+
+            m0 = m = m->m_next ;
+#ifdef IPSEC
+	    so = ipsec_getsocket(m);
+	    ipsec_setsocket(m, NULL);
+#endif
+            ip = mtod(m, struct ip *);
+            hlen = IP_VHL_HL(ip->ip_vhl) << 2 ;
+            ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa;
+            goto sendit;
+        } else
+            rule = NULL ;
+#endif
+#ifdef IPSEC
+	so = ipsec_getsocket(m);
+	ipsec_setsocket(m, NULL);
+#endif
+
+#ifdef	DIAGNOSTIC
+	if ((m->m_flags & M_PKTHDR) == 0)
+		panic("ip_output no HDR");
+	if (!ro)
+		panic("ip_output no route, proto = %d",
+		      mtod(m, struct ip *)->ip_p);
+#endif
+	if (opt) {
+		m = ip_insertoptions(m, opt, &len);
+		hlen = len;
+	}
+	ip = mtod(m, struct ip *);
+	/*
+	 * Fill in IP header.
+	 */
+	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
+		ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
+		ip->ip_off &= IP_DF;
+		ip->ip_id = htons(ip_id++);
+		ipstat.ips_localout++;
+	} else {
+		hlen = IP_VHL_HL(ip->ip_vhl) << 2;
+	}
+
+	dst = (struct sockaddr_in *)&ro->ro_dst;
+	/*
+	 * If there is a cached route,
+	 * check that it is to the same destination
+	 * and is still up.  If not, free it and try again.
+	 */
+	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
+	   dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
+		RTFREE(ro->ro_rt);
+		ro->ro_rt = (struct rtentry *)0;
+	}
+	if (ro->ro_rt == 0) {
+		dst->sin_family = AF_INET;
+		dst->sin_len = sizeof(*dst);
+		dst->sin_addr = ip->ip_dst;
+	}
+	/*
+	 * If routing to interface only,
+	 * short circuit routing lookup.
+	 */
+#define ifatoia(ifa)	((struct in_ifaddr *)(ifa))
+#define sintosa(sin)	((struct sockaddr *)(sin))
+	if (flags & IP_ROUTETOIF) {
+		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
+		    (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
+			ipstat.ips_noroute++;
+			error = ENETUNREACH;
+			goto bad;
+		}
+		ifp = ia->ia_ifp;
+		ip->ip_ttl = 1;
+		isbroadcast = in_broadcast(dst->sin_addr, ifp);
+	} else {
+		/*
+		 * If this is the case, we probably don't want to allocate
+		 * a protocol-cloned route since we didn't get one from the
+		 * ULP.  This lets TCP do its thing, while not burdening
+		 * forwarding or ICMP with the overhead of cloning a route.
+		 * Of course, we still want to do any cloning requested by
+		 * the link layer, as this is probably required in all cases
+		 * for correct operation (as it is for ARP).
+		 */
+		if (ro->ro_rt == 0)
+			rtalloc_ign(ro, RTF_PRCLONING);
+		if (ro->ro_rt == 0) {
+			ipstat.ips_noroute++;
+			error = EHOSTUNREACH;
+			goto bad;
+		}
+		ia = ifatoia(ro->ro_rt->rt_ifa);
+		ifp = ro->ro_rt->rt_ifp;
+		ro->ro_rt->rt_use++;
+		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
+			dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
+		if (ro->ro_rt->rt_flags & RTF_HOST)
+			isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
+		else
+			isbroadcast = in_broadcast(dst->sin_addr, ifp);
+	}
+	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
+		struct in_multi *inm;
+
+		m->m_flags |= M_MCAST;
+		/*
+		 * IP destination address is multicast.  Make sure "dst"
+		 * still points to the address in "ro".  (It may have been
+		 * changed to point to a gateway address, above.)
+		 */
+		dst = (struct sockaddr_in *)&ro->ro_dst;
+		/*
+		 * See if the caller provided any multicast options
+		 */
+		if (imo != NULL) {
+			ip->ip_ttl = imo->imo_multicast_ttl;
+			if (imo->imo_multicast_ifp != NULL)
+				ifp = imo->imo_multicast_ifp;
+			if (imo->imo_multicast_vif != -1)
+				ip->ip_src.s_addr =
+				    ip_mcast_src(imo->imo_multicast_vif);
+		} else
+			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
+		/*
+		 * Confirm that the outgoing interface supports multicast.
+		 */
+		if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
+			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
+				ipstat.ips_noroute++;
+				error = ENETUNREACH;
+				goto bad;
+			}
+		}
+		/*
+		 * If source address not specified yet, use address
+		 * of outgoing interface.
+		 */
+		if (ip->ip_src.s_addr == INADDR_ANY) {
+			register struct in_ifaddr *ia1;
+
+			TAILQ_FOREACH(ia1, &in_ifaddrhead, ia_link)
+				if (ia1->ia_ifp == ifp) {
+					ip->ip_src = IA_SIN(ia1)->sin_addr;
+					break;
+				}
+		}
+
+		IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
+		if (inm != NULL &&
+		   (imo == NULL || imo->imo_multicast_loop)) {
+			/*
+			 * If we belong to the destination multicast group
+			 * on the outgoing interface, and the caller did not
+			 * forbid loopback, loop back a copy.
+			 */
+			ip_mloopback(ifp, m, dst, hlen);
+		}
+		else {
+			/*
+			 * If we are acting as a multicast router, perform
+			 * multicast forwarding as if the packet had just
+			 * arrived on the interface to which we are about
+			 * to send.  The multicast forwarding function
+			 * recursively calls this function, using the
+			 * IP_FORWARDING flag to prevent infinite recursion.
+			 *
+			 * Multicasts that are looped back by ip_mloopback(),
+			 * above, will be forwarded by the ip_input() routine,
+			 * if necessary.
+			 */
+			if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
+				/*
+				 * Check if rsvp daemon is running. If not, don't
+				 * set ip_moptions. This ensures that the packet
+				 * is multicast and not just sent down one link
+				 * as prescribed by rsvpd.
+				 */
+				if (!rsvp_on)
+				  imo = NULL;
+				if (ip_mforward(ip, ifp, m, imo) != 0) {
+					m_freem(m);
+					goto done;
+				}
+			}
+		}
+
+		/*
+		 * Multicasts with a time-to-live of zero may be looped-
+		 * back, above, but must not be transmitted on a network.
+		 * Also, multicasts addressed to the loopback interface
+		 * are not sent -- the above call to ip_mloopback() will
+		 * loop back a copy if this host actually belongs to the
+		 * destination group on the loopback interface.
+		 */
+		if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
+			m_freem(m);
+			goto done;
+		}
+
+		goto sendit;
+	}
+#ifndef notdef
+	/*
+	 * If source address not specified yet, use address
+	 * of outgoing interface.
+	 */
+	if (ip->ip_src.s_addr == INADDR_ANY) {
+		ip->ip_src = IA_SIN(ia)->sin_addr;
+#ifdef IPFIREWALL_FORWARD
+		/* Keep note that we did this - if the firewall changes
+		 * the next-hop, our interface may change, changing the
+		 * default source IP. It's a shame so much effort happens
+		 * twice. Oh well. 
+		 */
+		fwd_rewrite_src++;
+#endif /* IPFIREWALL_FORWARD */
+	}
+#endif /* notdef */
+	/*
+	 * Verify that we have any chance at all of being able to queue
+	 *      the packet or packet fragments
+	 */
+	if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
+		ifp->if_snd.ifq_maxlen) {
+			error = ENOBUFS;
+			goto bad;
+	}
+
+	/*
+	 * Look for broadcast address and
+	 * and verify user is allowed to send
+	 * such a packet.
+	 */
+	if (isbroadcast) {
+		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
+			error = EADDRNOTAVAIL;
+			goto bad;
+		}
+		if ((flags & IP_ALLOWBROADCAST) == 0) {
+			error = EACCES;
+			goto bad;
+		}
+		/* don't allow broadcast messages to be fragmented */
+		if ((u_short)ip->ip_len > ifp->if_mtu) {
+			error = EMSGSIZE;
+			goto bad;
+		}
+		m->m_flags |= M_BCAST;
+	} else {
+		m->m_flags &= ~M_BCAST;
+	}
+
+sendit:
+	/*
+	 * IpHack's section.
+	 * - Xlate: translate packet's addr/port (NAT).
+	 * - Firewall: deny/allow/etc.
+	 * - Wrap: fake packet's addr/port <unimpl.>
+	 * - Encapsulate: put it in another IP and send out. <unimp.>
+	 */ 
+#ifdef PFIL_HOOKS
+	/*
+	 * Run through list of hooks for output packets.
+	 */
+	m1 = m;
+	pfh = pfil_hook_get(PFIL_OUT, &inetsw[ip_protox[IPPROTO_IP]].pr_pfh);
+	for (; pfh; pfh = TAILQ_NEXT(pfh, pfil_link))
+		if (pfh->pfil_func) {
+			rv = pfh->pfil_func(ip, hlen, ifp, 1, &m1);
+			if (rv) {
+				error = EHOSTUNREACH;
+				goto done;
+			}
+			m = m1;
+			if (m == NULL)
+				goto done;
+			ip = mtod(m, struct ip *);
+		}
+#endif /* PFIL_HOOKS */
+
+	/*
+	 * Check with the firewall...
+	 */
+	if (fw_enable && ip_fw_chk_ptr) {
+		struct sockaddr_in *old = dst;
+
+		off = (*ip_fw_chk_ptr)(&ip,
+		    hlen, ifp, &divert_cookie, &m, &rule, &dst);
+                /*
+                 * On return we must do the following:
+                 * m == NULL         -> drop the pkt (old interface, deprecated)
+                 * (off & 0x40000)   -> drop the pkt (new interface)
+                 * 1<=off<= 0xffff   -> DIVERT
+                 * (off & 0x10000)   -> send to a DUMMYNET pipe
+                 * (off & 0x20000)   -> TEE the packet
+                 * dst != old        -> IPFIREWALL_FORWARD
+                 * off==0, dst==old  -> accept
+                 * If some of the above modules is not compiled in, then
+                 * we should't have to check the corresponding condition
+                 * (because the ipfw control socket should not accept
+                 * unsupported rules), but better play safe and drop
+                 * packets in case of doubt.
+                 */
+		if (off & IP_FW_PORT_DENY_FLAG) { /* XXX new interface-denied */
+		    if (m)
+			m_freem(m);
+		    error = EACCES ;
+		    goto done;
+		}
+		if (!m) { /* firewall said to reject */
+		    static int __debug=10;
+		    if (__debug >0) {
+			printf("firewall returns NULL, please update!\n");	
+			__debug-- ;
+		    }
+		    error = EACCES;
+		    goto done;
+		}
+		if (off == 0 && dst == old) /* common case */
+			goto pass ;
+#ifdef DUMMYNET
+                if ((off & IP_FW_PORT_DYNT_FLAG) != 0) {
+                    /*
+                     * pass the pkt to dummynet. Need to include
+                     * pipe number, m, ifp, ro, dst because these are
+                     * not recomputed in the next pass.
+                     * All other parameters have been already used and
+                     * so they are not needed anymore. 
+                     * XXX note: if the ifp or ro entry are deleted
+                     * while a pkt is in dummynet, we are in trouble!
+                     */ 
+		    error = dummynet_io(off & 0xffff, DN_TO_IP_OUT, m,
+				ifp,ro,dst,rule, flags);
+		    goto done;
+		}
+#endif   
+#ifdef IPDIVERT
+		if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) {
+			struct mbuf *clone = NULL;
+
+			/* Clone packet if we're doing a 'tee' */
+			if ((off & IP_FW_PORT_TEE_FLAG) != 0)
+				clone = m_dup(m, M_DONTWAIT);
+
+			/*
+			 * XXX
+			 * delayed checksums are not currently compatible
+			 * with divert sockets.
+			 */
+			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+				in_delayed_cksum(m);
+				m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+			}
+
+			/* Restore packet header fields to original values */
+			HTONS(ip->ip_len);
+			HTONS(ip->ip_off);
+
+			/* Deliver packet to divert input routine */
+			ip_divert_cookie = divert_cookie;
+			divert_packet(m, 0, off & 0xffff);
+
+			/* If 'tee', continue with original packet */
+			if (clone != NULL) {
+				m = clone;
+				ip = mtod(m, struct ip *);
+				goto pass;
+			}
+			goto done;
+		}
+#endif
+
+#ifdef IPFIREWALL_FORWARD
+		/* Here we check dst to make sure it's directly reachable on the
+		 * interface we previously thought it was.
+		 * If it isn't (which may be likely in some situations) we have
+		 * to re-route it (ie, find a route for the next-hop and the
+		 * associated interface) and set them here. This is nested
+		 * forwarding which in most cases is undesirable, except where
+		 * such control is nigh impossible. So we do it here.
+		 * And I'm babbling.
+		 */
+		if (off == 0 && old != dst) {
+			struct in_ifaddr *ia;
+
+			/* It's changed... */
+			/* There must be a better way to do this next line... */
+			static struct route sro_fwd, *ro_fwd = &sro_fwd;
+#ifdef IPFIREWALL_FORWARD_DEBUG
+			printf("IPFIREWALL_FORWARD: New dst ip: ");
+			print_ip(dst->sin_addr);
+			printf("\n");
+#endif
+			/*
+			 * We need to figure out if we have been forwarded
+			 * to a local socket. If so then we should somehow 
+			 * "loop back" to ip_input, and get directed to the
+			 * PCB as if we had received this packet. This is
+			 * because it may be dificult to identify the packets
+			 * you want to forward until they are being output
+			 * and have selected an interface. (e.g. locally
+			 * initiated packets) If we used the loopback inteface,
+			 * we would not be able to control what happens 
+			 * as the packet runs through ip_input() as
+			 * it is done through a ISR.
+			 */
+			TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
+				/*
+				 * If the addr to forward to is one
+				 * of ours, we pretend to
+				 * be the destination for this packet.
+				 */
+				if (IA_SIN(ia)->sin_addr.s_addr ==
+						 dst->sin_addr.s_addr)
+					break;
+			}
+			if (ia) {
+				/* tell ip_input "dont filter" */
+				ip_fw_fwd_addr = dst;
+				if (m->m_pkthdr.rcvif == NULL)
+					m->m_pkthdr.rcvif = ifunit("lo0");
+				if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+					m->m_pkthdr.csum_flags |=
+					    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
+					m0->m_pkthdr.csum_data = 0xffff;
+				}
+				m->m_pkthdr.csum_flags |=
+				    CSUM_IP_CHECKED | CSUM_IP_VALID;
+				HTONS(ip->ip_len);
+				HTONS(ip->ip_off);
+				ip_input(m);
+				goto done;
+			}
+			/* Some of the logic for this was
+			 * nicked from above.
+			 *
+			 * This rewrites the cached route in a local PCB.
+			 * Is this what we want to do?
+			 */
+			bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
+
+			ro_fwd->ro_rt = 0;
+			rtalloc_ign(ro_fwd, RTF_PRCLONING);
+
+			if (ro_fwd->ro_rt == 0) {
+				ipstat.ips_noroute++;
+				error = EHOSTUNREACH;
+				goto bad;
+			}
+
+			ia = ifatoia(ro_fwd->ro_rt->rt_ifa);
+			ifp = ro_fwd->ro_rt->rt_ifp;
+			ro_fwd->ro_rt->rt_use++;
+			if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
+				dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway;
+			if (ro_fwd->ro_rt->rt_flags & RTF_HOST)
+				isbroadcast =
+				    (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
+			else
+				isbroadcast = in_broadcast(dst->sin_addr, ifp);
+			RTFREE(ro->ro_rt);
+			ro->ro_rt = ro_fwd->ro_rt;
+			dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
+
+			/*
+			 * If we added a default src ip earlier,
+			 * which would have been gotten from the-then
+			 * interface, do it again, from the new one.
+			 */
+			if (fwd_rewrite_src)
+				ip->ip_src = IA_SIN(ia)->sin_addr;
+			goto pass ;
+		}
+#endif /* IPFIREWALL_FORWARD */
+                /*
+                 * if we get here, none of the above matches, and 
+                 * we have to drop the pkt
+                 */
+		m_freem(m);
+                error = EACCES; /* not sure this is the right error msg */
+                goto done;
+	}
+
+pass:
+#ifdef IPSEC
+	/* get SP for this packet */
+	if (so == NULL)
+		sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
+	else
+		sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
+
+	if (sp == NULL) {
+		ipsecstat.out_inval++;
+		goto bad;
+	}
+
+	error = 0;
+
+	/* check policy */
+	switch (sp->policy) {
+	case IPSEC_POLICY_DISCARD:
+		/*
+		 * This packet is just discarded.
+		 */
+		ipsecstat.out_polvio++;
+		goto bad;
+
+	case IPSEC_POLICY_BYPASS:
+	case IPSEC_POLICY_NONE:
+		/* no need to do IPsec. */
+		goto skip_ipsec;
+	
+	case IPSEC_POLICY_IPSEC:
+		if (sp->req == NULL) {
+			/* XXX should be panic ? */
+			printf("ip_output: No IPsec request specified.\n");
+			error = EINVAL;
+			goto bad;
+		}
+		break;
+
+	case IPSEC_POLICY_ENTRUST:
+	default:
+		printf("ip_output: Invalid policy found. %d\n", sp->policy);
+	}
+    {
+	struct ipsec_output_state state;
+	bzero(&state, sizeof(state));
+	state.m = m;
+	if (flags & IP_ROUTETOIF) {
+		state.ro = &iproute;
+		bzero(&iproute, sizeof(iproute));
+	} else
+		state.ro = ro;
+	state.dst = (struct sockaddr *)dst;
+
+	ip->ip_sum = 0;
+
+	/*
+	 * XXX
+	 * delayed checksums are not currently compatible with IPsec
+	 */
+	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+		in_delayed_cksum(m);
+		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+	}
+
+	HTONS(ip->ip_len);
+	HTONS(ip->ip_off);
+
+	error = ipsec4_output(&state, sp, flags);
+
+	m = state.m;
+	if (flags & IP_ROUTETOIF) {
+		/*
+		 * if we have tunnel mode SA, we may need to ignore
+		 * IP_ROUTETOIF.
+		 */
+		if (state.ro != &iproute || state.ro->ro_rt != NULL) {
+			flags &= ~IP_ROUTETOIF;
+			ro = state.ro;
+		}
+	} else
+		ro = state.ro;
+	dst = (struct sockaddr_in *)state.dst;
+	if (error) {
+		/* mbuf is already reclaimed in ipsec4_output. */
+		m0 = NULL;
+		switch (error) {
+		case EHOSTUNREACH:
+		case ENETUNREACH:
+		case EMSGSIZE:
+		case ENOBUFS:
+		case ENOMEM:
+			break;
+		default:
+			printf("ip4_output (ipsec): error code %d\n", error);
+			/*fall through*/
+		case ENOENT:
+			/* don't show these error codes to the user */
+			error = 0;
+			break;
+		}
+		goto bad;
+	}
+    }
+
+	/* be sure to update variables that are affected by ipsec4_output() */
+	ip = mtod(m, struct ip *);
+#ifdef _IP_VHL
+	hlen = IP_VHL_HL(ip->ip_vhl) << 2;
+#else
+	hlen = ip->ip_hl << 2;
+#endif
+	if (ro->ro_rt == NULL) {
+		if ((flags & IP_ROUTETOIF) == 0) {
+			printf("ip_output: "
+				"can't update route after IPsec processing\n");
+			error = EHOSTUNREACH;	/*XXX*/
+			goto bad;
+		}
+	} else {
+		ia = ifatoia(ro->ro_rt->rt_ifa);
+		ifp = ro->ro_rt->rt_ifp;
+	}
+
+	/* make it flipped, again. */
+	NTOHS(ip->ip_len);
+	NTOHS(ip->ip_off);
+skip_ipsec:
+#endif /*IPSEC*/
+
+	m->m_pkthdr.csum_flags |= CSUM_IP;
+	sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist;
+	if (sw_csum & CSUM_DELAY_DATA) {
+		in_delayed_cksum(m);
+		sw_csum &= ~CSUM_DELAY_DATA;
+	}
+	m->m_pkthdr.csum_flags &= ifp->if_hwassist;
+
+	/*
+	 * If small enough for interface, or the interface will take
+	 * care of the fragmentation for us, can just send directly.
+	 */
+	if ((u_short)ip->ip_len <= ifp->if_mtu ||
+	    ifp->if_hwassist & CSUM_FRAGMENT) {
+		HTONS(ip->ip_len);
+		HTONS(ip->ip_off);
+		ip->ip_sum = 0;
+		if (sw_csum & CSUM_DELAY_IP) {
+			if (ip->ip_vhl == IP_VHL_BORING) {
+				ip->ip_sum = in_cksum_hdr(ip);
+			} else {
+				ip->ip_sum = in_cksum(m, hlen);
+			}
+		}
+
+		/* Record statistics for this interface address. */
+		if (!(flags & IP_FORWARDING)) {
+			ia->ia_ifa.if_opackets++;
+			ia->ia_ifa.if_obytes += m->m_pkthdr.len;
+		}
+
+		error = (*ifp->if_output)(ifp, m,
+				(struct sockaddr *)dst, ro->ro_rt);
+		goto done;
+	}
+	/*
+	 * Too large for interface; fragment if possible.
+	 * Must be able to put at least 8 bytes per fragment.
+	 */
+	if (ip->ip_off & IP_DF) {
+		error = EMSGSIZE;
+		/*
+		 * This case can happen if the user changed the MTU
+		 * of an interface after enabling IP on it.  Because
+		 * most netifs don't keep track of routes pointing to
+		 * them, there is no way for one to update all its
+		 * routes when the MTU is changed.
+		 */
+		if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
+		    && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
+		    && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
+			ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
+		}
+		ipstat.ips_cantfrag++;
+		goto bad;
+	}
+	len = (ifp->if_mtu - hlen) &~ 7;
+	if (len < 8) {
+		error = EMSGSIZE;
+		goto bad;
+	}
+
+	/*
+	 * if the interface will not calculate checksums on
+	 * fragmented packets, then do it here.
+	 */
+	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
+	    (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) {
+		in_delayed_cksum(m);
+		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+	}
+
+    {
+	int mhlen, firstlen = len;
+	struct mbuf **mnext = &m->m_nextpkt;
+	int nfrags = 1;
+
+	/*
+	 * Loop through length of segment after first fragment,
+	 * make new header and copy data of each part and link onto chain.
+	 */
+	m0 = m;
+	mhlen = sizeof (struct ip);
+	for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
+		MGETHDR(m, M_DONTWAIT, MT_HEADER);
+		if (m == 0) {
+			error = ENOBUFS;
+			ipstat.ips_odropped++;
+			goto sendorfree;
+		}
+		m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
+		m->m_data += max_linkhdr;
+		mhip = mtod(m, struct ip *);
+		*mhip = *ip;
+		if (hlen > sizeof (struct ip)) {
+			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
+			mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
+		}
+		m->m_len = mhlen;
+		mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off;
+		if (off + len >= (u_short)ip->ip_len)
+			len = (u_short)ip->ip_len - off;
+		else
+			mhip->ip_off |= IP_MF;
+		mhip->ip_len = htons((u_short)(len + mhlen));
+		m->m_next = m_copy(m0, off, len);
+		if (m->m_next == 0) {
+			(void) m_free(m);
+			error = ENOBUFS;	/* ??? */
+			ipstat.ips_odropped++;
+			goto sendorfree;
+		}
+		m->m_pkthdr.len = mhlen + len;
+		m->m_pkthdr.rcvif = (struct ifnet *)0;
+		m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
+		HTONS(mhip->ip_off);
+		mhip->ip_sum = 0;
+		if (sw_csum & CSUM_DELAY_IP) {
+			if (mhip->ip_vhl == IP_VHL_BORING) {
+				mhip->ip_sum = in_cksum_hdr(mhip);
+			} else {
+				mhip->ip_sum = in_cksum(m, mhlen);
+			}
+		}
+		*mnext = m;
+		mnext = &m->m_nextpkt;
+		nfrags++;
+	}
+	ipstat.ips_ofragments += nfrags;
+
+	/* set first/last markers for fragment chain */
+	m->m_flags |= M_LASTFRAG;
+	m0->m_flags |= M_FIRSTFRAG | M_FRAG;
+	m0->m_pkthdr.csum_data = nfrags;
+
+	/*
+	 * Update first fragment by trimming what's been copied out
+	 * and updating header, then send each fragment (in order).
+	 */
+	m = m0;
+	m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
+	m->m_pkthdr.len = hlen + firstlen;
+	ip->ip_len = htons((u_short)m->m_pkthdr.len);
+	ip->ip_off |= IP_MF;
+	HTONS(ip->ip_off);
+	ip->ip_sum = 0;
+	if (sw_csum & CSUM_DELAY_IP) {
+		if (ip->ip_vhl == IP_VHL_BORING) {
+			ip->ip_sum = in_cksum_hdr(ip);
+		} else {
+			ip->ip_sum = in_cksum(m, hlen);
+		}
+	}
+sendorfree:
+	for (m = m0; m; m = m0) {
+		m0 = m->m_nextpkt;
+		m->m_nextpkt = 0;
+		if (error == 0) {
+			/* Record statistics for this interface address. */
+			ia->ia_ifa.if_opackets++;
+			ia->ia_ifa.if_obytes += m->m_pkthdr.len;
+			
+			error = (*ifp->if_output)(ifp, m,
+			    (struct sockaddr *)dst, ro->ro_rt);
+		} else
+			m_freem(m);
+	}
+
+	if (error == 0)
+		ipstat.ips_fragmented++;
+    }
+done:
+#ifdef IPSEC
+	if (ro == &iproute && ro->ro_rt) {
+		RTFREE(ro->ro_rt);
+		ro->ro_rt = NULL;
+	}
+	if (sp != NULL) {
+		KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
+			printf("DP ip_output call free SP:%p\n", sp));
+		key_freesp(sp);
+	}
+#endif /* IPSEC */
+	return (error);
+bad:
+	m_freem(m0);
+	goto done;
+}
+
+void
+in_delayed_cksum(struct mbuf *m)
+{
+	struct ip *ip;
+	u_short csum, offset;
+
+	ip = mtod(m, struct ip *);
+	offset = IP_VHL_HL(ip->ip_vhl) << 2 ;
+	csum = in_cksum_skip(m, ip->ip_len, offset);
+	if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
+		csum = 0xffff;
+	offset += m->m_pkthdr.csum_data;	/* checksum offset */
+
+	if (offset + sizeof(u_short) > m->m_len) {
+		printf("delayed m_pullup, m->len: %d  off: %d  p: %d\n",
+		    m->m_len, offset, ip->ip_p);
+		/*
+		 * XXX
+		 * this shouldn't happen, but if it does, the
+		 * correct behavior may be to insert the checksum
+		 * in the existing chain instead of rearranging it.
+		 */
+		m = m_pullup(m, offset + sizeof(u_short));
+	}
+	*(u_short *)(m->m_data + offset) = csum;
+}
+
+/*
+ * Insert IP options into preformed packet.
+ * Adjust IP destination as required for IP source routing,
+ * as indicated by a non-zero in_addr at the start of the options.
+ *
+ * XXX This routine assumes that the packet has no options in place.
+ */
+static struct mbuf *
+ip_insertoptions(m, opt, phlen)
+	register struct mbuf *m;
+	struct mbuf *opt;
+	int *phlen;
+{
+	register struct ipoption *p = mtod(opt, struct ipoption *);
+	struct mbuf *n;
+	register struct ip *ip = mtod(m, struct ip *);
+	unsigned optlen;
+
+	optlen = opt->m_len - sizeof(p->ipopt_dst);
+	if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
+		return (m);		/* XXX should fail */
+	if (p->ipopt_dst.s_addr)
+		ip->ip_dst = p->ipopt_dst;
+	if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
+		MGETHDR(n, M_DONTWAIT, MT_HEADER);
+		if (n == 0)
+			return (m);
+		n->m_pkthdr.rcvif = (struct ifnet *)0;
+		n->m_pkthdr.len = m->m_pkthdr.len + optlen;
+		m->m_len -= sizeof(struct ip);
+		m->m_data += sizeof(struct ip);
+		n->m_next = m;
+		m = n;
+		m->m_len = optlen + sizeof(struct ip);
+		m->m_data += max_linkhdr;
+		(void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
+	} else {
+		m->m_data -= optlen;
+		m->m_len += optlen;
+		m->m_pkthdr.len += optlen;
+		ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
+	}
+	ip = mtod(m, struct ip *);
+	bcopy(p->ipopt_list, ip + 1, optlen);
+	*phlen = sizeof(struct ip) + optlen;
+	ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
+	ip->ip_len += optlen;
+	return (m);
+}
+
+/*
+ * Copy options from ip to jp,
+ * omitting those not copied during fragmentation.
+ */
+int
+ip_optcopy(ip, jp)
+	struct ip *ip, *jp;
+{
+	register u_char *cp, *dp;
+	int opt, optlen, cnt;
+
+	cp = (u_char *)(ip + 1);
+	dp = (u_char *)(jp + 1);
+	cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
+	for (; cnt > 0; cnt -= optlen, cp += optlen) {
+		opt = cp[0];
+		if (opt == IPOPT_EOL)
+			break;
+		if (opt == IPOPT_NOP) {
+			/* Preserve for IP mcast tunnel's LSRR alignment. */
+			*dp++ = IPOPT_NOP;
+			optlen = 1;
+			continue;
+		}
+#ifdef DIAGNOSTIC
+		if (cnt < IPOPT_OLEN + sizeof(*cp))
+			panic("malformed IPv4 option passed to ip_optcopy");
+#endif
+		optlen = cp[IPOPT_OLEN];
+#ifdef DIAGNOSTIC
+		if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
+			panic("malformed IPv4 option passed to ip_optcopy");
+#endif
+		/* bogus lengths should have been caught by ip_dooptions */
+		if (optlen > cnt)
+			optlen = cnt;
+		if (IPOPT_COPIED(opt)) {
+			bcopy(cp, dp, optlen);
+			dp += optlen;
+		}
+	}
+	for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
+		*dp++ = IPOPT_EOL;
+	return (optlen);
+}
+
+/*
+ * IP socket option processing.
+ */
+int
+ip_ctloutput(so, sopt)
+	struct socket *so;
+	struct sockopt *sopt;
+{
+	struct	inpcb *inp = sotoinpcb(so);
+	int	error, optval;
+
+	error = optval = 0;
+	if (sopt->sopt_level != IPPROTO_IP) {
+		return (EINVAL);
+	}
+
+	switch (sopt->sopt_dir) {
+	case SOPT_SET:
+		switch (sopt->sopt_name) {
+		case IP_OPTIONS:
+#ifdef notyet
+		case IP_RETOPTS:
+#endif
+		{
+			struct mbuf *m;
+			if (sopt->sopt_valsize > MLEN) {
+				error = EMSGSIZE;
+				break;
+			}
+			MGET(m, sopt->sopt_p ? M_TRYWAIT : M_DONTWAIT, MT_HEADER);
+			if (m == 0) {
+				error = ENOBUFS;
+				break;
+			}
+			m->m_len = sopt->sopt_valsize;
+			error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
+					    m->m_len);
+			
+			return (ip_pcbopts(sopt->sopt_name, &inp->inp_options,
+					   m));
+		}
+
+		case IP_TOS:
+		case IP_TTL:
+		case IP_RECVOPTS:
+		case IP_RECVRETOPTS:
+		case IP_RECVDSTADDR:
+		case IP_RECVIF:
+#if defined(NFAITH) && NFAITH > 0
+		case IP_FAITH:
+#endif
+			error = sooptcopyin(sopt, &optval, sizeof optval,
+					    sizeof optval);
+			if (error)
+				break;
+
+			switch (sopt->sopt_name) {
+			case IP_TOS:
+				inp->inp_ip_tos = optval;
+				break;
+
+			case IP_TTL:
+				inp->inp_ip_ttl = optval;
+				break;
+#define	OPTSET(bit) \
+	if (optval) \
+		inp->inp_flags |= bit; \
+	else \
+		inp->inp_flags &= ~bit;
+
+			case IP_RECVOPTS:
+				OPTSET(INP_RECVOPTS);
+				break;
+
+			case IP_RECVRETOPTS:
+				OPTSET(INP_RECVRETOPTS);
+				break;
+
+			case IP_RECVDSTADDR:
+				OPTSET(INP_RECVDSTADDR);
+				break;
+
+			case IP_RECVIF:
+				OPTSET(INP_RECVIF);
+				break;
+
+#if defined(NFAITH) && NFAITH > 0
+			case IP_FAITH:
+				OPTSET(INP_FAITH);
+				break;
+#endif
+			}
+			break;
+#undef OPTSET
+
+		case IP_MULTICAST_IF:
+		case IP_MULTICAST_VIF:
+		case IP_MULTICAST_TTL:
+		case IP_MULTICAST_LOOP:
+		case IP_ADD_MEMBERSHIP:
+		case IP_DROP_MEMBERSHIP:
+			error = ip_setmoptions(sopt, &inp->inp_moptions);
+			break;
+
+		case IP_PORTRANGE:
+			error = sooptcopyin(sopt, &optval, sizeof optval,
+					    sizeof optval);
+			if (error)
+				break;
+
+			switch (optval) {
+			case IP_PORTRANGE_DEFAULT:
+				inp->inp_flags &= ~(INP_LOWPORT);
+				inp->inp_flags &= ~(INP_HIGHPORT);
+				break;
+
+			case IP_PORTRANGE_HIGH:
+				inp->inp_flags &= ~(INP_LOWPORT);
+				inp->inp_flags |= INP_HIGHPORT;
+				break;
+
+			case IP_PORTRANGE_LOW:
+				inp->inp_flags &= ~(INP_HIGHPORT);
+				inp->inp_flags |= INP_LOWPORT;
+				break;
+
+			default:
+				error = EINVAL;
+				break;
+			}
+			break;
+
+#ifdef IPSEC
+		case IP_IPSEC_POLICY:
+		{
+			caddr_t req;
+			size_t len = 0;
+			int priv;
+			struct mbuf *m;
+			int optname;
+
+			if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
+				break;
+			if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
+				break;
+			priv = (sopt->sopt_p != NULL &&
+				suser(sopt->sopt_p) != 0) ? 0 : 1;
+			req = mtod(m, caddr_t);
+			len = m->m_len;
+			optname = sopt->sopt_name;
+			error = ipsec4_set_policy(inp, optname, req, len, priv);
+			m_freem(m);
+			break;
+		}
+#endif /*IPSEC*/
+
+		default:
+			error = ENOPROTOOPT;
+			break;
+		}
+		break;
+
+	case SOPT_GET:
+		switch (sopt->sopt_name) {
+		case IP_OPTIONS:
+		case IP_RETOPTS:
+			if (inp->inp_options)
+				error = sooptcopyout(sopt, 
+						     mtod(inp->inp_options,
+							  char *),
+						     inp->inp_options->m_len);
+			else
+				sopt->sopt_valsize = 0;
+			break;
+
+		case IP_TOS:
+		case IP_TTL:
+		case IP_RECVOPTS:
+		case IP_RECVRETOPTS:
+		case IP_RECVDSTADDR:
+		case IP_RECVIF:
+		case IP_PORTRANGE:
+#if defined(NFAITH) && NFAITH > 0
+		case IP_FAITH:
+#endif
+			switch (sopt->sopt_name) {
+
+			case IP_TOS:
+				optval = inp->inp_ip_tos;
+				break;
+
+			case IP_TTL:
+				optval = inp->inp_ip_ttl;
+				break;
+
+#define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
+
+			case IP_RECVOPTS:
+				optval = OPTBIT(INP_RECVOPTS);
+				break;
+
+			case IP_RECVRETOPTS:
+				optval = OPTBIT(INP_RECVRETOPTS);
+				break;
+
+			case IP_RECVDSTADDR:
+				optval = OPTBIT(INP_RECVDSTADDR);
+				break;
+
+			case IP_RECVIF:
+				optval = OPTBIT(INP_RECVIF);
+				break;
+
+			case IP_PORTRANGE:
+				if (inp->inp_flags & INP_HIGHPORT)
+					optval = IP_PORTRANGE_HIGH;
+				else if (inp->inp_flags & INP_LOWPORT)
+					optval = IP_PORTRANGE_LOW;
+				else
+					optval = 0;
+				break;
+
+#if defined(NFAITH) && NFAITH > 0
+			case IP_FAITH:
+				optval = OPTBIT(INP_FAITH);
+				break;
+#endif
+			}
+			error = sooptcopyout(sopt, &optval, sizeof optval);
+			break;
+
+		case IP_MULTICAST_IF:
+		case IP_MULTICAST_VIF:
+		case IP_MULTICAST_TTL:
+		case IP_MULTICAST_LOOP:
+		case IP_ADD_MEMBERSHIP:
+		case IP_DROP_MEMBERSHIP:
+			error = ip_getmoptions(sopt, inp->inp_moptions);
+			break;
+
+#ifdef IPSEC
+		case IP_IPSEC_POLICY:
+		{
+			struct mbuf *m = NULL;
+			caddr_t req = NULL;
+			size_t len = 0;
+
+			if (m != 0) {
+				req = mtod(m, caddr_t);
+				len = m->m_len;
+			}
+			error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
+			if (error == 0)
+				error = soopt_mcopyout(sopt, m); /* XXX */
+			if (error == 0)
+				m_freem(m);
+			break;
+		}
+#endif /*IPSEC*/
+
+		default:
+			error = ENOPROTOOPT;
+			break;
+		}
+		break;
+	}
+	return (error);
+}
+
+/*
+ * Set up IP options in pcb for insertion in output packets.
+ * Store in mbuf with pointer in pcbopt, adding pseudo-option
+ * with destination address if source routed.
+ */
+static int
+ip_pcbopts(optname, pcbopt, m)
+	int optname;
+	struct mbuf **pcbopt;
+	register struct mbuf *m;
+{
+	register int cnt, optlen;
+	register u_char *cp;
+	u_char opt;
+
+	/* turn off any old options */
+	if (*pcbopt)
+		(void)m_free(*pcbopt);
+	*pcbopt = 0;
+	if (m == (struct mbuf *)0 || m->m_len == 0) {
+		/*
+		 * Only turning off any previous options.
+		 */
+		if (m)
+			(void)m_free(m);
+		return (0);
+	}
+
+	if (m->m_len % sizeof(int32_t))
+		goto bad;
+	/*
+	 * IP first-hop destination address will be stored before
+	 * actual options; move other options back
+	 * and clear it when none present.
+	 */
+	if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
+		goto bad;
+	cnt = m->m_len;
+	m->m_len += sizeof(struct in_addr);
+	cp = mtod(m, u_char *) + sizeof(struct in_addr);
+	ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
+	bzero(mtod(m, caddr_t), sizeof(struct in_addr));
+
+	for (; cnt > 0; cnt -= optlen, cp += optlen) {
+		opt = cp[IPOPT_OPTVAL];
+		if (opt == IPOPT_EOL)
+			break;
+		if (opt == IPOPT_NOP)
+			optlen = 1;
+		else {
+			if (cnt < IPOPT_OLEN + sizeof(*cp))
+				goto bad;
+			optlen = cp[IPOPT_OLEN];
+			if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
+				goto bad;
+		}
+		switch (opt) {
+
+		default:
+			break;
+
+		case IPOPT_LSRR:
+		case IPOPT_SSRR:
+			/*
+			 * user process specifies route as:
+			 *	->A->B->C->D
+			 * D must be our final destination (but we can't
+			 * check that since we may not have connected yet).
+			 * A is first hop destination, which doesn't appear in
+			 * actual IP option, but is stored before the options.
+			 */
+			if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
+				goto bad;
+			m->m_len -= sizeof(struct in_addr);
+			cnt -= sizeof(struct in_addr);
+			optlen -= sizeof(struct in_addr);
+			cp[IPOPT_OLEN] = optlen;
+			/*
+			 * Move first hop before start of options.
+			 */
+			bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
+			    sizeof(struct in_addr));
+			/*
+			 * Then copy rest of options back
+			 * to close up the deleted entry.
+			 */
+			ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
+			    sizeof(struct in_addr)),
+			    (caddr_t)&cp[IPOPT_OFFSET+1],
+			    (unsigned)cnt + sizeof(struct in_addr));
+			break;
+		}
+	}
+	if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
+		goto bad;
+	*pcbopt = m;
+	return (0);
+
+bad:
+	(void)m_free(m);
+	return (EINVAL);
+}
+
+/*
+ * XXX
+ * The whole multicast option thing needs to be re-thought.
+ * Several of these options are equally applicable to non-multicast
+ * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
+ * standard option (IP_TTL).
+ */
+/*
+ * Set the IP multicast options in response to user setsockopt().
+ */
+static int
+ip_setmoptions(sopt, imop)
+	struct sockopt *sopt;
+	struct ip_moptions **imop;
+{
+	int error = 0;
+	int i;
+	struct in_addr addr;
+	struct ip_mreq mreq;
+	struct ifnet *ifp;
+	struct ip_moptions *imo = *imop;
+	struct route ro;
+	struct sockaddr_in *dst;
+	int s;
+
+	if (imo == NULL) {
+		/*
+		 * No multicast option buffer attached to the pcb;
+		 * allocate one and initialize to default values.
+		 */
+		imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS,
+		    M_WAITOK);
+
+		if (imo == NULL)
+			return (ENOBUFS);
+		*imop = imo;
+		imo->imo_multicast_ifp = NULL;
+		imo->imo_multicast_vif = -1;
+		imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
+		imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
+		imo->imo_num_memberships = 0;
+	}
+
+	switch (sopt->sopt_name) {
+	/* store an index number for the vif you wanna use in the send */
+	case IP_MULTICAST_VIF:
+		if (legal_vif_num == 0) {
+			error = EOPNOTSUPP;
+			break;
+		}
+		error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
+		if (error)
+			break;
+		if (!legal_vif_num(i) && (i != -1)) {
+			error = EINVAL;
+			break;
+		}
+		imo->imo_multicast_vif = i;
+		break;
+
+	case IP_MULTICAST_IF:
+		/*
+		 * Select the interface for outgoing multicast packets.
+		 */
+		error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
+		if (error)
+			break;
+		/*
+		 * INADDR_ANY is used to remove a previous selection.
+		 * When no interface is selected, a default one is
+		 * chosen every time a multicast packet is sent.
+		 */
+		if (addr.s_addr == INADDR_ANY) {
+			imo->imo_multicast_ifp = NULL;
+			break;
+		}
+		/*
+		 * The selected interface is identified by its local
+		 * IP address.  Find the interface and confirm that
+		 * it supports multicasting.
+		 */
+		s = splimp();
+		INADDR_TO_IFP(addr, ifp);
+		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
+			splx(s);
+			error = EADDRNOTAVAIL;
+			break;
+		}
+		imo->imo_multicast_ifp = ifp;
+		splx(s);
+		break;
+
+	case IP_MULTICAST_TTL:
+		/*
+		 * Set the IP time-to-live for outgoing multicast packets.
+		 * The original multicast API required a char argument,
+		 * which is inconsistent with the rest of the socket API.
+		 * We allow either a char or an int.
+		 */
+		if (sopt->sopt_valsize == 1) {
+			u_char ttl;
+			error = sooptcopyin(sopt, &ttl, 1, 1);
+			if (error)
+				break;
+			imo->imo_multicast_ttl = ttl;
+		} else {
+			u_int ttl;
+			error = sooptcopyin(sopt, &ttl, sizeof ttl, 
+					    sizeof ttl);
+			if (error)
+				break;
+			if (ttl > 255)
+				error = EINVAL;
+			else
+				imo->imo_multicast_ttl = ttl;
+		}
+		break;
+
+	case IP_MULTICAST_LOOP:
+		/*
+		 * Set the loopback flag for outgoing multicast packets.
+		 * Must be zero or one.  The original multicast API required a
+		 * char argument, which is inconsistent with the rest
+		 * of the socket API.  We allow either a char or an int.
+		 */
+		if (sopt->sopt_valsize == 1) {
+			u_char loop;
+			error = sooptcopyin(sopt, &loop, 1, 1);
+			if (error)
+				break;
+			imo->imo_multicast_loop = !!loop;
+		} else {
+			u_int loop;
+			error = sooptcopyin(sopt, &loop, sizeof loop,
+					    sizeof loop);
+			if (error)
+				break;
+			imo->imo_multicast_loop = !!loop;
+		}
+		break;
+
+	case IP_ADD_MEMBERSHIP:
+		/*
+		 * Add a multicast group membership.
+		 * Group must be a valid IP multicast address.
+		 */
+		error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
+		if (error)
+			break;
+
+		if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
+			error = EINVAL;
+			break;
+		}
+		s = splimp();
+		/*
+		 * If no interface address was provided, use the interface of
+		 * the route to the given multicast address.
+		 */
+		if (mreq.imr_interface.s_addr == INADDR_ANY) {
+			bzero((caddr_t)&ro, sizeof(ro));
+			dst = (struct sockaddr_in *)&ro.ro_dst;
+			dst->sin_len = sizeof(*dst);
+			dst->sin_family = AF_INET;
+			dst->sin_addr = mreq.imr_multiaddr;
+			rtalloc(&ro);
+			if (ro.ro_rt == NULL) {
+				error = EADDRNOTAVAIL;
+				splx(s);
+				break;
+			}
+			ifp = ro.ro_rt->rt_ifp;
+			rtfree(ro.ro_rt);
+		}
+		else {
+			INADDR_TO_IFP(mreq.imr_interface, ifp);
+		}
+
+		/*
+		 * See if we found an interface, and confirm that it
+		 * supports multicast.
+		 */
+		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
+			error = EADDRNOTAVAIL;
+			splx(s);
+			break;
+		}
+		/*
+		 * See if the membership already exists or if all the
+		 * membership slots are full.
+		 */
+		for (i = 0; i < imo->imo_num_memberships; ++i) {
+			if (imo->imo_membership[i]->inm_ifp == ifp &&
+			    imo->imo_membership[i]->inm_addr.s_addr
+						== mreq.imr_multiaddr.s_addr)
+				break;
+		}
+		if (i < imo->imo_num_memberships) {
+			error = EADDRINUSE;
+			splx(s);
+			break;
+		}
+		if (i == IP_MAX_MEMBERSHIPS) {
+			error = ETOOMANYREFS;
+			splx(s);
+			break;
+		}
+		/*
+		 * Everything looks good; add a new record to the multicast
+		 * address list for the given interface.
+		 */
+		if ((imo->imo_membership[i] =
+		    in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) {
+			error = ENOBUFS;
+			splx(s);
+			break;
+		}
+		++imo->imo_num_memberships;
+		splx(s);
+		break;
+
+	case IP_DROP_MEMBERSHIP:
+		/*
+		 * Drop a multicast group membership.
+		 * Group must be a valid IP multicast address.
+		 */
+		error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
+		if (error)
+			break;
+
+		if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
+			error = EINVAL;
+			break;
+		}
+
+		s = splimp();
+		/*
+		 * If an interface address was specified, get a pointer
+		 * to its ifnet structure.
+		 */
+		if (mreq.imr_interface.s_addr == INADDR_ANY)
+			ifp = NULL;
+		else {
+			INADDR_TO_IFP(mreq.imr_interface, ifp);
+			if (ifp == NULL) {
+				error = EADDRNOTAVAIL;
+				splx(s);
+				break;
+			}
+		}
+		/*
+		 * Find the membership in the membership array.
+		 */
+		for (i = 0; i < imo->imo_num_memberships; ++i) {
+			if ((ifp == NULL ||
+			     imo->imo_membership[i]->inm_ifp == ifp) &&
+			     imo->imo_membership[i]->inm_addr.s_addr ==
+			     mreq.imr_multiaddr.s_addr)
+				break;
+		}
+		if (i == imo->imo_num_memberships) {
+			error = EADDRNOTAVAIL;
+			splx(s);
+			break;
+		}
+		/*
+		 * Give up the multicast address record to which the
+		 * membership points.
+		 */
+		in_delmulti(imo->imo_membership[i]);
+		/*
+		 * Remove the gap in the membership array.
+		 */
+		for (++i; i < imo->imo_num_memberships; ++i)
+			imo->imo_membership[i-1] = imo->imo_membership[i];
+		--imo->imo_num_memberships;
+		splx(s);
+		break;
+
+	default:
+		error = EOPNOTSUPP;
+		break;
+	}
+
+	/*
+	 * If all options have default values, no need to keep the mbuf.
+	 */
+	if (imo->imo_multicast_ifp == NULL &&
+	    imo->imo_multicast_vif == -1 &&
+	    imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
+	    imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
+	    imo->imo_num_memberships == 0) {
+		free(*imop, M_IPMOPTS);
+		*imop = NULL;
+	}
+
+	return (error);
+}
+
+/*
+ * Return the IP multicast options in response to user getsockopt().
+ */
+static int
+ip_getmoptions(sopt, imo)
+	struct sockopt *sopt;
+	register struct ip_moptions *imo;
+{
+	struct in_addr addr;
+	struct in_ifaddr *ia;
+	int error, optval;
+	u_char coptval;
+
+	error = 0;
+	switch (sopt->sopt_name) {
+	case IP_MULTICAST_VIF: 
+		if (imo != NULL)
+			optval = imo->imo_multicast_vif;
+		else
+			optval = -1;
+		error = sooptcopyout(sopt, &optval, sizeof optval);
+		break;
+
+	case IP_MULTICAST_IF:
+		if (imo == NULL || imo->imo_multicast_ifp == NULL)
+			addr.s_addr = INADDR_ANY;
+		else {
+			IFP_TO_IA(imo->imo_multicast_ifp, ia);
+			addr.s_addr = (ia == NULL) ? INADDR_ANY
+				: IA_SIN(ia)->sin_addr.s_addr;
+		}
+		error = sooptcopyout(sopt, &addr, sizeof addr);
+		break;
+
+	case IP_MULTICAST_TTL:
+		if (imo == 0)
+			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
+		else
+			optval = coptval = imo->imo_multicast_ttl;
+		if (sopt->sopt_valsize == 1)
+			error = sooptcopyout(sopt, &coptval, 1);
+		else
+			error = sooptcopyout(sopt, &optval, sizeof optval);
+		break;
+
+	case IP_MULTICAST_LOOP:
+		if (imo == 0)
+			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
+		else
+			optval = coptval = imo->imo_multicast_loop;
+		if (sopt->sopt_valsize == 1)
+			error = sooptcopyout(sopt, &coptval, 1);
+		else
+			error = sooptcopyout(sopt, &optval, sizeof optval);
+		break;
+
+	default:
+		error = ENOPROTOOPT;
+		break;
+	}
+	return (error);
+}
+
+/*
+ * Discard the IP multicast options.
+ */
+void
+ip_freemoptions(imo)
+	register struct ip_moptions *imo;
+{
+	register int i;
+
+	if (imo != NULL) {
+		for (i = 0; i < imo->imo_num_memberships; ++i)
+			in_delmulti(imo->imo_membership[i]);
+		free(imo, M_IPMOPTS);
+	}
+}
+
+/*
+ * Routine called from ip_output() to loop back a copy of an IP multicast
+ * packet to the input queue of a specified interface.  Note that this
+ * calls the output routine of the loopback "driver", but with an interface
+ * pointer that might NOT be a loopback interface -- evil, but easier than
+ * replicating that code here.
+ */
+static void
+ip_mloopback(ifp, m, dst, hlen)
+	struct ifnet *ifp;
+	register struct mbuf *m;
+	register struct sockaddr_in *dst;
+	int hlen;
+{
+	register struct ip *ip;
+	struct mbuf *copym;
+
+	copym = m_copy(m, 0, M_COPYALL);
+	if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
+		copym = m_pullup(copym, hlen);
+	if (copym != NULL) {
+		/*
+		 * We don't bother to fragment if the IP length is greater
+		 * than the interface's MTU.  Can this possibly matter?
+		 */
+		ip = mtod(copym, struct ip *);
+		HTONS(ip->ip_len);
+		HTONS(ip->ip_off);
+		ip->ip_sum = 0;
+		if (ip->ip_vhl == IP_VHL_BORING) {
+			ip->ip_sum = in_cksum_hdr(ip);
+		} else {
+			ip->ip_sum = in_cksum(copym, hlen);
+		}
+		/*
+		 * NB:
+		 * It's not clear whether there are any lingering
+		 * reentrancy problems in other areas which might
+		 * be exposed by using ip_input directly (in
+		 * particular, everything which modifies the packet
+		 * in-place).  Yet another option is using the
+		 * protosw directly to deliver the looped back
+		 * packet.  For the moment, we'll err on the side
+		 * of safety by using if_simloop().
+		 */
+#if 1 /* XXX */
+		if (dst->sin_family != AF_INET) {
+			printf("ip_mloopback: bad address family %d\n",
+						dst->sin_family);
+			dst->sin_family = AF_INET;
+		}
+#endif
+
+#ifdef notdef
+		copym->m_pkthdr.rcvif = ifp;
+		ip_input(copym);
+#else
+		/* if the checksum hasn't been computed, mark it as valid */
+		if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+			copym->m_pkthdr.csum_flags |=
+			    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
+			copym->m_pkthdr.csum_data = 0xffff;
+		}
+		if_simloop(ifp, copym, dst->sin_family, 0);
+#endif
+	}
+}
diff --git a/sys/netinet/ip_proxy.c b/sys/netinet/ip_proxy.c
new file mode 100644
index 0000000..47d0e5e
--- /dev/null
+++ b/sys/netinet/ip_proxy.c
@@ -0,0 +1,452 @@
+/*
+ * Copyright (C) 1997-2000 by Darren Reed.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that this notice is preserved and due credit is given
+ * to the original author and the contributors.
+ */
+#if !defined(lint)
+/*static const char rcsid[] = "@(#)$Id: ip_proxy.c,v 2.2.2.1 1999/09/19 12:18:19 darrenr Exp $";*/
+static const char rcsid[] = "@(#)$FreeBSD$";
+#endif
+
+#if defined(__FreeBSD__) && defined(KERNEL) && !defined(_KERNEL)
+# define	_KERNEL
+#endif
+
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/file.h>
+#if !defined(__FreeBSD_version)  
+# include <sys/ioctl.h>      
+#endif
+#include <sys/fcntl.h>
+#include <sys/uio.h>
+#if !defined(_KERNEL) && !defined(KERNEL)
+# include <stdio.h>
+# include <string.h>
+# include <stdlib.h>
+#endif
+#ifndef	linux
+# include <sys/protosw.h>
+#endif
+#include <sys/socket.h>
+#if defined(_KERNEL)
+# if !defined(linux)
+#  include <sys/systm.h>
+# else
+#  include <linux/string.h>
+# endif
+#endif
+#if !defined(__SVR4) && !defined(__svr4__)
+# ifndef linux
+#  include <sys/mbuf.h>
+# endif
+#else
+# include <sys/byteorder.h>
+# ifdef _KERNEL
+#  include <sys/dditypes.h>
+# endif
+# include <sys/stream.h>
+# include <sys/kmem.h>
+#endif
+#if __FreeBSD__ > 2
+# include <sys/queue.h>
+#endif
+#include <net/if.h>
+#ifdef sun
+# include <net/af.h>
+#endif
+#include <net/route.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifndef linux
+# include <netinet/ip_var.h>
+#endif
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet/ip_icmp.h>
+#include "netinet/ip_compat.h"
+#include <netinet/tcpip.h>
+#include "netinet/ip_fil.h"
+#include "netinet/ip_proxy.h"
+#include "netinet/ip_nat.h"
+#include "netinet/ip_state.h"
+#if (__FreeBSD_version >= 300000)
+# include <sys/malloc.h>
+#endif
+
+
+#ifndef MIN
+#define MIN(a,b)        (((a)<(b))?(a):(b))
+#endif
+
+static ap_session_t *appr_new_session __P((aproxy_t *, ip_t *,
+					   fr_info_t *, nat_t *));
+static int appr_fixseqack __P((fr_info_t *, ip_t *, ap_session_t *, int ));
+
+
+#define	AP_SESS_SIZE	53
+
+#if defined(_KERNEL) && !defined(linux)
+#include "netinet/ip_ftp_pxy.c"
+#include "netinet/ip_rcmd_pxy.c"
+#include "netinet/ip_raudio_pxy.c"
+#endif
+
+ap_session_t	*ap_sess_tab[AP_SESS_SIZE];
+ap_session_t	*ap_sess_list = NULL;
+aproxy_t	*ap_proxylist = NULL;
+aproxy_t	ap_proxies[] = {
+#ifdef	IPF_FTP_PROXY
+	{ NULL, "ftp", (char)IPPROTO_TCP, 0, 0, ippr_ftp_init, NULL,
+	  ippr_ftp_new, ippr_ftp_in, ippr_ftp_out },
+#endif
+#ifdef	IPF_RCMD_PROXY
+	{ NULL, "rcmd", (char)IPPROTO_TCP, 0, 0, ippr_rcmd_init, NULL,
+	  ippr_rcmd_new, NULL, ippr_rcmd_out },
+#endif
+#ifdef	IPF_RAUDIO_PROXY
+	{ NULL, "raudio", (char)IPPROTO_TCP, 0, 0, ippr_raudio_init, NULL,
+	  ippr_raudio_new, ippr_raudio_in, ippr_raudio_out },
+#endif
+	{ NULL, "", '\0', 0, 0, NULL, NULL }
+};
+
+
+int appr_add(ap)
+aproxy_t *ap;
+{
+	aproxy_t *a;
+
+	for (a = ap_proxies; a->apr_p; a++)
+		if ((a->apr_p == ap->apr_p) &&
+		    !strncmp(a->apr_label, ap->apr_label,
+			     sizeof(ap->apr_label)))
+			return -1;
+
+	for (a = ap_proxylist; a->apr_p; a = a->apr_next)
+		if ((a->apr_p == ap->apr_p) &&
+		    !strncmp(a->apr_label, ap->apr_label,
+			     sizeof(ap->apr_label)))
+			return -1;
+	ap->apr_next = ap_proxylist;
+	ap_proxylist = ap;
+	return (*ap->apr_init)();
+}
+
+
+int appr_del(ap)
+aproxy_t *ap;
+{
+	aproxy_t *a, **app;
+
+	for (app = &ap_proxylist; (a = *app); app = &a->apr_next)
+		if (a == ap) {
+			if (ap->apr_ref != 0)
+				return 1;
+			*app = a->apr_next;
+			return 0;
+		}
+	return -1;
+}
+
+
+int appr_ok(ip, tcp, nat)
+ip_t *ip;
+tcphdr_t *tcp;
+ipnat_t *nat;
+{
+	aproxy_t *apr = nat->in_apr;
+	u_short dport = nat->in_dport;
+
+	if (!apr || (apr->apr_flags & APR_DELETE) ||
+	    (ip->ip_p != apr->apr_p))
+		return 0;
+	if ((tcp && (tcp->th_dport != dport)) || (!tcp && dport))
+		return 0;
+	return 1;
+}
+
+
+/*
+ * Allocate a new application proxy structure and fill it in with the
+ * relevant details.  call the init function once complete, prior to
+ * returning.
+ */
+static ap_session_t *appr_new_session(apr, ip, fin, nat)
+aproxy_t *apr;
+ip_t *ip;
+fr_info_t *fin;
+nat_t *nat;
+{
+	register ap_session_t *aps;
+
+	if (!apr || (apr->apr_flags & APR_DELETE) || (ip->ip_p != apr->apr_p))
+		return NULL;
+
+	KMALLOC(aps, ap_session_t *);
+	if (!aps)
+		return NULL;
+	bzero((char *)aps, sizeof(*aps));
+	aps->aps_p = ip->ip_p;
+	aps->aps_data = NULL;
+	aps->aps_apr = apr;
+	aps->aps_psiz = 0;
+	if (apr->apr_new != NULL)
+		if ((*apr->apr_new)(fin, ip, aps, nat) == -1) {
+			KFREE(aps);
+			return NULL;
+		}
+	aps->aps_nat = nat;
+	aps->aps_next = ap_sess_list;
+	ap_sess_list = aps;
+	return aps;
+}
+
+
+/*
+ * check to see if a packet should be passed through an active proxy routine
+ * if one has been setup for it.
+ */
+int appr_check(ip, fin, nat)
+ip_t *ip;
+fr_info_t *fin;
+nat_t *nat;
+{
+	ap_session_t *aps;
+	aproxy_t *apr;
+	tcphdr_t *tcp = NULL;
+	u_32_t sum;
+	short rv;
+	int err;
+
+	if (nat->nat_aps == NULL)
+		nat->nat_aps = appr_new_session(nat->nat_ptr->in_apr, ip,
+						fin, nat);
+	aps = nat->nat_aps;
+	if ((aps != NULL) && (aps->aps_p == ip->ip_p)) {
+		if (ip->ip_p == IPPROTO_TCP) {
+			tcp = (tcphdr_t *)fin->fin_dp;
+			/*
+			 * verify that the checksum is correct.  If not, then
+			 * don't do anything with this packet.
+			 */
+#if SOLARIS && defined(_KERNEL)
+			sum = fr_tcpsum(fin->fin_qfm, ip, tcp);
+#else
+			sum = fr_tcpsum(*(mb_t **)fin->fin_mp, ip, tcp);
+#endif
+			if (sum != tcp->th_sum) {
+				frstats[fin->fin_out].fr_tcpbad++;
+				return -1;
+			}
+		}
+
+		apr = aps->aps_apr;
+		err = 0;
+		if (fin->fin_out != 0) {
+			if (apr->apr_outpkt != NULL)
+				err = (*apr->apr_outpkt)(fin, ip, aps, nat);
+		} else {
+			if (apr->apr_inpkt != NULL)
+				err = (*apr->apr_inpkt)(fin, ip, aps, nat);
+		}
+
+		rv = APR_EXIT(err);
+		if (rv == -1)
+			return rv;
+
+		if (tcp != NULL) {
+			err = appr_fixseqack(fin, ip, aps, APR_INC(err));
+#if SOLARIS && defined(_KERNEL)
+			tcp->th_sum = fr_tcpsum(fin->fin_qfm, ip, tcp);
+#else
+			tcp->th_sum = fr_tcpsum(*(mb_t **)fin->fin_mp, ip, tcp);
+#endif
+		}
+		aps->aps_bytes += ip->ip_len;
+		aps->aps_pkts++;
+		return 1;
+	}
+	return 0;
+}
+
+
+aproxy_t *appr_match(pr, name)
+u_int pr;
+char *name;
+{
+	aproxy_t *ap;
+
+	for (ap = ap_proxies; ap->apr_p; ap++)
+		if ((ap->apr_p == pr) &&
+		    !strncmp(name, ap->apr_label, sizeof(ap->apr_label))) {
+			ap->apr_ref++;
+			return ap;
+		}
+
+	for (ap = ap_proxylist; ap; ap = ap->apr_next)
+		if ((ap->apr_p == pr) &&
+		    !strncmp(name, ap->apr_label, sizeof(ap->apr_label))) {
+			ap->apr_ref++;
+			return ap;
+		}
+	return NULL;
+}
+
+
+void appr_free(ap)
+aproxy_t *ap;
+{
+	ap->apr_ref--;
+}
+
+
+void aps_free(aps)
+ap_session_t *aps;
+{
+	ap_session_t *a, **ap;
+
+	if (!aps)
+		return;
+
+	for (ap = &ap_sess_list; (a = *ap); ap = &a->aps_next)
+		if (a == aps) {
+			*ap = a->aps_next;
+			break;
+		}
+
+	if ((aps->aps_data != NULL) && (aps->aps_psiz != 0))
+		KFREES(aps->aps_data, aps->aps_psiz);
+	KFREE(aps);
+}
+
+
+static int appr_fixseqack(fin, ip, aps, inc)
+fr_info_t *fin;
+ip_t *ip;
+ap_session_t *aps;
+int inc;
+{
+	int sel, ch = 0, out, nlen;
+	u_32_t seq1, seq2;
+	tcphdr_t *tcp;
+
+	tcp = (tcphdr_t *)fin->fin_dp;
+	out = fin->fin_out;
+	nlen = ip->ip_len;
+	nlen -= (ip->ip_hl << 2) + (tcp->th_off << 2);
+
+	if (out != 0) {
+		seq1 = (u_32_t)ntohl(tcp->th_seq);
+		sel = aps->aps_sel[out];
+
+		/* switch to other set ? */
+		if ((aps->aps_seqmin[!sel] > aps->aps_seqmin[sel]) &&
+		    (seq1 > aps->aps_seqmin[!sel]))
+			sel = aps->aps_sel[out] = !sel;
+
+		if (aps->aps_seqoff[sel]) {
+			seq2 = aps->aps_seqmin[sel] - aps->aps_seqoff[sel];
+			if (seq1 > seq2) {
+				seq2 = aps->aps_seqoff[sel];
+				seq1 += seq2;
+				tcp->th_seq = htonl(seq1);
+				ch = 1;
+			}
+		}
+
+		if (inc && (seq1 > aps->aps_seqmin[!sel])) {
+			aps->aps_seqmin[!sel] = seq1 + nlen - 1;
+			aps->aps_seqoff[!sel] = aps->aps_seqoff[sel] + inc;
+		}
+
+		/***/
+
+		seq1 = ntohl(tcp->th_ack);
+		sel = aps->aps_sel[1 - out];
+
+		/* switch to other set ? */
+		if ((aps->aps_ackmin[!sel] > aps->aps_ackmin[sel]) &&
+		    (seq1 > aps->aps_ackmin[!sel]))
+			sel = aps->aps_sel[1 - out] = !sel;
+
+		if (aps->aps_ackoff[sel] && (seq1 > aps->aps_ackmin[sel])) {
+			seq2 = aps->aps_ackoff[sel];
+			tcp->th_ack = htonl(seq1 - seq2);
+			ch = 1;
+		}
+	} else {
+		seq1 = ntohl(tcp->th_seq);
+		sel = aps->aps_sel[out];
+
+		/* switch to other set ? */
+		if ((aps->aps_ackmin[!sel] > aps->aps_ackmin[sel]) &&
+		    (seq1 > aps->aps_ackmin[!sel]))
+			sel = aps->aps_sel[out] = !sel;
+
+		if (aps->aps_ackoff[sel]) {
+			seq2 = aps->aps_ackmin[sel] -
+			       aps->aps_ackoff[sel];
+			if (seq1 > seq2) {
+				seq2 = aps->aps_ackoff[sel];
+				seq1 += seq2;
+				tcp->th_seq = htonl(seq1);
+				ch = 1;
+			}
+		}
+
+		if (inc && (seq1 > aps->aps_ackmin[!sel])) {
+			aps->aps_ackmin[!sel] = seq1 + nlen - 1;
+			aps->aps_ackoff[!sel] = aps->aps_ackoff[sel] + inc;
+		}
+
+		/***/
+
+		seq1 = ntohl(tcp->th_ack);
+		sel = aps->aps_sel[1 - out];
+
+		/* switch to other set ? */
+		if ((aps->aps_seqmin[!sel] > aps->aps_seqmin[sel]) &&
+		    (seq1 > aps->aps_seqmin[!sel]))
+			sel = aps->aps_sel[1 - out] = !sel;
+
+		if (aps->aps_seqoff[sel] && (seq1 > aps->aps_seqmin[sel])) {
+			seq2 = aps->aps_seqoff[sel];
+			tcp->th_ack = htonl(seq1 - seq2);
+			ch = 1;
+		}
+	}
+	return ch ? 2 : 0;
+}
+
+
+int appr_init()
+{
+	aproxy_t *ap;
+	int err = 0;
+
+	for (ap = ap_proxies; ap->apr_p; ap++) {
+		err = (*ap->apr_init)();
+		if (err != 0)
+			break;
+	}
+	return err;
+}
+
+
+void appr_unload()
+{
+	aproxy_t *ap;
+
+	for (ap = ap_proxies; ap->apr_p; ap++)
+		if (ap->apr_fini)
+			(*ap->apr_fini)();
+	for (ap = ap_proxylist; ap; ap = ap->apr_next)
+		if (ap->apr_fini)
+			(*ap->apr_fini)();
+}
diff --git a/sys/netinet/ip_proxy.h b/sys/netinet/ip_proxy.h
new file mode 100644
index 0000000..f22c709
--- /dev/null
+++ b/sys/netinet/ip_proxy.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 1997-2000 by Darren Reed.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that this notice is preserved and due credit is given
+ * to the original author and the contributors.
+ *
+ * $Id: ip_proxy.h,v 2.8.2.4 2000/12/02 00:15:03 darrenr Exp $
+ * $FreeBSD$
+ */
+
+#ifndef	__IP_PROXY_H__
+#define	__IP_PROXY_H__
+
+#ifndef SOLARIS
+#define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4)))
+#endif
+
+#ifndef	APR_LABELLEN
+#define	APR_LABELLEN	16
+#endif
+#define	AP_SESS_SIZE	53
+
+struct	nat;
+struct	ipnat;
+
+typedef	struct	ap_tcp {
+	u_short	apt_sport;	/* source port */
+	u_short	apt_dport;	/* destination port */
+	short	apt_sel[2];	/* {seq,ack}{off,min} set selector */
+	short	apt_seqoff[2];	/* sequence # difference */
+	tcp_seq	apt_seqmin[2];	/* don't change seq-off until after this */
+	short	apt_ackoff[2];	/* sequence # difference */
+	tcp_seq	apt_ackmin[2];	/* don't change seq-off until after this */
+	u_char	apt_state[2];	/* connection state */
+} ap_tcp_t;
+
+typedef	struct	ap_udp {
+	u_short	apu_sport;	/* source port */
+	u_short	apu_dport;	/* destination port */
+} ap_udp_t;
+
+typedef	struct ap_session {
+	struct	aproxy	*aps_apr;
+	union {
+		struct	ap_tcp	apu_tcp;
+		struct	ap_udp	apu_udp;
+	} aps_un;
+	u_int	aps_flags;
+	U_QUAD_T aps_bytes;	/* bytes sent */
+	U_QUAD_T aps_pkts;	/* packets sent */
+	void	*aps_nat;	/* pointer back to nat struct */
+	void	*aps_data;	/* private data */
+	int	aps_p;		/* protocol */
+	int	aps_psiz;	/* size of private data */
+	struct	ap_session	*aps_hnext;
+	struct	ap_session	*aps_next;
+} ap_session_t;
+
+#define	aps_sport	aps_un.apu_tcp.apt_sport
+#define	aps_dport	aps_un.apu_tcp.apt_dport
+#define	aps_sel		aps_un.apu_tcp.apt_sel
+#define	aps_seqoff	aps_un.apu_tcp.apt_seqoff
+#define	aps_seqmin	aps_un.apu_tcp.apt_seqmin
+#define	aps_state	aps_un.apu_tcp.apt_state
+#define	aps_ackoff	aps_un.apu_tcp.apt_ackoff
+#define	aps_ackmin	aps_un.apu_tcp.apt_ackmin
+
+
+typedef	struct	aproxy	{
+	struct	aproxy	*apr_next;
+	char	apr_label[APR_LABELLEN];	/* Proxy label # */
+	u_char	apr_p;		/* protocol */
+	int	apr_ref;	/* +1 per rule referencing it */
+	int	apr_flags;
+	int	(* apr_init) __P((void));
+	void	(* apr_fini) __P((void));
+	int	(* apr_new) __P((fr_info_t *, ip_t *,
+				 ap_session_t *, struct nat *));
+	int	(* apr_inpkt) __P((fr_info_t *, ip_t *,
+				   ap_session_t *, struct nat *));
+	int	(* apr_outpkt) __P((fr_info_t *, ip_t *,
+				    ap_session_t *, struct nat *));
+} aproxy_t;
+
+#define	APR_DELETE	1
+
+#define	APR_ERR(x)	(((x) & 0xffff) << 16)
+#define	APR_EXIT(x)	(((x) >> 16) & 0xffff)
+#define	APR_INC(x)	((x) & 0xffff)
+
+#define	FTP_BUFSZ	160
+/*
+ * For the ftp proxy.
+ */
+typedef struct  ftpside {
+	char	*ftps_rptr;
+	char	*ftps_wptr;
+	u_32_t	ftps_seq;
+	u_32_t	ftps_len;
+	int	ftps_junk;
+	char	ftps_buf[FTP_BUFSZ];
+} ftpside_t;
+
+typedef struct  ftpinfo {
+	u_int   	ftp_passok;
+	ftpside_t	ftp_side[2];
+} ftpinfo_t;
+
+/*
+ * Real audio proxy structure and #defines
+ */
+typedef	struct	{
+	int	rap_seenpna;
+	int	rap_seenver;
+	int	rap_version;
+	int	rap_eos;	/* End Of Startup */
+	int	rap_gotid;
+	int	rap_gotlen;
+	int	rap_mode;
+	int	rap_sdone;
+	u_short	rap_plport;
+	u_short	rap_prport;
+	u_short	rap_srport;
+	char	rap_svr[19];
+	u_32_t	rap_sbf;	/* flag to indicate which of the 19 bytes have
+				 * been filled
+				 */
+	tcp_seq	rap_sseq;
+} raudio_t;
+
+#define	RA_ID_END	0
+#define	RA_ID_UDP	1
+#define	RA_ID_ROBUST	7
+
+#define	RAP_M_UDP	1
+#define	RAP_M_ROBUST	2
+#define	RAP_M_TCP	4
+#define	RAP_M_UDP_ROBUST	(RAP_M_UDP|RAP_M_ROBUST)
+
+
+extern	ap_session_t	*ap_sess_tab[AP_SESS_SIZE];
+extern	ap_session_t	*ap_sess_list;
+extern	aproxy_t	ap_proxies[];
+extern	int		ippr_ftp_pasvonly;
+
+extern	int	appr_add __P((aproxy_t *));
+extern	int	appr_del __P((aproxy_t *));
+extern	int	appr_init __P((void));
+extern	void	appr_unload __P((void));
+extern	int	appr_ok __P((ip_t *, tcphdr_t *, struct ipnat *));
+extern	void	appr_free __P((aproxy_t *));
+extern	void	aps_free __P((ap_session_t *));
+extern	int	appr_check __P((ip_t *, fr_info_t *, struct nat *));
+extern	aproxy_t	*appr_match __P((u_int, char *));
+
+#endif /* __IP_PROXY_H__ */
diff --git a/sys/netinet/ip_raudio_pxy.c b/sys/netinet/ip_raudio_pxy.c
new file mode 100644
index 0000000..8b2c231
--- /dev/null
+++ b/sys/netinet/ip_raudio_pxy.c
@@ -0,0 +1,308 @@
+/*
+ * $FreeBSD$
+ */
+#if SOLARIS && defined(_KERNEL)
+extern	kmutex_t	ipf_rw;
+#endif
+
+#define	IPF_RAUDIO_PROXY
+
+
+int ippr_raudio_init __P((void));
+int ippr_raudio_new __P((fr_info_t *, ip_t *, ap_session_t *, nat_t *));
+int ippr_raudio_in __P((fr_info_t *, ip_t *, ap_session_t *, nat_t *));
+int ippr_raudio_out __P((fr_info_t *, ip_t *, ap_session_t *, nat_t *));
+
+static	frentry_t	raudiofr;
+
+
+/*
+ * Real Audio application proxy initialization.
+ */
+int ippr_raudio_init()
+{
+	bzero((char *)&raudiofr, sizeof(raudiofr));
+	raudiofr.fr_ref = 1;
+	raudiofr.fr_flags = FR_INQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE;
+	return 0;
+}
+
+
+/*
+ * Setup for a new proxy to handle Real Audio.
+ */
+int ippr_raudio_new(fin, ip, aps, nat)
+fr_info_t *fin;
+ip_t *ip;
+ap_session_t *aps;
+nat_t *nat;
+{
+	raudio_t *rap;
+
+
+	KMALLOCS(aps->aps_data, void *, sizeof(raudio_t));
+	if (aps->aps_data == NULL)
+		return -1;
+
+	bzero(aps->aps_data, sizeof(raudio_t));
+	rap = aps->aps_data;
+	aps->aps_psiz = sizeof(raudio_t);
+	rap->rap_mode = RAP_M_TCP;	/* default is for TCP */
+	return 0;
+}
+
+
+
+int ippr_raudio_out(fin, ip, aps, nat)
+fr_info_t *fin;
+ip_t *ip;
+ap_session_t *aps;
+nat_t *nat;
+{
+	raudio_t *rap = aps->aps_data;
+	unsigned char membuf[512 + 1], *s;
+	u_short id = 0;
+	tcphdr_t *tcp;
+	int off, dlen;
+	int len = 0;
+	mb_t *m;
+#if	SOLARIS
+	mb_t *m1;
+#endif
+
+	/*
+	 * If we've already processed the start messages, then nothing left
+	 * for the proxy to do.
+	 */
+	if (rap->rap_eos == 1)
+		return 0;
+
+	tcp = (tcphdr_t *)fin->fin_dp;
+	off = (ip->ip_hl << 2) + (tcp->th_off << 2);
+	bzero(membuf, sizeof(membuf));
+#if	SOLARIS
+	m = fin->fin_qfm;
+
+	dlen = msgdsize(m) - off;
+	if (dlen <= 0)
+		return 0;
+	copyout_mblk(m, off, MIN(sizeof(membuf), dlen), (char *)membuf);
+#else
+	m = *(mb_t **)fin->fin_mp;
+
+	dlen = mbufchainlen(m) - off;
+	if (dlen <= 0)
+		return 0;
+	m_copydata(m, off, MIN(sizeof(membuf), dlen), (char *)membuf);
+#endif
+	/*
+	 * In all the startup parsing, ensure that we don't go outside
+	 * the packet buffer boundary.
+	 */
+	/*
+	 * Look for the start of connection "PNA" string if not seen yet.
+	 */
+	if (rap->rap_seenpna == 0) {
+		s = (u_char *)memstr("PNA", (char *)membuf, 3, dlen);
+		if (s == NULL)
+			return 0;
+		s += 3;
+		rap->rap_seenpna = 1;
+	} else
+		s = membuf;
+
+	/*
+	 * Directly after the PNA will be the version number of this
+	 * connection.
+	 */
+	if (rap->rap_seenpna == 1 && rap->rap_seenver == 0) {
+		if ((s + 1) - membuf < dlen) {
+			rap->rap_version = (*s << 8) | *(s + 1);
+			s += 2;
+			rap->rap_seenver = 1;
+		} else
+			return 0;
+	}
+
+	/*
+	 * Now that we've been past the PNA and version number, we're into the
+	 * startup messages block.  This ends when a message with an ID of 0.
+	 */
+	while ((rap->rap_eos == 0) && ((s + 1) - membuf < dlen)) {
+		if (rap->rap_gotid == 0) {
+			id = (*s << 8) | *(s + 1);
+			s += 2;
+			rap->rap_gotid = 1;
+			if (id == RA_ID_END) {
+				rap->rap_eos = 1;
+				break;
+			}
+		} else if (rap->rap_gotlen == 0) {
+			len = (*s << 8) | *(s + 1);
+			s += 2;
+			rap->rap_gotlen = 1;
+		}
+
+		if (rap->rap_gotid == 1 && rap->rap_gotlen == 1) {
+			if (id == RA_ID_UDP) {
+				rap->rap_mode &= ~RAP_M_TCP;
+				rap->rap_mode |= RAP_M_UDP;
+				rap->rap_plport = (*s << 8) | *(s + 1);
+			} else if (id == RA_ID_ROBUST) {
+				rap->rap_mode |= RAP_M_ROBUST;
+				rap->rap_prport = (*s << 8) | *(s + 1);
+			}
+			s += len;
+			rap->rap_gotlen = 0;
+			rap->rap_gotid = 0;
+		}
+	}
+	return 0;
+}
+
+
+int ippr_raudio_in(fin, ip, aps, nat)
+fr_info_t *fin;
+ip_t *ip;
+ap_session_t *aps;
+nat_t *nat;
+{
+	unsigned char membuf[IPF_MAXPORTLEN + 1], *s;
+	tcphdr_t *tcp, tcph, *tcp2 = &tcph;
+	raudio_t *rap = aps->aps_data;
+	struct in_addr swa, swb;
+	int off, dlen, slen;
+	int a1, a2, a3, a4;
+	u_short sp, dp;
+	fr_info_t fi;
+	tcp_seq seq;
+	nat_t *ipn;
+	u_char swp;
+	mb_t *m;
+#if	SOLARIS
+	mb_t *m1;
+#endif
+
+	/*
+	 * Wait until we've seen the end of the start messages and even then
+	 * only proceed further if we're using UDP.  If they want to use TCP
+	 * then data is sent back on the same channel that is already open.
+	 */
+	if (rap->rap_sdone != 0)
+		return 0;
+
+	tcp = (tcphdr_t *)fin->fin_dp;
+	off = (ip->ip_hl << 2) + (tcp->th_off << 2);
+	m = *(mb_t **)fin->fin_mp;
+
+#if	SOLARIS
+	m = fin->fin_qfm;
+
+	dlen = msgdsize(m) - off;
+	if (dlen <= 0)
+		return 0;
+	bzero(membuf, sizeof(membuf));
+	copyout_mblk(m, off, MIN(sizeof(membuf), dlen), (char *)membuf);
+#else
+	dlen = mbufchainlen(m) - off;
+	if (dlen <= 0)
+		return 0;
+	bzero(membuf, sizeof(membuf));
+	m_copydata(m, off, MIN(sizeof(membuf), dlen), (char *)membuf);
+#endif
+
+	seq = ntohl(tcp->th_seq);
+	/*
+	 * Check to see if the data in this packet is of interest to us.
+	 * We only care for the first 19 bytes coming back from the server.
+	 */
+	if (rap->rap_sseq == 0) {
+		s = (u_char *)memstr("PNA", (char *)membuf, 3, dlen);
+		if (s == NULL)
+			return 0;
+		a1 = s - membuf;
+		dlen -= a1;
+		a1 = 0;
+		rap->rap_sseq = seq;
+		a2 = MIN(dlen, sizeof(rap->rap_svr));
+	} else if (seq <= rap->rap_sseq + sizeof(rap->rap_svr)) {
+		/*
+		 * seq # which is the start of data and from that the offset
+		 * into the buffer array.
+		 */
+		a1 = seq - rap->rap_sseq;
+		a2 = MIN(dlen, sizeof(rap->rap_svr));
+		a2 -= a1;
+		s = membuf;
+	} else
+		return 0;
+
+	for (a3 = a1, a4 = a2; (a4 > 0) && (a3 < 19) && (a3 >= 0); a4--,a3++) {
+		rap->rap_sbf |= (1 << a3);
+		rap->rap_svr[a3] = *s++;
+	}
+
+	if ((rap->rap_sbf != 0x7ffff) || (!rap->rap_eos))	/* 19 bits */
+		return 0;
+	rap->rap_sdone = 1;
+
+	s = (u_char *)rap->rap_svr + 11;
+	if (((*s << 8) | *(s + 1)) == RA_ID_ROBUST) {
+		s += 2;
+		rap->rap_srport = (*s << 8) | *(s + 1);
+	}
+
+	swp = ip->ip_p;
+	swa = ip->ip_src;
+	swb = ip->ip_dst;
+
+	ip->ip_p = IPPROTO_UDP;
+	ip->ip_src = nat->nat_inip;
+	ip->ip_dst = nat->nat_oip;
+
+	bcopy((char *)fin, (char *)&fi, sizeof(fi));
+	bzero((char *)tcp2, sizeof(*tcp2));
+	tcp2->th_off = 5;
+	fi.fin_dp = (char *)tcp2;
+	fi.fin_fr = &raudiofr;
+	fi.fin_dlen = sizeof(*tcp2);
+	tcp2->th_win = htons(8192);
+	slen = ip->ip_len;
+	ip->ip_len = fin->fin_hlen + sizeof(*tcp);
+
+	if (((rap->rap_mode & RAP_M_UDP_ROBUST) == RAP_M_UDP_ROBUST) &&
+	    (rap->rap_srport != 0)) {
+		dp = rap->rap_srport;
+		sp = rap->rap_prport;
+		tcp2->th_sport = htons(sp);
+		tcp2->th_dport = htons(dp);
+		fi.fin_data[0] = dp;
+		fi.fin_data[1] = sp;
+		ipn = nat_new(nat->nat_ptr, ip, &fi, 
+			      IPN_UDP | (sp ? 0 : FI_W_SPORT), NAT_OUTBOUND);
+		if (ipn != NULL) {
+			ipn->nat_age = fr_defnatage;
+			(void) fr_addstate(ip, &fi, sp ? 0 : FI_W_SPORT);
+		}
+	}
+
+	if ((rap->rap_mode & RAP_M_UDP) == RAP_M_UDP) {
+		sp = rap->rap_plport;
+		tcp2->th_sport = htons(sp);
+		tcp2->th_dport = 0; /* XXX - don't specify remote port */
+		fi.fin_data[0] = sp;
+		fi.fin_data[1] = 0;
+		ipn = nat_new(nat->nat_ptr, ip, &fi, IPN_UDP|FI_W_DPORT,
+			      NAT_OUTBOUND);
+		if (ipn != NULL) {
+			ipn->nat_age = fr_defnatage;
+			(void) fr_addstate(ip, &fi, FI_W_DPORT);
+		}
+	}
+
+	ip->ip_p = swp;
+	ip->ip_len = slen;
+	ip->ip_src = swa;
+	ip->ip_dst = swb;
+	return 0;
+}
diff --git a/sys/netinet/ip_rcmd_pxy.c b/sys/netinet/ip_rcmd_pxy.c
new file mode 100644
index 0000000..0ae0210
--- /dev/null
+++ b/sys/netinet/ip_rcmd_pxy.c
@@ -0,0 +1,174 @@
+/*
+ * $Id: ip_rcmd_pxy.c,v 1.4.2.4 2000/11/01 14:34:20 darrenr Exp $
+ */
+/*
+ * Simple RCMD transparent proxy for in-kernel use.  For use with the NAT
+ * code.
+ * $FreeBSD$
+ */
+#if SOLARIS && defined(_KERNEL)
+extern	kmutex_t	ipf_rw;
+#endif
+
+#define	isdigit(x)	((x) >= '0' && (x) <= '9')
+
+#define	IPF_RCMD_PROXY
+
+
+int ippr_rcmd_init __P((void));
+int ippr_rcmd_new __P((fr_info_t *, ip_t *, ap_session_t *, nat_t *));
+int ippr_rcmd_out __P((fr_info_t *, ip_t *, ap_session_t *, nat_t *));
+u_short ipf_rcmd_atoi __P((char *));
+int ippr_rcmd_portmsg __P((fr_info_t *, ip_t *, ap_session_t *, nat_t *));
+
+static	frentry_t	rcmdfr;
+
+
+/*
+ * RCMD application proxy initialization.
+ */
+int ippr_rcmd_init()
+{
+	bzero((char *)&rcmdfr, sizeof(rcmdfr));
+	rcmdfr.fr_ref = 1;
+	rcmdfr.fr_flags = FR_INQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE;
+	return 0;
+}
+
+
+/*
+ * Setup for a new RCMD proxy.
+ */
+int ippr_rcmd_new(fin, ip, aps, nat)
+fr_info_t *fin;
+ip_t *ip;
+ap_session_t *aps;
+nat_t *nat;
+{
+	tcphdr_t *tcp = (tcphdr_t *)fin->fin_dp;
+
+	aps->aps_psiz = sizeof(u_32_t);
+	KMALLOCS(aps->aps_data, u_32_t *, sizeof(u_32_t));
+	if (aps->aps_data == NULL)
+		return -1;
+	*(u_32_t *)aps->aps_data = 0;
+	aps->aps_sport = tcp->th_sport;
+	aps->aps_dport = tcp->th_dport;
+	return 0;
+}
+
+
+/*
+ * ipf_rcmd_atoi - implement a simple version of atoi
+ */
+u_short ipf_rcmd_atoi(ptr)
+char *ptr;
+{
+	register char *s = ptr, c;
+	register u_short i = 0;
+
+	while ((c = *s++) && isdigit(c)) {
+		i *= 10;
+		i += c - '0';
+	}
+	return i;
+}
+
+
+int ippr_rcmd_portmsg(fin, ip, aps, nat)
+fr_info_t *fin;
+ip_t *ip;
+ap_session_t *aps;
+nat_t *nat;
+{
+	char portbuf[8], *s;
+	struct in_addr swip;
+	u_short sp, dp;
+	int off, dlen;
+	tcphdr_t *tcp, tcph, *tcp2 = &tcph;
+	fr_info_t fi;
+	nat_t *ipn;
+	mb_t *m;
+#if	SOLARIS
+	mb_t *m1;
+#endif
+
+	tcp = (tcphdr_t *)fin->fin_dp;
+
+	if (tcp->th_flags & TH_SYN) {
+		*(u_32_t *)aps->aps_data = htonl(ntohl(tcp->th_seq) + 1);
+		return 0;
+	}
+
+	if ((*(u_32_t *)aps->aps_data != 0) &&
+	    (tcp->th_seq != *(u_32_t *)aps->aps_data))
+		return 0;
+
+	off = (ip->ip_hl << 2) + (tcp->th_off << 2);
+
+#if	SOLARIS
+	m = fin->fin_qfm;
+
+	dlen = msgdsize(m) - off;
+	bzero(portbuf, sizeof(portbuf));
+	copyout_mblk(m, off, MIN(sizeof(portbuf), dlen), portbuf);
+#else
+	m = *(mb_t **)fin->fin_mp;
+	dlen = mbufchainlen(m) - off;
+	bzero(portbuf, sizeof(portbuf));
+	m_copydata(m, off, MIN(sizeof(portbuf), dlen), portbuf);
+#endif
+
+	portbuf[sizeof(portbuf) - 1] = '\0';
+	s = portbuf;
+	sp = ipf_rcmd_atoi(s);
+	if (!sp)
+		return 0;
+
+	/*
+	 * Add skeleton NAT entry for connection which will come back the
+	 * other way.
+	 */
+	sp = htons(sp);
+	dp = htons(fin->fin_data[1]);
+	ipn = nat_outlookup(fin->fin_ifp, IPN_TCP, nat->nat_p, nat->nat_inip,
+			    ip->ip_dst, (dp << 16) | sp, 0);
+	if (ipn == NULL) {
+		int slen;
+
+		slen = ip->ip_len;
+		ip->ip_len = fin->fin_hlen + sizeof(*tcp);
+		bcopy((char *)fin, (char *)&fi, sizeof(fi));
+		bzero((char *)tcp2, sizeof(*tcp2));
+		tcp2->th_win = htons(8192);
+		tcp2->th_sport = sp;
+		tcp2->th_dport = 0; /* XXX - don't specify remote port */
+		tcp2->th_off = 5;
+		fi.fin_data[0] = ntohs(sp);
+		fi.fin_data[1] = 0;
+		fi.fin_dp = (char *)tcp2;
+		fi.fin_dlen = sizeof(*tcp2);
+		swip = ip->ip_src;
+		ip->ip_src = nat->nat_inip;
+		ipn = nat_new(nat->nat_ptr, ip, &fi, IPN_TCP|FI_W_DPORT,
+			      NAT_OUTBOUND);
+		if (ipn != NULL) {
+			ipn->nat_age = fr_defnatage;
+			fi.fin_fr = &rcmdfr;
+			(void) fr_addstate(ip, &fi, FI_W_DPORT);
+		}
+		ip->ip_len = slen;
+		ip->ip_src = swip;
+	}
+	return 0;
+}
+
+
+int ippr_rcmd_out(fin, ip, aps, nat)
+fr_info_t *fin;
+ip_t *ip;
+ap_session_t *aps;
+nat_t *nat;
+{
+	return ippr_rcmd_portmsg(fin, ip, aps, nat);
+}
diff --git a/sys/netinet/ip_state.c b/sys/netinet/ip_state.c
new file mode 100644
index 0000000..8cfe62d
--- /dev/null
+++ b/sys/netinet/ip_state.c
@@ -0,0 +1,1901 @@
+/*
+ * Copyright (C) 1995-2000 by Darren Reed.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that this notice is preserved and due credit is given
+ * to the original author and the contributors.
+ */
+#if !defined(lint)
+static const char sccsid[] = "@(#)ip_state.c	1.8 6/5/96 (C) 1993-1995 Darren Reed";
+static const char rcsid[] = "@(#)$FreeBSD$";
+#endif
+
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/file.h>
+#if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
+    defined(_KERNEL)
+# include "opt_ipfilter_log.h"
+#endif
+#if defined(_KERNEL) && defined(__FreeBSD_version) && \
+    (__FreeBSD_version >= 400000) && !defined(KLD_MODULE)
+#include "opt_inet6.h"
+#endif
+#if !defined(_KERNEL) && !defined(KERNEL) && !defined(__KERNEL__)
+# include <stdio.h>
+# include <stdlib.h>
+# include <string.h>
+#else
+# ifdef linux
+#  include <linux/kernel.h>
+#  include <linux/module.h>
+# endif
+#endif
+#if (defined(KERNEL) || defined(_KERNEL)) && (__FreeBSD_version >= 220000)
+# include <sys/filio.h>
+# include <sys/fcntl.h>
+# if (__FreeBSD_version >= 300000) && !defined(IPFILTER_LKM)
+#  include "opt_ipfilter.h"
+# endif
+#else
+# include <sys/ioctl.h>
+#endif
+#include <sys/time.h>
+#include <sys/uio.h>
+#ifndef linux
+# include <sys/protosw.h>
+#endif
+#include <sys/socket.h>
+#if (defined(_KERNEL) || defined(KERNEL)) && !defined(linux)
+# include <sys/systm.h>
+#endif
+#if !defined(__SVR4) && !defined(__svr4__)
+# ifndef linux
+#  include <sys/mbuf.h>
+# endif
+#else
+# include <sys/filio.h>
+# include <sys/byteorder.h>
+# ifdef _KERNEL
+#  include <sys/dditypes.h>
+# endif
+# include <sys/stream.h>
+# include <sys/kmem.h>
+#endif
+
+#include <net/if.h>
+#ifdef sun
+# include <net/af.h>
+#endif
+#include <net/route.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+#ifndef linux
+# include <netinet/ip_var.h>
+# include <netinet/tcp_fsm.h>
+#endif
+#include <netinet/udp.h>
+#include <netinet/ip_icmp.h>
+#include "netinet/ip_compat.h"
+#include <netinet/tcpip.h>
+#include "netinet/ip_fil.h"
+#include "netinet/ip_nat.h"
+#include "netinet/ip_frag.h"
+#include "netinet/ip_proxy.h"
+#include "netinet/ip_state.h"
+#ifdef	USE_INET6
+#include <netinet/icmp6.h>
+#endif
+#if (__FreeBSD_version >= 300000)
+# include <sys/malloc.h>
+# if (defined(_KERNEL) || defined(KERNEL)) && !defined(IPFILTER_LKM)
+#  include <sys/libkern.h>
+#  include <sys/systm.h>
+# endif
+#endif
+
+#ifndef	MIN
+# define	MIN(a,b)	(((a)<(b))?(a):(b))
+#endif
+
+#define	TCP_CLOSE	(TH_FIN|TH_RST)
+
+static ipstate_t **ips_table = NULL;
+static ipstate_t *ips_list = NULL;
+static int	ips_num = 0;
+static int	ips_wild = 0;
+static ips_stat_t ips_stats;
+#if	(SOLARIS || defined(__sgi)) && defined(_KERNEL)
+extern	KRWLOCK_T	ipf_state, ipf_mutex;
+extern	kmutex_t	ipf_rw;
+#endif
+
+#ifdef	USE_INET6
+static frentry_t *fr_checkicmp6matchingstate __P((ip6_t *, fr_info_t *));
+#endif
+static int fr_matchsrcdst __P((ipstate_t *, union i6addr, union i6addr,
+			       fr_info_t *, tcphdr_t *));
+static frentry_t *fr_checkicmpmatchingstate __P((ip_t *, fr_info_t *));
+static int fr_matchicmpqueryreply __P((int, ipstate_t *, icmphdr_t *));
+static int fr_state_flush __P((int));
+static ips_stat_t *fr_statetstats __P((void));
+static void fr_delstate __P((ipstate_t *));
+static int fr_state_remove __P((caddr_t));
+static void fr_ipsmove __P((ipstate_t **, ipstate_t *, u_int));
+int fr_stputent __P((caddr_t));
+int fr_stgetent __P((caddr_t));
+void fr_stinsert __P((ipstate_t *));
+
+
+#define	FIVE_DAYS	(2 * 5 * 86400)	/* 5 days: half closed session */
+
+#define	TCP_MSL	240			/* 2 minutes */
+u_long	fr_tcpidletimeout = FIVE_DAYS,
+	fr_tcpclosewait = 2 * TCP_MSL,
+	fr_tcplastack = 2 * TCP_MSL,
+	fr_tcptimeout = 2 * TCP_MSL,
+	fr_tcpclosed = 120,
+	fr_tcphalfclosed = 2 * 2 * 3600,    /* 2 hours */
+	fr_udptimeout = 240,
+	fr_icmptimeout = 120;
+int	fr_statemax = IPSTATE_MAX,
+	fr_statesize = IPSTATE_SIZE;
+int	fr_state_doflush = 0,
+	fr_state_lock = 0;
+
+static 	int icmpreplytype4[ICMP_MAXTYPE + 1];
+
+int fr_stateinit()
+{
+	int i;
+
+	KMALLOCS(ips_table, ipstate_t **, fr_statesize * sizeof(ipstate_t *));
+	if (ips_table != NULL)
+		bzero((char *)ips_table, fr_statesize * sizeof(ipstate_t *));
+	else
+		return -1;
+
+	/* fill icmp reply type table */
+	for (i = 0; i <= ICMP_MAXTYPE; i++)
+		icmpreplytype4[i] = -1;
+	icmpreplytype4[ICMP_ECHO] = ICMP_ECHOREPLY;
+	icmpreplytype4[ICMP_TSTAMP] = ICMP_TSTAMPREPLY;
+	icmpreplytype4[ICMP_IREQ] = ICMP_IREQREPLY;
+	icmpreplytype4[ICMP_MASKREQ] = ICMP_MASKREPLY;
+
+	return 0;
+}
+
+
+static ips_stat_t *fr_statetstats()
+{
+	ips_stats.iss_active = ips_num;
+	ips_stats.iss_table = ips_table;
+	ips_stats.iss_list = ips_list;
+	return &ips_stats;
+}
+
+
+/*
+ * flush state tables.  two actions currently defined:
+ * which == 0 : flush all state table entries
+ * which == 1 : flush TCP connections which have started to close but are
+ *	        stuck for some reason.
+ */
+static int fr_state_flush(which)
+int which;
+{
+	register ipstate_t *is, **isp;
+#if defined(_KERNEL) && !SOLARIS
+	int s;
+#endif
+	int delete, removed = 0;
+
+	SPL_NET(s);
+	for (isp = &ips_list; (is = *isp); ) {
+		delete = 0;
+
+		switch (which)
+		{
+		case 0 :
+			delete = 1;
+			break;
+		case 1 :
+			if (is->is_p != IPPROTO_TCP)
+				break;
+			if ((is->is_state[0] != TCPS_ESTABLISHED) ||
+			    (is->is_state[1] != TCPS_ESTABLISHED))
+				delete = 1;
+			break;
+		}
+
+		if (delete) {
+			if (is->is_p == IPPROTO_TCP)
+				ips_stats.iss_fin++;
+			else
+				ips_stats.iss_expire++;
+#ifdef	IPFILTER_LOG
+			ipstate_log(is, ISL_FLUSH);
+#endif
+			fr_delstate(is);
+			removed++;
+		} else
+			isp = &is->is_next;
+	}
+	SPL_X(s);
+	return removed;
+}
+
+
+static int fr_state_remove(data)
+caddr_t data;
+{
+	ipstate_t *sp, st;
+	int error;
+
+	sp = &st;
+	error = IRCOPYPTR(data, (caddr_t)&st, sizeof(st));
+	if (error)
+		return EFAULT;
+
+	for (sp = ips_list; sp; sp = sp->is_next)
+		if ((sp->is_p == st.is_p) && (sp->is_v == st.is_v) &&
+		    !bcmp((char *)&sp->is_src, (char *)&st.is_src,
+			  sizeof(st.is_src)) &&
+		    !bcmp((char *)&sp->is_dst, (char *)&st.is_src,
+			  sizeof(st.is_dst)) &&
+		    !bcmp((char *)&sp->is_ps, (char *)&st.is_ps,
+			  sizeof(st.is_ps))) {
+			WRITE_ENTER(&ipf_state);
+#ifdef	IPFILTER_LOG
+			ipstate_log(sp, ISL_REMOVE);
+#endif
+			fr_delstate(sp);
+			RWLOCK_EXIT(&ipf_state);
+			return 0;
+		}
+	return ESRCH;
+}
+
+
+int fr_state_ioctl(data, cmd, mode)
+caddr_t data;
+#if defined(__NetBSD__) || defined(__OpenBSD__)
+u_long cmd;
+#else
+int cmd;
+#endif
+int mode;
+{
+	int arg, ret, error = 0;
+
+	switch (cmd)
+	{
+	case SIOCDELST :
+		error = fr_state_remove(data);
+		break;
+	case SIOCIPFFL :
+		error = IRCOPY(data, (caddr_t)&arg, sizeof(arg));
+		if (error)
+			break;
+		if (arg == 0 || arg == 1) {
+			WRITE_ENTER(&ipf_state);
+			ret = fr_state_flush(arg);
+			RWLOCK_EXIT(&ipf_state);
+			error = IWCOPY((caddr_t)&ret, data, sizeof(ret));
+		} else
+			error = EINVAL;
+		break;
+#ifdef	IPFILTER_LOG
+	case SIOCIPFFB :
+		if (!(mode & FWRITE))
+			error = EPERM;
+		else {
+			int tmp;
+
+			tmp = ipflog_clear(IPL_LOGSTATE);
+			IWCOPY((char *)&tmp, data, sizeof(tmp));
+		}
+		break;
+#endif
+	case SIOCGETFS :
+		error = IWCOPYPTR((caddr_t)fr_statetstats(), data,
+				  sizeof(ips_stat_t));
+		break;
+	case FIONREAD :
+#ifdef	IPFILTER_LOG
+		arg = (int)iplused[IPL_LOGSTATE];
+		error = IWCOPY((caddr_t)&arg, (caddr_t)data, sizeof(arg));
+#endif
+		break;
+	case SIOCSTLCK :
+		error = fr_lock(data, &fr_state_lock);
+		break;
+	case SIOCSTPUT :
+		if (!fr_state_lock) {
+			error = EACCES;
+			break;
+		}
+		error = fr_stputent(data);
+		break;
+	case SIOCSTGET :
+		if (!fr_state_lock) {
+			error = EACCES;
+			break;
+		}
+		error = fr_stgetent(data);
+		break;
+	default :
+		error = EINVAL;
+		break;
+	}
+	return error;
+}
+
+
+int fr_stgetent(data)
+caddr_t data;
+{
+	register ipstate_t *is, *isn;
+	ipstate_save_t ips, *ipsp;
+	int error;
+
+	error = IRCOPY(data, (caddr_t)&ipsp, sizeof(ipsp));
+	if (error)
+		return EFAULT;
+	error = IRCOPY((caddr_t)ipsp, (caddr_t)&ips, sizeof(ips));
+	if (error)
+		return EFAULT;
+
+	isn = ips.ips_next;
+	if (!isn) {
+		isn = ips_list;
+		if (isn == NULL) {
+			if (ips.ips_next == NULL)
+				return ENOENT;
+			return 0;
+		}
+	} else {
+		/*
+		 * Make sure the pointer we're copying from exists in the
+		 * current list of entries.  Security precaution to prevent
+		 * copying of random kernel data.
+		 */
+		for (is = ips_list; is; is = is->is_next)
+			if (is == isn)
+				break;
+		if (!is)
+			return ESRCH;
+	}
+	ips.ips_next = isn->is_next;
+	bcopy((char *)isn, (char *)&ips.ips_is, sizeof(ips.ips_is));
+	if (isn->is_rule)
+		bcopy((char *)isn->is_rule, (char *)&ips.ips_fr,
+		      sizeof(ips.ips_fr));
+	error = IWCOPY((caddr_t)&ips, ipsp, sizeof(ips));
+	if (error)
+		error = EFAULT;
+	return error;
+}
+
+
+int fr_stputent(data)
+caddr_t data;
+{
+	register ipstate_t *is, *isn;
+	ipstate_save_t ips, *ipsp;
+	int error, out;
+	frentry_t *fr;
+
+	error = IRCOPY(data, (caddr_t)&ipsp, sizeof(ipsp));
+	if (error)
+		return EFAULT;
+	error = IRCOPY((caddr_t)ipsp, (caddr_t)&ips, sizeof(ips));
+	if (error)
+		return EFAULT;
+
+	KMALLOC(isn, ipstate_t *);
+	if (isn == NULL)
+		return ENOMEM;
+
+	bcopy((char *)&ips.ips_is, (char *)isn, sizeof(*isn));
+	fr = isn->is_rule;
+	if (fr != NULL) {
+		if (isn->is_flags & FI_NEWFR) {
+			KMALLOC(fr, frentry_t *);
+			if (fr == NULL) {
+				KFREE(isn);
+				return ENOMEM;
+			}
+			bcopy((char *)&ips.ips_fr, (char *)fr, sizeof(*fr));
+			out = fr->fr_flags & FR_OUTQUE ? 1 : 0;
+			isn->is_rule = fr;
+			ips.ips_is.is_rule = fr;
+			if (*fr->fr_ifname) {
+				fr->fr_ifa = GETUNIT(fr->fr_ifname, fr->fr_v);
+				if (fr->fr_ifa == NULL)
+					fr->fr_ifa = (void *)-1;
+#ifdef	_KERNEL
+				else {
+					strncpy(isn->is_ifname[out],
+						IFNAME(fr->fr_ifa), IFNAMSIZ);
+					isn->is_ifp[out] = fr->fr_ifa;
+				}
+#endif
+			} else
+				fr->fr_ifa = NULL;
+			/*
+			 * send a copy back to userland of what we ended up
+			 * to allow for verification.
+			 */
+			error = IWCOPY((caddr_t)&ips, ipsp, sizeof(ips));
+			if (error) {
+				KFREE(isn);
+				KFREE(fr);
+				return EFAULT;
+			}
+		} else {
+			for (is = ips_list; is; is = is->is_next)
+				if (is->is_rule == fr)
+					break;
+			if (!is) {
+				KFREE(isn);
+				return ESRCH;
+			}
+		}
+	}
+	fr_stinsert(isn);
+	return 0;
+}
+
+
+void fr_stinsert(is)
+register ipstate_t *is;
+{
+	register u_int hv = is->is_hv;
+
+	MUTEX_INIT(&is->is_lock, "ipf state entry", NULL);
+
+	is->is_ifname[0][sizeof(is->is_ifname[0]) - 1] = '\0';
+	if (is->is_ifname[0][0] != '\0') {
+		is->is_ifp[0] = GETUNIT(is->is_ifname[0], is->is_v);
+	}
+	is->is_ifname[1][sizeof(is->is_ifname[0]) - 1] = '\0';
+	if (is->is_ifname[1][0] != '\0') {
+		is->is_ifp[1] = GETUNIT(is->is_ifname[1], is->is_v);
+	}
+
+	/*
+	 * add into list table.
+	 */
+	if (ips_list)
+		ips_list->is_pnext = &is->is_next;
+	is->is_pnext = &ips_list;
+	is->is_next = ips_list;
+	ips_list = is;
+	if (ips_table[hv])
+		ips_table[hv]->is_phnext = &is->is_hnext;
+	else
+		ips_stats.iss_inuse++;
+	is->is_phnext = ips_table + hv;
+	is->is_hnext = ips_table[hv];
+	ips_table[hv] = is;
+	ips_num++;
+}
+
+
+/*
+ * Create a new ipstate structure and hang it off the hash table.
+ */
+ipstate_t *fr_addstate(ip, fin, flags)
+ip_t *ip;
+fr_info_t *fin;
+u_int flags;
+{
+	register tcphdr_t *tcp = NULL;
+	register ipstate_t *is;
+	register u_int hv;
+	ipstate_t ips;
+	u_int pass;
+	int out;
+
+	if (fr_state_lock || (fin->fin_off & IP_OFFMASK) ||
+	    (fin->fin_fi.fi_fl & FI_SHORT))
+		return NULL;
+	if (ips_num == fr_statemax) {
+		ips_stats.iss_max++;
+		fr_state_doflush = 1;
+		return NULL;
+	}
+	out = fin->fin_out;
+	is = &ips;
+	bzero((char *)is, sizeof(*is));
+	ips.is_age = 1;
+	ips.is_state[0] = 0;
+	ips.is_state[1] = 0;
+	/*
+	 * Copy and calculate...
+	 */
+	hv = (is->is_p = fin->fin_fi.fi_p);
+	is->is_src = fin->fin_fi.fi_src;
+	hv += is->is_saddr;
+	is->is_dst = fin->fin_fi.fi_dst;
+	hv += is->is_daddr;
+#ifdef	USE_INET6
+	if (fin->fin_v == 6) {
+		if (is->is_p == IPPROTO_ICMPV6) {
+			if (IN6_IS_ADDR_MULTICAST(&is->is_dst.in6))
+				flags |= FI_W_DADDR;
+			if (out)
+				hv -= is->is_daddr;
+			else
+				hv -= is->is_saddr;
+		}
+	}
+#endif
+
+	switch (is->is_p)
+	{
+#ifdef	USE_INET6
+	case IPPROTO_ICMPV6 :
+#endif
+	case IPPROTO_ICMP :
+	    {
+		struct icmp *ic = (struct icmp *)fin->fin_dp;
+
+#ifdef	USE_INET6
+		if ((is->is_p == IPPROTO_ICMPV6) &&
+		    ((ic->icmp_type & ICMP6_INFOMSG_MASK) == 0))
+			return NULL;
+#endif
+		switch (ic->icmp_type)
+		{
+#ifdef	USE_INET6
+		case ICMP6_ECHO_REQUEST :
+			is->is_icmp.ics_type = ICMP6_ECHO_REPLY;
+			hv += (is->is_icmp.ics_id = ic->icmp_id);
+			hv += (is->is_icmp.ics_seq = ic->icmp_seq);
+			break;
+		case ICMP6_MEMBERSHIP_QUERY :
+		case ND_ROUTER_SOLICIT :
+		case ND_NEIGHBOR_SOLICIT :
+			is->is_icmp.ics_type = ic->icmp_type + 1;
+			break;
+#endif
+		case ICMP_ECHO :
+		case ICMP_TSTAMP :
+		case ICMP_IREQ :
+		case ICMP_MASKREQ :
+			is->is_icmp.ics_type = ic->icmp_type;
+			hv += (is->is_icmp.ics_id = ic->icmp_id);
+			hv += (is->is_icmp.ics_seq = ic->icmp_seq);
+			break;
+		default :
+			return NULL;
+		}
+		ATOMIC_INCL(ips_stats.iss_icmp);
+		is->is_age = fr_icmptimeout;
+		break;
+	    }
+	case IPPROTO_TCP :
+	    {
+		tcp = (tcphdr_t *)fin->fin_dp;
+
+		if (tcp->th_flags & TH_RST)
+			return NULL;
+		/*
+		 * The endian of the ports doesn't matter, but the ack and
+		 * sequence numbers do as we do mathematics on them later.
+		 */
+		is->is_dport = tcp->th_dport;
+		is->is_sport = tcp->th_sport;
+		if ((flags & (FI_W_DPORT|FI_W_SPORT)) == 0) {
+			hv += tcp->th_dport;
+			hv += tcp->th_sport;
+		}
+		is->is_send = ntohl(tcp->th_seq) + fin->fin_dlen -
+			      (tcp->th_off << 2) +
+			      ((tcp->th_flags & TH_SYN) ? 1 : 0) +
+			      ((tcp->th_flags & TH_FIN) ? 1 : 0);
+		is->is_maxsend = is->is_send;
+		is->is_dend = 0;
+		is->is_maxdwin = 1;
+		is->is_maxswin = ntohs(tcp->th_win);
+		if (is->is_maxswin == 0)
+			is->is_maxswin = 1;
+		/*
+		 * If we're creating state for a starting connection, start the
+		 * timer on it as we'll never see an error if it fails to
+		 * connect.
+		 */
+		ATOMIC_INCL(ips_stats.iss_tcp);
+		break;
+	    }
+	case IPPROTO_UDP :
+	    {
+		tcp = (tcphdr_t *)fin->fin_dp;
+
+		is->is_dport = tcp->th_dport;
+		is->is_sport = tcp->th_sport;
+		if ((flags & (FI_W_DPORT|FI_W_SPORT)) == 0) {
+			hv += tcp->th_dport;
+			hv += tcp->th_sport;
+		}
+		ATOMIC_INCL(ips_stats.iss_udp);
+		is->is_age = fr_udptimeout;
+		break;
+	    }
+	default :
+		return NULL;
+	}
+
+	KMALLOC(is, ipstate_t *);
+	if (is == NULL) {
+		ATOMIC_INCL(ips_stats.iss_nomem);
+		return NULL;
+	}
+	bcopy((char *)&ips, (char *)is, sizeof(*is));
+	hv %= fr_statesize;
+	is->is_hv = hv;
+	is->is_rule = fin->fin_fr;
+	if (is->is_rule != NULL) {
+		ATOMIC_INC32(is->is_rule->fr_ref);
+		pass = is->is_rule->fr_flags;
+	} else
+		pass = fr_flags;
+	WRITE_ENTER(&ipf_state);
+
+	is->is_pass = pass;
+	is->is_pkts = 1;
+	is->is_bytes = fin->fin_dlen + fin->fin_hlen;
+	/*
+	 * We want to check everything that is a property of this packet,
+	 * but we don't (automatically) care about it's fragment status as
+	 * this may change.
+	 */
+	is->is_v = fin->fin_fi.fi_v;
+	is->is_opt = fin->fin_fi.fi_optmsk;
+	is->is_optmsk = 0xffffffff;
+	is->is_sec = fin->fin_fi.fi_secmsk;
+	is->is_secmsk = 0xffff;
+	is->is_auth = fin->fin_fi.fi_auth;
+	is->is_authmsk = 0xffff;
+	is->is_flags = fin->fin_fi.fi_fl & FI_CMP;
+	is->is_flags |= FI_CMP << 4;
+	is->is_flags |= flags & (FI_WILDP|FI_WILDA);
+	if (flags & (FI_WILDP|FI_WILDA))
+		ips_wild++;
+	is->is_ifp[1 - out] = NULL;
+	is->is_ifp[out] = fin->fin_ifp;
+#ifdef	_KERNEL
+	strncpy(is->is_ifname[out], IFNAME(fin->fin_ifp), IFNAMSIZ);
+#endif
+	is->is_ifname[1 - out][0] = '\0';
+	if (pass & FR_LOGFIRST)
+		is->is_pass &= ~(FR_LOGFIRST|FR_LOG);
+	fr_stinsert(is);
+	if (is->is_p == IPPROTO_TCP) {
+		MUTEX_ENTER(&is->is_lock);
+		fr_tcp_age(&is->is_age, is->is_state, fin,
+			   0); /* 0 = packet from the source */
+		MUTEX_EXIT(&is->is_lock);
+	}
+#ifdef	IPFILTER_LOG
+	ipstate_log(is, ISL_NEW);
+#endif
+	RWLOCK_EXIT(&ipf_state);
+	fin->fin_rev = IP6NEQ(is->is_dst, fin->fin_fi.fi_dst);
+	if ((fin->fin_fi.fi_fl & FI_FRAG) && (pass & FR_KEEPFRAG))
+		ipfr_newfrag(ip, fin, pass ^ FR_KEEPSTATE);
+	return is;
+}
+
+
+
+/*
+ * check to see if a packet with TCP headers fits within the TCP window.
+ * change timeout depending on whether new packet is a SYN-ACK returning for a
+ * SYN or a RST or FIN which indicate time to close up shop.
+ */
+int fr_tcpstate(is, fin, ip, tcp)
+register ipstate_t *is;
+fr_info_t *fin;
+ip_t *ip;
+tcphdr_t *tcp;
+{
+	register tcp_seq seq, ack, end;
+	register int ackskew;
+	tcpdata_t  *fdata, *tdata;
+	u_short	win, maxwin;
+	int ret = 0;
+	int source;
+
+	/*
+	 * Find difference between last checked packet and this packet.
+	 */
+	source = IP6EQ(fin->fin_fi.fi_src, is->is_src);
+	fdata = &is->is_tcp.ts_data[!source];
+	tdata = &is->is_tcp.ts_data[source];
+	seq = ntohl(tcp->th_seq);
+	ack = ntohl(tcp->th_ack);
+	win = ntohs(tcp->th_win);
+	end = seq + fin->fin_dlen - (tcp->th_off << 2) +
+	       ((tcp->th_flags & TH_SYN) ? 1 : 0) +
+	       ((tcp->th_flags & TH_FIN) ? 1 : 0);
+
+	MUTEX_ENTER(&is->is_lock);
+	if (fdata->td_end == 0) {
+		/*
+		 * Must be a (outgoing) SYN-ACK in reply to a SYN.
+		 */
+		fdata->td_end = end;
+		fdata->td_maxwin = 1;
+		fdata->td_maxend = end + 1;
+	}
+
+	if (!(tcp->th_flags & TH_ACK)) {  /* Pretend an ack was sent */
+		ack = tdata->td_end;
+	} else if (((tcp->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) &&
+		   (ack == 0)) {
+		/* gross hack to get around certain broken tcp stacks */
+		ack = tdata->td_end;
+	}
+
+	if (seq == end)
+		seq = end = fdata->td_end;
+
+	maxwin = tdata->td_maxwin;
+	ackskew = tdata->td_end - ack;
+
+#define	SEQ_GE(a,b)	((int)((a) - (b)) >= 0)
+#define	SEQ_GT(a,b)	((int)((a) - (b)) > 0)
+	if ((SEQ_GE(fdata->td_maxend, end)) &&
+	    (SEQ_GE(seq, fdata->td_end - maxwin)) &&
+/* XXX what about big packets */
+#define MAXACKWINDOW 66000
+	    (ackskew >= -MAXACKWINDOW) &&
+	    (ackskew <= MAXACKWINDOW)) {
+		/* if ackskew < 0 then this should be due to fragented
+		 * packets. There is no way to know the length of the
+		 * total packet in advance.
+		 * We do know the total length from the fragment cache though.
+		 * Note however that there might be more sessions with
+		 * exactly the same source and destination paramters in the
+		 * state cache (and source and destination is the only stuff
+		 * that is saved in the fragment cache). Note further that
+		 * some TCP connections in the state cache are hashed with
+		 * sport and dport as well which makes it not worthwhile to
+		 * look for them.
+		 * Thus, when ackskew is negative but still seems to belong
+		 * to this session, we bump up the destinations end value.
+		 */
+		if (ackskew < 0)
+			tdata->td_end = ack;
+
+		/* update max window seen */
+		if (fdata->td_maxwin < win)
+			fdata->td_maxwin = win;
+		if (SEQ_GT(end, fdata->td_end))
+			fdata->td_end = end;
+		if (SEQ_GE(ack + win, tdata->td_maxend)) {
+			tdata->td_maxend = ack + win;
+			if (win == 0)
+				tdata->td_maxend++;
+		}
+
+		ATOMIC_INCL(ips_stats.iss_hits);
+		/*
+		 * Nearing end of connection, start timeout.
+		 */
+		/* source ? 0 : 1 -> !source */
+		fr_tcp_age(&is->is_age, is->is_state, fin, !source);
+		ret = 1;
+	}
+	MUTEX_EXIT(&is->is_lock);
+	return ret;
+}
+
+
+static int fr_matchsrcdst(is, src, dst, fin, tcp)
+ipstate_t *is;
+union i6addr src, dst;
+fr_info_t *fin;
+tcphdr_t *tcp;
+{
+	int ret = 0, rev, out, flags;
+	u_short sp, dp;
+	void *ifp;
+
+	rev = fin->fin_rev = IP6NEQ(is->is_dst, dst);
+	ifp = fin->fin_ifp;
+	out = fin->fin_out;
+
+	if (tcp != NULL) {
+		flags = is->is_flags;
+		sp = tcp->th_sport;
+		dp = tcp->th_dport;
+	} else {
+		flags = is->is_flags & FI_WILDA;
+		sp = 0;
+		dp = 0;
+	}
+
+	if (rev == 0) {
+		if (!out) {
+			if (is->is_ifpin == NULL || is->is_ifpin == ifp)
+				ret = 1;
+		} else {
+			if (is->is_ifpout == NULL || is->is_ifpout == ifp)
+				ret = 1;
+		}
+	} else {
+		if (out) {
+			if (is->is_ifpin == NULL || is->is_ifpin == ifp)
+				ret = 1;
+		} else {
+			if (is->is_ifpout == NULL || is->is_ifpout == ifp)
+				ret = 1;
+		}
+	}
+	if (ret == 0)
+		return 0;
+	ret = 0;
+
+	if (rev == 0) {
+		if (
+		    (IP6EQ(is->is_dst, dst) || (flags & FI_W_DADDR)) &&
+		    (IP6EQ(is->is_src, src) || (flags & FI_W_SADDR)) &&
+		    (!tcp || ((sp == is->is_sport || flags & FI_W_SPORT) &&
+		     (dp == is->is_dport || flags & FI_W_DPORT)))) {
+			ret = 1;
+		}
+	} else {
+		if (
+		    (IP6EQ(is->is_dst, src) || (flags & FI_W_DADDR)) &&
+		    (IP6EQ(is->is_src, dst) || (flags & FI_W_SADDR)) &&
+		    (!tcp || ((sp == is->is_dport || flags & FI_W_DPORT) &&
+		     (dp == is->is_sport || flags & FI_W_SPORT)))) {
+			ret = 1;
+		}
+	}
+	if (ret == 0)
+		return 0;
+
+	/*
+	 * Whether or not this should be here, is questionable, but the aim
+	 * is to get this out of the main line.
+	 */
+	if (tcp == NULL)
+		flags = is->is_flags & (FI_CMP|(FI_CMP<<4));
+
+	if (((fin->fin_fi.fi_fl & (flags >> 4)) != (flags & FI_CMP)) ||
+	    ((fin->fin_fi.fi_optmsk & is->is_optmsk) != is->is_opt) ||
+	    ((fin->fin_fi.fi_secmsk & is->is_secmsk) != is->is_sec) ||
+	    ((fin->fin_fi.fi_auth & is->is_authmsk) != is->is_auth))
+		return 0;
+
+	if ((flags & (FI_W_SPORT|FI_W_DPORT))) {
+		if ((flags & FI_W_SPORT) != 0) {
+			if (rev == 0) {
+				is->is_sport = sp;
+				is->is_send = htonl(tcp->th_seq);
+			} else {
+				is->is_sport = dp;
+				is->is_send = htonl(tcp->th_ack);
+			}
+			is->is_maxsend = is->is_send + 1;
+		} else if ((flags & FI_W_DPORT) != 0) {
+			if (rev == 0) {
+				is->is_dport = dp;
+				is->is_dend = htonl(tcp->th_ack);
+			} else {
+				is->is_dport = sp;
+				is->is_dend = htonl(tcp->th_seq);
+			}
+			is->is_maxdend = is->is_dend + 1;
+		}
+		is->is_flags &= ~(FI_W_SPORT|FI_W_DPORT);
+		ips_wild--;
+	}
+
+	ret = -1;
+
+	if (!rev) {
+		if (out) {
+			if (!is->is_ifpout)
+				ret = 1;
+		} else {
+			if (!is->is_ifpin)
+				ret = 0;
+		}
+	} else {
+		if (out) {
+			if (!is->is_ifpin)
+				ret = 0;
+		} else {
+			if (!is->is_ifpout)
+				ret = 1;
+		}
+	}
+
+	if (ret >= 0) {
+		is->is_ifp[ret] = ifp;
+#ifdef	_KERNEL
+		strncpy(is->is_ifname[out], IFNAME(fin->fin_ifp),
+			sizeof(is->is_ifname[1]));
+#endif
+	}
+#ifdef  _KERNEL
+	if (ret >= 0) {
+		strncpy(is->is_ifname[out], IFNAME(fin->fin_ifp),
+			sizeof(is->is_ifname[1]));
+	}
+#endif
+	return 1;
+}
+
+static int fr_matchicmpqueryreply(v, is, icmp)
+int v;
+ipstate_t *is;
+icmphdr_t *icmp;
+{
+	if (v == 4) {
+		/*
+		 * If we matched its type on the way in, then when going out
+		 * it will still be the same type.
+		 */
+		if (((icmp->icmp_type == is->is_type) ||
+		     (icmpreplytype4[is->is_type] == icmp->icmp_type)) &&
+		    (icmp->icmp_id == is->is_icmp.ics_id) &&
+		    (icmp->icmp_seq == is->is_icmp.ics_seq)) {
+			return 1;
+		};
+	}
+#ifdef	USE_INET6
+	else if (is->is_v == 6) {
+		if ((is->is_type == ICMP6_ECHO_REPLY) &&
+		    (icmp->icmp_type == ICMP6_ECHO_REQUEST) &&
+		    (icmp->icmp_id == is->is_icmp.ics_id) &&
+		    (icmp->icmp_seq == is->is_icmp.ics_seq)) {
+			return 1;
+		};
+	}
+#endif
+	return 0;
+}
+
+static frentry_t *fr_checkicmpmatchingstate(ip, fin)
+ip_t *ip;
+fr_info_t *fin;
+{
+	register ipstate_t *is, **isp;
+	register u_short sport, dport;
+	register u_char	pr;
+	union i6addr dst, src;
+	struct icmp *ic;
+	u_short savelen;
+	icmphdr_t *icmp;
+	fr_info_t ofin;
+	int type, len;
+	tcphdr_t *tcp;
+	frentry_t *fr;
+	ip_t *oip;
+	u_int hv;
+
+	/*
+	 * Does it at least have the return (basic) IP header ?
+	 * Only a basic IP header (no options) should be with
+	 * an ICMP error header.
+	 */
+	if (((ip->ip_v != 4) || (ip->ip_hl != 5)) ||
+	    (fin->fin_plen < ICMPERR_MINPKTLEN))
+		return NULL;
+	ic = (struct icmp *)fin->fin_dp;
+	type = ic->icmp_type;
+	/*
+	 * If it's not an error type, then return
+	 */
+	if ((type != ICMP_UNREACH) && (type != ICMP_SOURCEQUENCH) &&
+    	    (type != ICMP_REDIRECT) && (type != ICMP_TIMXCEED) &&
+    	    (type != ICMP_PARAMPROB))
+		return NULL;
+
+	oip = (ip_t *)((char *)ic + ICMPERR_ICMPHLEN);
+	if (fin->fin_plen < ICMPERR_MAXPKTLEN + ((oip->ip_hl - 5) << 2))
+		return NULL;
+
+	/*
+	 * Sanity checks.
+	 */
+	len = fin->fin_dlen - ICMPERR_ICMPHLEN;
+	if ((len <= 0) || ((oip->ip_hl << 2) > len))
+		return NULL;
+
+	/*
+	 * Is the buffer big enough for all of it ?  It's the size of the IP
+	 * header claimed in the encapsulated part which is of concern.  It
+	 * may be too big to be in this buffer but not so big that it's
+	 * outside the ICMP packet, leading to TCP deref's causing problems.
+	 * This is possible because we don't know how big oip_hl is when we
+	 * do the pullup early in fr_check() and thus can't gaurantee it is
+	 * all here now.
+	 */
+#ifdef  _KERNEL
+	{
+	mb_t *m;
+
+# if SOLARIS
+	m = fin->fin_qfm;
+	if ((char *)oip + len > (char *)m->b_wptr)
+		return NULL;
+# else
+	m = *(mb_t **)fin->fin_mp;
+	if ((char *)oip + len > (char *)ip + m->m_len)
+		return NULL;
+# endif
+	}
+#endif
+
+	/*
+	 * in the IPv4 case we must zero the i6addr union otherwise
+	 * the IP6EQ and IP6NEQ macros produce the wrong results because
+	 * of the 'junk' in the unused part of the union
+	 */
+	bzero((char *)&src, sizeof(src));
+	bzero((char *)&dst, sizeof(dst));
+
+	if (oip->ip_p == IPPROTO_ICMP) {
+		icmp = (icmphdr_t *)((char *)oip + (oip->ip_hl << 2));
+
+		/*
+		 * a ICMP error can only be generated as a result of an
+		 * ICMP query, not as the response on an ICMP error
+		 *
+		 * XXX theoretically ICMP_ECHOREP and the other reply's are
+		 * ICMP query's as well, but adding them here seems strange XXX
+		 */
+		 if ((icmp->icmp_type != ICMP_ECHO) &&
+		     (icmp->icmp_type != ICMP_TSTAMP) &&
+		     (icmp->icmp_type != ICMP_IREQ) &&
+		     (icmp->icmp_type != ICMP_MASKREQ))
+		    	return NULL;
+
+		/*
+		 * perform a lookup of the ICMP packet in the state table
+		 */
+		hv = (pr = oip->ip_p);
+		src.in4 = oip->ip_src;
+		hv += src.in4.s_addr;
+		dst.in4 = oip->ip_dst;
+		hv += dst.in4.s_addr;
+		hv += icmp->icmp_id;
+		hv += icmp->icmp_seq;
+		hv %= fr_statesize;
+
+		savelen = oip->ip_len;
+		oip->ip_len = len;
+		ofin.fin_v = 4;
+		fr_makefrip(oip->ip_hl << 2, oip, &ofin);
+		oip->ip_len = savelen;
+		ofin.fin_ifp = fin->fin_ifp;
+		ofin.fin_out = !fin->fin_out;
+		ofin.fin_mp = NULL; /* if dereferenced, panic XXX */
+
+		READ_ENTER(&ipf_state);
+		for (isp = &ips_table[hv]; (is = *isp); isp = &is->is_hnext)
+			if ((is->is_p == pr) && (is->is_v == 4) &&
+			    fr_matchsrcdst(is, src, dst, &ofin, NULL) &&
+			    fr_matchicmpqueryreply(is->is_v, is, icmp)) {
+				ips_stats.iss_hits++;
+				is->is_pkts++;
+				is->is_bytes += ip->ip_len;
+				fr = is->is_rule;
+				RWLOCK_EXIT(&ipf_state);
+				return fr;
+			}
+		RWLOCK_EXIT(&ipf_state);
+		return NULL;
+	};
+
+	if ((oip->ip_p != IPPROTO_TCP) && (oip->ip_p != IPPROTO_UDP))
+		return NULL;
+
+	tcp = (tcphdr_t *)((char *)oip + (oip->ip_hl << 2));
+	dport = tcp->th_dport;
+	sport = tcp->th_sport;
+
+	hv = (pr = oip->ip_p);
+	src.in4 = oip->ip_src;
+	hv += src.in4.s_addr;
+	dst.in4 = oip->ip_dst;
+	hv += dst.in4.s_addr;
+	hv += dport;
+	hv += sport;
+	hv %= fr_statesize;
+	/*
+	 * we make an fin entry to be able to feed it to
+	 * matchsrcdst note that not all fields are encessary
+	 * but this is the cleanest way. Note further we fill
+	 * in fin_mp such that if someone uses it we'll get
+	 * a kernel panic. fr_matchsrcdst does not use this.
+	 *
+	 * watch out here, as ip is in host order and oip in network
+	 * order. Any change we make must be undone afterwards.
+	 */
+	savelen = oip->ip_len;
+	oip->ip_len = len;
+	ofin.fin_v = 4;
+	fr_makefrip(oip->ip_hl << 2, oip, &ofin);
+	oip->ip_len = savelen;
+	ofin.fin_ifp = fin->fin_ifp;
+	ofin.fin_out = !fin->fin_out;
+	ofin.fin_mp = NULL; /* if dereferenced, panic XXX */
+	READ_ENTER(&ipf_state);
+	for (isp = &ips_table[hv]; (is = *isp); isp = &is->is_hnext) {
+		/*
+		 * Only allow this icmp though if the
+		 * encapsulated packet was allowed through the
+		 * other way around. Note that the minimal amount
+		 * of info present does not allow for checking against
+		 * tcp internals such as seq and ack numbers.
+		 */
+		if ((is->is_p == pr) && (is->is_v == 4) &&
+		    fr_matchsrcdst(is, src, dst, &ofin, tcp)) {
+			fr = is->is_rule;
+			ips_stats.iss_hits++;
+			is->is_pkts++;
+			is->is_bytes += fin->fin_plen;
+			/*
+			 * we deliberately do not touch the timeouts
+			 * for the accompanying state table entry.
+			 * It remains to be seen if that is correct. XXX
+			 */
+			RWLOCK_EXIT(&ipf_state);
+			return fr;
+		}
+	}
+	RWLOCK_EXIT(&ipf_state);
+	return NULL;
+}
+
+
+static void fr_ipsmove(isp, is, hv)
+ipstate_t **isp, *is;
+u_int hv;
+{
+	u_int hvm;
+
+	hvm = is->is_hv;
+	/*
+	 * Remove the hash from the old location...
+	 */
+	if (is->is_hnext)
+		is->is_hnext->is_phnext = isp;
+	*isp = is->is_hnext;
+	if (ips_table[hvm] == NULL)
+		ips_stats.iss_inuse--;
+
+	/*
+	 * ...and put the hash in the new one.
+	 */
+	hvm = hv % fr_statesize;
+	is->is_hv = hvm;
+	isp = &ips_table[hvm];
+	if (*isp)
+		(*isp)->is_phnext = &is->is_hnext;
+	else
+		ips_stats.iss_inuse++;
+	is->is_phnext = isp;
+	is->is_hnext = *isp;
+	*isp = is;
+}
+
+
+/*
+ * Check if a packet has a registered state.
+ */
+frentry_t *fr_checkstate(ip, fin)
+ip_t *ip;
+fr_info_t *fin;
+{
+	union i6addr dst, src;
+	register ipstate_t *is, **isp;
+	register u_char pr;
+	u_int hv, hvm, hlen, tryagain, pass, v;
+	struct icmp *ic;
+	frentry_t *fr;
+	tcphdr_t *tcp;
+
+	if (fr_state_lock || (fin->fin_off & IP_OFFMASK) ||
+	    (fin->fin_fi.fi_fl & FI_SHORT))
+		return NULL;
+
+	is = NULL;
+	hlen = fin->fin_hlen;
+	tcp = (tcphdr_t *)((char *)ip + hlen);
+	ic = (struct icmp *)tcp;
+	hv = (pr = fin->fin_fi.fi_p);
+	src = fin->fin_fi.fi_src;
+	dst = fin->fin_fi.fi_dst;
+	hv += src.in4.s_addr;
+	hv += dst.in4.s_addr;
+
+	/*
+	 * Search the hash table for matching packet header info.
+	 */
+	v = fin->fin_fi.fi_v;
+	switch (fin->fin_fi.fi_p)
+	{
+#ifdef	USE_INET6
+	case IPPROTO_ICMPV6 :
+		if (v == 6) {
+			if (fin->fin_out)
+				hv -= dst.in4.s_addr;
+			else
+				hv -= src.in4.s_addr;
+			if ((ic->icmp_type == ICMP6_ECHO_REQUEST) ||
+			    (ic->icmp_type == ICMP6_ECHO_REPLY)) {
+				hv += ic->icmp_id;
+				hv += ic->icmp_seq;
+			}
+		}
+#endif
+	case IPPROTO_ICMP :
+		if (v == 4) {
+			hv += ic->icmp_id;
+			hv += ic->icmp_seq;
+		}
+		hv %= fr_statesize;
+		READ_ENTER(&ipf_state);
+		for (isp = &ips_table[hv]; (is = *isp); isp = &is->is_hnext) {
+			if ((is->is_p == pr) && (is->is_v == v) &&
+			    fr_matchsrcdst(is, src, dst, fin, NULL) &&
+			    fr_matchicmpqueryreply(v, is, ic)) {
+				is->is_age = fr_icmptimeout;
+				break;
+			}
+		}
+		if (is != NULL)
+			break;
+		RWLOCK_EXIT(&ipf_state);
+		/*
+		 * No matching icmp state entry. Perhaps this is a
+		 * response to another state entry.
+		 */
+#ifdef	USE_INET6
+		if (v == 6)
+			fr = fr_checkicmp6matchingstate((ip6_t *)ip, fin);
+		else
+#endif
+			fr = fr_checkicmpmatchingstate(ip, fin);
+		if (fr)
+			return fr;
+		break;
+	case IPPROTO_TCP :
+	    {
+		register u_short dport, sport;
+		register int i;
+
+		i = tcp->th_flags;
+		/*
+		 * Just plain ignore RST flag set with either FIN or SYN.
+		 */
+		if ((i & TH_RST) &&
+		    ((i & (TH_FIN|TH_SYN|TH_RST)) != TH_RST))
+			break;
+	case IPPROTO_UDP :
+		dport = tcp->th_dport;
+		sport = tcp->th_sport;
+		tryagain = 0;
+		hv += dport;
+		hv += sport;
+		READ_ENTER(&ipf_state);
+retry_tcpudp:
+		hvm = hv % fr_statesize;
+		for (isp = &ips_table[hvm]; (is = *isp); isp = &is->is_hnext)
+			if ((is->is_p == pr) && (is->is_v == v) &&
+			    fr_matchsrcdst(is, src, dst, fin, tcp)) {
+				if ((pr == IPPROTO_TCP)) {
+					if (!fr_tcpstate(is, fin, ip, tcp)) {
+						continue;
+					}
+				}
+				break;
+			}
+		if (is != NULL) {
+			if (tryagain &&
+			    !(is->is_flags & (FI_WILDP|FI_WILDA))) {
+				hv += dport;
+				hv += sport;
+				fr_ipsmove(isp, is, hv);
+				MUTEX_DOWNGRADE(&ipf_state);
+			}
+			break;
+		}
+		RWLOCK_EXIT(&ipf_state);
+		if (!tryagain && ips_wild) {
+			hv -= dport;
+			hv -= sport;
+			tryagain = 1;
+			WRITE_ENTER(&ipf_state);
+			goto retry_tcpudp;
+		}
+		break;
+	    }
+	default :
+		break;
+	}
+	if (is == NULL) {
+		ATOMIC_INCL(ips_stats.iss_miss);
+		return NULL;
+	}
+	MUTEX_ENTER(&is->is_lock);
+	is->is_bytes += fin->fin_plen;
+	ips_stats.iss_hits++;
+	is->is_pkts++;
+	MUTEX_EXIT(&is->is_lock);
+	fr = is->is_rule;
+	fin->fin_fr = fr;
+	pass = is->is_pass;
+#ifndef	_KERNEL
+	if (tcp->th_flags & TCP_CLOSE)
+		fr_delstate(is);
+#endif
+	RWLOCK_EXIT(&ipf_state);
+	if ((fin->fin_fi.fi_fl & FI_FRAG) && (pass & FR_KEEPFRAG))
+		ipfr_newfrag(ip, fin, pass ^ FR_KEEPSTATE);
+	return fr;
+}
+
+
+void ip_statesync(ifp)
+void *ifp;
+{
+	register ipstate_t *is;
+
+	WRITE_ENTER(&ipf_state);
+	for (is = ips_list; is; is = is->is_next) {
+		if (is->is_ifpin == ifp) {
+			is->is_ifpin = GETUNIT(is->is_ifname[0], is->is_v);
+			if (!is->is_ifpin)
+				is->is_ifpin = (void *)-1;
+		}
+		if (is->is_ifpout == ifp) {
+			is->is_ifpout = GETUNIT(is->is_ifname[1], is->is_v);
+			if (!is->is_ifpout)
+				is->is_ifpout = (void *)-1;
+		}
+	}
+	RWLOCK_EXIT(&ipf_state);
+}
+
+
+/*
+ * Must always be called with fr_ipfstate held as a write lock.
+ */
+static void fr_delstate(is)
+ipstate_t *is;
+{
+	frentry_t *fr;
+
+	if (is->is_flags & (FI_WILDP|FI_WILDA))
+		ips_wild--;
+	if (is->is_next)
+		is->is_next->is_pnext = is->is_pnext;
+	*is->is_pnext = is->is_next;
+	if (is->is_hnext)
+		is->is_hnext->is_phnext = is->is_phnext;
+	*is->is_phnext = is->is_hnext;
+	if (ips_table[is->is_hv] == NULL)
+		ips_stats.iss_inuse--;
+
+	fr = is->is_rule;
+	if (fr != NULL) {
+		fr->fr_ref--;
+		if (fr->fr_ref == 0) {
+			KFREE(fr);
+		}
+	}
+#ifdef	_KERNEL
+	MUTEX_DESTROY(&is->is_lock);
+#endif
+	KFREE(is);
+	ips_num--;
+}
+
+
+/*
+ * Free memory in use by all state info. kept.
+ */
+void fr_stateunload()
+{
+	register ipstate_t *is;
+
+	WRITE_ENTER(&ipf_state);
+	while ((is = ips_list))
+		fr_delstate(is);
+	ips_stats.iss_inuse = 0;
+	ips_num = 0;
+	RWLOCK_EXIT(&ipf_state);
+	KFREES(ips_table, fr_statesize * sizeof(ipstate_t *));
+	ips_table = NULL;
+}
+
+
+/*
+ * Slowly expire held state for thingslike UDP and ICMP.  Timeouts are set
+ * in expectation of this being called twice per second.
+ */
+void fr_timeoutstate()
+{
+	register ipstate_t *is, **isp;
+#if defined(_KERNEL) && !SOLARIS
+	int s;
+#endif
+
+	SPL_NET(s);
+	WRITE_ENTER(&ipf_state);
+	for (isp = &ips_list; (is = *isp); )
+		if (is->is_age && !--is->is_age) {
+			if (is->is_p == IPPROTO_TCP)
+				ips_stats.iss_fin++;
+			else
+				ips_stats.iss_expire++;
+#ifdef	IPFILTER_LOG
+			ipstate_log(is, ISL_EXPIRE);
+#endif
+			fr_delstate(is);
+		} else
+			isp = &is->is_next;
+	if (fr_state_doflush) {
+		(void) fr_state_flush(1);
+		fr_state_doflush = 0;
+	}
+	RWLOCK_EXIT(&ipf_state);
+	SPL_X(s);
+}
+
+
+/*
+ * Original idea freom Pradeep Krishnan for use primarily with NAT code.
+ * (pkrishna@netcom.com)
+ *
+ * Rewritten by Arjan de Vet <Arjan.deVet@adv.iae.nl>, 2000-07-29:
+ *
+ * - (try to) base state transitions on real evidence only,
+ *   i.e. packets that are sent and have been received by ipfilter;
+ *   diagram 18.12 of TCP/IP volume 1 by W. Richard Stevens was used.
+ *
+ * - deal with half-closed connections correctly;
+ *
+ * - store the state of the source in state[0] such that ipfstat
+ *   displays the state as source/dest instead of dest/source; the calls
+ *   to fr_tcp_age have been changed accordingly.
+ *
+ * Parameters:
+ *
+ *    state[0] = state of source (host that initiated connection)
+ *    state[1] = state of dest   (host that accepted the connection)
+ *
+ *    dir == 0 : a packet from source to dest
+ *    dir == 1 : a packet from dest to source
+ *
+ */
+void fr_tcp_age(age, state, fin, dir)
+u_long *age;
+u_char *state;
+fr_info_t *fin;
+int dir;
+{
+	tcphdr_t *tcp = (tcphdr_t *)fin->fin_dp;
+	u_char flags = tcp->th_flags;
+	int dlen, ostate;
+
+	ostate = state[1 - dir];
+
+	dlen = fin->fin_plen - fin->fin_hlen - (tcp->th_off << 2);
+
+	if (flags & TH_RST) {
+		if (!(tcp->th_flags & TH_PUSH) && !dlen) {
+			*age = fr_tcpclosed;
+			state[dir] = TCPS_CLOSED;
+		} else {
+			*age = fr_tcpclosewait;
+			state[dir] = TCPS_CLOSE_WAIT;
+		}
+		return;
+	}
+
+	*age = fr_tcptimeout; /* default 4 mins */
+
+	switch(state[dir])
+	{
+	case TCPS_CLOSED: /* 0 */
+		if ((flags & TH_OPENING) == TH_OPENING) {
+			/*
+			 * 'dir' received an S and sends SA in response,
+			 * CLOSED -> SYN_RECEIVED
+			 */
+			state[dir] = TCPS_SYN_RECEIVED;
+			*age = fr_tcptimeout;
+		} else if ((flags & (TH_SYN|TH_ACK)) == TH_SYN) {
+			/* 'dir' sent S, CLOSED -> SYN_SENT */
+			state[dir] = TCPS_SYN_SENT;
+			*age = fr_tcptimeout;
+		}
+		/*
+		 * The next piece of code makes it possible to get
+		 * already established connections into the state table
+		 * after a restart or reload of the filter rules; this
+		 * does not work when a strict 'flags S keep state' is
+		 * used for tcp connections of course
+		 */
+		if ((flags & (TH_FIN|TH_SYN|TH_RST|TH_ACK)) == TH_ACK) {
+			/* we saw an A, guess 'dir' is in ESTABLISHED mode */
+			state[dir] = TCPS_ESTABLISHED;
+			*age = fr_tcpidletimeout;
+		}
+		/*
+		 * TODO: besides regular ACK packets we can have other
+		 * packets as well; it is yet to be determined how we
+		 * should initialize the states in those cases
+		 */
+		break;
+
+	case TCPS_LISTEN: /* 1 */
+		/* NOT USED */
+		break;
+
+	case TCPS_SYN_SENT: /* 2 */
+		if ((flags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) {
+			/*
+			 * We see an A from 'dir' which is in SYN_SENT
+			 * state: 'dir' sent an A in response to an SA
+			 * which it received, SYN_SENT -> ESTABLISHED
+			 */
+			state[dir] = TCPS_ESTABLISHED;
+			*age = fr_tcpidletimeout;
+		} else if (flags & TH_FIN) {
+			/*
+			 * We see an F from 'dir' which is in SYN_SENT
+			 * state and wants to close its side of the
+			 * connection; SYN_SENT -> FIN_WAIT_1
+			 */
+			state[dir] = TCPS_FIN_WAIT_1;
+			*age = fr_tcpidletimeout; /* or fr_tcptimeout? */
+		} else if ((flags & TH_OPENING) == TH_OPENING) {
+			/*
+			 * We see an SA from 'dir' which is already in
+			 * SYN_SENT state, this means we have a
+			 * simultaneous open; SYN_SENT -> SYN_RECEIVED
+			 */
+			state[dir] = TCPS_SYN_RECEIVED;
+			*age = fr_tcptimeout;
+		}
+		break;
+
+	case TCPS_SYN_RECEIVED: /* 3 */
+		if ((flags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) {
+			/*
+			 * We see an A from 'dir' which was in SYN_RECEIVED
+			 * state so it must now be in established state,
+			 * SYN_RECEIVED -> ESTABLISHED
+			 */
+			state[dir] = TCPS_ESTABLISHED;
+			*age = fr_tcpidletimeout;
+		} else if (flags & TH_FIN) {
+			/*
+			 * We see an F from 'dir' which is in SYN_RECEIVED
+			 * state and wants to close its side of the connection;
+			 * SYN_RECEIVED -> FIN_WAIT_1
+			 */
+			state[dir] = TCPS_FIN_WAIT_1;
+			*age = fr_tcpidletimeout;
+		}
+		break;
+
+	case TCPS_ESTABLISHED: /* 4 */
+		if (flags & TH_FIN) {
+			/*
+			 * 'dir' closed its side of the connection; this
+			 * gives us a half-closed connection;
+			 * ESTABLISHED -> FIN_WAIT_1
+			 */
+			state[dir] = TCPS_FIN_WAIT_1;
+			*age = fr_tcphalfclosed;
+		} else if (flags & TH_ACK) {
+			/* an ACK, should we exclude other flags here? */
+			if (ostate == TCPS_FIN_WAIT_1) {
+				/*
+				 * We know the other side did an active close,
+				 * so we are ACKing the recvd FIN packet (does
+				 * the window matching code guarantee this?)
+				 * and go into CLOSE_WAIT state; this gives us
+				 * a half-closed connection
+				 */
+				state[dir] = TCPS_CLOSE_WAIT;
+				*age = fr_tcphalfclosed;
+			} else if (ostate < TCPS_CLOSE_WAIT)
+				/*
+				 * Still a fully established connection,
+				 * reset timeout
+				 */
+				*age = fr_tcpidletimeout;
+		}
+		break;
+
+	case TCPS_CLOSE_WAIT: /* 5 */
+		if (flags & TH_FIN) {
+			/*
+			 * Application closed and 'dir' sent a FIN, we're now
+			 * going into LAST_ACK state
+			 */
+			*age  = fr_tcplastack;
+			state[dir] = TCPS_LAST_ACK;
+		} else {
+			/*
+			 * We remain in CLOSE_WAIT because the other side has
+			 * closed already and we did not close our side yet;
+			 * reset timeout
+			 */
+			*age  = fr_tcphalfclosed;
+		}
+		break;
+
+	case TCPS_FIN_WAIT_1: /* 6 */
+		if ((flags & TH_ACK) && ostate > TCPS_CLOSE_WAIT) {
+			/*
+			 * If the other side is not active anymore it has sent
+			 * us a FIN packet that we are ack'ing now with an ACK;
+			 * this means both sides have now closed the connection
+			 * and we go into TIME_WAIT
+			 */
+			/*
+			 * XXX: how do we know we really are ACKing the FIN
+			 * packet here? does the window code guarantee that?
+			 */
+			state[dir] = TCPS_TIME_WAIT;
+			*age = fr_tcptimeout;
+		} else
+			/*
+			 * We closed our side of the connection already but the
+			 * other side is still active (ESTABLISHED/CLOSE_WAIT);
+			 * continue with this half-closed connection
+			 */
+			*age = fr_tcphalfclosed;
+		break;
+
+	case TCPS_CLOSING: /* 7 */
+		/* NOT USED */
+		break;
+
+	case TCPS_LAST_ACK: /* 8 */
+		if (flags & TH_ACK) {
+			if ((flags & TH_PUSH) || dlen)
+				/*
+				 * There is still data to be delivered, reset
+				 * timeout
+				 */
+				*age  = fr_tcplastack;
+		}
+		/*
+		 * We cannot detect when we go out of LAST_ACK state to CLOSED
+		 * because that is based on the reception of ACK packets;
+		 * ipfilter can only detect that a packet has been sent by a
+		 * host
+		 */
+		break;
+
+	case TCPS_FIN_WAIT_2: /* 9 */
+		/* NOT USED */
+		break;
+
+	case TCPS_TIME_WAIT: /* 10 */
+		/* we're in 2MSL timeout now */
+		break;
+	}
+}
+
+
+#ifdef	IPFILTER_LOG
+void ipstate_log(is, type)
+struct ipstate *is;
+u_int type;
+{
+	struct	ipslog	ipsl;
+	void *items[1];
+	size_t sizes[1];
+	int types[1];
+
+	ipsl.isl_type = type;
+	ipsl.isl_pkts = is->is_pkts;
+	ipsl.isl_bytes = is->is_bytes;
+	ipsl.isl_src = is->is_src;
+	ipsl.isl_dst = is->is_dst;
+	ipsl.isl_p = is->is_p;
+	ipsl.isl_v = is->is_v;
+	ipsl.isl_flags = is->is_flags;
+	if (ipsl.isl_p == IPPROTO_TCP || ipsl.isl_p == IPPROTO_UDP) {
+		ipsl.isl_sport = is->is_sport;
+		ipsl.isl_dport = is->is_dport;
+		if (ipsl.isl_p == IPPROTO_TCP) {
+			ipsl.isl_state[0] = is->is_state[0];
+			ipsl.isl_state[1] = is->is_state[1];
+		}
+	} else if (ipsl.isl_p == IPPROTO_ICMP)
+		ipsl.isl_itype = is->is_icmp.ics_type;
+	else {
+		ipsl.isl_ps.isl_filler[0] = 0;
+		ipsl.isl_ps.isl_filler[1] = 0;
+	}
+	items[0] = &ipsl;
+	sizes[0] = sizeof(ipsl);
+	types[0] = 0;
+
+	(void) ipllog(IPL_LOGSTATE, NULL, items, sizes, types, 1);
+}
+#endif
+
+
+#ifdef	USE_INET6
+frentry_t *fr_checkicmp6matchingstate(ip, fin)
+ip6_t *ip;
+fr_info_t *fin;
+{
+	register ipstate_t *is, **isp;
+	register u_short sport, dport;
+	register u_char	pr;
+	struct icmp6_hdr *ic, *oic;
+	union i6addr dst, src;
+	u_short savelen;
+	fr_info_t ofin;
+	tcphdr_t *tcp;
+	frentry_t *fr;
+	ip6_t *oip;
+	int type;
+	u_int hv;
+
+	/*
+	 * Does it at least have the return (basic) IP header ?
+	 * Only a basic IP header (no options) should be with
+	 * an ICMP error header.
+	 */
+	if ((fin->fin_v != 6) || (fin->fin_plen < ICMP6ERR_MINPKTLEN))
+		return NULL;
+	ic = (struct icmp6_hdr *)fin->fin_dp;
+	type = ic->icmp6_type;
+	/*
+	 * If it's not an error type, then return
+	 */
+	if ((type != ICMP6_DST_UNREACH) && (type != ICMP6_PACKET_TOO_BIG) &&
+	    (type != ICMP6_TIME_EXCEEDED) && (type != ICMP6_PARAM_PROB))
+		return NULL;
+
+	oip = (ip6_t *)((char *)ic + ICMPERR_ICMPHLEN);
+	if (fin->fin_plen < sizeof(*oip))
+		return NULL;
+
+	if (oip->ip6_nxt == IPPROTO_ICMPV6) {
+		oic = (struct icmp6_hdr *)(oip + 1);
+		/*
+		 * a ICMP error can only be generated as a result of an
+		 * ICMP query, not as the response on an ICMP error
+		 *
+		 * XXX theoretically ICMP_ECHOREP and the other reply's are
+		 * ICMP query's as well, but adding them here seems strange XXX
+		 */
+		 if (!(oic->icmp6_type & ICMP6_INFOMSG_MASK))
+		    	return NULL;
+
+		/*
+		 * perform a lookup of the ICMP packet in the state table
+		 */
+		hv = (pr = oip->ip6_nxt);
+		src.in6 = oip->ip6_src;
+		hv += src.in4.s_addr;
+		dst.in6 = oip->ip6_dst;
+		hv += dst.in4.s_addr;
+		hv += oic->icmp6_id;
+		hv += oic->icmp6_seq;
+		hv %= fr_statesize;
+
+		oip->ip6_plen = ntohs(oip->ip6_plen);
+		ofin.fin_v = 6;
+		fr_makefrip(sizeof(*oip), (ip_t *)oip, &ofin);
+		oip->ip6_plen = htons(oip->ip6_plen);
+		ofin.fin_ifp = fin->fin_ifp;
+		ofin.fin_out = !fin->fin_out;
+		ofin.fin_mp = NULL; /* if dereferenced, panic XXX */
+
+		READ_ENTER(&ipf_state);
+		for (isp = &ips_table[hv]; (is = *isp); isp = &is->is_hnext)
+			if ((is->is_p == pr) &&
+			    (oic->icmp6_id == is->is_icmp.ics_id) &&
+			    (oic->icmp6_seq == is->is_icmp.ics_seq) &&
+			    fr_matchsrcdst(is, src, dst, &ofin, NULL)) {
+			    	/*
+			    	 * in the state table ICMP query's are stored
+			    	 * with the type of the corresponding ICMP
+			    	 * response. Correct here
+			    	 */
+				if (((is->is_type == ICMP6_ECHO_REPLY) &&
+				     (oic->icmp6_type == ICMP6_ECHO_REQUEST)) ||
+				     (is->is_type - 1 == oic->icmp6_type )) {
+				    	ips_stats.iss_hits++;
+    					is->is_pkts++;
+					is->is_bytes += fin->fin_plen;
+					return is->is_rule;
+				}
+			}
+		RWLOCK_EXIT(&ipf_state);
+
+		return NULL;
+	};
+
+	if ((oip->ip6_nxt != IPPROTO_TCP) && (oip->ip6_nxt != IPPROTO_UDP))
+		return NULL;
+	tcp = (tcphdr_t *)(oip + 1);
+	dport = tcp->th_dport;
+	sport = tcp->th_sport;
+
+	hv = (pr = oip->ip6_nxt);
+	src.in6 = oip->ip6_src;
+	hv += src.in4.s_addr;
+	dst.in6 = oip->ip6_dst;
+	hv += dst.in4.s_addr;
+	hv += dport;
+	hv += sport;
+	hv %= fr_statesize;
+	/*
+	 * we make an fin entry to be able to feed it to
+	 * matchsrcdst note that not all fields are encessary
+	 * but this is the cleanest way. Note further we fill
+	 * in fin_mp such that if someone uses it we'll get
+	 * a kernel panic. fr_matchsrcdst does not use this.
+	 *
+	 * watch out here, as ip is in host order and oip in network
+	 * order. Any change we make must be undone afterwards.
+	 */
+	savelen = oip->ip6_plen;
+	oip->ip6_plen = ip->ip6_plen - sizeof(*ip) - ICMPERR_ICMPHLEN;
+	ofin.fin_v = 6;
+	fr_makefrip(sizeof(*oip), (ip_t *)oip, &ofin);
+	oip->ip6_plen = savelen;
+	ofin.fin_ifp = fin->fin_ifp;
+	ofin.fin_out = !fin->fin_out;
+	ofin.fin_mp = NULL; /* if dereferenced, panic XXX */
+	READ_ENTER(&ipf_state);
+	for (isp = &ips_table[hv]; (is = *isp); isp = &is->is_hnext) {
+		/*
+		 * Only allow this icmp though if the
+		 * encapsulated packet was allowed through the
+		 * other way around. Note that the minimal amount
+		 * of info present does not allow for checking against
+		 * tcp internals such as seq and ack numbers.
+		 */
+		if ((is->is_p == pr) && (is->is_v == 6) &&
+		    fr_matchsrcdst(is, src, dst, &ofin, tcp)) {
+			fr = is->is_rule;
+			ips_stats.iss_hits++;
+			/*
+			 * we must swap src and dst here because the icmp
+			 * comes the other way around
+			 */
+			is->is_pkts++;
+			is->is_bytes += fin->fin_plen;
+			/*
+			 * we deliberately do not touch the timeouts
+			 * for the accompanying state table entry.
+			 * It remains to be seen if that is correct. XXX
+			 */
+			RWLOCK_EXIT(&ipf_state);
+			return fr;
+		}
+	}
+	RWLOCK_EXIT(&ipf_state);
+	return NULL;
+}
+#endif
diff --git a/sys/netinet/ip_state.h b/sys/netinet/ip_state.h
new file mode 100644
index 0000000..765709c
--- /dev/null
+++ b/sys/netinet/ip_state.h
@@ -0,0 +1,196 @@
+/*
+ * Copyright (C) 1995-2000 by Darren Reed.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that this notice is preserved and due credit is given
+ * to the original author and the contributors.
+ *
+ * @(#)ip_state.h	1.3 1/12/96 (C) 1995 Darren Reed
+ * $Id: ip_state.h,v 2.13.2.1 2000/07/08 02:15:35 darrenr Exp $
+ * $FreeBSD$
+ */
+#ifndef	__IP_STATE_H__
+#define	__IP_STATE_H__
+
+#if defined(__STDC__) || defined(__GNUC__)
+# define	SIOCDELST	_IOW('r', 61, struct ipstate *)
+#else
+# define	SIOCDELST	_IOW(r, 61, struct ipstate *)
+#endif
+
+#define	IPSTATE_SIZE	5737
+#define	IPSTATE_MAX	4013	/* Maximum number of states held */
+
+#define	PAIRS(s1,d1,s2,d2)	((((s1) == (s2)) && ((d1) == (d2))) ||\
+				 (((s1) == (d2)) && ((d1) == (s2))))
+#define	IPPAIR(s1,d1,s2,d2)	PAIRS((s1).s_addr, (d1).s_addr, \
+				      (s2).s_addr, (d2).s_addr)
+
+
+typedef struct udpstate {
+	u_short	us_sport;
+	u_short	us_dport;
+} udpstate_t;
+
+typedef struct icmpstate {
+	u_short	ics_id;
+	u_short	ics_seq;
+	u_char	ics_type;
+} icmpstate_t;
+
+typedef	struct	tcpdata	{
+	u_32_t	td_end;
+	u_32_t	td_maxend;
+	u_short	td_maxwin;
+} tcpdata_t;
+
+typedef	struct tcpstate {
+	u_short	ts_sport;
+	u_short	ts_dport;
+	tcpdata_t ts_data[2];
+	u_char	ts_state[2];
+} tcpstate_t;
+
+typedef struct ipstate {
+	struct	ipstate	*is_next;
+	struct	ipstate	**is_pnext;
+	struct	ipstate	*is_hnext;
+	struct	ipstate	**is_phnext;
+	u_long	is_age;
+	u_int	is_pass;
+	U_QUAD_T	is_pkts;
+	U_QUAD_T	is_bytes;
+	void	*is_ifp[2];
+	frentry_t	*is_rule;
+	union	i6addr	is_src;
+	union	i6addr	is_dst;
+	u_char	is_p;			/* Protocol */
+	u_char	is_v;
+	u_int	is_hv;
+	u_32_t	is_flags;
+	u_32_t	is_opt;			/* packet options set */
+	u_32_t	is_optmsk;		/*    "      "    mask */
+	u_short	is_sec;			/* security options set */
+	u_short	is_secmsk;		/*    "        "    mask */
+	u_short	is_auth;		/* authentication options set */
+	u_short	is_authmsk;		/*    "              "    mask */
+	union {
+		icmpstate_t	is_ics;
+		tcpstate_t	is_ts;
+		udpstate_t	is_us;
+	} is_ps;
+	char	is_ifname[2][IFNAMSIZ];
+#if SOLARIS || defined(__sgi)
+	kmutex_t	is_lock;
+#endif
+} ipstate_t;
+
+#define	is_saddr	is_src.in4.s_addr
+#define	is_daddr	is_dst.in4.s_addr
+#define	is_icmp		is_ps.is_ics
+#define	is_type		is_icmp.ics_type
+#define	is_code		is_icmp.ics_code
+#define	is_tcp		is_ps.is_ts
+#define	is_udp		is_ps.is_us
+#define is_send		is_tcp.ts_data[0].td_end
+#define is_dend		is_tcp.ts_data[1].td_end
+#define is_maxswin	is_tcp.ts_data[0].td_maxwin
+#define is_maxdwin	is_tcp.ts_data[1].td_maxwin
+#define is_maxsend	is_tcp.ts_data[0].td_maxend
+#define is_maxdend	is_tcp.ts_data[1].td_maxend
+#define	is_sport	is_tcp.ts_sport
+#define	is_dport	is_tcp.ts_dport
+#define	is_state	is_tcp.ts_state
+#define	is_ifpin	is_ifp[0]
+#define	is_ifpout	is_ifp[1]
+
+#define	TH_OPENING	(TH_SYN|TH_ACK)
+/*
+ * is_flags:
+ * Bits 0 - 3 are use as a mask with the current packet's bits to check for
+ * whether it is short, tcp/udp, a fragment or the presence of IP options.
+ * Bits 4 - 7 are set from the initial packet and contain what the packet
+ * anded with bits 0-3 must match.
+ * Bits 8,9 are used to indicate wildcard source/destination port matching.
+ */
+
+typedef	struct	ipstate_save	{
+	void	*ips_next;
+	struct	ipstate	ips_is;
+	struct	frentry	ips_fr;
+} ipstate_save_t;
+
+#define	ips_rule	ips_is.is_rule
+
+
+typedef	struct	ipslog	{
+	U_QUAD_T	isl_pkts;
+	U_QUAD_T	isl_bytes;
+	union	i6addr	isl_src;
+	union	i6addr	isl_dst;
+	u_short	isl_type;
+	union {
+		u_short	isl_filler[2];
+		u_short	isl_ports[2];
+		u_short	isl_icmp;
+	} isl_ps;
+	u_char	isl_v;
+	u_char	isl_p;
+	u_char	isl_flags;
+	u_char	isl_state[2];
+} ipslog_t;
+
+#define	isl_sport	isl_ps.isl_ports[0]
+#define	isl_dport	isl_ps.isl_ports[1]
+#define	isl_itype	isl_ps.isl_icmp
+
+#define	ISL_NEW		0
+#define	ISL_EXPIRE	0xffff
+#define	ISL_FLUSH	0xfffe
+#define	ISL_REMOVE	0xfffd
+
+
+typedef	struct	ips_stat {
+	u_long	iss_hits;
+	u_long	iss_miss;
+	u_long	iss_max;
+	u_long	iss_tcp;
+	u_long	iss_udp;
+	u_long	iss_icmp;
+	u_long	iss_nomem;
+	u_long	iss_expire;
+	u_long	iss_fin;
+	u_long	iss_active;
+	u_long	iss_logged;
+	u_long	iss_logfail;
+	u_long	iss_inuse;
+	ipstate_t **iss_table;
+	ipstate_t *iss_list;
+} ips_stat_t;
+
+
+extern	u_long	fr_tcpidletimeout;
+extern	u_long	fr_tcpclosewait;
+extern	u_long	fr_tcplastack;
+extern	u_long	fr_tcptimeout;
+extern	u_long	fr_tcpclosed;
+extern	u_long	fr_tcphalfclosed;
+extern	u_long	fr_udptimeout;
+extern	u_long	fr_icmptimeout;
+extern	int	fr_state_lock;
+extern	int	fr_stateinit __P((void));
+extern	int	fr_tcpstate __P((ipstate_t *, fr_info_t *, ip_t *, tcphdr_t *));
+extern	ipstate_t	*fr_addstate __P((ip_t *, fr_info_t *, u_int));
+extern	frentry_t	*fr_checkstate __P((ip_t *, fr_info_t *));
+extern	void	ip_statesync __P((void *));
+extern	void	fr_timeoutstate __P((void));
+extern	void	fr_tcp_age __P((u_long *, u_char *, fr_info_t *, int));
+extern	void	fr_stateunload __P((void));
+extern	void	ipstate_log __P((struct ipstate *, u_int));
+#if defined(__NetBSD__) || defined(__OpenBSD__)
+extern	int	fr_state_ioctl __P((caddr_t, u_long, int));
+#else
+extern	int	fr_state_ioctl __P((caddr_t, int, int));
+#endif
+
+#endif /* __IP_STATE_H__ */
diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h
new file mode 100644
index 0000000..bc8b797
--- /dev/null
+++ b/sys/netinet/ip_var.h
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ip_var.h	8.2 (Berkeley) 1/9/95
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_IP_VAR_H_
+#define	_NETINET_IP_VAR_H_
+
+/*
+ * Overlay for ip header used by other protocols (tcp, udp).
+ */
+struct ipovly {
+	u_char	ih_x1[9];		/* (unused) */
+	u_char	ih_pr;			/* protocol */
+	u_short	ih_len;			/* protocol length */
+	struct	in_addr ih_src;		/* source internet address */
+	struct	in_addr ih_dst;		/* destination internet address */
+};
+
+/*
+ * Ip reassembly queue structure.  Each fragment
+ * being reassembled is attached to one of these structures.
+ * They are timed out after ipq_ttl drops to 0, and may also
+ * be reclaimed if memory becomes tight.
+ */
+struct ipq {
+	TAILQ_ENTRY(ipq) ipq_list;	/* to other reass headers */
+	u_char	ipq_ttl;		/* time for reass q to live */
+	u_char	ipq_p;			/* protocol of this fragment */
+	u_short	ipq_id;			/* sequence id for reassembly */
+	struct mbuf *ipq_frags;		/* to ip headers of fragments */
+	struct	in_addr ipq_src,ipq_dst;
+#ifdef IPDIVERT
+	u_int32_t ipq_div_info;		/* ipfw divert port & flags */
+	u_int16_t ipq_div_cookie;	/* ipfw divert cookie */
+#endif
+};
+
+/*
+ * Structure stored in mbuf in inpcb.ip_options
+ * and passed to ip_output when ip options are in use.
+ * The actual length of the options (including ipopt_dst)
+ * is in m_len.
+ */
+#define MAX_IPOPTLEN	40
+
+struct ipoption {
+	struct	in_addr ipopt_dst;	/* first-hop dst if source routed */
+	char	ipopt_list[MAX_IPOPTLEN];	/* options proper */
+};
+
+/*
+ * Structure attached to inpcb.ip_moptions and
+ * passed to ip_output when IP multicast options are in use.
+ */
+struct ip_moptions {
+	struct	ifnet *imo_multicast_ifp; /* ifp for outgoing multicasts */
+	u_char	imo_multicast_ttl;	/* TTL for outgoing multicasts */
+	u_char	imo_multicast_loop;	/* 1 => hear sends if a member */
+	u_short	imo_num_memberships;	/* no. memberships this socket */
+	struct	in_multi *imo_membership[IP_MAX_MEMBERSHIPS];
+	u_long	imo_multicast_vif;	/* vif num outgoing multicasts */
+};
+
+struct	ipstat {
+	u_long	ips_total;		/* total packets received */
+	u_long	ips_badsum;		/* checksum bad */
+	u_long	ips_tooshort;		/* packet too short */
+	u_long	ips_toosmall;		/* not enough data */
+	u_long	ips_badhlen;		/* ip header length < data size */
+	u_long	ips_badlen;		/* ip length < ip header length */
+	u_long	ips_fragments;		/* fragments received */
+	u_long	ips_fragdropped;	/* frags dropped (dups, out of space) */
+	u_long	ips_fragtimeout;	/* fragments timed out */
+	u_long	ips_forward;		/* packets forwarded */
+	u_long	ips_fastforward;	/* packets fast forwarded */
+	u_long	ips_cantforward;	/* packets rcvd for unreachable dest */
+	u_long	ips_redirectsent;	/* packets forwarded on same net */
+	u_long	ips_noproto;		/* unknown or unsupported protocol */
+	u_long	ips_delivered;		/* datagrams delivered to upper level*/
+	u_long	ips_localout;		/* total ip packets generated here */
+	u_long	ips_odropped;		/* lost packets due to nobufs, etc. */
+	u_long	ips_reassembled;	/* total packets reassembled ok */
+	u_long	ips_fragmented;		/* datagrams successfully fragmented */
+	u_long	ips_ofragments;		/* output fragments created */
+	u_long	ips_cantfrag;		/* don't fragment flag was set, etc. */
+	u_long	ips_badoptions;		/* error in option processing */
+	u_long	ips_noroute;		/* packets discarded due to no route */
+	u_long	ips_badvers;		/* ip version != 4 */
+	u_long	ips_rawout;		/* total raw ip packets generated */
+	u_long	ips_toolong;		/* ip length > max ip packet size */
+	u_long	ips_notmember;		/* multicasts for unregistered grps */
+	u_long	ips_nogif;		/* no match gif found */
+};
+
+#ifdef _KERNEL
+
+/* flags passed to ip_output as last parameter */
+#define	IP_FORWARDING		0x1		/* most of ip header exists */
+#define	IP_RAWOUTPUT		0x2		/* raw ip header exists */
+#define	IP_ROUTETOIF		SO_DONTROUTE	/* bypass routing tables */
+#define	IP_ALLOWBROADCAST	SO_BROADCAST	/* can send broadcast packets */
+
+struct ip;
+struct inpcb;
+struct route;
+struct sockopt;
+
+extern struct	ipstat	ipstat;
+extern u_short	ip_id;				/* ip packet ctr, for ids */
+extern int	ip_defttl;			/* default IP ttl */
+extern int	ipforwarding;			/* ip forwarding */
+extern struct route ipforward_rt;		/* ip forwarding cached route */
+extern u_char	ip_protox[];
+extern struct socket *ip_rsvpd;	/* reservation protocol daemon */
+extern struct socket *ip_mrouter; /* multicast routing daemon */
+extern int	(*legal_vif_num) __P((int));
+extern u_long	(*ip_mcast_src) __P((int));
+extern int rsvp_on;
+extern struct	pr_usrreqs rip_usrreqs;
+
+int	 ip_ctloutput __P((struct socket *, struct sockopt *sopt));
+void	 ip_drain __P((void));
+void	 ip_freemoptions __P((struct ip_moptions *));
+void	 ip_init __P((void));
+extern int	 (*ip_mforward) __P((struct ip *, struct ifnet *, struct mbuf *,
+			  struct ip_moptions *));
+int	 ip_output __P((struct mbuf *,
+	    struct mbuf *, struct route *, int, struct ip_moptions *));
+void	 ip_savecontrol __P((struct inpcb *, struct mbuf **, struct ip *,
+		struct mbuf *));
+void	 ip_slowtimo __P((void));
+struct mbuf *
+	 ip_srcroute __P((void));
+void	 ip_stripoptions __P((struct mbuf *, struct mbuf *));
+int	 rip_ctloutput __P((struct socket *, struct sockopt *));
+void	 rip_ctlinput __P((int, struct sockaddr *, void *));
+void	 rip_init __P((void));
+void	 rip_input __P((struct mbuf *, int, int));
+int	 rip_output __P((struct mbuf *, struct socket *, u_long));
+void	ipip_input __P((struct mbuf *, int, int));
+void	rsvp_input __P((struct mbuf *, int, int));
+int	ip_rsvp_init __P((struct socket *));
+int	ip_rsvp_done __P((void));
+int	ip_rsvp_vif_init __P((struct socket *, struct sockopt *));
+int	ip_rsvp_vif_done __P((struct socket *, struct sockopt *));
+void	ip_rsvp_force_done __P((struct socket *));
+
+#ifdef IPDIVERT
+void	div_init __P((void));
+void	div_input __P((struct mbuf *, int, int));
+void	divert_packet __P((struct mbuf *, int, int));
+extern struct pr_usrreqs div_usrreqs;
+extern u_int16_t ip_divert_cookie;
+#endif
+
+extern struct sockaddr_in *ip_fw_fwd_addr;
+
+void	in_delayed_cksum(struct mbuf *m);
+
+#endif /* _KERNEL */
+
+#endif /* !_NETINET_IP_VAR_H_ */
diff --git a/sys/netinet/ipl.h b/sys/netinet/ipl.h
new file mode 100644
index 0000000..79d0bd3
--- /dev/null
+++ b/sys/netinet/ipl.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 1993-2000 by Darren Reed.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that this notice is preserved and due credit is given
+ * to the original author and the contributors.
+ *
+ * @(#)ipl.h	1.21 6/5/96
+ * $FreeBSD$
+ */
+
+#ifndef	__IPL_H__
+#define	__IPL_H__
+
+#define	IPL_VERSION	"IP Filter: v3.4.16"
+
+#endif
diff --git a/sys/netinet/ipprotosw.h b/sys/netinet/ipprotosw.h
new file mode 100644
index 0000000..1d65f0c
--- /dev/null
+++ b/sys/netinet/ipprotosw.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 1995, 1996, 1997, 1998, and 1999 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)protosw.h	8.1 (Berkeley) 6/2/93
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_IPPROTOSW_H_
+#define _NETINET_IPPROTOSW_H_
+
+/*
+ * For pfil_head structure.
+ */
+#include <net/pfil.h>
+
+/* Forward declare these structures referenced from prototypes below. */
+struct mbuf;
+struct proc;
+struct sockaddr;
+struct socket;
+struct sockopt;
+
+struct ipprotosw {
+	short	pr_type;		/* socket type used for */
+	struct	domain *pr_domain;	/* domain protocol a member of */
+	short	pr_protocol;		/* protocol number */
+	short	pr_flags;		/* see below */
+/* protocol-protocol hooks */
+	void	(*pr_input) __P((struct mbuf *, int off, int proto));
+					/* input to protocol (from below) */
+	int	(*pr_output)	__P((struct mbuf *m, struct socket *so));
+					/* output to protocol (from above) */
+	void	(*pr_ctlinput)__P((int, struct sockaddr *, void *));
+					/* control input (from below) */
+	int	(*pr_ctloutput)__P((struct socket *, struct sockopt *));
+					/* control output (from above) */
+/* user-protocol hook */
+	void	*pr_ousrreq;
+/* utility hooks */
+	void	(*pr_init) __P((void));	/* initialization hook */
+	void	(*pr_fasttimo) __P((void));
+					/* fast timeout (200ms) */
+	void	(*pr_slowtimo) __P((void));
+					/* slow timeout (500ms) */
+	void	(*pr_drain) __P((void));
+					/* flush any excess space possible */
+	struct	pr_usrreqs *pr_usrreqs;	/* supersedes pr_usrreq() */
+	struct	pfil_head	pr_pfh;
+};
+
+#endif /* !_NETINET_IPPROTOSW_H_ */
diff --git a/sys/netinet/libalias/HISTORY b/sys/netinet/libalias/HISTORY
new file mode 100644
index 0000000..c5bca59
--- /dev/null
+++ b/sys/netinet/libalias/HISTORY
@@ -0,0 +1,145 @@
+$FreeBSD$
+
+Version 1.0: August 11, 1996 (cjm)
+
+Version 1.1:  August 20, 1996  (cjm)
+    - Host accepts incoming connections for ports 0 to 1023.
+
+Version 1.2:  September 7, 1996 (cjm)
+    - Fragment handling error in alias_db.c corrected.
+
+Version 1.3:  September 15, 1996 (cjm)
+    - Generalized mechanism for handling incoming
+      connections (no more 0 to 1023 restriction).
+
+    - Increased ICMP support (will handle traceroute now).
+
+    - Improved TCP close connection logic.
+
+Version 1.4: September 16, 1996 (cjm)
+
+Version 1.5: September 17, 1996 (cjm)
+    - Corrected error in handling incoming UDP packets
+      with zero checksum.
+
+Version 1.6: September 18, 1996
+    - Simplified ICMP data storage.  Will now handle
+      tracert from Win95 and NT as well as FreeBSD
+      traceroute, which uses UDP packets to non-existent
+      ports.
+
+Version 1.7: January 9, 1997 (cjm)
+    - Reduced malloc() activity for ICMP echo and
+      timestamp requests.
+
+    - Added handling for out-of-order IP fragments.
+
+    - Switched to differential checksum computation
+      for IP headers (TCP, UDP and ICMP checksums
+      were already differential).
+
+    - Accepts FTP data connections from other than
+      port 20.  This allows one ftp connections
+      from two hosts which are both running packet
+      aliasing.
+
+    - Checksum error on FTP transfers.  Problem
+      in code located by Martin Renters and
+      Brian Somers.
+
+Version 1.8: January 14, 1997 (cjm)
+    - Fixed data type error in function StartPoint()
+      in alias_db.c (this bug did not exist before v1.7)
+      Problem in code located by Ari Suutari.
+
+Version 1.9: February 1, 1997 (Eivind Eklund <perhaps@yes.no>)
+    - Added support for IRC DCC (ee)
+
+    - Changed the aliasing routines to use ANSI style
+      throughout (ee)
+
+    - Minor API changes for integration with other
+      programs than PPP (ee)
+
+    - Fixed minor security hole in alias_ftp.c for
+      other applications of the aliasing software.
+      Hole could _not_ manifest in ppp+pktAlias, but
+      could potentially manifest in other applications
+      of the aliasing. (ee)
+
+    - Connections initiated from packet aliasing
+      host machine will not have their port number
+      aliased unless it conflicts with an aliasing
+      port already being used. (There is an option
+      to disable this for debugging) (cjm)
+
+    - Sockets will be allocated in cases where
+      there might be port interference with the
+      host machine.  This can be disabled in cases
+      where the ppp host will be acting purely as a
+      masquerading router and not generate any
+      traffic of its own.
+      (cjm)
+
+Version 2.0: March, 1997 (cjm)
+    - Aliasing links are cleared only when a host interface address
+      changes.
+
+    - PacketAliasPermanentLink() API added.
+
+    - Option for only aliasing private, unregistered
+      IP addresses added.
+
+    - Substantial rework to the aliasing lookup engine.
+
+Version 2.1: May, 1997 (cjm)
+    - Continuing rework to the aliasing lookup engine
+      to support multiple incoming addresses and static
+      NAT.  PacketAliasRedirectPort() and
+      PacketAliasRedirectAddr() added to API.
+
+    - Now supports outgoing as well as incoming ICMP
+      error messages.
+
+Version 2.2: July, 1997 (cjm)
+    - Rationalized API function names to all begin with
+      "PacketAlias..."  Old function names are retained
+      for backwards compatibility.
+
+    - Packet aliasing engine will now free memory of
+      fragments which are never resolved after a timeout
+      period.  Once a fragment is resolved, it becomes
+      the users responsibility to free the memory.
+
+Version 2.3: August 11, 1997 (cjm)
+    - Problem associated with socket file descriptor
+      accumulation in alias_db.c corrected.  The sockets
+      had to be closed when a binding failed.  Problem 
+      in code located by Gordon Burditt.
+
+Version 2.4: September 1, 1997 (cjm)
+    - PKT_ALIAS_UNREGISTERED_ONLY option repaired.
+      This part of the code was incorrectly re-implemented
+      in version 2.1.
+
+Version 2.5: December, 1997 (ee)
+    - Added PKT_ALIAS_PUNCH_FW mode for firewall
+      bypass of FTP/IRC DCC data connections.  Also added
+      improved TCP connection monitoring.
+
+Version 2.6: May, 1998 (amurai)
+    - Added supporting routine for NetBios over TCP/IP.
+
+Version 3.0: January 1, 1999
+    - Transparent proxying support added.
+    - PPTP redirecting support added based on patches
+      contributed by Dru Nelson <dnelson@redwoodsoft.com>.
+
+Version 3.1: May, 2000 (Erik Salander, erik@whistle.com)
+    - Added support to alias 227 replies, allows aliasing for
+      FTP servers in passive mode.
+    - Added support for PPTP aliasing.
+
+Version 3.2: July, 2000 (Erik Salander, erik@whistle.com and 
+      Junichi Satoh, junichi@junichi.org)
+    - Added support for streaming media (RTSP and PNA) aliasing.
diff --git a/sys/netinet/libalias/Makefile b/sys/netinet/libalias/Makefile
new file mode 100644
index 0000000..a6f577d
--- /dev/null
+++ b/sys/netinet/libalias/Makefile
@@ -0,0 +1,13 @@
+# $FreeBSD$
+
+LIB=		alias
+SHLIB_MAJOR=	4
+SHLIB_MINOR=	0
+CFLAGS+=	-Wall -Wmissing-prototypes
+SRCS=		alias.c alias_cuseeme.c alias_db.c alias_ftp.c alias_irc.c \
+		alias_nbt.c alias_pptp.c alias_proxy.c alias_smedia.c \
+		alias_util.c
+INCS=		alias.h
+MAN=		libalias.3
+
+.include <bsd.lib.mk>
diff --git a/sys/netinet/libalias/alias.c b/sys/netinet/libalias/alias.c
new file mode 100644
index 0000000..bf5fd47
--- /dev/null
+++ b/sys/netinet/libalias/alias.c
@@ -0,0 +1,1538 @@
+/* -*- mode: c; tab-width: 8; c-basic-indent: 4; -*- */
+/*
+    Alias.c provides supervisory control for the functions of the
+    packet aliasing software.  It consists of routines to monitor
+    TCP connection state, protocol-specific aliasing routines,
+    fragment handling and the following outside world functional
+    interfaces: SaveFragmentPtr, GetFragmentPtr, FragmentAliasIn,
+    PacketAliasIn and PacketAliasOut.
+
+    The other C program files are briefly described. The data
+    structure framework which holds information needed to translate
+    packets is encapsulated in alias_db.c.  Data is accessed by
+    function calls, so other segments of the program need not know
+    about the underlying data structures.  Alias_ftp.c contains
+    special code for modifying the ftp PORT command used to establish
+    data connections, while alias_irc.c does the same for IRC
+    DCC. Alias_util.c contains a few utility routines.
+
+    This software is placed into the public domain with no restrictions
+    on its distribution.
+
+    Version 1.0 August, 1996  (cjm)
+
+    Version 1.1 August 20, 1996  (cjm)
+        PPP host accepts incoming connections for ports 0 to 1023.
+        (Gary Roberts pointed out the need to handle incoming
+         connections.)
+
+    Version 1.2 September 7, 1996 (cjm)
+        Fragment handling error in alias_db.c corrected.
+        (Tom Torrance helped fix this problem.)
+
+    Version 1.4 September 16, 1996 (cjm)
+        - A more generalized method for handling incoming
+          connections, without the 0-1023 restriction, is
+          implemented in alias_db.c
+        - Improved ICMP support in alias.c.  Traceroute
+          packet streams can now be correctly aliased.
+        - TCP connection closing logic simplified in
+          alias.c and now allows for additional 1 minute
+          "grace period" after FIN or RST is observed.
+
+    Version 1.5 September 17, 1996 (cjm)
+        Corrected error in handling incoming UDP packets with 0 checksum.
+        (Tom Torrance helped fix this problem.)
+
+    Version 1.6 September 18, 1996 (cjm)
+        Simplified ICMP aliasing scheme.  Should now support
+        traceroute from Win95 as well as FreeBSD.
+
+    Version 1.7 January 9, 1997 (cjm)
+        - Out-of-order fragment handling.
+        - IP checksum error fixed for ftp transfers
+          from aliasing host.
+        - Integer return codes added to all
+          aliasing/de-aliasing functions.
+        - Some obsolete comments cleaned up.
+        - Differential checksum computations for
+          IP header (TCP, UDP and ICMP were already
+          differential).
+
+    Version 2.1 May 1997 (cjm)
+        - Added support for outgoing ICMP error
+          messages.
+        - Added two functions PacketAliasIn2()
+          and PacketAliasOut2() for dynamic address
+          control (e.g. round-robin allocation of
+          incoming packets).
+
+    Version 2.2 July 1997 (cjm)
+        - Rationalized API function names to begin
+          with "PacketAlias..."
+        - Eliminated PacketAliasIn2() and
+          PacketAliasOut2() as poorly conceived.
+
+    Version 2.3 Dec 1998 (dillon)
+	- Major bounds checking additions, see FreeBSD/CVS
+
+    Version 3.1 May, 2000 (salander) 
+	- Added hooks to handle PPTP.
+
+    Version 3.2 July, 2000 (salander and satoh)
+	- Added PacketUnaliasOut routine.
+	- Added hooks to handle RTSP/RTP.
+
+    See HISTORY file for additional revisions.
+
+    $FreeBSD$
+*/
+
+#include <sys/types.h>
+
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+
+#include <stdio.h>
+
+#include "alias_local.h"
+#include "alias.h"
+
+#define NETBIOS_NS_PORT_NUMBER 137
+#define NETBIOS_DGM_PORT_NUMBER 138
+#define FTP_CONTROL_PORT_NUMBER 21
+#define IRC_CONTROL_PORT_NUMBER_1 6667
+#define IRC_CONTROL_PORT_NUMBER_2 6668
+#define CUSEEME_PORT_NUMBER 7648
+#define RTSP_CONTROL_PORT_NUMBER_1 554
+#define RTSP_CONTROL_PORT_NUMBER_2 7070
+#define PPTP_CONTROL_PORT_NUMBER 1723
+
+
+
+
+/* TCP Handling Routines
+
+    TcpMonitorIn()  -- These routines monitor TCP connections, and
+    TcpMonitorOut()    delete a link when a connection is closed.
+
+These routines look for SYN, FIN and RST flags to determine when TCP
+connections open and close.  When a TCP connection closes, the data
+structure containing packet aliasing information is deleted after
+a timeout period.
+*/
+
+/* Local prototypes */
+static void TcpMonitorIn(struct ip *, struct alias_link *);
+
+static void TcpMonitorOut(struct ip *, struct alias_link *);
+
+
+static void
+TcpMonitorIn(struct ip *pip, struct alias_link *link)
+{
+    struct tcphdr *tc;
+
+    tc = (struct tcphdr *) ((char *) pip + (pip->ip_hl << 2));
+
+    switch (GetStateIn(link))
+    {
+        case ALIAS_TCP_STATE_NOT_CONNECTED:
+            if (tc->th_flags & TH_RST)
+                SetStateIn(link, ALIAS_TCP_STATE_DISCONNECTED);
+            else if (tc->th_flags & TH_SYN)
+                SetStateIn(link, ALIAS_TCP_STATE_CONNECTED);
+            break;
+        case ALIAS_TCP_STATE_CONNECTED:
+            if (tc->th_flags & (TH_FIN | TH_RST))
+                SetStateIn(link, ALIAS_TCP_STATE_DISCONNECTED);
+            break;
+    }
+}
+
+static void
+TcpMonitorOut(struct ip *pip, struct alias_link *link)
+{
+    struct tcphdr *tc;
+
+    tc = (struct tcphdr *) ((char *) pip + (pip->ip_hl << 2));
+     
+    switch (GetStateOut(link))
+    {
+        case ALIAS_TCP_STATE_NOT_CONNECTED:
+            if (tc->th_flags & TH_RST)
+                SetStateOut(link, ALIAS_TCP_STATE_DISCONNECTED);
+            else if (tc->th_flags & TH_SYN)
+                SetStateOut(link, ALIAS_TCP_STATE_CONNECTED);
+            break;
+        case ALIAS_TCP_STATE_CONNECTED:
+            if (tc->th_flags & (TH_FIN | TH_RST))
+                SetStateOut(link, ALIAS_TCP_STATE_DISCONNECTED);
+            break;
+    }
+}
+
+
+
+
+
+/* Protocol Specific Packet Aliasing Routines 
+
+    IcmpAliasIn(), IcmpAliasIn1(), IcmpAliasIn2()
+    IcmpAliasOut(), IcmpAliasOut1(), IcmpAliasOut2()
+    ProtoAliasIn(), ProtoAliasOut()
+    UdpAliasIn(), UdpAliasOut()
+    TcpAliasIn(), TcpAliasOut()
+
+These routines handle protocol specific details of packet aliasing.
+One may observe a certain amount of repetitive arithmetic in these
+functions, the purpose of which is to compute a revised checksum
+without actually summing over the entire data packet, which could be
+unnecessarily time consuming.
+
+The purpose of the packet aliasing routines is to replace the source
+address of the outgoing packet and then correctly put it back for
+any incoming packets.  For TCP and UDP, ports are also re-mapped.
+
+For ICMP echo/timestamp requests and replies, the following scheme
+is used: the ID number is replaced by an alias for the outgoing
+packet.
+
+ICMP error messages are handled by looking at the IP fragment
+in the data section of the message.
+
+For TCP and UDP protocols, a port number is chosen for an outgoing
+packet, and then incoming packets are identified by IP address and
+port numbers.  For TCP packets, there is additional logic in the event
+that sequence and ACK numbers have been altered (as in the case for
+FTP data port commands).
+
+The port numbers used by the packet aliasing module are not true
+ports in the Unix sense.  No sockets are actually bound to ports.
+They are more correctly thought of as placeholders.
+
+All packets go through the aliasing mechanism, whether they come from
+the gateway machine or other machines on a local area network.
+*/
+
+
+/* Local prototypes */
+static int IcmpAliasIn1(struct ip *);
+static int IcmpAliasIn2(struct ip *);
+static int IcmpAliasIn (struct ip *);
+
+static int IcmpAliasOut1(struct ip *);
+static int IcmpAliasOut2(struct ip *);
+static int IcmpAliasOut (struct ip *);
+
+static int ProtoAliasIn(struct ip *);
+static int ProtoAliasOut(struct ip *);
+
+static int UdpAliasOut(struct ip *);
+static int UdpAliasIn (struct ip *);
+
+static int TcpAliasOut(struct ip *, int);
+static int TcpAliasIn (struct ip *);
+
+
+static int
+IcmpAliasIn1(struct ip *pip)
+{
+/*
+    De-alias incoming echo and timestamp replies.
+    Alias incoming echo and timestamp requests.
+*/
+    struct alias_link *link;
+    struct icmp *ic;
+
+    ic = (struct icmp *) ((char *) pip + (pip->ip_hl << 2));
+
+/* Get source address from ICMP data field and restore original data */
+    link = FindIcmpIn(pip->ip_src, pip->ip_dst, ic->icmp_id, 1);
+    if (link != NULL)
+    {
+        u_short original_id;
+        int accumulate;
+
+        original_id = GetOriginalPort(link);
+
+/* Adjust ICMP checksum */
+        accumulate  = ic->icmp_id;
+        accumulate -= original_id;
+        ADJUST_CHECKSUM(accumulate, ic->icmp_cksum);
+
+/* Put original sequence number back in */
+        ic->icmp_id = original_id;
+
+/* Put original address back into IP header */
+        {
+            struct in_addr original_address;
+
+            original_address = GetOriginalAddress(link);
+            DifferentialChecksum(&pip->ip_sum,
+                                 (u_short *) &original_address,
+                                 (u_short *) &pip->ip_dst,
+                                 2);
+            pip->ip_dst = original_address;
+        }
+
+        return(PKT_ALIAS_OK);
+    }
+    return(PKT_ALIAS_IGNORED);
+}
+
+static int
+IcmpAliasIn2(struct ip *pip)
+{
+/*
+    Alias incoming ICMP error messages containing
+    IP header and first 64 bits of datagram.
+*/
+    struct ip *ip;
+    struct icmp *ic, *ic2;
+    struct udphdr *ud;
+    struct tcphdr *tc;
+    struct alias_link *link;
+
+    ic = (struct icmp *) ((char *) pip + (pip->ip_hl << 2));
+    ip = &ic->icmp_ip;
+
+    ud = (struct udphdr *) ((char *) ip + (ip->ip_hl <<2));
+    tc = (struct tcphdr *) ud;
+    ic2 = (struct icmp *) ud;
+
+    if (ip->ip_p == IPPROTO_UDP)
+        link = FindUdpTcpIn(ip->ip_dst, ip->ip_src,
+                            ud->uh_dport, ud->uh_sport,
+                            IPPROTO_UDP, 0);
+    else if (ip->ip_p == IPPROTO_TCP)
+        link = FindUdpTcpIn(ip->ip_dst, ip->ip_src,
+                            tc->th_dport, tc->th_sport,
+                            IPPROTO_TCP, 0);
+    else if (ip->ip_p == IPPROTO_ICMP) {
+        if (ic2->icmp_type == ICMP_ECHO || ic2->icmp_type == ICMP_TSTAMP)
+            link = FindIcmpIn(ip->ip_dst, ip->ip_src, ic2->icmp_id, 0);
+        else
+            link = NULL;
+    } else
+        link = NULL;
+
+    if (link != NULL)
+    {
+        if (ip->ip_p == IPPROTO_UDP || ip->ip_p == IPPROTO_TCP)
+        {
+            u_short *sptr;
+            int accumulate;
+            struct in_addr original_address;
+            u_short original_port;
+
+            original_address = GetOriginalAddress(link);
+            original_port = GetOriginalPort(link);
+    
+/* Adjust ICMP checksum */
+            sptr = (u_short *) &(ip->ip_src);
+            accumulate  = *sptr++;
+            accumulate += *sptr;
+            sptr = (u_short *) &original_address;
+            accumulate -= *sptr++;
+            accumulate -= *sptr;
+            accumulate += ud->uh_sport;
+            accumulate -= original_port;
+            ADJUST_CHECKSUM(accumulate, ic->icmp_cksum);
+
+/* Un-alias address in IP header */
+            DifferentialChecksum(&pip->ip_sum,
+                                 (u_short *) &original_address,
+                                 (u_short *) &pip->ip_dst,
+                                 2);
+            pip->ip_dst = original_address;
+
+/* Un-alias address and port number of original IP packet
+fragment contained in ICMP data section */
+            ip->ip_src = original_address;
+            ud->uh_sport = original_port; 
+        }
+        else if (ip->ip_p == IPPROTO_ICMP)
+        {
+            u_short *sptr;
+            int accumulate;
+            struct in_addr original_address;
+            u_short original_id;
+
+            original_address = GetOriginalAddress(link);
+            original_id = GetOriginalPort(link);
+
+/* Adjust ICMP checksum */
+            sptr = (u_short *) &(ip->ip_src);
+            accumulate  = *sptr++;
+            accumulate += *sptr;
+            sptr = (u_short *) &original_address;
+            accumulate -= *sptr++;
+            accumulate -= *sptr;
+            accumulate += ic2->icmp_id;
+            accumulate -= original_id;
+            ADJUST_CHECKSUM(accumulate, ic->icmp_cksum);
+
+/* Un-alias address in IP header */
+            DifferentialChecksum(&pip->ip_sum,
+                                 (u_short *) &original_address,
+                                 (u_short *) &pip->ip_dst,
+                                 2);
+            pip->ip_dst = original_address;
+
+/* Un-alias address of original IP packet and sequence number of 
+   embedded ICMP datagram */
+            ip->ip_src = original_address;
+            ic2->icmp_id = original_id;
+        }
+        return(PKT_ALIAS_OK);
+    }
+    return(PKT_ALIAS_IGNORED);
+}
+
+
+static int
+IcmpAliasIn(struct ip *pip)
+{
+    int iresult;
+    struct icmp *ic;
+
+/* Return if proxy-only mode is enabled */
+    if (packetAliasMode & PKT_ALIAS_PROXY_ONLY)
+        return PKT_ALIAS_OK;
+
+    ic = (struct icmp *) ((char *) pip + (pip->ip_hl << 2));
+
+    iresult = PKT_ALIAS_IGNORED;
+    switch (ic->icmp_type)
+    {
+        case ICMP_ECHOREPLY:
+        case ICMP_TSTAMPREPLY:
+            if (ic->icmp_code == 0)
+            {
+                iresult = IcmpAliasIn1(pip);
+            }
+            break;
+        case ICMP_UNREACH:
+        case ICMP_SOURCEQUENCH:
+        case ICMP_TIMXCEED:
+        case ICMP_PARAMPROB:
+            iresult = IcmpAliasIn2(pip);
+            break;
+        case ICMP_ECHO:
+        case ICMP_TSTAMP:
+            iresult = IcmpAliasIn1(pip);
+            break;
+    }
+    return(iresult);
+}
+
+
+static int
+IcmpAliasOut1(struct ip *pip)
+{
+/*
+    Alias outgoing echo and timestamp requests.
+    De-alias outgoing echo and timestamp replies.
+*/
+    struct alias_link *link;
+    struct icmp *ic;
+
+    ic = (struct icmp *) ((char *) pip + (pip->ip_hl << 2));
+
+/* Save overwritten data for when echo packet returns */
+    link = FindIcmpOut(pip->ip_src, pip->ip_dst, ic->icmp_id, 1);
+    if (link != NULL)
+    {
+        u_short alias_id;
+        int accumulate;
+
+        alias_id = GetAliasPort(link);
+
+/* Since data field is being modified, adjust ICMP checksum */
+        accumulate  = ic->icmp_id;
+        accumulate -= alias_id;
+        ADJUST_CHECKSUM(accumulate, ic->icmp_cksum);
+
+/* Alias sequence number */
+        ic->icmp_id = alias_id;
+
+/* Change source address */
+        {
+            struct in_addr alias_address;
+
+            alias_address = GetAliasAddress(link);
+            DifferentialChecksum(&pip->ip_sum,
+                                 (u_short *) &alias_address,
+                                 (u_short *) &pip->ip_src,
+                                 2);
+            pip->ip_src = alias_address;
+        }
+
+        return(PKT_ALIAS_OK);
+    }
+    return(PKT_ALIAS_IGNORED);
+}
+
+
+static int
+IcmpAliasOut2(struct ip *pip)
+{
+/*
+    Alias outgoing ICMP error messages containing
+    IP header and first 64 bits of datagram.
+*/
+    struct ip *ip;
+    struct icmp *ic, *ic2;
+    struct udphdr *ud;
+    struct tcphdr *tc;
+    struct alias_link *link;
+
+    ic = (struct icmp *) ((char *) pip + (pip->ip_hl << 2));
+    ip = &ic->icmp_ip;
+
+    ud = (struct udphdr *) ((char *) ip + (ip->ip_hl <<2));
+    tc = (struct tcphdr *) ud;
+    ic2 = (struct icmp *) ud;
+
+    if (ip->ip_p == IPPROTO_UDP)
+        link = FindUdpTcpOut(ip->ip_dst, ip->ip_src,
+                            ud->uh_dport, ud->uh_sport,
+                            IPPROTO_UDP, 0);
+    else if (ip->ip_p == IPPROTO_TCP)
+        link = FindUdpTcpOut(ip->ip_dst, ip->ip_src,
+                            tc->th_dport, tc->th_sport,
+                            IPPROTO_TCP, 0);
+    else if (ip->ip_p == IPPROTO_ICMP) {
+        if (ic2->icmp_type == ICMP_ECHO || ic2->icmp_type == ICMP_TSTAMP)
+            link = FindIcmpOut(ip->ip_dst, ip->ip_src, ic2->icmp_id, 0);
+        else
+            link = NULL;
+    } else
+        link = NULL;
+
+    if (link != NULL)
+    {
+        if (ip->ip_p == IPPROTO_UDP || ip->ip_p == IPPROTO_TCP)
+        {
+            u_short *sptr;
+            int accumulate;
+            struct in_addr alias_address;
+            u_short alias_port;
+
+            alias_address = GetAliasAddress(link);
+            alias_port = GetAliasPort(link);
+    
+/* Adjust ICMP checksum */
+            sptr = (u_short *) &(ip->ip_dst);
+            accumulate  = *sptr++;
+            accumulate += *sptr;
+            sptr = (u_short *) &alias_address;
+            accumulate -= *sptr++;
+            accumulate -= *sptr;
+            accumulate += ud->uh_dport;
+            accumulate -= alias_port;
+            ADJUST_CHECKSUM(accumulate, ic->icmp_cksum);
+
+/*
+ * Alias address in IP header if it comes from the host
+ * the original TCP/UDP packet was destined for.
+ */
+	    if (pip->ip_src.s_addr == ip->ip_dst.s_addr) {
+		DifferentialChecksum(&pip->ip_sum,
+				     (u_short *) &alias_address,
+				     (u_short *) &pip->ip_src,
+				     2);
+		pip->ip_src = alias_address;
+	    }
+
+/* Alias address and port number of original IP packet
+fragment contained in ICMP data section */
+            ip->ip_dst = alias_address;
+            ud->uh_dport = alias_port; 
+        }
+        else if (ip->ip_p == IPPROTO_ICMP)
+        {
+            u_short *sptr;
+            int accumulate;
+            struct in_addr alias_address;
+            u_short alias_id;
+
+            alias_address = GetAliasAddress(link);
+            alias_id = GetAliasPort(link);
+
+/* Adjust ICMP checksum */
+            sptr = (u_short *) &(ip->ip_dst);
+            accumulate  = *sptr++;
+            accumulate += *sptr;
+            sptr = (u_short *) &alias_address;
+            accumulate -= *sptr++;
+            accumulate -= *sptr;
+            accumulate += ic2->icmp_id;
+            accumulate -= alias_id;
+            ADJUST_CHECKSUM(accumulate, ic->icmp_cksum);
+
+/*
+ * Alias address in IP header if it comes from the host
+ * the original ICMP message was destined for.
+ */
+	    if (pip->ip_src.s_addr == ip->ip_dst.s_addr) {
+		DifferentialChecksum(&pip->ip_sum,
+				     (u_short *) &alias_address,
+				     (u_short *) &pip->ip_src,
+				     2);
+		pip->ip_src = alias_address;
+	    }
+
+/* Alias address of original IP packet and sequence number of 
+   embedded ICMP datagram */
+            ip->ip_dst = alias_address;
+            ic2->icmp_id = alias_id;
+        }
+        return(PKT_ALIAS_OK);
+    }
+    return(PKT_ALIAS_IGNORED);
+}
+
+
+static int
+IcmpAliasOut(struct ip *pip)
+{
+    int iresult;
+    struct icmp *ic;
+
+/* Return if proxy-only mode is enabled */
+    if (packetAliasMode & PKT_ALIAS_PROXY_ONLY)
+        return PKT_ALIAS_OK;
+
+    ic = (struct icmp *) ((char *) pip + (pip->ip_hl << 2));
+
+    iresult = PKT_ALIAS_IGNORED;
+    switch (ic->icmp_type)
+    {
+        case ICMP_ECHO:
+        case ICMP_TSTAMP:
+            if (ic->icmp_code == 0)
+            {
+                iresult = IcmpAliasOut1(pip);
+            }
+            break;
+        case ICMP_UNREACH:
+        case ICMP_SOURCEQUENCH:
+        case ICMP_TIMXCEED:
+        case ICMP_PARAMPROB:
+            iresult = IcmpAliasOut2(pip);
+            break;
+        case ICMP_ECHOREPLY:
+        case ICMP_TSTAMPREPLY:
+            iresult = IcmpAliasOut1(pip);
+    }
+    return(iresult);
+}
+
+
+
+static int
+ProtoAliasIn(struct ip *pip)
+{
+/*
+  Handle incoming IP packets. The
+  only thing which is done in this case is to alias
+  the dest IP address of the packet to our inside
+  machine.
+*/
+    struct alias_link *link;
+
+/* Return if proxy-only mode is enabled */
+    if (packetAliasMode & PKT_ALIAS_PROXY_ONLY)
+        return PKT_ALIAS_OK;
+
+    link = FindProtoIn(pip->ip_src, pip->ip_dst, pip->ip_p);
+    if (link != NULL)
+    {
+        struct in_addr original_address;
+
+        original_address = GetOriginalAddress(link);
+
+/* Restore original IP address */
+        DifferentialChecksum(&pip->ip_sum,
+                             (u_short *) &original_address,
+                             (u_short *) &pip->ip_dst,
+                             2);
+        pip->ip_dst = original_address;
+
+	return(PKT_ALIAS_OK);
+    }
+    return(PKT_ALIAS_IGNORED);
+}
+
+
+static int
+ProtoAliasOut(struct ip *pip)
+{
+/*
+  Handle outgoing IP packets. The
+  only thing which is done in this case is to alias
+  the source IP address of the packet.
+*/
+    struct alias_link *link;
+
+/* Return if proxy-only mode is enabled */
+    if (packetAliasMode & PKT_ALIAS_PROXY_ONLY)
+        return PKT_ALIAS_OK;
+
+    link = FindProtoOut(pip->ip_src, pip->ip_dst, pip->ip_p);
+    if (link != NULL)
+    {
+        struct in_addr alias_address;
+
+        alias_address = GetAliasAddress(link);
+
+/* Change source address */
+        DifferentialChecksum(&pip->ip_sum,
+                             (u_short *) &alias_address,
+                             (u_short *) &pip->ip_src,
+                             2);
+        pip->ip_src = alias_address;
+
+        return(PKT_ALIAS_OK);
+    }
+    return(PKT_ALIAS_IGNORED);
+}
+
+
+static int
+UdpAliasIn(struct ip *pip)
+{
+    struct udphdr *ud;
+    struct alias_link *link;
+
+/* Return if proxy-only mode is enabled */
+    if (packetAliasMode & PKT_ALIAS_PROXY_ONLY)
+        return PKT_ALIAS_OK;
+
+    ud = (struct udphdr *) ((char *) pip + (pip->ip_hl << 2));
+
+    link = FindUdpTcpIn(pip->ip_src, pip->ip_dst,
+                        ud->uh_sport, ud->uh_dport,
+                        IPPROTO_UDP, 1);
+    if (link != NULL)
+    {
+        struct in_addr alias_address;
+        struct in_addr original_address;
+        u_short alias_port;
+        int accumulate;
+        u_short *sptr;
+	int r = 0;
+
+        alias_address = GetAliasAddress(link);
+        original_address = GetOriginalAddress(link);
+        alias_port = ud->uh_dport;
+        ud->uh_dport = GetOriginalPort(link);
+
+/* Special processing for IP encoding protocols */
+	if (ntohs(ud->uh_dport) == CUSEEME_PORT_NUMBER)
+	    AliasHandleCUSeeMeIn(pip, original_address);
+/* If NETBIOS Datagram, It should be alias address in UDP Data, too */
+	else if (ntohs(ud->uh_dport) == NETBIOS_DGM_PORT_NUMBER
+	      || ntohs(ud->uh_sport) == NETBIOS_DGM_PORT_NUMBER)
+	    r = AliasHandleUdpNbt(pip, link, &original_address, ud->uh_dport);
+	else if (ntohs(ud->uh_dport) == NETBIOS_NS_PORT_NUMBER
+	      || ntohs(ud->uh_sport) == NETBIOS_NS_PORT_NUMBER)
+	    r = AliasHandleUdpNbtNS(pip, link, &alias_address, &alias_port,
+				    &original_address, &ud->uh_dport);
+
+/* If UDP checksum is not zero, then adjust since destination port */
+/* is being unaliased and destination address is being altered.    */
+        if (ud->uh_sum != 0)
+        {
+            accumulate  = alias_port;
+            accumulate -= ud->uh_dport;
+            sptr = (u_short *) &alias_address;
+            accumulate += *sptr++;
+            accumulate += *sptr;
+            sptr = (u_short *) &original_address;
+            accumulate -= *sptr++;
+            accumulate -= *sptr;
+            ADJUST_CHECKSUM(accumulate, ud->uh_sum);
+        }
+
+/* Restore original IP address */
+        DifferentialChecksum(&pip->ip_sum,
+                             (u_short *) &original_address,
+                             (u_short *) &pip->ip_dst,
+                             2);
+        pip->ip_dst = original_address;
+
+	/*
+	 * If we cannot figure out the packet, ignore it.
+	 */
+	if (r < 0)
+	    return(PKT_ALIAS_IGNORED);
+	else
+	    return(PKT_ALIAS_OK);
+    }
+    return(PKT_ALIAS_IGNORED);
+}
+
+static int
+UdpAliasOut(struct ip *pip)
+{
+    struct udphdr *ud;
+    struct alias_link *link;
+
+/* Return if proxy-only mode is enabled */
+    if (packetAliasMode & PKT_ALIAS_PROXY_ONLY)
+        return PKT_ALIAS_OK;
+
+    ud = (struct udphdr *) ((char *) pip + (pip->ip_hl << 2));
+
+    link = FindUdpTcpOut(pip->ip_src, pip->ip_dst,
+                         ud->uh_sport, ud->uh_dport,
+                         IPPROTO_UDP, 1);
+    if (link != NULL)
+    {
+        u_short alias_port;
+        struct in_addr alias_address;
+
+        alias_address = GetAliasAddress(link);
+        alias_port = GetAliasPort(link);
+
+/* Special processing for IP encoding protocols */
+	if (ntohs(ud->uh_dport) == CUSEEME_PORT_NUMBER)
+	    AliasHandleCUSeeMeOut(pip, link);
+/* If NETBIOS Datagram, It should be alias address in UDP Data, too */
+	else if (ntohs(ud->uh_dport) == NETBIOS_DGM_PORT_NUMBER
+	      || ntohs(ud->uh_sport) == NETBIOS_DGM_PORT_NUMBER)
+	    AliasHandleUdpNbt(pip, link, &alias_address, alias_port);
+	else if (ntohs(ud->uh_dport) == NETBIOS_NS_PORT_NUMBER
+	      || ntohs(ud->uh_sport) == NETBIOS_NS_PORT_NUMBER)
+	    AliasHandleUdpNbtNS(pip, link, &pip->ip_src, &ud->uh_sport,
+				&alias_address, &alias_port);
+
+/* If UDP checksum is not zero, adjust since source port is */
+/* being aliased and source address is being altered        */
+        if (ud->uh_sum != 0)
+        {
+            int accumulate;
+            u_short *sptr;
+
+            accumulate  = ud->uh_sport;
+            accumulate -= alias_port;
+            sptr = (u_short *) &(pip->ip_src);
+            accumulate += *sptr++;
+            accumulate += *sptr;
+            sptr = (u_short *) &alias_address;
+            accumulate -= *sptr++;
+            accumulate -= *sptr;
+            ADJUST_CHECKSUM(accumulate, ud->uh_sum);
+        }
+
+/* Put alias port in UDP header */
+        ud->uh_sport = alias_port;
+
+/* Change source address */
+        DifferentialChecksum(&pip->ip_sum,
+                             (u_short *) &alias_address,
+                             (u_short *) &pip->ip_src,
+                             2);
+        pip->ip_src = alias_address;
+
+        return(PKT_ALIAS_OK);
+    }
+    return(PKT_ALIAS_IGNORED);
+}
+
+
+
+static int
+TcpAliasIn(struct ip *pip)
+{
+    struct tcphdr *tc;
+    struct alias_link *link;
+
+    tc = (struct tcphdr *) ((char *) pip + (pip->ip_hl << 2));
+
+    link = FindUdpTcpIn(pip->ip_src, pip->ip_dst,
+                        tc->th_sport, tc->th_dport,
+                        IPPROTO_TCP,
+                        !(packetAliasMode & PKT_ALIAS_PROXY_ONLY));
+    if (link != NULL)
+    {
+        struct in_addr alias_address;
+        struct in_addr original_address;
+        struct in_addr proxy_address;
+        u_short alias_port;
+        u_short proxy_port;
+        int accumulate;
+        u_short *sptr;
+
+/* Special processing for IP encoding protocols */
+        if (ntohs(tc->th_dport) == PPTP_CONTROL_PORT_NUMBER
+         || ntohs(tc->th_sport) == PPTP_CONTROL_PORT_NUMBER)
+            AliasHandlePptpIn(pip, link);
+
+        alias_address = GetAliasAddress(link);
+        original_address = GetOriginalAddress(link);
+        proxy_address = GetProxyAddress(link);
+        alias_port = tc->th_dport;
+        tc->th_dport = GetOriginalPort(link);
+        proxy_port = GetProxyPort(link);
+
+/* Adjust TCP checksum since destination port is being unaliased */
+/* and destination port is being altered.                        */
+        accumulate  = alias_port;
+        accumulate -= tc->th_dport;
+        sptr = (u_short *) &alias_address;
+        accumulate += *sptr++;
+        accumulate += *sptr;
+        sptr = (u_short *) &original_address;
+        accumulate -= *sptr++;
+        accumulate -= *sptr;
+
+/* If this is a proxy, then modify the TCP source port and
+   checksum accumulation */
+        if (proxy_port != 0)
+        {
+            accumulate += tc->th_sport;
+            tc->th_sport = proxy_port;
+            accumulate -= tc->th_sport;
+
+            sptr = (u_short *) &pip->ip_src;
+            accumulate += *sptr++;
+            accumulate += *sptr;
+            sptr = (u_short *) &proxy_address;
+            accumulate -= *sptr++;
+            accumulate -= *sptr;
+        }
+
+/* See if ACK number needs to be modified */
+        if (GetAckModified(link) == 1)
+        {
+            int delta;
+
+            delta = GetDeltaAckIn(pip, link);
+            if (delta != 0)
+            {
+                sptr = (u_short *) &tc->th_ack;
+                accumulate += *sptr++;
+                accumulate += *sptr;
+                tc->th_ack = htonl(ntohl(tc->th_ack) - delta);
+                sptr = (u_short *) &tc->th_ack;
+                accumulate -= *sptr++;
+                accumulate -= *sptr;
+            }
+        }
+
+        ADJUST_CHECKSUM(accumulate, tc->th_sum);
+
+/* Restore original IP address */
+        sptr = (u_short *) &pip->ip_dst;
+        accumulate  = *sptr++;
+        accumulate += *sptr;
+        pip->ip_dst = original_address;
+        sptr = (u_short *) &pip->ip_dst;
+        accumulate -= *sptr++;
+        accumulate -= *sptr;
+
+/* If this is a transparent proxy packet, then modify the source
+   address */
+        if (proxy_address.s_addr != 0)
+        {
+            sptr = (u_short *) &pip->ip_src;
+            accumulate += *sptr++;
+            accumulate += *sptr;
+            pip->ip_src = proxy_address;
+            sptr = (u_short *) &pip->ip_src;
+            accumulate -= *sptr++;
+            accumulate -= *sptr;
+        }
+
+        ADJUST_CHECKSUM(accumulate, pip->ip_sum);
+
+/* Monitor TCP connection state */
+        TcpMonitorIn(pip, link);
+
+        return(PKT_ALIAS_OK);
+    }
+    return(PKT_ALIAS_IGNORED);
+}
+
+static int
+TcpAliasOut(struct ip *pip, int maxpacketsize)
+{
+    int proxy_type;
+    u_short dest_port;
+    u_short proxy_server_port;
+    struct in_addr dest_address;
+    struct in_addr proxy_server_address;
+    struct tcphdr *tc;
+    struct alias_link *link;
+
+    tc = (struct tcphdr *) ((char *) pip + (pip->ip_hl << 2));
+
+    proxy_type = ProxyCheck(pip, &proxy_server_address, &proxy_server_port);
+
+    if (proxy_type == 0 && (packetAliasMode & PKT_ALIAS_PROXY_ONLY))
+        return PKT_ALIAS_OK;
+
+/* If this is a transparent proxy, save original destination,
+   then alter the destination and adjust checksums */
+    dest_port = tc->th_dport;
+    dest_address = pip->ip_dst;
+    if (proxy_type != 0)
+    {
+        int accumulate;
+        u_short *sptr;
+
+        accumulate = tc->th_dport;
+        tc->th_dport = proxy_server_port;
+        accumulate -= tc->th_dport;
+
+        sptr = (u_short *) &(pip->ip_dst);
+        accumulate += *sptr++;
+        accumulate += *sptr;
+        sptr = (u_short *) &proxy_server_address;
+        accumulate -= *sptr++;
+        accumulate -= *sptr;
+
+        ADJUST_CHECKSUM(accumulate, tc->th_sum);
+
+        sptr = (u_short *) &(pip->ip_dst);
+        accumulate  = *sptr++;
+        accumulate += *sptr;
+        pip->ip_dst = proxy_server_address;
+        sptr = (u_short *) &(pip->ip_dst);
+        accumulate -= *sptr++;
+        accumulate -= *sptr;
+
+        ADJUST_CHECKSUM(accumulate, pip->ip_sum);
+    }
+
+    link = FindUdpTcpOut(pip->ip_src, pip->ip_dst,
+                         tc->th_sport, tc->th_dport,
+                         IPPROTO_TCP, 1);
+    if (link !=NULL)
+    {
+        u_short alias_port;
+        struct in_addr alias_address;
+        int accumulate;
+        u_short *sptr;
+
+/* Save original destination address, if this is a proxy packet.
+   Also modify packet to include destination encoding. */
+        if (proxy_type != 0)
+        {
+            SetProxyPort(link, dest_port);
+            SetProxyAddress(link, dest_address);
+            ProxyModify(link, pip, maxpacketsize, proxy_type);
+        }
+
+/* Get alias address and port */
+        alias_port = GetAliasPort(link);
+        alias_address = GetAliasAddress(link);
+
+/* Monitor TCP connection state */
+        TcpMonitorOut(pip, link);
+
+/* Special processing for IP encoding protocols */
+        if (ntohs(tc->th_dport) == FTP_CONTROL_PORT_NUMBER
+         || ntohs(tc->th_sport) == FTP_CONTROL_PORT_NUMBER)
+            AliasHandleFtpOut(pip, link, maxpacketsize);
+        else if (ntohs(tc->th_dport) == IRC_CONTROL_PORT_NUMBER_1
+         || ntohs(tc->th_dport) == IRC_CONTROL_PORT_NUMBER_2)
+            AliasHandleIrcOut(pip, link, maxpacketsize);
+        else if (ntohs(tc->th_dport) == RTSP_CONTROL_PORT_NUMBER_1
+         || ntohs(tc->th_sport) == RTSP_CONTROL_PORT_NUMBER_1
+         || ntohs(tc->th_dport) == RTSP_CONTROL_PORT_NUMBER_2
+         || ntohs(tc->th_sport) == RTSP_CONTROL_PORT_NUMBER_2) 
+            AliasHandleRtspOut(pip, link, maxpacketsize);
+        else if (ntohs(tc->th_dport) == PPTP_CONTROL_PORT_NUMBER
+         || ntohs(tc->th_sport) == PPTP_CONTROL_PORT_NUMBER)
+            AliasHandlePptpOut(pip, link);
+
+/* Adjust TCP checksum since source port is being aliased */
+/* and source address is being altered                    */
+        accumulate  = tc->th_sport;
+        tc->th_sport = alias_port;
+        accumulate -= tc->th_sport;
+
+        sptr = (u_short *) &(pip->ip_src);
+        accumulate += *sptr++;
+        accumulate += *sptr;
+        sptr = (u_short *) &alias_address;
+        accumulate -= *sptr++;
+        accumulate -= *sptr;
+
+/* Modify sequence number if necessary */
+        if (GetAckModified(link) == 1)
+        {
+            int delta;
+
+            delta = GetDeltaSeqOut(pip, link);
+            if (delta != 0)
+            {
+                sptr = (u_short *) &tc->th_seq;
+                accumulate += *sptr++;
+                accumulate += *sptr;
+                tc->th_seq = htonl(ntohl(tc->th_seq) + delta);
+                sptr = (u_short *) &tc->th_seq;
+                accumulate -= *sptr++;
+                accumulate -= *sptr;
+            }
+        }
+
+        ADJUST_CHECKSUM(accumulate, tc->th_sum);
+
+/* Change source address */
+        sptr = (u_short *) &(pip->ip_src);
+        accumulate  = *sptr++;
+        accumulate += *sptr;
+        pip->ip_src = alias_address;
+        sptr = (u_short *) &(pip->ip_src);
+        accumulate -= *sptr++;
+        accumulate -= *sptr;
+
+        ADJUST_CHECKSUM(accumulate, pip->ip_sum);
+
+        return(PKT_ALIAS_OK);
+    }
+    return(PKT_ALIAS_IGNORED);
+}
+
+
+
+
+/* Fragment Handling
+
+    FragmentIn()
+    FragmentOut()
+
+The packet aliasing module has a limited ability for handling IP
+fragments.  If the ICMP, TCP or UDP header is in the first fragment
+received, then the ID number of the IP packet is saved, and other
+fragments are identified according to their ID number and IP address
+they were sent from.  Pointers to unresolved fragments can also be
+saved and recalled when a header fragment is seen.
+*/
+
+/* Local prototypes */
+static int FragmentIn(struct ip *);
+static int FragmentOut(struct ip *);
+
+
+static int
+FragmentIn(struct ip *pip)
+{
+    struct alias_link *link;
+
+    link = FindFragmentIn2(pip->ip_src, pip->ip_dst, pip->ip_id);
+    if (link != NULL)
+    {
+        struct in_addr original_address;
+
+        GetFragmentAddr(link, &original_address);
+        DifferentialChecksum(&pip->ip_sum,
+                             (u_short *) &original_address,
+                             (u_short *) &pip->ip_dst,
+                             2);
+        pip->ip_dst = original_address; 
+   
+        return(PKT_ALIAS_OK);
+    }
+    return(PKT_ALIAS_UNRESOLVED_FRAGMENT);
+}
+
+
+static int
+FragmentOut(struct ip *pip)
+{
+    struct in_addr alias_address;
+
+    alias_address = FindAliasAddress(pip->ip_src);
+    DifferentialChecksum(&pip->ip_sum,
+                         (u_short *) &alias_address,
+                         (u_short *) &pip->ip_src,
+                          2);
+    pip->ip_src = alias_address;
+
+    return(PKT_ALIAS_OK);
+}
+
+
+
+
+
+
+/* Outside World Access
+
+        PacketAliasSaveFragment()
+        PacketAliasGetFragment()
+        PacketAliasFragmentIn()
+        PacketAliasIn()
+        PacketAliasOut()
+        PacketUnaliasOut()
+
+(prototypes in alias.h)
+*/
+
+
+int
+PacketAliasSaveFragment(char *ptr)
+{
+    int iresult;
+    struct alias_link *link;
+    struct ip *pip;
+
+    pip = (struct ip *) ptr;
+    link = AddFragmentPtrLink(pip->ip_src, pip->ip_id);
+    iresult = PKT_ALIAS_ERROR;
+    if (link != NULL)
+    {
+        SetFragmentPtr(link, ptr);
+        iresult = PKT_ALIAS_OK;
+    }
+    return(iresult);
+}
+
+
+char *
+PacketAliasGetFragment(char *ptr)
+{
+    struct alias_link *link;
+    char *fptr;
+    struct ip *pip;
+
+    pip = (struct ip *) ptr;
+    link = FindFragmentPtr(pip->ip_src, pip->ip_id);
+    if (link != NULL)
+    {
+        GetFragmentPtr(link, &fptr);
+        SetFragmentPtr(link, NULL);
+        SetExpire(link, 0); /* Deletes link */
+
+        return(fptr);
+    }
+    else
+    {
+        return(NULL);
+    }
+}
+
+
+void
+PacketAliasFragmentIn(char *ptr,          /* Points to correctly de-aliased
+                                             header fragment */
+                      char *ptr_fragment  /* Points to fragment which must
+                                             be de-aliased   */
+                     )
+{
+    struct ip *pip;
+    struct ip *fpip;
+
+    pip = (struct ip *) ptr;
+    fpip = (struct ip *) ptr_fragment;
+
+    DifferentialChecksum(&fpip->ip_sum,
+                         (u_short *) &pip->ip_dst,
+                         (u_short *) &fpip->ip_dst,
+                         2);
+    fpip->ip_dst = pip->ip_dst;
+}
+
+
+int
+PacketAliasIn(char *ptr, int maxpacketsize)
+{
+    struct in_addr alias_addr;
+    struct ip *pip;
+    int iresult;
+
+    if (packetAliasMode & PKT_ALIAS_REVERSE) {
+        packetAliasMode &= ~PKT_ALIAS_REVERSE;
+        iresult = PacketAliasOut(ptr, maxpacketsize);
+        packetAliasMode |= PKT_ALIAS_REVERSE;
+        return iresult;
+    }
+
+    HouseKeeping();
+    ClearCheckNewLink();
+    pip = (struct ip *) ptr;
+    alias_addr = pip->ip_dst;
+        
+    /* Defense against mangled packets */
+    if (ntohs(pip->ip_len) > maxpacketsize
+     || (pip->ip_hl<<2) > maxpacketsize)
+        return PKT_ALIAS_IGNORED;
+        
+    iresult = PKT_ALIAS_IGNORED;
+    if ( (ntohs(pip->ip_off) & IP_OFFMASK) == 0 )
+    {
+        switch (pip->ip_p)
+        {
+            case IPPROTO_ICMP:
+                iresult = IcmpAliasIn(pip);
+                break;
+            case IPPROTO_UDP:
+                iresult = UdpAliasIn(pip);
+                break;
+            case IPPROTO_TCP:
+                iresult = TcpAliasIn(pip);
+                break;
+            case IPPROTO_GRE:
+		if (packetAliasMode & PKT_ALIAS_PROXY_ONLY ||
+		    AliasHandlePptpGreIn(pip) == 0)
+		    iresult = PKT_ALIAS_OK;
+		else
+		    iresult = ProtoAliasIn(pip);
+		break;
+	    default:
+		iresult = ProtoAliasIn(pip);
+                break;
+        }
+
+        if (ntohs(pip->ip_off) & IP_MF)
+        {
+            struct alias_link *link;
+
+            link = FindFragmentIn1(pip->ip_src, alias_addr, pip->ip_id);
+            if (link != NULL)
+            {
+                iresult = PKT_ALIAS_FOUND_HEADER_FRAGMENT;
+                SetFragmentAddr(link, pip->ip_dst);
+            }
+            else
+            {
+                iresult = PKT_ALIAS_ERROR;
+            }
+        }
+    }
+    else
+    {
+        iresult = FragmentIn(pip);
+    }
+
+    return(iresult);
+}
+
+
+
+/* Unregistered address ranges */
+
+/* 10.0.0.0   ->   10.255.255.255 */
+#define UNREG_ADDR_A_LOWER 0x0a000000
+#define UNREG_ADDR_A_UPPER 0x0affffff
+
+/* 172.16.0.0  ->  172.31.255.255 */
+#define UNREG_ADDR_B_LOWER 0xac100000
+#define UNREG_ADDR_B_UPPER 0xac1fffff
+
+/* 192.168.0.0 -> 192.168.255.255 */
+#define UNREG_ADDR_C_LOWER 0xc0a80000
+#define UNREG_ADDR_C_UPPER 0xc0a8ffff
+
+int
+PacketAliasOut(char *ptr,           /* valid IP packet */
+               int  maxpacketsize   /* How much the packet data may grow
+                                       (FTP and IRC inline changes) */
+              )
+{
+    int iresult;
+    struct in_addr addr_save;
+    struct ip *pip;
+
+    if (packetAliasMode & PKT_ALIAS_REVERSE) {
+        packetAliasMode &= ~PKT_ALIAS_REVERSE;
+        iresult = PacketAliasIn(ptr, maxpacketsize);
+        packetAliasMode |= PKT_ALIAS_REVERSE;
+        return iresult;
+    }
+
+    HouseKeeping();
+    ClearCheckNewLink();
+    pip = (struct ip *) ptr;
+
+    /* Defense against mangled packets */
+    if (ntohs(pip->ip_len) > maxpacketsize
+     || (pip->ip_hl<<2) > maxpacketsize)
+        return PKT_ALIAS_IGNORED;
+
+    addr_save = GetDefaultAliasAddress();
+    if (packetAliasMode & PKT_ALIAS_UNREGISTERED_ONLY)
+    {
+        u_long addr;
+        int iclass;
+
+        iclass = 0;
+        addr = ntohl(pip->ip_src.s_addr);
+        if      (addr >= UNREG_ADDR_C_LOWER && addr <= UNREG_ADDR_C_UPPER)
+            iclass = 3;
+        else if (addr >= UNREG_ADDR_B_LOWER && addr <= UNREG_ADDR_B_UPPER)
+            iclass = 2;
+        else if (addr >= UNREG_ADDR_A_LOWER && addr <= UNREG_ADDR_A_UPPER)
+            iclass = 1;
+
+        if (iclass == 0)
+        {
+            SetDefaultAliasAddress(pip->ip_src);
+        }
+    }
+
+    iresult = PKT_ALIAS_IGNORED;
+    if ((ntohs(pip->ip_off) & IP_OFFMASK) == 0)
+    {
+        switch (pip->ip_p)
+        {
+            case IPPROTO_ICMP:
+                iresult = IcmpAliasOut(pip);
+                break;
+            case IPPROTO_UDP:
+                iresult = UdpAliasOut(pip);
+                break;
+            case IPPROTO_TCP:
+                iresult = TcpAliasOut(pip, maxpacketsize);
+                break;
+	    case IPPROTO_GRE:
+		if (AliasHandlePptpGreOut(pip) == 0)
+		    iresult = PKT_ALIAS_OK;
+		else
+		    iresult = ProtoAliasOut(pip);
+		break;
+	    default:
+		iresult = ProtoAliasOut(pip);
+                break;
+        }
+    }
+    else
+    {
+        iresult = FragmentOut(pip);
+    }
+
+    SetDefaultAliasAddress(addr_save);
+    return(iresult);
+}
+
+int
+PacketUnaliasOut(char *ptr,           /* valid IP packet */
+                 int  maxpacketsize   /* for error checking */
+                )
+{
+    struct ip		*pip;
+    struct icmp 	*ic;
+    struct udphdr	*ud;
+    struct tcphdr 	*tc;
+    struct alias_link 	*link;
+    int 		iresult = PKT_ALIAS_IGNORED;
+
+    pip = (struct ip *) ptr;
+
+    /* Defense against mangled packets */
+    if (ntohs(pip->ip_len) > maxpacketsize
+     || (pip->ip_hl<<2) > maxpacketsize)
+        return(iresult);
+
+    ud = (struct udphdr *) ((char *) pip + (pip->ip_hl << 2));
+    tc = (struct tcphdr *) ud;
+    ic = (struct icmp *) ud;
+
+    /* Find a link */
+    if (pip->ip_p == IPPROTO_UDP)
+        link = FindUdpTcpIn(pip->ip_dst, pip->ip_src,
+                            ud->uh_dport, ud->uh_sport,
+                            IPPROTO_UDP, 0);
+    else if (pip->ip_p == IPPROTO_TCP)
+        link = FindUdpTcpIn(pip->ip_dst, pip->ip_src,
+                            tc->th_dport, tc->th_sport,
+                            IPPROTO_TCP, 0);
+    else if (pip->ip_p == IPPROTO_ICMP) 
+        link = FindIcmpIn(pip->ip_dst, pip->ip_src, ic->icmp_id, 0);
+    else
+        link = NULL;
+
+    /* Change it from an aliased packet to an unaliased packet */
+    if (link != NULL)
+    {
+        if (pip->ip_p == IPPROTO_UDP || pip->ip_p == IPPROTO_TCP)
+        {
+            u_short        *sptr;
+            int 	   accumulate;
+            struct in_addr original_address;
+            u_short        original_port;
+
+            original_address = GetOriginalAddress(link);
+            original_port = GetOriginalPort(link);
+    
+            /* Adjust TCP/UDP checksum */
+            sptr = (u_short *) &(pip->ip_src);
+            accumulate  = *sptr++;
+            accumulate += *sptr;
+            sptr = (u_short *) &original_address;
+            accumulate -= *sptr++;
+            accumulate -= *sptr;
+
+            if (pip->ip_p == IPPROTO_UDP) {
+                accumulate += ud->uh_sport;
+                accumulate -= original_port;
+                ADJUST_CHECKSUM(accumulate, ud->uh_sum);
+	    } else { 
+                accumulate += tc->th_sport;
+                accumulate -= original_port;
+                ADJUST_CHECKSUM(accumulate, tc->th_sum);
+	    }
+
+            /* Adjust IP checksum */
+            DifferentialChecksum(&pip->ip_sum,
+                                 (u_short *) &original_address,
+                                 (u_short *) &pip->ip_src,
+                                 2);
+
+            /* Un-alias source address and port number */ 
+            pip->ip_src = original_address;
+            if (pip->ip_p == IPPROTO_UDP) 
+                ud->uh_sport = original_port; 
+	    else   
+                tc->th_sport = original_port; 
+            
+	    iresult = PKT_ALIAS_OK;
+
+        } else if (pip->ip_p == IPPROTO_ICMP) {
+
+            u_short        *sptr;
+            int            accumulate;
+            struct in_addr original_address;
+            u_short        original_id;
+
+            original_address = GetOriginalAddress(link);
+            original_id = GetOriginalPort(link);
+
+            /* Adjust ICMP checksum */
+            sptr = (u_short *) &(pip->ip_src);
+            accumulate  = *sptr++;
+            accumulate += *sptr;
+            sptr = (u_short *) &original_address;
+            accumulate -= *sptr++;
+            accumulate -= *sptr;
+            accumulate += ic->icmp_id;
+            accumulate -= original_id;
+            ADJUST_CHECKSUM(accumulate, ic->icmp_cksum);
+
+            /* Adjust IP checksum */
+            DifferentialChecksum(&pip->ip_sum,
+                                 (u_short *) &original_address,
+                                 (u_short *) &pip->ip_src,
+                                 2);
+
+            /* Un-alias source address and port number */
+            pip->ip_src = original_address;
+            ic->icmp_id = original_id;
+
+	    iresult = PKT_ALIAS_OK;
+        }
+    }
+    return(iresult);
+
+}
diff --git a/sys/netinet/libalias/alias.h b/sys/netinet/libalias/alias.h
new file mode 100644
index 0000000..607021f
--- /dev/null
+++ b/sys/netinet/libalias/alias.h
@@ -0,0 +1,156 @@
+/* lint -save -library Flexelint comment for external headers */
+
+/*-
+ * Alias.h defines the outside world interfaces for the packet aliasing
+ * software.
+ * 
+ * This software is placed into the public domain with no restrictions on its
+ * distribution.
+ * 
+ * $FreeBSD$
+ */
+
+#ifndef _ALIAS_H_
+#define	_ALIAS_H_
+
+/* The external interface to libalias, the packet aliasing engine. */
+
+/* Initialization and control functions. */
+void	 PacketAliasInit(void);
+void	 PacketAliasSetAddress(struct in_addr _addr);
+void	 PacketAliasSetFWBase(unsigned int _base, unsigned int _num);
+unsigned int
+	 PacketAliasSetMode(unsigned int _flags, unsigned int _mask);
+void	 PacketAliasUninit(void);
+
+/* Packet Handling functions. */
+int	 PacketAliasIn(char *_ptr, int _maxpacketsize);
+int	 PacketAliasOut(char *_ptr, int _maxpacketsize);
+int	 PacketUnaliasOut(char *_ptr, int _maxpacketsize);
+
+/* Port and address redirection functions. */
+
+/*
+ * An anonymous structure, a pointer to which is returned from
+ * PacketAliasRedirectAddr(), PacketAliasRedirectPort() or
+ * PacketAliasRedirectProto(), passed to PacketAliasAddServer(),
+ * and freed by PacketAliasRedirectDelete().
+ */
+struct	alias_link;
+
+int	 PacketAliasAddServer(struct alias_link *_link,
+	    struct in_addr _addr, unsigned short _port);
+struct alias_link *
+	 PacketAliasRedirectAddr(struct in_addr _src_addr,
+	    struct in_addr _alias_addr);
+void	 PacketAliasRedirectDelete(struct alias_link *_link);
+struct alias_link *
+	 PacketAliasRedirectPort(struct in_addr _src_addr,
+	    unsigned short _src_port, struct in_addr _dst_addr,
+	    unsigned short _dst_port, struct in_addr _alias_addr,
+	    unsigned short _alias_port, unsigned char _proto);
+struct alias_link *
+	 PacketAliasRedirectProto(struct in_addr _src_addr,
+	    struct in_addr _dst_addr, struct in_addr _alias_addr,
+	    unsigned char _proto);
+
+/* Fragment Handling functions. */
+void	 PacketAliasFragmentIn(char *_ptr, char *_ptr_fragment);
+char	*PacketAliasGetFragment(char *_ptr);
+int	 PacketAliasSaveFragment(char *_ptr);
+
+/* Miscellaneous functions. */
+int	 PacketAliasCheckNewLink(void);
+unsigned short
+	 PacketAliasInternetChecksum(unsigned short *_ptr, int _nbytes);
+void	 PacketAliasSetTarget(struct in_addr _target_addr);
+
+/* Transparent proxying routines. */
+int	 PacketAliasProxyRule(const char *_cmd);
+
+/* Mode flags, set using PacketAliasSetMode() */
+
+/*
+ * If PKT_ALIAS_LOG is set, a message will be printed to /var/log/alias.log
+ * every time a link is created or deleted.  This is useful for debugging.
+ */
+#define	PKT_ALIAS_LOG			0x01
+
+/*
+ * If PKT_ALIAS_DENY_INCOMING is set, then incoming connections (e.g. to ftp,
+ * telnet or web servers will be prevented by the aliasing mechanism.
+ */
+#define	PKT_ALIAS_DENY_INCOMING		0x02
+
+/*
+ * If PKT_ALIAS_SAME_PORTS is set, packets will be attempted sent from the
+ * same port as they originated on.  This allows e.g. rsh to work *99% of the
+ * time*, but _not_ 100% (it will be slightly flakey instead of not working
+ * at all).  This mode bit is set by PacketAliasInit(), so it is a default
+ * mode of operation.
+ */
+#define	PKT_ALIAS_SAME_PORTS		0x04
+
+/*
+ * If PKT_ALIAS_USE_SOCKETS is set, then when partially specified links (e.g.
+ * destination port and/or address is zero), the packet aliasing engine will
+ * attempt to allocate a socket for the aliasing port it chooses.  This will
+ * avoid interference with the host machine.  Fully specified links do not
+ * require this.  This bit is set after a call to PacketAliasInit(), so it is
+ * a default mode of operation.
+ */
+#define	PKT_ALIAS_USE_SOCKETS		0x08
+
+/*-
+ * If PKT_ALIAS_UNREGISTERED_ONLY is set, then only packets with
+ * unregistered source addresses will be aliased.  Private
+ * addresses are those in the following ranges:
+ *
+ *		10.0.0.0     ->   10.255.255.255
+ *		172.16.0.0   ->   172.31.255.255
+ *		192.168.0.0  ->   192.168.255.255
+ */
+#define	PKT_ALIAS_UNREGISTERED_ONLY	0x10
+
+/*
+ * If PKT_ALIAS_RESET_ON_ADDR_CHANGE is set, then the table of dynamic
+ * aliasing links will be reset whenever PacketAliasSetAddress() changes the
+ * default aliasing address.  If the default aliasing address is left
+ * unchanged by this function call, then the table of dynamic aliasing links
+ * will be left intact.  This bit is set after a call to PacketAliasInit().
+ */
+#define	PKT_ALIAS_RESET_ON_ADDR_CHANGE	0x20
+
+#ifndef NO_FW_PUNCH
+/*
+ * If PKT_ALIAS_PUNCH_FW is set, active FTP and IRC DCC connections will
+ * create a 'hole' in the firewall to allow the transfers to work.  The
+ * ipfw rule number that the hole is created with is controlled by
+ * PacketAliasSetFWBase().  The hole will be attached to that
+ * particular alias_link, so when the link goes away the hole is deleted.
+ */
+#define	PKT_ALIAS_PUNCH_FW		0x100
+#endif
+
+/*
+ * If PKT_ALIAS_PROXY_ONLY is set, then NAT will be disabled and only
+ * transparent proxying is performed.
+ */
+#define	PKT_ALIAS_PROXY_ONLY		0x40
+
+/*
+ * If PKT_ALIAS_REVERSE is set, the actions of PacketAliasIn() and
+ * PacketAliasOut() are reversed.
+ */
+#define	PKT_ALIAS_REVERSE		0x80
+
+/* Function return codes. */
+#define	PKT_ALIAS_ERROR			-1
+#define	PKT_ALIAS_OK			1
+#define	PKT_ALIAS_IGNORED		2
+#define	PKT_ALIAS_UNRESOLVED_FRAGMENT	3
+#define	PKT_ALIAS_FOUND_HEADER_FRAGMENT	4
+
+#endif /* !_ALIAS_H_ */
+
+/* lint -restore */
diff --git a/sys/netinet/libalias/alias_cuseeme.c b/sys/netinet/libalias/alias_cuseeme.c
new file mode 100644
index 0000000..1a7fbf1
--- /dev/null
+++ b/sys/netinet/libalias/alias_cuseeme.c
@@ -0,0 +1,120 @@
+/*-
+ * Copyright (c) 1998 Brian Somers <brian@Awfulhak.org>
+ *                    with the aid of code written by
+ *                    Junichi SATOH <junichi@astec.co.jp> 1996, 1997.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/types.h>
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+
+#include "alias_local.h"
+
+/* CU-SeeMe Data Header */
+struct cu_header {
+    u_int16_t dest_family;
+    u_int16_t dest_port;
+    u_int32_t  dest_addr;
+    int16_t family;
+    u_int16_t port;
+    u_int32_t addr;
+    u_int32_t seq;
+    u_int16_t msg;
+    u_int16_t data_type;
+    u_int16_t packet_len;
+};
+
+/* Open Continue Header */
+struct oc_header {
+    u_int16_t client_count;    /* Number of client info structs */
+    u_int32_t seq_no;
+    char user_name[20];
+    char reserved[4];        /* flags, version stuff, etc */
+};
+
+/* client info structures */
+struct client_info {
+    u_int32_t address;          /* Client address */
+    char reserved[8];        /* Flags, pruning bitfield, packet counts etc */
+};
+
+void
+AliasHandleCUSeeMeOut(struct ip *pip, struct alias_link *link)
+{
+  struct udphdr *ud;
+
+  ud = (struct udphdr *)((char *)pip + (pip->ip_hl << 2));
+  if (ntohs(ud->uh_ulen) - sizeof(struct udphdr) >= sizeof(struct cu_header)) {
+    struct cu_header *cu;
+    struct alias_link *cu_link;
+
+    cu = (struct cu_header *)(ud + 1);
+    if (cu->addr)
+      cu->addr = (u_int32_t)GetAliasAddress(link).s_addr;
+
+    cu_link = FindUdpTcpOut(pip->ip_src, GetDestAddress(link),
+                            ud->uh_dport, 0, IPPROTO_UDP, 1);
+                         
+#ifndef NO_FW_PUNCH
+    if (cu_link)
+        PunchFWHole(cu_link);
+#endif
+  }
+}
+
+void
+AliasHandleCUSeeMeIn(struct ip *pip, struct in_addr original_addr)
+{
+  struct in_addr alias_addr;
+  struct udphdr *ud;
+  struct cu_header *cu;
+  struct oc_header *oc;
+  struct client_info *ci;
+  char *end;
+  int i;
+
+  alias_addr.s_addr = pip->ip_dst.s_addr;
+  ud = (struct udphdr *)((char *)pip + (pip->ip_hl << 2));
+  cu = (struct cu_header *)(ud + 1);
+  oc = (struct oc_header *)(cu + 1);
+  ci = (struct client_info *)(oc + 1);
+  end = (char *)ud + ntohs(ud->uh_ulen);
+
+  if ((char *)oc <= end) {
+    if(cu->dest_addr)
+      cu->dest_addr = (u_int32_t)original_addr.s_addr;
+    if(ntohs(cu->data_type) == 101)
+      /* Find and change our address */
+      for(i = 0; (char *)(ci + 1) <= end && i < oc->client_count; i++, ci++)
+        if(ci->address == (u_int32_t)alias_addr.s_addr) {
+          ci->address = (u_int32_t)original_addr.s_addr;
+          break;
+        }
+  }
+}
diff --git a/sys/netinet/libalias/alias_db.c b/sys/netinet/libalias/alias_db.c
new file mode 100644
index 0000000..60d425f
--- /dev/null
+++ b/sys/netinet/libalias/alias_db.c
@@ -0,0 +1,2788 @@
+/*  -*- mode: c; tab-width: 8; c-basic-indent: 4; -*-
+    Alias_db.c encapsulates all data structures used for storing
+    packet aliasing data.  Other parts of the aliasing software
+    access data through functions provided in this file.
+
+    Data storage is based on the notion of a "link", which is
+    established for ICMP echo/reply packets, UDP datagrams and
+    TCP stream connections.  A link stores the original source
+    and destination addresses.  For UDP and TCP, it also stores
+    source and destination port numbers, as well as an alias
+    port number.  Links are also used to store information about
+    fragments.
+
+    There is a facility for sweeping through and deleting old
+    links as new packets are sent through.  A simple timeout is
+    used for ICMP and UDP links.  TCP links are left alone unless
+    there is an incomplete connection, in which case the link
+    can be deleted after a certain amount of time.
+
+
+    This software is placed into the public domain with no restrictions
+    on its distribution.
+
+    Initial version: August, 1996  (cjm)
+
+    Version 1.4: September 16, 1996 (cjm)
+        Facility for handling incoming links added.
+
+    Version 1.6: September 18, 1996 (cjm)
+        ICMP data handling simplified.
+
+    Version 1.7: January 9, 1997 (cjm)
+        Fragment handling simplified.
+        Saves pointers for unresolved fragments.
+        Permits links for unspecified remote ports
+          or unspecified remote addresses.
+        Fixed bug which did not properly zero port
+          table entries after a link was deleted.
+        Cleaned up some obsolete comments.
+
+    Version 1.8: January 14, 1997 (cjm)
+        Fixed data type error in StartPoint().
+        (This error did not exist prior to v1.7
+        and was discovered and fixed by Ari Suutari)
+
+    Version 1.9: February 1, 1997
+        Optionally, connections initiated from packet aliasing host
+        machine will will not have their port number aliased unless it
+        conflicts with an aliasing port already being used. (cjm)
+
+        All options earlier being #ifdef'ed are now available through
+        a new interface, SetPacketAliasMode().  This allows run time
+        control (which is now available in PPP+pktAlias through the
+        'alias' keyword). (ee)
+
+        Added ability to create an alias port without
+        either destination address or port specified.
+        port type = ALIAS_PORT_UNKNOWN_DEST_ALL (ee)
+
+        Removed K&R style function headers
+        and general cleanup. (ee)
+
+        Added packetAliasMode to replace compiler #defines's (ee)
+
+        Allocates sockets for partially specified
+        ports if ALIAS_USE_SOCKETS defined. (cjm)
+
+    Version 2.0: March, 1997
+        SetAliasAddress() will now clean up alias links
+        if the aliasing address is changed. (cjm)
+
+        PacketAliasPermanentLink() function added to support permanent
+        links.  (J. Fortes suggested the need for this.)
+        Examples:
+
+        (192.168.0.1, port 23)  <-> alias port 6002, unknown dest addr/port
+
+        (192.168.0.2, port 21)  <-> alias port 3604, known dest addr
+                                                     unknown dest port
+
+        These permanent links allow for incoming connections to
+        machines on the local network.  They can be given with a
+        user-chosen amount of specificity, with increasing specificity
+        meaning more security. (cjm)
+
+        Quite a bit of rework to the basic engine.  The portTable[]
+        array, which kept track of which ports were in use was replaced
+        by a table/linked list structure. (cjm)
+
+        SetExpire() function added. (cjm)
+
+        DeleteLink() no longer frees memory association with a pointer
+        to a fragment (this bug was first recognized by E. Eklund in
+        v1.9).
+
+    Version 2.1: May, 1997 (cjm)
+        Packet aliasing engine reworked so that it can handle
+        multiple external addresses rather than just a single
+        host address.
+
+        PacketAliasRedirectPort() and PacketAliasRedirectAddr()
+        added to the API.  The first function is a more generalized
+        version of PacketAliasPermanentLink().  The second function
+        implements static network address translation.
+
+    Version 3.2: July, 2000 (salander and satoh)
+        Added FindNewPortGroup to get contiguous range of port values.  
+
+        Added QueryUdpTcpIn and QueryUdpTcpOut to look for an aliasing
+	link but not actually add one.
+
+        Added FindRtspOut, which is closely derived from FindUdpTcpOut, 
+	except that the alias port (from FindNewPortGroup) is provided
+	as input.
+
+    See HISTORY file for additional revisions.
+
+    $FreeBSD$
+*/
+
+
+/* System include files */
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/queue.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/types.h>
+
+/* BSD network include files */
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+#include <arpa/inet.h>
+
+#include "alias.h"
+#include "alias_local.h"
+
+
+
+/*
+   Constants (note: constants are also defined
+              near relevant functions or structs)
+*/
+
+/* Sizes of input and output link tables */
+#define LINK_TABLE_OUT_SIZE         101
+#define LINK_TABLE_IN_SIZE         4001
+
+/* Parameters used for cleanup of expired links */
+#define ALIAS_CLEANUP_INTERVAL_SECS  60
+#define ALIAS_CLEANUP_MAX_SPOKES     30
+
+/* Timeouts (in seconds) for different link types */
+#define ICMP_EXPIRE_TIME             60
+#define UDP_EXPIRE_TIME              60
+#define PROTO_EXPIRE_TIME            60
+#define FRAGMENT_ID_EXPIRE_TIME      10
+#define FRAGMENT_PTR_EXPIRE_TIME     30
+
+/* TCP link expire time for different cases */
+/* When the link has been used and closed - minimal grace time to
+   allow ACKs and potential re-connect in FTP (XXX - is this allowed?)  */
+#ifndef TCP_EXPIRE_DEAD
+#   define TCP_EXPIRE_DEAD           10
+#endif
+
+/* When the link has been used and closed on one side - the other side
+   is allowed to still send data */
+#ifndef TCP_EXPIRE_SINGLEDEAD
+#   define TCP_EXPIRE_SINGLEDEAD     90
+#endif
+
+/* When the link isn't yet up */
+#ifndef TCP_EXPIRE_INITIAL
+#   define TCP_EXPIRE_INITIAL       300
+#endif
+
+/* When the link is up */
+#ifndef TCP_EXPIRE_CONNECTED
+#   define TCP_EXPIRE_CONNECTED   86400
+#endif
+
+
+/* Dummy port number codes used for FindLinkIn/Out() and AddLink().
+   These constants can be anything except zero, which indicates an
+   unknown port number. */
+
+#define NO_DEST_PORT     1
+#define NO_SRC_PORT      1
+
+
+
+/* Data Structures
+
+    The fundamental data structure used in this program is
+    "struct alias_link".  Whenever a TCP connection is made,
+    a UDP datagram is sent out, or an ICMP echo request is made,
+    a link record is made (if it has not already been created).
+    The link record is identified by the source address/port
+    and the destination address/port. In the case of an ICMP
+    echo request, the source port is treated as being equivalent
+    with the 16-bit ID number of the ICMP packet.
+
+    The link record also can store some auxiliary data.  For
+    TCP connections that have had sequence and acknowledgment
+    modifications, data space is available to track these changes.
+    A state field is used to keep track in changes to the TCP
+    connection state.  ID numbers of fragments can also be
+    stored in the auxiliary space.  Pointers to unresolved
+    fragments can also be stored.
+
+    The link records support two independent chainings.  Lookup
+    tables for input and out tables hold the initial pointers
+    the link chains.  On input, the lookup table indexes on alias
+    port and link type.  On output, the lookup table indexes on
+    source address, destination address, source port, destination
+    port and link type.
+*/
+
+struct ack_data_record     /* used to save changes to ACK/sequence numbers */
+{
+    u_long ack_old;
+    u_long ack_new;
+    int delta;
+    int active;
+};
+
+struct tcp_state           /* Information about TCP connection        */
+{
+    int in;                /* State for outside -> inside             */
+    int out;               /* State for inside  -> outside            */
+    int index;             /* Index to ACK data array                 */
+    int ack_modified;      /* Indicates whether ACK and sequence numbers */
+                           /* been modified                           */
+};
+
+#define N_LINK_TCP_DATA   3 /* Number of distinct ACK number changes
+                               saved for a modified TCP stream */
+struct tcp_dat
+{
+    struct tcp_state state;
+    struct ack_data_record ack[N_LINK_TCP_DATA];
+    int fwhole;             /* Which firewall record is used for this hole? */
+};
+
+struct server              /* LSNAT server pool (circular list) */
+{
+    struct in_addr addr;
+    u_short port;
+    struct server *next;
+};
+
+struct alias_link                /* Main data structure */
+{
+    struct in_addr src_addr;     /* Address and port information        */
+    struct in_addr dst_addr;
+    struct in_addr alias_addr;
+    struct in_addr proxy_addr;
+    u_short src_port;
+    u_short dst_port;
+    u_short alias_port;
+    u_short proxy_port;
+    struct server *server;
+
+    int link_type;               /* Type of link: TCP, UDP, ICMP, proto, frag */
+
+/* values for link_type */
+#define LINK_ICMP                     IPPROTO_ICMP
+#define LINK_UDP                      IPPROTO_UDP
+#define LINK_TCP                      IPPROTO_TCP
+#define LINK_FRAGMENT_ID              (IPPROTO_MAX + 1)
+#define LINK_FRAGMENT_PTR             (IPPROTO_MAX + 2)
+#define LINK_ADDR                     (IPPROTO_MAX + 3)
+#define LINK_PPTP                     (IPPROTO_MAX + 4)
+
+    int flags;                   /* indicates special characteristics   */
+
+/* flag bits */
+#define LINK_UNKNOWN_DEST_PORT     0x01
+#define LINK_UNKNOWN_DEST_ADDR     0x02
+#define LINK_PERMANENT             0x04
+#define LINK_PARTIALLY_SPECIFIED   0x03 /* logical-or of first two bits */
+#define LINK_UNFIREWALLED          0x08
+#define LINK_LAST_LINE_CRLF_TERMED 0x10
+
+    int timestamp;               /* Time link was last accessed         */
+    int expire_time;             /* Expire time for link                */
+
+    int sockfd;                  /* socket descriptor                   */
+
+    LIST_ENTRY(alias_link) list_out; /* Linked list of pointers for     */
+    LIST_ENTRY(alias_link) list_in;  /* input and output lookup tables  */
+
+    union                        /* Auxiliary data                      */
+    {
+        char *frag_ptr;
+        struct in_addr frag_addr;
+        struct tcp_dat *tcp;
+    } data;
+};
+
+
+
+
+
+/* Global Variables 
+
+    The global variables listed here are only accessed from
+    within alias_db.c and so are prefixed with the static 
+    designation.
+*/
+
+int packetAliasMode;                 /* Mode flags                      */ 
+                                     /*        - documented in alias.h  */
+
+static struct in_addr aliasAddress;  /* Address written onto source     */
+                                     /*   field of IP packet.           */
+
+static struct in_addr targetAddress; /* IP address incoming packets     */
+                                     /*   are sent to if no aliasing    */
+                                     /*   link already exists           */
+
+static struct in_addr nullAddress;   /* Used as a dummy parameter for   */
+                                     /*   some function calls           */
+static LIST_HEAD(, alias_link)
+linkTableOut[LINK_TABLE_OUT_SIZE];   /* Lookup table of pointers to     */
+                                     /*   chains of link records. Each  */
+static LIST_HEAD(, alias_link)       /*   link record is doubly indexed */
+linkTableIn[LINK_TABLE_IN_SIZE];     /*   into input and output lookup  */
+                                     /*   tables.                       */
+
+static int icmpLinkCount;            /* Link statistics                 */
+static int udpLinkCount;
+static int tcpLinkCount;
+static int pptpLinkCount;
+static int protoLinkCount;
+static int fragmentIdLinkCount;
+static int fragmentPtrLinkCount;
+static int sockCount;
+
+static int cleanupIndex;             /* Index to chain of link table    */
+                                     /* being inspected for old links   */
+
+static int timeStamp;                /* System time in seconds for      */
+                                     /* current packet                  */
+
+static int lastCleanupTime;          /* Last time IncrementalCleanup()  */
+                                     /* was called                      */
+
+static int houseKeepingResidual;     /* used by HouseKeeping()          */
+
+static int deleteAllLinks;           /* If equal to zero, DeleteLink()  */
+                                     /* will not remove permanent links */
+
+static FILE *monitorFile;            /* File descriptor for link        */
+                                     /* statistics monitoring file      */
+
+static int newDefaultLink;           /* Indicates if a new aliasing     */
+                                     /* link has been created after a   */
+                                     /* call to PacketAliasIn/Out().    */
+             
+#ifndef NO_FW_PUNCH
+static int fireWallFD = -1;          /* File descriptor to be able to   */
+                                     /* control firewall.  Opened by    */
+                                     /* PacketAliasSetMode on first     */
+                                     /* setting the PKT_ALIAS_PUNCH_FW  */
+                                     /* flag.                           */
+#endif
+
+
+
+
+
+
+
+/* Internal utility routines (used only in alias_db.c)
+
+Lookup table starting points:
+    StartPointIn()           -- link table initial search point for
+                                incoming packets
+    StartPointOut()          -- link table initial search point for
+                                outgoing packets
+    
+Miscellaneous:
+    SeqDiff()                -- difference between two TCP sequences
+    ShowAliasStats()         -- send alias statistics to a monitor file
+*/
+
+
+/* Local prototypes */
+static u_int StartPointIn(struct in_addr, u_short, int);
+
+static u_int StartPointOut(struct in_addr, struct in_addr,
+                           u_short, u_short, int);
+
+static int SeqDiff(u_long, u_long);
+
+static void ShowAliasStats(void);
+
+#ifndef NO_FW_PUNCH
+/* Firewall control */
+static void InitPunchFW(void);
+static void UninitPunchFW(void);
+static void ClearFWHole(struct alias_link *link);
+#endif
+
+/* Log file control */
+static void InitPacketAliasLog(void);
+static void UninitPacketAliasLog(void);
+
+static u_int
+StartPointIn(struct in_addr alias_addr,
+             u_short alias_port,
+             int link_type)
+{
+    u_int n;
+
+    n  = alias_addr.s_addr;
+    if (link_type != LINK_PPTP)
+	n += alias_port;
+    n += link_type;
+    return(n % LINK_TABLE_IN_SIZE);
+}
+
+
+static u_int
+StartPointOut(struct in_addr src_addr, struct in_addr dst_addr,
+              u_short src_port, u_short dst_port, int link_type)
+{
+    u_int n;
+
+    n  = src_addr.s_addr;
+    n += dst_addr.s_addr;
+    if (link_type != LINK_PPTP) {
+	n += src_port; 
+	n += dst_port;
+    }
+    n += link_type;
+
+    return(n % LINK_TABLE_OUT_SIZE);
+}
+
+
+static int
+SeqDiff(u_long x, u_long y)
+{
+/* Return the difference between two TCP sequence numbers */
+
+/*
+    This function is encapsulated in case there are any unusual
+    arithmetic conditions that need to be considered.
+*/
+
+    return (ntohl(y) - ntohl(x));
+}
+
+
+static void
+ShowAliasStats(void)
+{
+/* Used for debugging */
+
+   if (monitorFile)
+   {
+      fprintf(monitorFile, "icmp=%d, udp=%d, tcp=%d, pptp=%d, proto=%d, frag_id=%d frag_ptr=%d",
+              icmpLinkCount,
+              udpLinkCount,
+              tcpLinkCount,
+              pptpLinkCount,
+              protoLinkCount,
+              fragmentIdLinkCount,
+              fragmentPtrLinkCount);
+
+      fprintf(monitorFile, " / tot=%d  (sock=%d)\n",
+              icmpLinkCount + udpLinkCount
+                            + tcpLinkCount
+                            + pptpLinkCount
+                            + protoLinkCount
+                            + fragmentIdLinkCount
+                            + fragmentPtrLinkCount,
+              sockCount);
+
+      fflush(monitorFile);
+   }
+}
+
+
+
+
+
+/* Internal routines for finding, deleting and adding links
+
+Port Allocation:
+    GetNewPort()             -- find and reserve new alias port number
+    GetSocket()              -- try to allocate a socket for a given port
+
+Link creation and deletion:
+    CleanupAliasData()      - remove all link chains from lookup table
+    IncrementalCleanup()    - look for stale links in a single chain
+    DeleteLink()            - remove link
+    AddLink()               - add link 
+    ReLink()                - change link 
+
+Link search:
+    FindLinkOut()           - find link for outgoing packets
+    FindLinkIn()            - find link for incoming packets
+
+Port search:
+    FindNewPortGroup()      - find an available group of ports 
+*/
+
+/* Local prototypes */
+static int GetNewPort(struct alias_link *, int);
+
+static u_short GetSocket(u_short, int *, int);
+
+static void CleanupAliasData(void);
+
+static void IncrementalCleanup(void);
+
+static void DeleteLink(struct alias_link *);
+
+static struct alias_link *
+AddLink(struct in_addr, struct in_addr, struct in_addr,
+        u_short, u_short, int, int);
+
+static struct alias_link *
+ReLink(struct alias_link *,
+       struct in_addr, struct in_addr, struct in_addr,
+        u_short, u_short, int, int);
+
+static struct alias_link *
+FindLinkOut(struct in_addr, struct in_addr, u_short, u_short, int, int);
+
+static struct alias_link *
+FindLinkIn(struct in_addr, struct in_addr, u_short, u_short, int, int);
+
+
+#define ALIAS_PORT_BASE            0x08000
+#define ALIAS_PORT_MASK            0x07fff
+#define ALIAS_PORT_MASK_EVEN       0x07ffe
+#define GET_NEW_PORT_MAX_ATTEMPTS       20
+
+#define GET_ALIAS_PORT                  -1
+#define GET_ALIAS_ID        GET_ALIAS_PORT
+
+#define FIND_EVEN_ALIAS_BASE             1
+
+/* GetNewPort() allocates port numbers.  Note that if a port number
+   is already in use, that does not mean that it cannot be used by
+   another link concurrently.  This is because GetNewPort() looks for
+   unused triplets: (dest addr, dest port, alias port). */
+
+static int
+GetNewPort(struct alias_link *link, int alias_port_param)
+{
+    int i;
+    int max_trials;
+    u_short port_sys;
+    u_short port_net;
+
+/*
+   Description of alias_port_param for GetNewPort().  When
+   this parameter is zero or positive, it precisely specifies
+   the port number.  GetNewPort() will return this number
+   without check that it is in use.
+
+   When this parameter is GET_ALIAS_PORT, it indicates to get a randomly
+   selected port number.
+*/
+ 
+    if (alias_port_param == GET_ALIAS_PORT)
+    {
+        /*
+         * The aliasing port is automatically selected
+         * by one of two methods below:
+         */
+        max_trials = GET_NEW_PORT_MAX_ATTEMPTS;
+
+        if (packetAliasMode & PKT_ALIAS_SAME_PORTS)
+        {
+            /*
+             * When the PKT_ALIAS_SAME_PORTS option is
+             * chosen, the first try will be the
+             * actual source port. If this is already
+             * in use, the remainder of the trials
+             * will be random.
+             */
+            port_net = link->src_port;
+            port_sys = ntohs(port_net);
+        }
+        else
+        {
+            /* First trial and all subsequent are random. */
+            port_sys = random() & ALIAS_PORT_MASK;
+            port_sys += ALIAS_PORT_BASE;
+            port_net = htons(port_sys);
+        }
+    }
+    else if (alias_port_param >= 0 && alias_port_param < 0x10000)
+    {
+        link->alias_port = (u_short) alias_port_param;
+        return(0);
+    }
+    else
+    {
+#ifdef DEBUG
+        fprintf(stderr, "PacketAlias/GetNewPort(): ");
+        fprintf(stderr, "input parameter error\n");
+#endif
+        return(-1);
+    }
+
+
+/* Port number search */
+    for (i=0; i<max_trials; i++)
+    {
+        int go_ahead;
+        struct alias_link *search_result;
+
+        search_result = FindLinkIn(link->dst_addr, link->alias_addr,
+                                   link->dst_port, port_net,
+                                   link->link_type, 0);
+
+        if (search_result == NULL)
+            go_ahead = 1;
+        else if (!(link->flags          & LINK_PARTIALLY_SPECIFIED)
+               && (search_result->flags & LINK_PARTIALLY_SPECIFIED))
+            go_ahead = 1;
+        else
+            go_ahead = 0;
+
+        if (go_ahead)
+        {
+            if ((packetAliasMode & PKT_ALIAS_USE_SOCKETS)
+             && (link->flags & LINK_PARTIALLY_SPECIFIED)
+	     && ((link->link_type == LINK_TCP) || 
+		 (link->link_type == LINK_UDP)))
+            {
+                if (GetSocket(port_net, &link->sockfd, link->link_type))
+                {
+                    link->alias_port = port_net;
+                    return(0);
+                }
+            }
+            else
+            {
+                link->alias_port = port_net;
+                return(0);
+            }
+        }
+
+        port_sys = random() & ALIAS_PORT_MASK;
+        port_sys += ALIAS_PORT_BASE;
+        port_net = htons(port_sys);
+    }
+
+#ifdef DEBUG
+    fprintf(stderr, "PacketAlias/GetnewPort(): ");
+    fprintf(stderr, "could not find free port\n");
+#endif
+
+    return(-1);
+}
+
+
+static u_short 
+GetSocket(u_short port_net, int *sockfd, int link_type)
+{
+    int err;
+    int sock;
+    struct sockaddr_in sock_addr;
+
+    if (link_type == LINK_TCP)
+        sock = socket(AF_INET, SOCK_STREAM, 0);
+    else if (link_type == LINK_UDP)
+        sock = socket(AF_INET, SOCK_DGRAM, 0);
+    else
+    {
+#ifdef DEBUG
+        fprintf(stderr, "PacketAlias/GetSocket(): ");
+        fprintf(stderr, "incorrect link type\n");
+#endif
+        return(0);
+    }
+
+    if (sock < 0)
+    {
+#ifdef DEBUG
+        fprintf(stderr, "PacketAlias/GetSocket(): ");
+        fprintf(stderr, "socket() error %d\n", *sockfd);
+#endif
+        return(0);
+    }
+
+    sock_addr.sin_family = AF_INET;
+    sock_addr.sin_addr.s_addr = htonl(INADDR_ANY);
+    sock_addr.sin_port = port_net;
+
+    err = bind(sock,
+               (struct sockaddr *) &sock_addr,
+               sizeof(sock_addr));
+    if (err == 0)
+    {
+        sockCount++;
+        *sockfd = sock;
+        return(1);
+    }
+    else
+    {
+        close(sock);
+        return(0);
+    }
+}
+
+
+/* FindNewPortGroup() returns a base port number for an available        
+   range of contiguous port numbers. Note that if a port number
+   is already in use, that does not mean that it cannot be used by
+   another link concurrently.  This is because FindNewPortGroup()
+   looks for unused triplets: (dest addr, dest port, alias port). */
+
+int
+FindNewPortGroup(struct in_addr  dst_addr,
+                 struct in_addr  alias_addr,
+                 u_short         src_port,
+                 u_short         dst_port,
+                 u_short         port_count, 
+		 u_char          proto, 
+		 u_char          align)
+{
+    int     i, j;
+    int     max_trials;
+    u_short port_sys;
+    int     link_type;
+
+    /*
+     * Get link_type from protocol
+     */
+
+    switch (proto)
+    {
+    case IPPROTO_UDP:
+        link_type = LINK_UDP;
+        break;
+    case IPPROTO_TCP:
+        link_type = LINK_TCP;
+        break;
+    default:
+        return (0);
+        break;
+    }
+
+    /*
+     * The aliasing port is automatically selected
+     * by one of two methods below:
+     */
+    max_trials = GET_NEW_PORT_MAX_ATTEMPTS;
+
+    if (packetAliasMode & PKT_ALIAS_SAME_PORTS) {
+      /*
+       * When the ALIAS_SAME_PORTS option is
+       * chosen, the first try will be the
+       * actual source port. If this is already
+       * in use, the remainder of the trials
+       * will be random.
+       */
+      port_sys = ntohs(src_port);
+
+    } else {
+
+      /* First trial and all subsequent are random. */
+      if (align == FIND_EVEN_ALIAS_BASE)
+        port_sys = random() & ALIAS_PORT_MASK_EVEN;
+      else
+        port_sys = random() & ALIAS_PORT_MASK;
+
+      port_sys += ALIAS_PORT_BASE;
+    }
+
+/* Port number search */
+    for (i = 0; i < max_trials; i++) {
+
+      struct alias_link *search_result;
+
+      for (j = 0; j < port_count; j++)  
+        if (0 != (search_result = FindLinkIn(dst_addr, alias_addr,
+                                        dst_port, htons(port_sys + j),
+                                        link_type, 0)))
+	  break;
+
+      /* Found a good range, return base */
+      if (j == port_count)
+	return (htons(port_sys));
+
+      /* Find a new base to try */
+      if (align == FIND_EVEN_ALIAS_BASE)
+        port_sys = random() & ALIAS_PORT_MASK_EVEN;
+      else
+        port_sys = random() & ALIAS_PORT_MASK;
+
+      port_sys += ALIAS_PORT_BASE;
+    }
+
+#ifdef DEBUG
+    fprintf(stderr, "PacketAlias/FindNewPortGroup(): ");
+    fprintf(stderr, "could not find free port(s)\n");
+#endif
+
+    return(0);
+}
+
+static void
+CleanupAliasData(void)
+{
+    struct alias_link *link;
+    int i, icount;
+
+    icount = 0;
+    for (i=0; i<LINK_TABLE_OUT_SIZE; i++)
+    {
+        link = LIST_FIRST(&linkTableOut[i]);
+        while (link != NULL)
+        {
+            struct alias_link *link_next;
+            link_next = LIST_NEXT(link, list_out);
+            icount++;
+            DeleteLink(link);
+            link = link_next;
+        }
+    }
+
+    cleanupIndex =0;
+}
+
+
+static void
+IncrementalCleanup(void)
+{
+    int icount;
+    struct alias_link *link;
+
+    icount = 0;
+    link = LIST_FIRST(&linkTableOut[cleanupIndex++]);
+    while (link != NULL)
+    {
+        int idelta;
+        struct alias_link *link_next;
+
+        link_next = LIST_NEXT(link, list_out);
+        idelta = timeStamp - link->timestamp;
+        switch (link->link_type)
+        {
+            case LINK_TCP:
+                if (idelta > link->expire_time)
+                {
+                    struct tcp_dat *tcp_aux;
+
+                    tcp_aux = link->data.tcp; 
+                    if (tcp_aux->state.in  != ALIAS_TCP_STATE_CONNECTED
+                     || tcp_aux->state.out != ALIAS_TCP_STATE_CONNECTED)
+                    {
+                        DeleteLink(link);
+                        icount++;
+                    }
+                }
+                break;
+            default:
+                if (idelta > link->expire_time)
+                {
+                    DeleteLink(link);
+                    icount++;
+                }
+                break;
+        }
+        link = link_next;
+    }
+
+    if (cleanupIndex == LINK_TABLE_OUT_SIZE)
+        cleanupIndex = 0;
+}
+
+static void
+DeleteLink(struct alias_link *link)
+{
+
+/* Don't do anything if the link is marked permanent */
+    if (deleteAllLinks == 0 && link->flags & LINK_PERMANENT)
+        return;
+
+#ifndef NO_FW_PUNCH
+/* Delete associated firewall hole, if any */
+    ClearFWHole(link);
+#endif
+
+/* Free memory allocated for LSNAT server pool */
+    if (link->server != NULL) {
+	struct server *head, *curr, *next;
+
+	head = curr = link->server;
+	do {
+	    next = curr->next;
+	    free(curr);
+	} while ((curr = next) != head);
+    }
+
+/* Adjust output table pointers */
+    LIST_REMOVE(link, list_out);
+
+/* Adjust input table pointers */
+    LIST_REMOVE(link, list_in);
+
+/* Close socket, if one has been allocated */
+    if (link->sockfd != -1)
+    {
+        sockCount--;
+        close(link->sockfd);
+    }
+
+/* Link-type dependent cleanup */
+    switch(link->link_type)
+    {
+        case LINK_ICMP:
+            icmpLinkCount--;
+            break;
+        case LINK_UDP:
+            udpLinkCount--;
+            break;
+        case LINK_TCP:
+            tcpLinkCount--;
+            free(link->data.tcp);
+            break;
+        case LINK_PPTP:
+            pptpLinkCount--;
+            break;
+        case LINK_FRAGMENT_ID:
+            fragmentIdLinkCount--;
+            break;
+        case LINK_FRAGMENT_PTR:
+            fragmentPtrLinkCount--;
+            if (link->data.frag_ptr != NULL)
+                free(link->data.frag_ptr);
+            break;
+	case LINK_ADDR:
+	    break;
+        default:
+            protoLinkCount--;
+            break;
+    }
+
+/* Free memory */
+    free(link);
+
+/* Write statistics, if logging enabled */
+    if (packetAliasMode & PKT_ALIAS_LOG)
+    {
+        ShowAliasStats();
+    }
+}
+
+
+static struct alias_link *
+AddLink(struct in_addr  src_addr,
+        struct in_addr  dst_addr,
+        struct in_addr  alias_addr,
+        u_short         src_port,
+        u_short         dst_port,
+        int             alias_port_param,  /* if less than zero, alias   */
+        int             link_type)         /* port will be automatically */
+{                                          /* chosen. If greater than    */
+    u_int start_point;                     /* zero, equal to alias port  */
+    struct alias_link *link;
+
+    link = malloc(sizeof(struct alias_link));
+    if (link != NULL)
+    {
+    /* Basic initialization */
+        link->src_addr          = src_addr;
+        link->dst_addr          = dst_addr;
+        link->alias_addr        = alias_addr;
+        link->proxy_addr.s_addr = INADDR_ANY;
+        link->src_port          = src_port;
+        link->dst_port          = dst_port;
+        link->proxy_port        = 0;
+        link->server            = NULL;
+        link->link_type         = link_type;
+        link->sockfd            = -1;
+        link->flags             = 0;
+        link->timestamp         = timeStamp;
+
+    /* Expiration time */
+        switch (link_type)
+        {
+        case LINK_ICMP:
+            link->expire_time = ICMP_EXPIRE_TIME;
+            break;
+        case LINK_UDP:
+            link->expire_time = UDP_EXPIRE_TIME;
+            break;
+        case LINK_TCP:
+            link->expire_time = TCP_EXPIRE_INITIAL;
+            break;
+        case LINK_PPTP:
+            link->flags |= LINK_PERMANENT;	/* no timeout. */
+            break;
+        case LINK_FRAGMENT_ID:
+            link->expire_time = FRAGMENT_ID_EXPIRE_TIME;
+            break;
+        case LINK_FRAGMENT_PTR:
+            link->expire_time = FRAGMENT_PTR_EXPIRE_TIME;
+            break;
+	case LINK_ADDR:
+	    break;
+        default:
+            link->expire_time = PROTO_EXPIRE_TIME;
+            break;
+        }
+
+    /* Determine alias flags */
+        if (dst_addr.s_addr == INADDR_ANY)
+            link->flags |= LINK_UNKNOWN_DEST_ADDR;
+        if (dst_port == 0)
+            link->flags |= LINK_UNKNOWN_DEST_PORT;
+
+    /* Determine alias port */
+        if (GetNewPort(link, alias_port_param) != 0)
+        {
+            free(link);
+            return(NULL);
+        }
+
+    /* Link-type dependent initialization */
+        switch(link_type)
+        {
+            struct tcp_dat  *aux_tcp;
+
+            case LINK_ICMP:
+                icmpLinkCount++;
+                break;
+            case LINK_UDP:
+                udpLinkCount++;
+                break;
+            case LINK_TCP:
+                aux_tcp = malloc(sizeof(struct tcp_dat));
+                if (aux_tcp != NULL)
+                {
+                    int i;
+
+                    tcpLinkCount++;
+                    aux_tcp->state.in = ALIAS_TCP_STATE_NOT_CONNECTED;
+                    aux_tcp->state.out = ALIAS_TCP_STATE_NOT_CONNECTED;
+                    aux_tcp->state.index = 0;
+                    aux_tcp->state.ack_modified = 0;
+                    for (i=0; i<N_LINK_TCP_DATA; i++)
+                        aux_tcp->ack[i].active = 0;
+                    aux_tcp->fwhole = -1;
+                    link->data.tcp = aux_tcp;
+                }
+                else
+                {
+#ifdef DEBUG
+                    fprintf(stderr, "PacketAlias/AddLink: ");
+                    fprintf(stderr, " cannot allocate auxiliary TCP data\n");
+#endif
+		    free(link);
+		    return (NULL);
+                }
+                break;
+            case LINK_PPTP:
+                pptpLinkCount++;
+                break;
+            case LINK_FRAGMENT_ID:
+                fragmentIdLinkCount++;
+                break;
+            case LINK_FRAGMENT_PTR:
+                fragmentPtrLinkCount++;
+                break;
+	    case LINK_ADDR:
+		break;
+            default:
+                protoLinkCount++;
+                break;
+        }
+
+    /* Set up pointers for output lookup table */
+        start_point = StartPointOut(src_addr, dst_addr, 
+                                    src_port, dst_port, link_type);
+        LIST_INSERT_HEAD(&linkTableOut[start_point], link, list_out);
+
+    /* Set up pointers for input lookup table */
+        start_point = StartPointIn(alias_addr, link->alias_port, link_type); 
+        LIST_INSERT_HEAD(&linkTableIn[start_point], link, list_in);
+    }
+    else
+    {
+#ifdef DEBUG
+        fprintf(stderr, "PacketAlias/AddLink(): ");
+        fprintf(stderr, "malloc() call failed.\n");
+#endif
+    }
+
+    if (packetAliasMode & PKT_ALIAS_LOG)
+    {
+        ShowAliasStats();
+    }
+
+    return(link);
+}
+
+static struct alias_link *
+ReLink(struct alias_link *old_link,
+       struct in_addr  src_addr,
+       struct in_addr  dst_addr,
+       struct in_addr  alias_addr,
+       u_short         src_port,
+       u_short         dst_port,
+       int             alias_port_param,   /* if less than zero, alias   */
+       int             link_type)          /* port will be automatically */
+{                                          /* chosen. If greater than    */
+    struct alias_link *new_link;           /* zero, equal to alias port  */
+
+    new_link = AddLink(src_addr, dst_addr, alias_addr,
+                       src_port, dst_port, alias_port_param,
+                       link_type);
+#ifndef NO_FW_PUNCH
+    if (new_link != NULL &&
+        old_link->link_type == LINK_TCP &&
+        old_link->data.tcp->fwhole > 0) {
+      PunchFWHole(new_link);
+    }
+#endif
+    DeleteLink(old_link);
+    return new_link;
+}
+
+static struct alias_link *
+_FindLinkOut(struct in_addr src_addr,
+            struct in_addr dst_addr,
+            u_short src_port,
+            u_short dst_port,
+            int link_type,
+            int replace_partial_links)
+{
+    u_int i;
+    struct alias_link *link;
+
+    i = StartPointOut(src_addr, dst_addr, src_port, dst_port, link_type);
+    LIST_FOREACH(link, &linkTableOut[i], list_out)
+    {
+        if (link->src_addr.s_addr == src_addr.s_addr
+         && link->server          == NULL
+         && link->dst_addr.s_addr == dst_addr.s_addr
+         && link->dst_port        == dst_port
+         && link->src_port        == src_port
+         && link->link_type       == link_type)
+        {
+            link->timestamp = timeStamp;
+            break;
+        }
+    }
+
+/* Search for partially specified links. */
+    if (link == NULL && replace_partial_links)
+    {
+        if (dst_port != 0 && dst_addr.s_addr != INADDR_ANY)
+        {
+            link = _FindLinkOut(src_addr, dst_addr, src_port, 0,
+                                link_type, 0);
+            if (link == NULL)
+                link = _FindLinkOut(src_addr, nullAddress, src_port,
+                                    dst_port, link_type, 0);
+        }
+        if (link == NULL &&
+           (dst_port != 0 || dst_addr.s_addr != INADDR_ANY))
+        {
+            link = _FindLinkOut(src_addr, nullAddress, src_port, 0,
+                                link_type, 0);
+        }
+        if (link != NULL)
+        {
+            link = ReLink(link,
+                          src_addr, dst_addr, link->alias_addr,
+                          src_port, dst_port, link->alias_port,
+                          link_type);
+        }
+    }
+
+    return(link);
+}
+
+static struct alias_link *
+FindLinkOut(struct in_addr src_addr,
+            struct in_addr dst_addr,
+            u_short src_port,
+            u_short dst_port,
+            int link_type,
+            int replace_partial_links)
+{
+    struct alias_link *link;
+
+    link = _FindLinkOut(src_addr, dst_addr, src_port, dst_port,
+                        link_type, replace_partial_links);
+
+    if (link == NULL)
+    {
+    /* The following allows permanent links to be
+       specified as using the default source address
+       (i.e. device interface address) without knowing
+       in advance what that address is. */
+        if (aliasAddress.s_addr != 0 &&
+            src_addr.s_addr == aliasAddress.s_addr)
+        {
+            link = _FindLinkOut(nullAddress, dst_addr, src_port, dst_port,
+                               link_type, replace_partial_links);
+        }
+    }
+
+    return(link);
+}
+
+
+static struct alias_link *
+_FindLinkIn(struct in_addr dst_addr,
+           struct in_addr  alias_addr,
+           u_short         dst_port,
+           u_short         alias_port,
+           int             link_type,
+           int             replace_partial_links)
+{
+    int flags_in;
+    u_int start_point;
+    struct alias_link *link;
+    struct alias_link *link_fully_specified;
+    struct alias_link *link_unknown_all;
+    struct alias_link *link_unknown_dst_addr;
+    struct alias_link *link_unknown_dst_port;
+
+/* Initialize pointers */
+    link_fully_specified  = NULL;
+    link_unknown_all      = NULL;
+    link_unknown_dst_addr = NULL;
+    link_unknown_dst_port = NULL;
+
+/* If either the dest addr or port is unknown, the search
+   loop will have to know about this. */
+
+    flags_in = 0;
+    if (dst_addr.s_addr == INADDR_ANY)
+        flags_in |= LINK_UNKNOWN_DEST_ADDR;
+    if (dst_port == 0)
+        flags_in |= LINK_UNKNOWN_DEST_PORT;
+
+/* Search loop */
+    start_point = StartPointIn(alias_addr, alias_port, link_type);
+    LIST_FOREACH(link, &linkTableIn[start_point], list_in)
+    {
+        int flags;
+
+        flags = flags_in | link->flags;
+        if (!(flags & LINK_PARTIALLY_SPECIFIED))
+        {
+            if (link->alias_addr.s_addr == alias_addr.s_addr
+             && link->alias_port        == alias_port 
+             && link->dst_addr.s_addr   == dst_addr.s_addr
+             && link->dst_port          == dst_port
+             && link->link_type         == link_type)
+            {
+                link_fully_specified = link;
+                break;
+            }
+        }
+        else if ((flags & LINK_UNKNOWN_DEST_ADDR)
+              && (flags & LINK_UNKNOWN_DEST_PORT))
+        {
+            if (link->alias_addr.s_addr == alias_addr.s_addr
+             && link->alias_port        == alias_port
+             && link->link_type         == link_type)
+            {
+                if (link_unknown_all == NULL)
+                    link_unknown_all = link;
+            }
+        }
+        else if (flags & LINK_UNKNOWN_DEST_ADDR)
+        {
+            if (link->alias_addr.s_addr == alias_addr.s_addr
+             && link->alias_port        == alias_port
+             && link->link_type         == link_type
+             && link->dst_port          == dst_port)
+            {
+                if (link_unknown_dst_addr == NULL)
+                    link_unknown_dst_addr = link;
+            }
+        }
+        else if (flags & LINK_UNKNOWN_DEST_PORT)
+        {
+            if (link->alias_addr.s_addr == alias_addr.s_addr
+             && link->alias_port        == alias_port
+             && link->link_type         == link_type
+             && link->dst_addr.s_addr   == dst_addr.s_addr)
+            {
+                if (link_unknown_dst_port == NULL)
+                    link_unknown_dst_port = link;
+            }
+        }
+    }
+
+
+
+    if (link_fully_specified != NULL)
+    {
+        link_fully_specified->timestamp = timeStamp;
+        link = link_fully_specified;
+    }
+    else if (link_unknown_dst_port != NULL)
+	link = link_unknown_dst_port;
+    else if (link_unknown_dst_addr != NULL)
+	link = link_unknown_dst_addr;
+    else if (link_unknown_all != NULL)
+	link = link_unknown_all;
+    else
+        return (NULL);
+
+    if (replace_partial_links &&
+	(link->flags & LINK_PARTIALLY_SPECIFIED || link->server != NULL))
+    {
+	struct in_addr src_addr;
+	u_short src_port;
+
+	if (link->server != NULL) {		/* LSNAT link */
+	    src_addr = link->server->addr;
+	    src_port = link->server->port;
+	    link->server = link->server->next;
+	} else {
+	    src_addr = link->src_addr;
+	    src_port = link->src_port;
+	}
+
+	link = ReLink(link,
+		      src_addr, dst_addr, alias_addr,
+		      src_port, dst_port, alias_port,
+		      link_type);
+    }
+
+    return (link);
+}
+
+static struct alias_link *
+FindLinkIn(struct in_addr dst_addr,
+           struct in_addr alias_addr,
+           u_short dst_port,
+           u_short alias_port,
+           int link_type,
+           int replace_partial_links)
+{
+    struct alias_link *link;
+
+    link = _FindLinkIn(dst_addr, alias_addr, dst_port, alias_port,
+                       link_type, replace_partial_links);
+
+    if (link == NULL)
+    {
+    /* The following allows permanent links to be
+       specified as using the default aliasing address
+       (i.e. device interface address) without knowing
+       in advance what that address is. */
+        if (aliasAddress.s_addr != 0 &&
+            alias_addr.s_addr == aliasAddress.s_addr)
+        {
+            link = _FindLinkIn(dst_addr, nullAddress, dst_port, alias_port,
+                               link_type, replace_partial_links);
+        }
+    }
+
+    return(link);
+}
+
+
+
+
+/* External routines for finding/adding links
+
+-- "external" means outside alias_db.c, but within alias*.c --
+
+    FindIcmpIn(), FindIcmpOut()
+    FindFragmentIn1(), FindFragmentIn2()
+    AddFragmentPtrLink(), FindFragmentPtr()
+    FindProtoIn(), FindProtoOut()
+    FindUdpTcpIn(), FindUdpTcpOut()
+    AddPptp(), FindPptpOutByCallId(), FindPptpInByCallId(),
+    FindPptpOutByPeerCallId(), FindPptpInByPeerCallId()
+    FindOriginalAddress(), FindAliasAddress()
+
+(prototypes in alias_local.h)
+*/
+
+
+struct alias_link *
+FindIcmpIn(struct in_addr dst_addr,
+           struct in_addr alias_addr,
+           u_short id_alias,
+           int create)
+{
+    struct alias_link *link;
+
+    link = FindLinkIn(dst_addr, alias_addr,
+                      NO_DEST_PORT, id_alias,
+                      LINK_ICMP, 0);
+    if (link == NULL && create && !(packetAliasMode & PKT_ALIAS_DENY_INCOMING))
+    {
+        struct in_addr target_addr;
+
+        target_addr = FindOriginalAddress(alias_addr);
+        link = AddLink(target_addr, dst_addr, alias_addr,
+                       id_alias, NO_DEST_PORT, id_alias,
+                       LINK_ICMP);
+    }
+
+    return (link);
+}
+
+
+struct alias_link *
+FindIcmpOut(struct in_addr src_addr,
+            struct in_addr dst_addr,
+            u_short id,
+            int create)
+{
+    struct alias_link * link;
+
+    link = FindLinkOut(src_addr, dst_addr,
+                       id, NO_DEST_PORT,
+                       LINK_ICMP, 0);
+    if (link == NULL && create)
+    {
+        struct in_addr alias_addr;
+
+        alias_addr = FindAliasAddress(src_addr);
+        link = AddLink(src_addr, dst_addr, alias_addr,
+                       id, NO_DEST_PORT, GET_ALIAS_ID,
+                       LINK_ICMP);
+    }
+
+    return(link);
+}
+
+
+struct alias_link *
+FindFragmentIn1(struct in_addr dst_addr,
+                struct in_addr alias_addr,
+                u_short ip_id)
+{
+    struct alias_link *link;
+
+    link = FindLinkIn(dst_addr, alias_addr,
+                      NO_DEST_PORT, ip_id,
+                      LINK_FRAGMENT_ID, 0);
+
+    if (link == NULL)
+    {
+        link = AddLink(nullAddress, dst_addr, alias_addr,
+                       NO_SRC_PORT, NO_DEST_PORT, ip_id,
+                       LINK_FRAGMENT_ID);
+    }
+
+    return(link);
+}
+
+
+struct alias_link *
+FindFragmentIn2(struct in_addr dst_addr,   /* Doesn't add a link if one */
+                struct in_addr alias_addr, /*   is not found.           */
+                u_short ip_id)
+{
+    return FindLinkIn(dst_addr, alias_addr,
+                      NO_DEST_PORT, ip_id,
+                      LINK_FRAGMENT_ID, 0);
+}
+
+
+struct alias_link *
+AddFragmentPtrLink(struct in_addr dst_addr,
+                   u_short ip_id)
+{
+    return AddLink(nullAddress, dst_addr, nullAddress,
+                   NO_SRC_PORT, NO_DEST_PORT, ip_id,
+                   LINK_FRAGMENT_PTR);
+}
+
+
+struct alias_link *
+FindFragmentPtr(struct in_addr dst_addr,
+                u_short ip_id)
+{
+    return FindLinkIn(dst_addr, nullAddress,
+                      NO_DEST_PORT, ip_id,
+                      LINK_FRAGMENT_PTR, 0);
+}
+
+
+struct alias_link *
+FindProtoIn(struct in_addr dst_addr,
+            struct in_addr alias_addr,
+	    u_char proto)
+{
+    struct alias_link *link;
+
+    link = FindLinkIn(dst_addr, alias_addr,
+                      NO_DEST_PORT, 0,
+                      proto, 1);
+
+    if (link == NULL && !(packetAliasMode & PKT_ALIAS_DENY_INCOMING))
+    {
+        struct in_addr target_addr;
+
+        target_addr = FindOriginalAddress(alias_addr);
+        link = AddLink(target_addr, dst_addr, alias_addr,
+                       NO_SRC_PORT, NO_DEST_PORT, 0,
+                       proto);
+    }
+
+    return (link);
+}
+
+
+struct alias_link *
+FindProtoOut(struct in_addr src_addr,
+             struct in_addr dst_addr,
+             u_char proto)
+{
+    struct alias_link *link;
+
+    link = FindLinkOut(src_addr, dst_addr,
+                       NO_SRC_PORT, NO_DEST_PORT,
+                       proto, 1);
+
+    if (link == NULL)
+    {
+        struct in_addr alias_addr;
+
+        alias_addr = FindAliasAddress(src_addr);
+        link = AddLink(src_addr, dst_addr, alias_addr,
+                       NO_SRC_PORT, NO_DEST_PORT, 0,
+                       proto);
+    }
+
+    return (link);
+}
+
+
+struct alias_link *
+FindUdpTcpIn(struct in_addr dst_addr,
+             struct in_addr alias_addr,
+             u_short        dst_port,
+             u_short        alias_port,
+             u_char         proto,
+             int            create)
+{
+    int link_type;
+    struct alias_link *link;
+
+    switch (proto)
+    {
+    case IPPROTO_UDP:
+        link_type = LINK_UDP;
+        break;
+    case IPPROTO_TCP:
+        link_type = LINK_TCP;
+        break;
+    default:
+        return NULL;
+        break;
+    }
+
+    link = FindLinkIn(dst_addr, alias_addr,
+                      dst_port, alias_port,
+                      link_type, create);
+
+    if (link == NULL && create && !(packetAliasMode & PKT_ALIAS_DENY_INCOMING))
+    {
+        struct in_addr target_addr;
+
+        target_addr = FindOriginalAddress(alias_addr);
+        link = AddLink(target_addr, dst_addr, alias_addr,
+                       alias_port, dst_port, alias_port,
+                       link_type);
+    }
+
+    return(link);
+}
+
+
+struct alias_link * 
+FindUdpTcpOut(struct in_addr  src_addr,
+              struct in_addr  dst_addr,
+              u_short         src_port,
+              u_short         dst_port,
+              u_char          proto,
+              int             create)
+{
+    int link_type;
+    struct alias_link *link;
+
+    switch (proto)
+    {
+    case IPPROTO_UDP:
+        link_type = LINK_UDP;
+        break;
+    case IPPROTO_TCP:
+        link_type = LINK_TCP;
+        break;
+    default:
+        return NULL;
+        break;
+    }
+
+    link = FindLinkOut(src_addr, dst_addr, src_port, dst_port, link_type, create);
+
+    if (link == NULL && create)
+    {
+        struct in_addr alias_addr;
+
+        alias_addr = FindAliasAddress(src_addr);
+        link = AddLink(src_addr, dst_addr, alias_addr,
+                       src_port, dst_port, GET_ALIAS_PORT,
+                       link_type);
+    }
+
+    return(link);
+}
+
+
+struct alias_link *
+AddPptp(struct in_addr  src_addr,
+	struct in_addr  dst_addr,
+	struct in_addr  alias_addr,
+	u_int16_t       src_call_id)
+{
+    struct alias_link *link;
+
+    link = AddLink(src_addr, dst_addr, alias_addr,
+		   src_call_id, 0, GET_ALIAS_PORT,
+		   LINK_PPTP);
+
+    return (link);
+}
+
+
+struct alias_link *
+FindPptpOutByCallId(struct in_addr src_addr,
+		    struct in_addr dst_addr,
+		    u_int16_t      src_call_id)
+{
+    u_int i;
+    struct alias_link *link;
+
+    i = StartPointOut(src_addr, dst_addr, 0, 0, LINK_PPTP);
+    LIST_FOREACH(link, &linkTableOut[i], list_out)
+	if (link->link_type == LINK_PPTP &&
+	    link->src_addr.s_addr == src_addr.s_addr &&
+	    link->dst_addr.s_addr == dst_addr.s_addr &&
+	    link->src_port == src_call_id)
+		break;
+
+    return (link);
+}
+
+
+struct alias_link *
+FindPptpOutByPeerCallId(struct in_addr src_addr,
+			struct in_addr dst_addr,
+			u_int16_t      dst_call_id)
+{
+    u_int i;
+    struct alias_link *link;
+
+    i = StartPointOut(src_addr, dst_addr, 0, 0, LINK_PPTP);
+    LIST_FOREACH(link, &linkTableOut[i], list_out)
+	if (link->link_type == LINK_PPTP &&
+	    link->src_addr.s_addr == src_addr.s_addr &&
+	    link->dst_addr.s_addr == dst_addr.s_addr &&
+	    link->dst_port == dst_call_id)
+		break;
+
+    return (link);
+}
+
+
+struct alias_link *
+FindPptpInByCallId(struct in_addr dst_addr,
+		   struct in_addr alias_addr,
+		   u_int16_t      dst_call_id)
+{
+    u_int i;
+    struct alias_link *link;
+
+    i = StartPointIn(alias_addr, 0, LINK_PPTP);
+    LIST_FOREACH(link, &linkTableIn[i], list_in)
+	if (link->link_type == LINK_PPTP &&
+	    link->dst_addr.s_addr == dst_addr.s_addr &&
+	    link->alias_addr.s_addr == alias_addr.s_addr &&
+	    link->dst_port == dst_call_id)
+		break;
+
+    return (link);
+}
+
+
+struct alias_link *
+FindPptpInByPeerCallId(struct in_addr dst_addr,
+		       struct in_addr alias_addr,
+		       u_int16_t      alias_call_id)
+{
+    struct alias_link *link;
+
+    link = FindLinkIn(dst_addr, alias_addr,
+		      0/* any */, alias_call_id,
+		      LINK_PPTP, 0);
+
+
+    return (link);
+}
+
+
+struct alias_link * 
+FindRtspOut(struct in_addr  src_addr,
+            struct in_addr  dst_addr,
+            u_short         src_port,
+            u_short         alias_port,
+            u_char          proto)
+{
+    int link_type;
+    struct alias_link *link;
+
+    switch (proto)
+    {
+    case IPPROTO_UDP:
+        link_type = LINK_UDP;
+        break;
+    case IPPROTO_TCP:
+        link_type = LINK_TCP;
+        break;
+    default:
+        return NULL;
+        break;
+    }
+
+    link = FindLinkOut(src_addr, dst_addr, src_port, 0, link_type, 1);
+
+    if (link == NULL)
+    {
+        struct in_addr alias_addr;
+
+        alias_addr = FindAliasAddress(src_addr);
+        link = AddLink(src_addr, dst_addr, alias_addr,
+                       src_port, 0, alias_port,
+                       link_type);
+    }
+
+    return(link);
+}
+
+
+struct in_addr
+FindOriginalAddress(struct in_addr alias_addr)
+{
+    struct alias_link *link;
+    
+    link = FindLinkIn(nullAddress, alias_addr,
+                      0, 0, LINK_ADDR, 0);
+    if (link == NULL)
+    {
+        newDefaultLink = 1;
+        if (targetAddress.s_addr == INADDR_ANY)
+            return alias_addr;
+        else if (targetAddress.s_addr == INADDR_NONE)
+            return aliasAddress;
+        else
+            return targetAddress;
+    }
+    else
+    {
+	if (link->server != NULL) {		/* LSNAT link */
+	    struct in_addr src_addr;
+
+	    src_addr = link->server->addr;
+	    link->server = link->server->next;
+	    return (src_addr);
+        } else if (link->src_addr.s_addr == INADDR_ANY)
+            return aliasAddress;
+        else
+            return link->src_addr;
+    }
+}
+
+
+struct in_addr
+FindAliasAddress(struct in_addr original_addr)
+{
+    struct alias_link *link;
+    
+    link = FindLinkOut(original_addr, nullAddress,
+                       0, 0, LINK_ADDR, 0);
+    if (link == NULL)
+    {
+        return aliasAddress;
+    }
+    else
+    {
+        if (link->alias_addr.s_addr == INADDR_ANY)
+            return aliasAddress;
+        else
+            return link->alias_addr;
+    }
+}
+
+
+/* External routines for getting or changing link data
+   (external to alias_db.c, but internal to alias*.c)
+
+    SetFragmentData(), GetFragmentData()
+    SetFragmentPtr(), GetFragmentPtr()
+    SetStateIn(), SetStateOut(), GetStateIn(), GetStateOut()
+    GetOriginalAddress(), GetDestAddress(), GetAliasAddress()
+    GetOriginalPort(), GetAliasPort()
+    SetAckModified(), GetAckModified()
+    GetDeltaAckIn(), GetDeltaSeqOut(), AddSeq()
+    SetLastLineCrlfTermed(), GetLastLineCrlfTermed()
+    SetDestCallId()
+*/
+
+
+void
+SetFragmentAddr(struct alias_link *link, struct in_addr src_addr)
+{
+    link->data.frag_addr = src_addr;
+}
+
+
+void
+GetFragmentAddr(struct alias_link *link, struct in_addr *src_addr)
+{
+    *src_addr = link->data.frag_addr;
+}
+
+
+void
+SetFragmentPtr(struct alias_link *link, char *fptr)
+{
+    link->data.frag_ptr = fptr;
+}
+
+
+void
+GetFragmentPtr(struct alias_link *link, char **fptr)
+{
+   *fptr = link->data.frag_ptr;
+}
+
+
+void
+SetStateIn(struct alias_link *link, int state)
+{
+    /* TCP input state */
+    switch (state) {
+    case ALIAS_TCP_STATE_DISCONNECTED:
+        if (link->data.tcp->state.out != ALIAS_TCP_STATE_CONNECTED)
+            link->expire_time = TCP_EXPIRE_DEAD;
+        else
+            link->expire_time = TCP_EXPIRE_SINGLEDEAD;
+        break;
+    case ALIAS_TCP_STATE_CONNECTED:
+        if (link->data.tcp->state.out == ALIAS_TCP_STATE_CONNECTED)
+            link->expire_time = TCP_EXPIRE_CONNECTED;
+        break;
+    default:
+        abort();
+    }
+    link->data.tcp->state.in = state;
+}
+
+
+void
+SetStateOut(struct alias_link *link, int state)
+{
+    /* TCP output state */
+    switch (state) {
+    case ALIAS_TCP_STATE_DISCONNECTED:
+        if (link->data.tcp->state.in != ALIAS_TCP_STATE_CONNECTED)
+            link->expire_time = TCP_EXPIRE_DEAD;
+        else
+            link->expire_time = TCP_EXPIRE_SINGLEDEAD;
+        break;
+    case ALIAS_TCP_STATE_CONNECTED:
+        if (link->data.tcp->state.in == ALIAS_TCP_STATE_CONNECTED)
+            link->expire_time = TCP_EXPIRE_CONNECTED;
+        break;
+    default:
+        abort();
+    }
+    link->data.tcp->state.out = state;
+}
+
+
+int
+GetStateIn(struct alias_link *link)
+{
+    /* TCP input state */
+    return link->data.tcp->state.in;
+}
+
+
+int
+GetStateOut(struct alias_link *link)
+{
+    /* TCP output state */
+    return link->data.tcp->state.out;
+}
+
+
+struct in_addr
+GetOriginalAddress(struct alias_link *link)
+{
+    if (link->src_addr.s_addr == INADDR_ANY)
+        return aliasAddress;
+    else
+        return(link->src_addr);
+}
+
+
+struct in_addr
+GetDestAddress(struct alias_link *link)
+{
+    return(link->dst_addr);
+}
+
+
+struct in_addr
+GetAliasAddress(struct alias_link *link)
+{
+    if (link->alias_addr.s_addr == INADDR_ANY)
+        return aliasAddress;
+    else
+        return link->alias_addr;
+}
+
+
+struct in_addr
+GetDefaultAliasAddress()
+{
+    return aliasAddress;
+}
+
+
+void
+SetDefaultAliasAddress(struct in_addr alias_addr)
+{
+    aliasAddress = alias_addr;
+}
+
+
+u_short
+GetOriginalPort(struct alias_link *link)
+{
+    return(link->src_port);
+}
+
+
+u_short
+GetAliasPort(struct alias_link *link)
+{
+    return(link->alias_port);
+}
+
+#ifndef NO_FW_PUNCH
+static u_short
+GetDestPort(struct alias_link *link)
+{
+    return(link->dst_port);
+}
+#endif
+
+void
+SetAckModified(struct alias_link *link)
+{
+/* Indicate that ACK numbers have been modified in a TCP connection */
+    link->data.tcp->state.ack_modified = 1;
+}
+
+
+struct in_addr
+GetProxyAddress(struct alias_link *link)
+{
+    return link->proxy_addr;
+}
+
+
+void
+SetProxyAddress(struct alias_link *link, struct in_addr addr)
+{
+    link->proxy_addr = addr;
+}
+
+
+u_short
+GetProxyPort(struct alias_link *link)
+{
+    return link->proxy_port;
+}
+
+
+void
+SetProxyPort(struct alias_link *link, u_short port)
+{
+    link->proxy_port = port;
+}
+
+
+int
+GetAckModified(struct alias_link *link)
+{
+/* See if ACK numbers have been modified */
+    return link->data.tcp->state.ack_modified;
+}
+
+
+int
+GetDeltaAckIn(struct ip *pip, struct alias_link *link)
+{
+/*
+Find out how much the ACK number has been altered for an incoming
+TCP packet.  To do this, a circular list of ACK numbers where the TCP
+packet size was altered is searched. 
+*/
+
+    int i;
+    struct tcphdr *tc;
+    int delta, ack_diff_min;
+    u_long ack;
+
+    tc = (struct tcphdr *) ((char *) pip + (pip->ip_hl << 2));
+    ack      = tc->th_ack;
+
+    delta = 0;
+    ack_diff_min = -1;
+    for (i=0; i<N_LINK_TCP_DATA; i++)
+    {
+        struct ack_data_record x;
+
+        x = link->data.tcp->ack[i];
+        if (x.active == 1)
+        {
+            int ack_diff;
+
+            ack_diff = SeqDiff(x.ack_new, ack);
+            if (ack_diff >= 0)
+            {
+                if (ack_diff_min >= 0)
+                {
+                    if (ack_diff < ack_diff_min)
+                    {
+                        delta = x.delta;
+                        ack_diff_min = ack_diff;
+                    }
+                }
+                else
+                {
+                    delta = x.delta;
+                    ack_diff_min = ack_diff;
+                }
+            }
+        }
+    }
+    return (delta);
+}
+
+
+int
+GetDeltaSeqOut(struct ip *pip, struct alias_link *link)
+{
+/*
+Find out how much the sequence number has been altered for an outgoing
+TCP packet.  To do this, a circular list of ACK numbers where the TCP
+packet size was altered is searched. 
+*/
+
+    int i;
+    struct tcphdr *tc;
+    int delta, seq_diff_min;
+    u_long seq;
+
+    tc = (struct tcphdr *) ((char *) pip + (pip->ip_hl << 2));
+    seq = tc->th_seq;
+
+    delta = 0;
+    seq_diff_min = -1;
+    for (i=0; i<N_LINK_TCP_DATA; i++)
+    {
+        struct ack_data_record x;
+
+        x = link->data.tcp->ack[i];
+        if (x.active == 1)
+        {
+            int seq_diff;
+
+            seq_diff = SeqDiff(x.ack_old, seq);
+            if (seq_diff >= 0)
+            {
+                if (seq_diff_min >= 0)
+                {
+                    if (seq_diff < seq_diff_min)
+                    {
+                        delta = x.delta;
+                        seq_diff_min = seq_diff;
+                    }
+                }
+                else
+                {
+                    delta = x.delta;
+                    seq_diff_min = seq_diff;
+                }
+            }
+        }
+    }
+    return (delta);
+}
+
+
+void
+AddSeq(struct ip *pip, struct alias_link *link, int delta)
+{
+/*
+When a TCP packet has been altered in length, save this
+information in a circular list.  If enough packets have
+been altered, then this list will begin to overwrite itself.
+*/
+
+    struct tcphdr *tc;
+    struct ack_data_record x;
+    int hlen, tlen, dlen;
+    int i;
+
+    tc = (struct tcphdr *) ((char *) pip + (pip->ip_hl << 2));
+
+    hlen = (pip->ip_hl + tc->th_off) << 2;
+    tlen = ntohs(pip->ip_len);
+    dlen = tlen - hlen;
+
+    x.ack_old = htonl(ntohl(tc->th_seq) + dlen);
+    x.ack_new = htonl(ntohl(tc->th_seq) + dlen + delta);
+    x.delta = delta;
+    x.active = 1;
+
+    i = link->data.tcp->state.index;
+    link->data.tcp->ack[i] = x;
+
+    i++;
+    if (i == N_LINK_TCP_DATA)
+        link->data.tcp->state.index = 0;
+    else
+        link->data.tcp->state.index = i;
+}
+
+void
+SetExpire(struct alias_link *link, int expire)
+{
+    if (expire == 0)
+    {
+        link->flags &= ~LINK_PERMANENT;
+        DeleteLink(link);
+    }
+    else if (expire == -1)
+    {
+        link->flags |= LINK_PERMANENT;
+    }
+    else if (expire > 0)
+    {
+        link->expire_time = expire;
+    }
+    else
+    {
+#ifdef DEBUG
+        fprintf(stderr, "PacketAlias/SetExpire(): ");
+        fprintf(stderr, "error in expire parameter\n");
+#endif
+    }
+}
+
+void
+ClearCheckNewLink(void)
+{
+    newDefaultLink = 0;
+}
+
+void
+SetLastLineCrlfTermed(struct alias_link *link, int yes)
+{
+
+    if (yes)
+	link->flags |= LINK_LAST_LINE_CRLF_TERMED;
+    else
+	link->flags &= ~LINK_LAST_LINE_CRLF_TERMED;
+}
+
+int
+GetLastLineCrlfTermed(struct alias_link *link)
+{
+
+    return (link->flags & LINK_LAST_LINE_CRLF_TERMED);
+}
+
+void
+SetDestCallId(struct alias_link *link, u_int16_t cid)
+{
+
+    deleteAllLinks = 1;
+    link = ReLink(link, link->src_addr, link->dst_addr, link->alias_addr,
+		  link->src_port, cid, link->alias_port, link->link_type);
+    deleteAllLinks = 0;
+}
+
+
+/* Miscellaneous Functions
+
+    HouseKeeping()
+    InitPacketAliasLog()
+    UninitPacketAliasLog()
+*/
+
+/*
+    Whenever an outgoing or incoming packet is handled, HouseKeeping()
+    is called to find and remove timed-out aliasing links.  Logic exists
+    to sweep through the entire table and linked list structure
+    every 60 seconds.
+
+    (prototype in alias_local.h)
+*/
+
+void
+HouseKeeping(void)
+{
+    int i, n, n100;
+    struct timeval tv;
+    struct timezone tz;
+
+    /*
+     * Save system time (seconds) in global variable timeStamp for
+     * use by other functions. This is done so as not to unnecessarily
+     * waste timeline by making system calls.
+     */
+    gettimeofday(&tv, &tz);
+    timeStamp = tv.tv_sec;
+
+    /* Compute number of spokes (output table link chains) to cover */
+    n100  = LINK_TABLE_OUT_SIZE * 100 + houseKeepingResidual;
+    n100 *= timeStamp - lastCleanupTime;
+    n100 /= ALIAS_CLEANUP_INTERVAL_SECS;
+
+    n = n100/100;
+
+    /* Handle different cases */
+    if (n > ALIAS_CLEANUP_MAX_SPOKES)
+    {
+        n = ALIAS_CLEANUP_MAX_SPOKES;
+        lastCleanupTime = timeStamp;
+        houseKeepingResidual = 0;
+
+        for (i=0; i<n; i++)
+            IncrementalCleanup();
+    }
+    else if (n > 0)
+    {
+        lastCleanupTime = timeStamp;
+        houseKeepingResidual = n100 - 100*n;
+
+        for (i=0; i<n; i++)
+            IncrementalCleanup();
+    }
+    else if (n < 0)
+    {
+#ifdef DEBUG
+        fprintf(stderr, "PacketAlias/HouseKeeping(): ");
+        fprintf(stderr, "something unexpected in time values\n");
+#endif
+        lastCleanupTime = timeStamp;
+        houseKeepingResidual = 0;
+    }
+}
+
+
+/* Init the log file and enable logging */
+static void
+InitPacketAliasLog(void)
+{
+   if ((~packetAliasMode & PKT_ALIAS_LOG)
+    && (monitorFile = fopen("/var/log/alias.log", "w")))
+   {
+      packetAliasMode |= PKT_ALIAS_LOG;
+      fprintf(monitorFile,
+      "PacketAlias/InitPacketAliasLog: Packet alias logging enabled.\n");
+   }
+}
+
+
+/* Close the log-file and disable logging. */
+static void
+UninitPacketAliasLog(void)
+{
+    if (monitorFile) {
+        fclose(monitorFile);
+        monitorFile = NULL;
+    }
+    packetAliasMode &= ~PKT_ALIAS_LOG;
+}
+
+
+
+
+
+
+/* Outside world interfaces
+
+-- "outside world" means other than alias*.c routines --
+
+    PacketAliasRedirectPort()
+    PacketAliasAddServer()
+    PacketAliasRedirectProto()
+    PacketAliasRedirectAddr()
+    PacketAliasRedirectDelete()
+    PacketAliasSetAddress()
+    PacketAliasInit()
+    PacketAliasUninit()
+    PacketAliasSetMode()
+
+(prototypes in alias.h)
+*/
+
+/* Redirection from a specific public addr:port to a
+   private addr:port */
+struct alias_link *
+PacketAliasRedirectPort(struct in_addr src_addr,   u_short src_port,
+                        struct in_addr dst_addr,   u_short dst_port,
+                        struct in_addr alias_addr, u_short alias_port,
+                        u_char proto)
+{
+    int link_type;
+    struct alias_link *link;
+
+    switch(proto)
+    {
+    case IPPROTO_UDP:
+        link_type = LINK_UDP;
+        break;
+    case IPPROTO_TCP:
+        link_type = LINK_TCP;
+        break;
+    default:
+#ifdef DEBUG
+        fprintf(stderr, "PacketAliasRedirectPort(): ");
+        fprintf(stderr, "only TCP and UDP protocols allowed\n");
+#endif
+        return NULL;
+    }
+
+    link = AddLink(src_addr, dst_addr, alias_addr,
+                   src_port, dst_port, alias_port,
+                   link_type);
+
+    if (link != NULL)
+    {
+        link->flags |= LINK_PERMANENT;
+    }
+#ifdef DEBUG
+    else
+    {
+        fprintf(stderr, "PacketAliasRedirectPort(): " 
+                        "call to AddLink() failed\n");
+    }
+#endif
+
+    return link;
+}
+
+/* Add server to the pool of servers */
+int
+PacketAliasAddServer(struct alias_link *link, struct in_addr addr, u_short port)
+{
+    struct server *server;
+
+    server = malloc(sizeof(struct server));
+
+    if (server != NULL) {
+	struct server *head;
+
+	server->addr = addr;
+	server->port = port;
+
+	head = link->server;
+	if (head == NULL)
+	    server->next = server;
+	else {
+	    struct server *s;
+
+	    for (s = head; s->next != head; s = s->next);
+	    s->next = server;
+	    server->next = head;
+	}
+	link->server = server;
+	return (0);
+    } else
+	return (-1);
+}
+
+/* Redirect packets of a given IP protocol from a specific
+   public address to a private address */
+struct alias_link *
+PacketAliasRedirectProto(struct in_addr src_addr,
+                         struct in_addr dst_addr,
+                         struct in_addr alias_addr,
+                         u_char proto)
+{
+    struct alias_link *link;
+
+    link = AddLink(src_addr, dst_addr, alias_addr,
+                   NO_SRC_PORT, NO_DEST_PORT, 0,
+                   proto);
+
+    if (link != NULL)
+    {
+        link->flags |= LINK_PERMANENT;
+    }
+#ifdef DEBUG
+    else
+    {
+        fprintf(stderr, "PacketAliasRedirectProto(): " 
+                        "call to AddLink() failed\n");
+    }
+#endif
+
+    return link;
+}
+
+/* Static address translation */
+struct alias_link *
+PacketAliasRedirectAddr(struct in_addr src_addr,
+                        struct in_addr alias_addr)
+{
+    struct alias_link *link;
+
+    link = AddLink(src_addr, nullAddress, alias_addr,
+                   0, 0, 0,
+                   LINK_ADDR);
+
+    if (link != NULL)
+    {
+        link->flags |= LINK_PERMANENT;
+    }
+#ifdef DEBUG
+    else
+    {
+        fprintf(stderr, "PacketAliasRedirectAddr(): " 
+                        "call to AddLink() failed\n");
+    }
+#endif
+
+    return link;
+}
+
+
+void
+PacketAliasRedirectDelete(struct alias_link *link)
+{
+/* This is a dangerous function to put in the API,
+   because an invalid pointer can crash the program. */
+
+    deleteAllLinks = 1;
+    DeleteLink(link);
+    deleteAllLinks = 0;
+}
+
+
+void
+PacketAliasSetAddress(struct in_addr addr)
+{
+    if (packetAliasMode & PKT_ALIAS_RESET_ON_ADDR_CHANGE
+     && aliasAddress.s_addr != addr.s_addr)
+        CleanupAliasData();
+
+    aliasAddress = addr;
+}
+
+
+void
+PacketAliasSetTarget(struct in_addr target_addr)
+{
+    targetAddress = target_addr;
+}
+
+
+void
+PacketAliasInit(void)
+{
+    int i;
+    struct timeval tv;
+    struct timezone tz;
+    static int firstCall = 1;
+
+    if (firstCall == 1)
+    {
+        gettimeofday(&tv, &tz);
+        timeStamp = tv.tv_sec;
+        lastCleanupTime = tv.tv_sec;
+        houseKeepingResidual = 0;
+
+        for (i=0; i<LINK_TABLE_OUT_SIZE; i++)
+            LIST_INIT(&linkTableOut[i]);
+        for (i=0; i<LINK_TABLE_IN_SIZE; i++)
+            LIST_INIT(&linkTableIn[i]);
+
+        atexit(PacketAliasUninit);
+        firstCall = 0;
+    }
+    else
+    {
+        deleteAllLinks = 1;
+        CleanupAliasData();
+        deleteAllLinks = 0;
+    }
+
+    aliasAddress.s_addr = INADDR_ANY;
+    targetAddress.s_addr = INADDR_ANY;
+
+    icmpLinkCount = 0;
+    udpLinkCount = 0;
+    tcpLinkCount = 0;
+    pptpLinkCount = 0;
+    protoLinkCount = 0;
+    fragmentIdLinkCount = 0;
+    fragmentPtrLinkCount = 0;
+    sockCount = 0;
+
+    cleanupIndex =0;
+
+    packetAliasMode = PKT_ALIAS_SAME_PORTS
+                    | PKT_ALIAS_USE_SOCKETS
+                    | PKT_ALIAS_RESET_ON_ADDR_CHANGE;
+}
+
+void
+PacketAliasUninit(void) {
+    deleteAllLinks = 1;
+    CleanupAliasData();
+    deleteAllLinks = 0;
+    UninitPacketAliasLog();
+#ifndef NO_FW_PUNCH
+    UninitPunchFW();
+#endif
+}
+
+
+/* Change mode for some operations */
+unsigned int
+PacketAliasSetMode(
+    unsigned int flags, /* Which state to bring flags to */
+    unsigned int mask   /* Mask of which flags to affect (use 0 to do a
+                           probe for flag values) */
+)
+{
+/* Enable logging? */
+    if (flags & mask & PKT_ALIAS_LOG)
+    {
+        InitPacketAliasLog();     /* Do the enable */
+    } else
+/* _Disable_ logging? */
+    if (~flags & mask & PKT_ALIAS_LOG) {
+        UninitPacketAliasLog();
+    }
+
+#ifndef NO_FW_PUNCH
+/* Start punching holes in the firewall? */
+    if (flags & mask & PKT_ALIAS_PUNCH_FW) {
+        InitPunchFW();
+    } else
+/* Stop punching holes in the firewall? */
+    if (~flags & mask & PKT_ALIAS_PUNCH_FW) {
+        UninitPunchFW();
+    }
+#endif
+
+/* Other flags can be set/cleared without special action */
+    packetAliasMode = (flags & mask) | (packetAliasMode & ~mask);
+    return packetAliasMode;
+}
+
+
+int
+PacketAliasCheckNewLink(void)
+{
+    return newDefaultLink;
+}
+
+
+#ifndef NO_FW_PUNCH
+
+/*****************
+  Code to support firewall punching.  This shouldn't really be in this
+  file, but making variables global is evil too.
+  ****************/
+
+/* Firewall include files */
+#include <net/if.h>
+#include <netinet/ip_fw.h>
+#include <string.h>
+#include <err.h>
+
+static void ClearAllFWHoles(void);
+
+static int fireWallBaseNum;     /* The first firewall entry free for our use */
+static int fireWallNumNums;     /* How many entries can we use? */
+static int fireWallActiveNum;   /* Which entry did we last use? */
+static char *fireWallField;     /* bool array for entries */
+
+#define fw_setfield(field, num)                         \
+do {                                                    \
+    (field)[(num) - fireWallBaseNum] = 1;               \
+} /*lint -save -e717 */ while(0) /*lint -restore */
+#define fw_clrfield(field, num)                         \
+do {                                                    \
+    (field)[(num) - fireWallBaseNum] = 0;               \
+} /*lint -save -e717 */ while(0) /*lint -restore */
+#define fw_tstfield(field, num) ((field)[(num) - fireWallBaseNum])
+
+static void
+InitPunchFW(void) {
+    fireWallField = malloc(fireWallNumNums);
+    if (fireWallField) {
+        memset(fireWallField, 0, fireWallNumNums);
+        if (fireWallFD < 0) {
+            fireWallFD = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
+        }
+        ClearAllFWHoles();
+        fireWallActiveNum = fireWallBaseNum;
+    }
+}
+
+static void
+UninitPunchFW(void) {
+    ClearAllFWHoles();
+    if (fireWallFD >= 0)
+        close(fireWallFD);
+    fireWallFD = -1;
+    if (fireWallField)
+        free(fireWallField);
+    fireWallField = NULL;
+    packetAliasMode &= ~PKT_ALIAS_PUNCH_FW;
+}
+
+/* Make a certain link go through the firewall */
+void
+PunchFWHole(struct alias_link *link) {
+    int r;                      /* Result code */
+    struct ip_fw rule;          /* On-the-fly built rule */
+    int fwhole;                 /* Where to punch hole */
+
+/* Don't do anything unless we are asked to */
+    if ( !(packetAliasMode & PKT_ALIAS_PUNCH_FW) ||
+         fireWallFD < 0 ||
+         link->link_type != LINK_TCP)
+        return;
+
+    memset(&rule, 0, sizeof rule);
+
+/** Build rule **/
+
+    /* Find empty slot */
+    for (fwhole = fireWallActiveNum;
+         fwhole < fireWallBaseNum + fireWallNumNums &&
+             fw_tstfield(fireWallField, fwhole);
+         fwhole++)
+        ;
+    if (fwhole == fireWallBaseNum + fireWallNumNums) {
+        for (fwhole = fireWallBaseNum;
+             fwhole < fireWallActiveNum &&
+                 fw_tstfield(fireWallField, fwhole);
+             fwhole++)
+            ;
+        if (fwhole == fireWallActiveNum) {
+            /* No rule point empty - we can't punch more holes. */
+            fireWallActiveNum = fireWallBaseNum;
+#ifdef DEBUG
+            fprintf(stderr, "libalias: Unable to create firewall hole!\n");
+#endif
+            return;
+        }
+    }
+    /* Start next search at next position */
+    fireWallActiveNum = fwhole+1;
+
+    /* Build generic part of the two rules */
+    rule.fw_number = fwhole;
+    IP_FW_SETNSRCP(&rule, 1);	/* Number of source ports. */
+    IP_FW_SETNDSTP(&rule, 1);	/* Number of destination ports. */
+    rule.fw_flg = IP_FW_F_ACCEPT | IP_FW_F_IN | IP_FW_F_OUT;
+    rule.fw_prot = IPPROTO_TCP;
+    rule.fw_smsk.s_addr = INADDR_BROADCAST;
+    rule.fw_dmsk.s_addr = INADDR_BROADCAST;
+
+    /* Build and apply specific part of the rules */
+    rule.fw_src = GetOriginalAddress(link);
+    rule.fw_dst = GetDestAddress(link);
+    rule.fw_uar.fw_pts[0] = ntohs(GetOriginalPort(link));
+    rule.fw_uar.fw_pts[1] = ntohs(GetDestPort(link));
+
+    /* Skip non-bound links - XXX should not be strictly necessary,
+       but seems to leave hole if not done.  Leak of non-bound links?
+       (Code should be left even if the problem is fixed - it is a
+       clear optimization) */
+    if (rule.fw_uar.fw_pts[0] != 0 && rule.fw_uar.fw_pts[1] != 0) {
+        r = setsockopt(fireWallFD, IPPROTO_IP, IP_FW_ADD, &rule, sizeof rule);
+#ifdef DEBUG
+        if (r)
+            err(1, "alias punch inbound(1) setsockopt(IP_FW_ADD)");
+#endif
+        rule.fw_src = GetDestAddress(link);
+        rule.fw_dst = GetOriginalAddress(link);
+        rule.fw_uar.fw_pts[0] = ntohs(GetDestPort(link));
+        rule.fw_uar.fw_pts[1] = ntohs(GetOriginalPort(link));
+        r = setsockopt(fireWallFD, IPPROTO_IP, IP_FW_ADD, &rule, sizeof rule);
+#ifdef DEBUG
+        if (r)
+            err(1, "alias punch inbound(2) setsockopt(IP_FW_ADD)");
+#endif
+    }
+/* Indicate hole applied */
+    link->data.tcp->fwhole = fwhole;
+    fw_setfield(fireWallField, fwhole);
+}
+
+/* Remove a hole in a firewall associated with a particular alias
+   link.  Calling this too often is harmless. */
+static void
+ClearFWHole(struct alias_link *link) {
+    if (link->link_type == LINK_TCP) {
+        int fwhole =  link->data.tcp->fwhole; /* Where is the firewall hole? */
+        struct ip_fw rule;
+
+        if (fwhole < 0)
+            return;
+
+        memset(&rule, 0, sizeof rule);
+        rule.fw_number = fwhole;
+        while (!setsockopt(fireWallFD, IPPROTO_IP, IP_FW_DEL, &rule, sizeof rule))
+            ;
+        fw_clrfield(fireWallField, fwhole);
+        link->data.tcp->fwhole = -1;
+    }
+}
+
+/* Clear out the entire range dedicated to firewall holes. */
+static void
+ClearAllFWHoles(void) {
+    struct ip_fw rule;          /* On-the-fly built rule */
+    int i;
+    
+    if (fireWallFD < 0)
+        return;
+
+    memset(&rule, 0, sizeof rule);
+    for (i = fireWallBaseNum; i < fireWallBaseNum + fireWallNumNums; i++) {
+        rule.fw_number = i;
+        while (!setsockopt(fireWallFD, IPPROTO_IP, IP_FW_DEL, &rule, sizeof rule))
+            ;
+    }
+    memset(fireWallField, 0, fireWallNumNums);
+}
+#endif
+
+void
+PacketAliasSetFWBase(unsigned int base, unsigned int num) {
+#ifndef NO_FW_PUNCH
+    fireWallBaseNum = base;
+    fireWallNumNums = num;
+#endif
+}
diff --git a/sys/netinet/libalias/alias_ftp.c b/sys/netinet/libalias/alias_ftp.c
new file mode 100644
index 0000000..d5978f9
--- /dev/null
+++ b/sys/netinet/libalias/alias_ftp.c
@@ -0,0 +1,554 @@
+/*
+    Alias_ftp.c performs special processing for FTP sessions under
+    TCP.  Specifically, when a PORT/EPRT command from the client
+    side or 227/229 reply from the server is sent, it is intercepted
+    and modified.  The address is changed to the gateway machine
+    and an aliasing port is used.
+
+    For this routine to work, the message must fit entirely into a
+    single TCP packet.  This is typically the case, but exceptions
+    can easily be envisioned under the actual specifications.
+
+    Probably the most troubling aspect of the approach taken here is
+    that the new message will typically be a different length, and
+    this causes a certain amount of bookkeeping to keep track of the
+    changes of sequence and acknowledgment numbers, since the client
+    machine is totally unaware of the modification to the TCP stream.
+
+
+    This software is placed into the public domain with no restrictions
+    on its distribution.
+
+    References: RFC 959, RFC 2428.
+
+    Initial version:  August, 1996  (cjm)
+
+    Version 1.6
+         Brian Somers and Martin Renters identified an IP checksum
+         error for modified IP packets.
+
+    Version 1.7:  January 9, 1996 (cjm)
+         Differential checksum computation for change
+         in IP packet length.
+
+    Version 2.1:  May, 1997 (cjm)
+         Very minor changes to conform with
+         local/global/function naming conventions
+         within the packet aliasing module.
+
+    Version 3.1:  May, 2000 (eds)
+	 Add support for passive mode, alias the 227 replies.
+
+    See HISTORY file for record of revisions.
+
+    $FreeBSD$
+*/
+
+/* Includes */
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+
+#include "alias_local.h"
+
+#define FTP_CONTROL_PORT_NUMBER 21
+#define MAX_MESSAGE_SIZE	128
+
+enum ftp_message_type {
+    FTP_PORT_COMMAND,
+    FTP_EPRT_COMMAND,
+    FTP_227_REPLY,
+    FTP_229_REPLY,
+    FTP_UNKNOWN_MESSAGE
+};
+
+static int ParseFtpPortCommand(char *, int);
+static int ParseFtpEprtCommand(char *, int);
+static int ParseFtp227Reply(char *, int);
+static int ParseFtp229Reply(char *, int);
+static void NewFtpMessage(struct ip *, struct alias_link *, int, int);
+
+static struct in_addr true_addr;	/* in network byte order. */
+static u_short true_port;		/* in host byte order. */
+
+void
+AliasHandleFtpOut(
+struct ip *pip,	  /* IP packet to examine/patch */
+struct alias_link *link, /* The link to go through (aliased port) */
+int maxpacketsize  /* The maximum size this packet can grow to (including headers) */)
+{
+    int hlen, tlen, dlen;
+    char *sptr;
+    struct tcphdr *tc;
+    int ftp_message_type;
+
+/* Calculate data length of TCP packet */
+    tc = (struct tcphdr *) ((char *) pip + (pip->ip_hl << 2));
+    hlen = (pip->ip_hl + tc->th_off) << 2;
+    tlen = ntohs(pip->ip_len);
+    dlen = tlen - hlen;
+
+/* Place string pointer and beginning of data */
+    sptr = (char *) pip;
+    sptr += hlen;
+
+/*
+ * Check that data length is not too long and previous message was
+ * properly terminated with CRLF.
+ */
+    if (dlen <= MAX_MESSAGE_SIZE && GetLastLineCrlfTermed(link)) {
+	ftp_message_type = FTP_UNKNOWN_MESSAGE;
+
+	if (ntohs(tc->th_dport) == FTP_CONTROL_PORT_NUMBER) {
+/*
+ * When aliasing a client, check for the PORT/EPRT command.
+ */
+	    if (ParseFtpPortCommand(sptr, dlen))
+		ftp_message_type = FTP_PORT_COMMAND;
+	    else if (ParseFtpEprtCommand(sptr, dlen))
+		ftp_message_type = FTP_EPRT_COMMAND;
+	} else {
+/*
+ * When aliasing a server, check for the 227/229 reply.
+ */
+	    if (ParseFtp227Reply(sptr, dlen))
+		ftp_message_type = FTP_227_REPLY;
+	    else if (ParseFtp229Reply(sptr, dlen))
+		ftp_message_type = FTP_229_REPLY;
+	}
+
+	if (ftp_message_type != FTP_UNKNOWN_MESSAGE)
+	    NewFtpMessage(pip, link, maxpacketsize, ftp_message_type);
+    }
+
+/* Track the msgs which are CRLF term'd for PORT/PASV FW breach */
+
+    if (dlen) {                  /* only if there's data */
+      sptr = (char *) pip; 	 /* start over at beginning */
+      tlen = ntohs(pip->ip_len); /* recalc tlen, pkt may have grown */
+      SetLastLineCrlfTermed(link,
+			    (sptr[tlen-2] == '\r') && (sptr[tlen-1] == '\n'));
+    }
+}
+
+static int
+ParseFtpPortCommand(char *sptr, int dlen)
+{
+    char ch;
+    int i, state;
+    u_int32_t addr;
+    u_short port;
+    u_int8_t octet;
+
+    /* Format: "PORT A,D,D,R,PO,RT". */
+
+    /* Return if data length is too short. */
+    if (dlen < 18)
+	return 0;
+
+    addr = port = octet = 0;
+    state = -4;
+    for (i = 0; i < dlen; i++) {
+	ch = sptr[i];
+	switch (state) {
+	case -4: if (ch == 'P') state++; else return 0; break;
+	case -3: if (ch == 'O') state++; else return 0; break;
+	case -2: if (ch == 'R') state++; else return 0; break;
+	case -1: if (ch == 'T') state++; else return 0; break;
+
+	case 0:
+	    if (isspace(ch))
+		break;
+	    else
+		state++;
+	case 1: case 3: case 5: case 7: case 9: case 11:
+	    if (isdigit(ch)) {
+		octet = ch - '0';
+		state++;
+	    } else
+		return 0;
+	    break;
+	case 2: case 4: case 6: case 8:
+	    if (isdigit(ch))
+		octet = 10 * octet + ch - '0';
+            else if (ch == ',') {
+		addr = (addr << 8) + octet;
+		state++;
+	    } else
+		return 0;
+	    break;
+	case 10: case 12:
+	    if (isdigit(ch))
+		octet = 10 * octet + ch - '0';
+	    else if (ch == ',' || state == 12) {
+		port = (port << 8) + octet;
+		state++;
+	    } else
+		return 0;
+	    break;
+	}
+    }
+
+    if (state == 13) {
+	true_addr.s_addr = htonl(addr);
+	true_port = port;
+	return 1;
+    } else
+	return 0;
+}
+
+static int
+ParseFtpEprtCommand(char *sptr, int dlen)
+{
+    char ch, delim;
+    int i, state;
+    u_int32_t addr;
+    u_short port;
+    u_int8_t octet;
+
+    /* Format: "EPRT |1|A.D.D.R|PORT|". */
+
+    /* Return if data length is too short. */
+    if (dlen < 18)
+	return 0;
+
+    addr = port = octet = 0;
+    delim = '|';			/* XXX gcc -Wuninitialized */
+    state = -4;
+    for (i = 0; i < dlen; i++) {
+	ch = sptr[i];
+	switch (state)
+	{
+	case -4: if (ch == 'E') state++; else return 0; break;
+	case -3: if (ch == 'P') state++; else return 0; break;
+	case -2: if (ch == 'R') state++; else return 0; break;
+	case -1: if (ch == 'T') state++; else return 0; break;
+
+	case 0:
+	    if (!isspace(ch)) {
+		delim = ch;
+		state++;
+	    }
+	    break;
+	case 1:
+	    if (ch == '1')	/* IPv4 address */
+		state++;
+	    else
+		return 0;
+	    break;
+	case 2:
+	    if (ch == delim)
+		state++;
+	    else
+		return 0;
+	    break;
+	case 3: case 5: case 7: case 9:
+	    if (isdigit(ch)) {
+		octet = ch - '0';
+		state++;
+	    } else
+		return 0;
+	    break;
+	case 4: case 6: case 8: case 10:
+	    if (isdigit(ch))
+		octet = 10 * octet + ch - '0';
+            else if (ch == '.' || state == 10) {
+		addr = (addr << 8) + octet;
+		state++;
+	    } else
+		return 0;
+	    break;
+	case 11:
+	    if (isdigit(ch)) {
+		port = ch - '0';
+		state++;
+	    } else
+		return 0;
+	    break;
+	case 12:
+	    if (isdigit(ch))
+		port = 10 * port + ch - '0';
+	    else if (ch == delim)
+		state++;
+	    else
+		return 0;
+	    break;
+	}
+    }
+
+    if (state == 13) {
+	true_addr.s_addr = htonl(addr);
+	true_port = port;
+	return 1;
+    } else
+	return 0;
+}
+
+static int
+ParseFtp227Reply(char *sptr, int dlen)
+{
+    char ch;
+    int i, state;
+    u_int32_t addr;
+    u_short port;
+    u_int8_t octet;
+
+    /* Format: "227 Entering Passive Mode (A,D,D,R,PO,RT)" */
+
+    /* Return if data length is too short. */
+    if (dlen < 17)
+	return 0;
+
+    addr = port = octet = 0;
+
+    state = -3;
+    for (i = 0; i < dlen; i++) {
+        ch = sptr[i];
+        switch (state)
+        {
+        case -3: if (ch == '2') state++; else return 0; break;
+        case -2: if (ch == '2') state++; else return 0; break;
+        case -1: if (ch == '7') state++; else return 0; break;
+
+	case 0:
+	    if (ch == '(')
+		state++;
+	    break;
+	case 1: case 3: case 5: case 7: case 9: case 11:
+	    if (isdigit(ch)) {
+		octet = ch - '0';
+		state++;
+	    } else
+		return 0;
+	    break;
+	case 2: case 4: case 6: case 8:
+	    if (isdigit(ch))
+		octet = 10 * octet + ch - '0';
+            else if (ch == ',') {
+		addr = (addr << 8) + octet;
+		state++;
+	    } else
+		return 0;
+	    break;
+	case 10: case 12:
+	    if (isdigit(ch))
+		octet = 10 * octet + ch - '0';
+	    else if (ch == ',' || (state == 12 && ch == ')')) {
+		port = (port << 8) + octet;
+		state++;
+	    } else
+		return 0;
+	    break;
+	}
+    }
+
+    if (state == 13) {
+        true_port = port;
+        true_addr.s_addr = htonl(addr);
+	return 1;
+    } else
+	return 0;
+}
+
+static int
+ParseFtp229Reply(char *sptr, int dlen)
+{
+    char ch, delim;
+    int i, state;
+    u_short port;
+
+    /* Format: "229 Entering Extended Passive Mode (|||PORT|)" */
+
+    /* Return if data length is too short. */
+    if (dlen < 11)
+	return 0;
+
+    port = 0;
+    delim = '|';			/* XXX gcc -Wuninitialized */
+
+    state = -3;
+    for (i = 0; i < dlen; i++) {
+	ch = sptr[i];
+	switch (state)
+	{
+	case -3: if (ch == '2') state++; else return 0; break;
+	case -2: if (ch == '2') state++; else return 0; break;
+	case -1: if (ch == '9') state++; else return 0; break;
+
+	case 0:
+	    if (ch == '(')
+		state++;
+	    break;
+	case 1:
+	    delim = ch;
+	    state++;
+	    break;
+	case 2: case 3:
+	    if (ch == delim)
+		state++;
+	    else
+		return 0;
+	    break;
+	case 4:
+	    if (isdigit(ch)) {
+		port = ch - '0';
+		state++;
+	    } else
+		return 0;
+	    break;
+	case 5:
+	    if (isdigit(ch))
+		port = 10 * port + ch - '0';
+	    else if (ch == delim)
+		state++;
+	    else
+		return 0;
+	    break;
+	case 6:
+	    if (ch == ')')
+		state++;
+	    else
+		return 0;
+	    break;
+	}
+    }
+
+    if (state == 7) {
+	true_port = port;
+	return 1;
+    } else
+	return 0;
+}
+
+static void
+NewFtpMessage(struct ip *pip,
+              struct alias_link *link,
+              int maxpacketsize,
+              int ftp_message_type)
+{
+    struct alias_link *ftp_link;
+
+/* Security checks. */
+    if (ftp_message_type != FTP_229_REPLY &&
+	pip->ip_src.s_addr != true_addr.s_addr)
+	return;
+
+    if (true_port < IPPORT_RESERVED)
+	return;
+
+/* Establish link to address and port found in FTP control message. */
+    ftp_link = FindUdpTcpOut(true_addr, GetDestAddress(link),
+                             htons(true_port), 0, IPPROTO_TCP, 1);
+
+    if (ftp_link != NULL)
+    {
+        int slen, hlen, tlen, dlen;
+        struct tcphdr *tc;
+
+#ifndef NO_FW_PUNCH
+	if (ftp_message_type == FTP_PORT_COMMAND ||
+	    ftp_message_type == FTP_EPRT_COMMAND) {
+	    /* Punch hole in firewall */
+	    PunchFWHole(ftp_link);
+	}
+#endif
+
+/* Calculate data length of TCP packet */
+        tc = (struct tcphdr *) ((char *) pip + (pip->ip_hl << 2));
+        hlen = (pip->ip_hl + tc->th_off) << 2;
+        tlen = ntohs(pip->ip_len);
+        dlen = tlen - hlen;
+
+/* Create new FTP message. */
+        {
+            char stemp[MAX_MESSAGE_SIZE + 1];
+            char *sptr;
+            u_short alias_port;
+            u_char *ptr;
+            int a1, a2, a3, a4, p1, p2;
+            struct in_addr alias_address;
+
+/* Decompose alias address into quad format */
+            alias_address = GetAliasAddress(link);
+            ptr = (u_char *) &alias_address.s_addr;
+            a1 = *ptr++; a2=*ptr++; a3=*ptr++; a4=*ptr;
+
+	    alias_port = GetAliasPort(ftp_link);
+
+	    switch (ftp_message_type)
+	    {
+	    case FTP_PORT_COMMAND:
+	    case FTP_227_REPLY:
+		/* Decompose alias port into pair format. */
+		ptr = (char *) &alias_port;
+		p1 = *ptr++; p2=*ptr;
+
+		if (ftp_message_type == FTP_PORT_COMMAND) {
+		    /* Generate PORT command string. */
+		    sprintf(stemp, "PORT %d,%d,%d,%d,%d,%d\r\n",
+			    a1,a2,a3,a4,p1,p2);
+		} else {
+		    /* Generate 227 reply string. */
+		    sprintf(stemp,
+			    "227 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n",
+			    a1,a2,a3,a4,p1,p2);
+		}
+		break;
+	    case FTP_EPRT_COMMAND:
+		/* Generate EPRT command string. */
+		sprintf(stemp, "EPRT |1|%d.%d.%d.%d|%d|\r\n",
+			a1,a2,a3,a4,ntohs(alias_port));
+		break;
+	    case FTP_229_REPLY:
+		/* Generate 229 reply string. */
+		sprintf(stemp, "229 Entering Extended Passive Mode (|||%d|)\r\n",
+			ntohs(alias_port));
+		break;
+	    }
+
+/* Save string length for IP header modification */
+            slen = strlen(stemp);
+
+/* Copy modified buffer into IP packet. */
+            sptr = (char *) pip; sptr += hlen;
+            strncpy(sptr, stemp, maxpacketsize-hlen);
+        }
+
+/* Save information regarding modified seq and ack numbers */
+        {
+            int delta;
+
+            SetAckModified(link);
+            delta = GetDeltaSeqOut(pip, link);
+            AddSeq(pip, link, delta+slen-dlen);
+        }
+
+/* Revise IP header */
+        {
+            u_short new_len;
+
+            new_len = htons(hlen + slen);
+            DifferentialChecksum(&pip->ip_sum,
+                                 &new_len,
+                                 &pip->ip_len,
+                                 1);
+            pip->ip_len = new_len;
+        }
+
+/* Compute TCP checksum for revised packet */
+        tc->th_sum = 0;
+        tc->th_sum = TcpChecksum(pip);
+    }
+    else
+    {
+#ifdef DEBUG
+        fprintf(stderr,
+        "PacketAlias/HandleFtpOut: Cannot allocate FTP data port\n");
+#endif
+    }
+}
diff --git a/sys/netinet/libalias/alias_irc.c b/sys/netinet/libalias/alias_irc.c
new file mode 100644
index 0000000..afd032d
--- /dev/null
+++ b/sys/netinet/libalias/alias_irc.c
@@ -0,0 +1,317 @@
+/* Alias_irc.c intercepts packages contain IRC CTCP commands, and
+	changes DCC commands to export a port on the aliasing host instead
+	of an aliased host.
+
+    For this routine to work, the DCC command must fit entirely into a
+    single TCP packet.  This will usually happen, but is not
+    guaranteed.
+
+	 The interception is likely to change the length of the packet.
+	 The handling of this is copied more-or-less verbatim from
+	 ftp_alias.c
+
+    This software is placed into the public domain with no restrictions
+    on its distribution.
+
+	 Initial version: Eivind Eklund <perhaps@yes.no> (ee) 97-01-29
+
+         Version 2.1:  May, 1997 (cjm)
+             Very minor changes to conform with
+             local/global/function naming conventions
+             withing the packet alising module.
+
+    $FreeBSD$
+*/
+
+/* Includes */
+#include <ctype.h>
+#include <stdio.h> 
+#include <string.h>
+#include <sys/types.h>
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+#include <limits.h>
+
+#include "alias_local.h"
+
+/* Local defines */
+#define DBprintf(a)
+
+
+void
+AliasHandleIrcOut(struct ip *pip, /* IP packet to examine */
+				 struct alias_link *link,		  /* Which link are we on? */
+				 int maxsize		  /* Maximum size of IP packet including headers */
+				 )
+{       
+    int hlen, tlen, dlen;
+    struct in_addr true_addr;
+    u_short true_port;
+    char *sptr;
+    struct tcphdr *tc;
+	 int i;							  /* Iterator through the source */
+        
+/* Calculate data length of TCP packet */
+    tc = (struct tcphdr *) ((char *) pip + (pip->ip_hl << 2));
+    hlen = (pip->ip_hl + tc->th_off) << 2;
+    tlen = ntohs(pip->ip_len);
+    dlen = tlen - hlen;
+
+	 /* Return if data length is too short - assume an entire PRIVMSG in each packet. */
+    if (dlen<sizeof(":A!a@n.n PRIVMSG A :aDCC 1 1a")-1)
+        return;
+
+/* Place string pointer at beginning of data */
+    sptr = (char *) pip;  
+    sptr += hlen;
+	 maxsize -= hlen;				  /* We're interested in maximum size of data, not packet */
+
+	 /* Search for a CTCP command [Note 1] */
+	 for(	i=0; i<dlen; i++ ) {
+		 if(sptr[i]=='\001')
+			 goto lFOUND_CTCP;
+	 }
+	 return;					  /* No CTCP commands in  */
+	 /* Handle CTCP commands - the buffer may have to be copied */
+lFOUND_CTCP:
+	 {
+		 char newpacket[65536];	  /* Estimate of maximum packet size :) */
+		 int  copyat = i;			  /* Same */
+		 int  iCopy = 0;			  /* How much data have we written to copy-back string? */
+		 unsigned long org_addr;  /* Original IP address */
+		 unsigned short org_port; /* Original source port address */
+	 lCTCP_START:
+		 if( i >= dlen || iCopy >= sizeof(newpacket) )
+			 goto lPACKET_DONE;
+		 newpacket[iCopy++] = sptr[i++];	/* Copy the CTCP start character */
+		 /* Start of a CTCP */
+		 if( i+4 >= dlen )		  /* Too short for DCC */
+			 goto lBAD_CTCP;
+		 if( sptr[i+0] != 'D' )
+			 goto lBAD_CTCP;
+		 if( sptr[i+1] != 'C' )
+			 goto lBAD_CTCP;
+		 if( sptr[i+2] != 'C' )
+			 goto lBAD_CTCP;
+		 if( sptr[i+3] != ' ' )
+			 goto lBAD_CTCP;
+		 /* We have a DCC command - handle it! */
+		 i+= 4;						  /* Skip "DCC " */
+		 if( iCopy+4 > sizeof(newpacket) )
+			 goto lPACKET_DONE;
+		 newpacket[iCopy++] = 'D';
+		 newpacket[iCopy++] = 'C';
+		 newpacket[iCopy++] = 'C';
+		 newpacket[iCopy++] = ' ';
+
+		 DBprintf(("Found DCC\n"));
+		 /* Skip any extra spaces (should not occur according to
+          protocol, but DCC breaks CTCP protocol anyway */
+		 while(sptr[i] == ' ') {
+			 if( ++i >= dlen) {
+				 DBprintf(("DCC packet terminated in just spaces\n"));
+				 goto lPACKET_DONE;
+			 }
+		 }
+
+		 DBprintf(("Transferring command...\n"));
+		 while(sptr[i] != ' ') {
+			 newpacket[iCopy++] = sptr[i];
+			 if( ++i >= dlen || iCopy >= sizeof(newpacket) ) {
+				 DBprintf(("DCC packet terminated during command\n"));
+				 goto lPACKET_DONE;
+			 }
+		 }
+		 /* Copy _one_ space */
+		 if( i+1 < dlen && iCopy < sizeof(newpacket) )
+			 newpacket[iCopy++] = sptr[i++];
+
+		 DBprintf(("Done command - removing spaces\n"));
+		 /* Skip any extra spaces (should not occur according to
+          protocol, but DCC breaks CTCP protocol anyway */
+		 while(sptr[i] == ' ') {
+			 if( ++i >= dlen ) {
+				 DBprintf(("DCC packet terminated in just spaces (post-command)\n"));
+				 goto lPACKET_DONE;
+			 }
+		 }
+
+		 DBprintf(("Transferring filename...\n"));
+		 while(sptr[i] != ' ') {
+			 newpacket[iCopy++] = sptr[i];
+			 if( ++i >= dlen || iCopy >= sizeof(newpacket) ) {
+				 DBprintf(("DCC packet terminated during filename\n"));
+				 goto lPACKET_DONE;
+			 }
+		 }
+		 /* Copy _one_ space */
+		 if( i+1 < dlen && iCopy < sizeof(newpacket) )
+			 newpacket[iCopy++] = sptr[i++];
+
+		 DBprintf(("Done filename - removing spaces\n"));
+		 /* Skip any extra spaces (should not occur according to
+          protocol, but DCC breaks CTCP protocol anyway */
+		 while(sptr[i] == ' ') {
+			 if( ++i >= dlen ) {
+				 DBprintf(("DCC packet terminated in just spaces (post-filename)\n"));
+				 goto lPACKET_DONE;
+			 }
+		 }
+
+		 DBprintf(("Fetching IP address\n"));
+		 /* Fetch IP address */
+		 org_addr = 0;
+		 while(i<dlen && isdigit(sptr[i])) {
+			 if( org_addr > ULONG_MAX/10UL )	{ /* Terminate on overflow */
+				 DBprintf(("DCC Address overflow (org_addr == 0x%08lx, next char %c\n", org_addr, sptr[i]));
+				 goto lBAD_CTCP;
+			 }
+			 org_addr *= 10;
+			 org_addr += sptr[i++]-'0';
+		 }
+		 DBprintf(("Skipping space\n"));
+		 if( i+1 >= dlen || sptr[i] != ' ' ) {
+			 DBprintf(("Overflow (%d >= %d) or bad character (%02x) terminating IP address\n", i+1, dlen, sptr[i]));
+			 goto lBAD_CTCP;
+		 }
+		 /* Skip any extra spaces (should not occur according to
+          protocol, but DCC breaks CTCP protocol anyway, so we might
+          as well play it safe */
+		 while(sptr[i] == ' ') {
+			 if( ++i >= dlen ) {
+				 DBprintf(("Packet failure - space overflow.\n"));
+				 goto lPACKET_DONE;
+			 }
+		 }
+		 DBprintf(("Fetching port number\n"));
+		 /* Fetch source port */
+		 org_port = 0;
+		 while(i<dlen && isdigit(sptr[i])) {
+			 if( org_port > 6554 )	{ /* Terminate on overflow (65536/10 rounded up*/
+				 DBprintf(("DCC: port number overflow\n"));
+				 goto lBAD_CTCP;
+			 }
+			 org_port *= 10;
+			 org_port += sptr[i++]-'0';
+		 }
+		 /* Skip illegal addresses (or early termination) */
+		 if( i >= dlen || (sptr[i] != '\001' && sptr[i] != ' ') ) {
+			 DBprintf(("Bad port termination\n"));
+			 goto lBAD_CTCP;
+		 }
+		 DBprintf(("Got IP %lu and port %u\n", org_addr, (unsigned)org_port));
+
+		 /* We've got the address and port - now alias it */
+		 {
+			 struct alias_link *dcc_link;
+			 struct in_addr destaddr;
+			 
+
+			 true_port = htons(org_port);
+			 true_addr.s_addr = htonl(org_addr);
+			 destaddr.s_addr = 0;
+
+			 /* Steal the FTP_DATA_PORT - it doesn't really matter, and this
+				 would probably allow it through at least _some_
+				 firewalls. */
+			 dcc_link = FindUdpTcpOut(true_addr, destaddr,
+						  true_port, 0,
+						  IPPROTO_TCP, 1);
+			 DBprintf(("Got a DCC link\n"));
+			 if ( dcc_link ) {
+				 struct in_addr alias_address;	/* Address from aliasing */
+				 u_short alias_port;	/* Port given by aliasing */
+
+#ifndef NO_FW_PUNCH
+				 /* Generate firewall hole as appropriate */
+				 PunchFWHole(dcc_link);
+#endif
+
+				 alias_address = GetAliasAddress(link);
+				 iCopy += snprintf(&newpacket[iCopy],
+										 sizeof(newpacket)-iCopy, 
+										 "%lu ", (u_long)htonl(alias_address.s_addr));
+				 if( iCopy >= sizeof(newpacket) ) { /* Truncated/fit exactly - bad news */
+					 DBprintf(("DCC constructed packet overflow.\n"));
+					 goto lBAD_CTCP;
+				 }
+				 alias_port = GetAliasPort(dcc_link);
+				 iCopy += snprintf(&newpacket[iCopy],
+										 sizeof(newpacket)-iCopy, 
+										 "%u", htons(alias_port) );
+				 /* Done - truncated cases will be taken care of by lBAD_CTCP */
+				 DBprintf(("Aliased IP %lu and port %u\n", alias_address.s_addr, (unsigned)alias_port));
+			 }
+		 }
+		 /* An uninteresting CTCP - state entered right after '\001' has
+          been pushed.  Also used to copy the rest of a DCC, after IP
+          address and port has been handled */
+	 lBAD_CTCP:
+		 for(; i<dlen && iCopy<sizeof(newpacket); i++,iCopy++) {
+			 newpacket[iCopy] = sptr[i]; /* Copy CTCP unchanged */
+			 if(sptr[i] == '\001') {
+				 goto lNORMAL_TEXT;
+			 }
+		 }
+		 goto lPACKET_DONE;
+		 /* Normal text */
+	 lNORMAL_TEXT:
+		 for(; i<dlen && iCopy<sizeof(newpacket); i++,iCopy++) {
+			 newpacket[iCopy] = sptr[i]; /* Copy CTCP unchanged */
+			 if(sptr[i] == '\001') {
+				 goto lCTCP_START;
+			 }
+		 }
+		 /* Handle the end of a packet */
+	 lPACKET_DONE:
+		 iCopy = iCopy > maxsize-copyat ? maxsize-copyat : iCopy;
+		 memcpy(sptr+copyat, newpacket, iCopy);
+
+/* Save information regarding modified seq and ack numbers */
+        {
+            int delta;
+
+            SetAckModified(link);
+            delta = GetDeltaSeqOut(pip, link);
+            AddSeq(pip, link, delta+copyat+iCopy-dlen);
+        }
+
+		  /* Revise IP header */
+        {
+			  u_short new_len;
+			  
+			  new_len = htons(hlen + iCopy + copyat);
+			  DifferentialChecksum(&pip->ip_sum,
+										  &new_len,
+										  &pip->ip_len,
+										  1);
+			  pip->ip_len = new_len;
+        }
+
+		  /* Compute TCP checksum for revised packet */
+        tc->th_sum = 0;
+        tc->th_sum = TcpChecksum(pip);
+		  return;
+	 }
+}
+
+/* Notes:
+	[Note 1]
+	The initial search will most often fail; it could be replaced with a 32-bit specific search.
+	Such a search would be done for 32-bit unsigned value V:
+	V ^= 0x01010101;				  (Search is for null bytes)
+	if( ((V-0x01010101)^V) & 0x80808080 ) {
+     (found a null bytes which was a 01 byte)
+	}
+   To assert that the processor is 32-bits, do
+   extern int ircdccar[32];        (32 bits)
+   extern int ircdccar[CHAR_BIT*sizeof(unsigned int)];
+   which will generate a type-error on all but 32-bit machines.
+
+	[Note 2] This routine really ought to be replaced with one that
+	creates a transparent proxy on the aliasing host, to allow arbitary
+	changes in the TCP stream.  This should not be too difficult given
+	this base;  I (ee) will try to do this some time later.
+	*/
diff --git a/sys/netinet/libalias/alias_local.h b/sys/netinet/libalias/alias_local.h
new file mode 100644
index 0000000..152406d
--- /dev/null
+++ b/sys/netinet/libalias/alias_local.h
@@ -0,0 +1,203 @@
+/*
+ * Alias_local.h contains the function prototypes for alias.c,
+ * alias_db.c, alias_util.c and alias_ftp.c, alias_irc.c (as well
+ * as any future add-ons).  It also includes macros, globals and
+ * struct definitions shared by more than one alias*.c file.
+ *
+ * This include file is intended to be used only within the aliasing
+ * software.  Outside world interfaces are defined in alias.h
+ *
+ * This software is placed into the public domain with no restrictions
+ * on its distribution.
+ *
+ * Initial version:  August, 1996  (cjm)    
+ *
+ * <updated several times by original author and Eivind Eklund>
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _ALIAS_LOCAL_H_
+#define	_ALIAS_LOCAL_H_
+
+/* Macros */
+
+/*
+ * The following macro is used to update an
+ * internet checksum.  "delta" is a 32-bit
+ * accumulation of all the changes to the
+ * checksum (adding in new 16-bit words and
+ * subtracting out old words), and "cksum"
+ * is the checksum value to be updated.
+ */
+#define	ADJUST_CHECKSUM(acc, cksum) \
+	do { \
+		acc += cksum; \
+		if (acc < 0) { \
+			acc = -acc; \
+			acc = (acc >> 16) + (acc & 0xffff); \
+			acc += acc >> 16; \
+			cksum = (u_short) ~acc; \
+		} else { \
+			acc = (acc >> 16) + (acc & 0xffff); \
+			acc += acc >> 16; \
+			cksum = (u_short) acc; \
+		} \
+	} while (0)
+
+/* Globals */
+
+extern int packetAliasMode;
+
+/* Prototypes */
+
+/* General utilities */
+u_short	 IpChecksum(struct ip *_pip);
+u_short	 TcpChecksum(struct ip *_pip);
+void	 DifferentialChecksum(u_short *_cksum, u_short *_new, u_short *_old,
+	    int _n);
+
+/* Internal data access */
+struct alias_link *
+	 FindIcmpIn(struct in_addr _dst_addr, struct in_addr _alias_addr,
+	    u_short _id_alias, int _create);
+struct alias_link *
+	 FindIcmpOut(struct in_addr _src_addr, struct in_addr _dst_addr,
+	    u_short _id, int _create);
+struct alias_link *
+	 FindFragmentIn1(struct in_addr _dst_addr, struct in_addr _alias_addr,
+	    u_short _ip_id);
+struct alias_link *
+	 FindFragmentIn2(struct in_addr _dst_addr, struct in_addr _alias_addr,
+	    u_short _ip_id);
+struct alias_link *
+	 AddFragmentPtrLink(struct in_addr _dst_addr, u_short _ip_id);
+struct alias_link *
+	 FindFragmentPtr(struct in_addr _dst_addr, u_short _ip_id);
+struct alias_link *
+	 FindProtoIn(struct in_addr _dst_addr, struct in_addr _alias_addr,
+	    u_char _proto);
+struct alias_link *
+	 FindProtoOut(struct in_addr _src_addr, struct in_addr _dst_addr,
+	    u_char _proto);
+struct alias_link *
+	 FindUdpTcpIn(struct in_addr _dst_addr, struct in_addr _alias_addr,
+	    u_short _dst_port, u_short _alias_port, u_char _proto, int _create);
+struct alias_link *
+	 FindUdpTcpOut(struct in_addr _src_addr, struct in_addr _dst_addr,
+	    u_short _src_port, u_short _dst_port, u_char _proto, int _create);
+struct alias_link *
+	 AddPptp(struct in_addr _src_addr, struct in_addr _dst_addr,
+	    struct in_addr _alias_addr, u_int16_t _src_call_id);
+struct alias_link *
+	 FindPptpOutByCallId(struct in_addr _src_addr,
+	    struct in_addr _dst_addr, u_int16_t _src_call_id);
+struct alias_link *
+	 FindPptpInByCallId(struct in_addr _dst_addr,
+	    struct in_addr _alias_addr, u_int16_t _dst_call_id);
+struct alias_link *
+	 FindPptpOutByPeerCallId(struct in_addr _src_addr,
+	    struct in_addr _dst_addr, u_int16_t _dst_call_id);
+struct alias_link *
+	 FindPptpInByPeerCallId(struct in_addr _dst_addr,
+	    struct in_addr _alias_addr, u_int16_t _alias_call_id);
+struct alias_link *
+	 FindRtspOut(struct in_addr _src_addr, struct in_addr _dst_addr,
+	    u_short _src_port, u_short _alias_port, u_char _proto);
+struct in_addr
+	 FindOriginalAddress(struct in_addr _alias_addr);
+struct in_addr
+	 FindAliasAddress(struct in_addr _original_addr);
+
+/* External data access/modification */
+int	 FindNewPortGroup(struct in_addr _dst_addr, struct in_addr _alias_addr,
+                     u_short _src_port, u_short _dst_port, u_short _port_count,
+		     u_char _proto, u_char _align);
+void	 GetFragmentAddr(struct alias_link *_link, struct in_addr *_src_addr);
+void	 SetFragmentAddr(struct alias_link *_link, struct in_addr _src_addr);
+void	 GetFragmentPtr(struct alias_link *_link, char **_fptr);
+void	 SetFragmentPtr(struct alias_link *_link, char *fptr);
+void	 SetStateIn(struct alias_link *_link, int _state);
+void	 SetStateOut(struct alias_link *_link, int _state);
+int	 GetStateIn(struct alias_link *_link);
+int	 GetStateOut(struct alias_link *_link);
+struct in_addr
+	 GetOriginalAddress(struct alias_link *_link);
+struct in_addr
+	 GetDestAddress(struct alias_link *_link);
+struct in_addr
+	 GetAliasAddress(struct alias_link *_link);
+struct in_addr
+	 GetDefaultAliasAddress(void);
+void	 SetDefaultAliasAddress(struct in_addr _alias_addr);
+u_short	 GetOriginalPort(struct alias_link *_link);
+u_short	 GetAliasPort(struct alias_link *_link);
+struct in_addr
+	 GetProxyAddress(struct alias_link *_link);
+void	 SetProxyAddress(struct alias_link *_link, struct in_addr _addr);
+u_short	 GetProxyPort(struct alias_link *_link);
+void	 SetProxyPort(struct alias_link *_link, u_short _port);
+void	 SetAckModified(struct alias_link *_link);
+int	 GetAckModified(struct alias_link *_link);
+int	 GetDeltaAckIn(struct ip *_pip, struct alias_link *_link);
+int	 GetDeltaSeqOut(struct ip *_pip, struct alias_link *_link);
+void	 AddSeq(struct ip *_pip, struct alias_link *_link, int _delta);
+void	 SetExpire(struct alias_link *_link, int _expire);
+void	 ClearCheckNewLink(void);
+void	 SetLastLineCrlfTermed(struct alias_link *_link, int _yes);
+int	 GetLastLineCrlfTermed(struct alias_link *_link);
+void	 SetDestCallId(struct alias_link *_link, u_int16_t _cid);
+#ifndef NO_FW_PUNCH
+void	 PunchFWHole(struct alias_link *_link);
+#endif
+
+/* Housekeeping function */
+void	 HouseKeeping(void);
+
+/* Tcp specfic routines */
+/* lint -save -library Suppress flexelint warnings */
+
+/* FTP routines */
+void	 AliasHandleFtpOut(struct ip *_pip, struct alias_link *_link,
+	    int _maxpacketsize);
+
+/* IRC routines */
+void	 AliasHandleIrcOut(struct ip *_pip, struct alias_link *_link,
+	    int _maxsize);
+
+/* RTSP routines */
+void	 AliasHandleRtspOut(struct ip *_pip, struct alias_link *_link,
+	    int _maxpacketsize);
+
+/* PPTP routines */
+void	 AliasHandlePptpOut(struct ip *_pip, struct alias_link *_link);
+void	 AliasHandlePptpIn(struct ip *_pip, struct alias_link *_link);
+int	 AliasHandlePptpGreOut(struct ip *_pip);
+int	 AliasHandlePptpGreIn(struct ip *_pip);
+
+/* NetBIOS routines */
+int	 AliasHandleUdpNbt(struct ip *_pip, struct alias_link *_link,
+	    struct in_addr *_alias_address, u_short _alias_port);
+int	 AliasHandleUdpNbtNS(struct ip *_pip, struct alias_link *_link,
+	    struct in_addr *_alias_address, u_short *_alias_port,
+	    struct in_addr *_original_address, u_short *_original_port);
+
+/* CUSeeMe routines */
+void	 AliasHandleCUSeeMeOut(struct ip *_pip, struct alias_link *_link);
+void	 AliasHandleCUSeeMeIn(struct ip *_pip, struct in_addr _original_addr);
+
+/* Transparent proxy routines */
+int	 ProxyCheck(struct ip *_pip, struct in_addr *_proxy_server_addr,
+	    u_short *_proxy_server_port);
+void	 ProxyModify(struct alias_link *_link, struct ip *_pip,
+	    int _maxpacketsize, int _proxy_type);
+
+enum alias_tcp_state {
+	ALIAS_TCP_STATE_NOT_CONNECTED,
+	ALIAS_TCP_STATE_CONNECTED,
+	ALIAS_TCP_STATE_DISCONNECTED
+};
+
+/*lint -restore */
+
+#endif /* !_ALIAS_LOCAL_H_ */
diff --git a/sys/netinet/libalias/alias_nbt.c b/sys/netinet/libalias/alias_nbt.c
new file mode 100644
index 0000000..74fe56e
--- /dev/null
+++ b/sys/netinet/libalias/alias_nbt.c
@@ -0,0 +1,694 @@
+/*
+ * Written by Atsushi Murai <amurai@spec.co.jp>
+ *
+ * Copyright (C) 1998, System Planning and Engineering Co. All rights reserverd.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that the above copyright notice and this paragraph are
+ * duplicated in all such forms and that any documentation,
+ * advertising materials, and other materials related to such
+ * distribution and use acknowledge that the software was developed
+ * by the System Planning and Engineering Co.  The name of the
+ * SPEC may not be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * $FreeBSD$
+ *
+ *  TODO:
+ *       oClean up. 
+ *       oConsidering for word alignment for other platform.
+ */
+/*
+    alias_nbt.c performs special processing for NetBios over TCP/IP
+    sessions by UDP.
+
+    Initial version:  May, 1998  (Atsushi Murai <amurai@spec.co.jp>)
+
+    See HISTORY file for record of revisions.
+*/
+
+/* Includes */
+#include <ctype.h>
+#include <stdio.h> 
+#include <string.h>
+#include <sys/types.h>
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+#include <netinet/tcp.h>
+
+#include "alias_local.h"
+
+typedef struct {
+	struct in_addr		oldaddr;
+	u_short 			oldport;
+	struct in_addr		newaddr;
+	u_short 			newport;
+	u_short 			*uh_sum;
+} NBTArguments;
+
+typedef struct {
+	unsigned char   type;
+	unsigned char   flags;
+	u_short  		id;
+	struct in_addr  source_ip;
+	u_short			source_port;
+	u_short			len;
+	u_short			offset;
+} NbtDataHeader;
+
+#define OpQuery		0
+#define OpUnknown	4
+#define OpRegist	5
+#define OpRelease	6
+#define OpWACK		7
+#define OpRefresh	8
+typedef struct {
+	u_short			nametrid;
+	u_short 		dir:1, opcode:4, nmflags:7, rcode:4;
+	u_short			qdcount;
+	u_short			ancount;
+	u_short			nscount;
+	u_short			arcount;
+} NbtNSHeader;
+
+#define FMT_ERR		0x1
+#define SRV_ERR		0x2
+#define IMP_ERR		0x4
+#define RFS_ERR		0x5
+#define ACT_ERR		0x6
+#define CFT_ERR		0x7
+
+
+#ifdef DEBUG
+static void PrintRcode( u_char rcode )  {
+
+	switch (rcode) {
+		case FMT_ERR:
+			printf("\nFormat Error.");
+		case SRV_ERR:
+			printf("\nSever failure.");
+		case IMP_ERR:
+			printf("\nUnsupported request error.\n");
+		case RFS_ERR:
+			printf("\nRefused error.\n");
+		case ACT_ERR:
+			printf("\nActive error.\n");
+		case CFT_ERR:
+			printf("\nName in conflict error.\n");
+		default:
+			printf("\n???=%0x\n", rcode );
+
+	}	
+}
+#endif
+
+
+/* Handling Name field */
+static u_char *AliasHandleName ( u_char *p, char *pmax ) {
+
+	u_char *s;
+	u_char c;
+	int		compress;
+
+	/* Following length field */
+
+	if (p == NULL || (char *)p >= pmax)
+		return(NULL);
+
+	if (*p & 0xc0 ) {
+		p = p + 2;
+		if ((char *)p > pmax)
+			return(NULL);
+		return ((u_char *)p);
+	}
+	while ( ( *p & 0x3f) != 0x00 ) {
+		s = p + 1;
+		if ( *p == 0x20 )
+			compress = 1;
+		else
+			compress = 0;
+		
+	 	/* Get next length field */
+		p = (u_char *)(p + (*p & 0x3f) + 1);
+		if ((char *)p > pmax) {
+			p = NULL;
+			break;
+		}
+#ifdef DEBUG
+		printf(":");
+#endif
+		while (s < p) {
+			if ( compress == 1 ) {
+				c = (u_char )(((((*s & 0x0f) << 4) | (*(s+1) & 0x0f)) - 0x11));
+#ifdef DEBUG
+				if (isprint( c ) )
+					printf("%c", c );
+				else
+					printf("<0x%02x>", c );
+#endif
+				s +=2;
+			} else {
+#ifdef DEBUG
+				printf("%c", *s);
+#endif
+				s++;
+			}
+		}
+#ifdef DEBUG
+		printf(":");
+#endif
+		fflush(stdout);
+    }
+
+	/* Set up to out of Name field */
+	if (p == NULL || (char *)p >= pmax)
+	    p = NULL;
+	else
+	    p++;
+	return ((u_char *)p);
+}
+
+/* 
+ * NetBios Datagram Handler (IP/UDP)
+ */
+#define DGM_DIRECT_UNIQ		0x10
+#define DGM_DIRECT_GROUP	0x11
+#define DGM_BROADCAST		0x12
+#define DGM_ERROR			0x13
+#define DGM_QUERY			0x14
+#define DGM_POSITIVE_RES	0x15
+#define DGM_NEGATIVE_RES	0x16
+
+int AliasHandleUdpNbt(
+	struct ip 		  	*pip,	 /* IP packet to examine/patch */
+	struct alias_link 	*link,
+	struct in_addr		*alias_address,
+    u_short 		alias_port
+) {
+    struct udphdr *	uh;
+    NbtDataHeader 	*ndh;
+    u_char		*p = NULL;
+    char		*pmax;
+        
+    /* Calculate data length of UDP packet */
+    uh =  (struct udphdr *) ((char *) pip + (pip->ip_hl << 2));
+    pmax = (char *)uh + ntohs( uh->uh_ulen );
+
+	ndh = (NbtDataHeader *)((char *)uh + (sizeof (struct udphdr)));
+    if ((char *)(ndh + 1) > pmax)
+	    return(-1);
+#ifdef DEBUG
+	printf("\nType=%02x,", ndh->type );
+#endif
+	switch ( ndh->type ) {
+		case DGM_DIRECT_UNIQ:
+		case DGM_DIRECT_GROUP:
+		case DGM_BROADCAST:
+			p = (u_char *)ndh + 14;
+		    p = AliasHandleName ( p, pmax ); /* Source Name */
+		    p = AliasHandleName ( p, pmax ); /* Destination Name */
+			break;
+		case DGM_ERROR:
+			p = (u_char *)ndh + 11;
+			break;
+		case DGM_QUERY:
+		case DGM_POSITIVE_RES:
+		case DGM_NEGATIVE_RES:
+			p = (u_char *)ndh + 10;
+		    p = AliasHandleName ( p, pmax ); /* Destination Name */
+			break;
+	}
+    if (p == NULL || (char *)p > pmax)
+	    p = NULL;
+#ifdef DEBUG
+	printf("%s:%d-->", inet_ntoa(ndh->source_ip), ntohs(ndh->source_port) );
+#endif
+	/* Doing a IP address and Port number Translation */
+	if ( uh->uh_sum != 0 ) {
+		int				acc;
+		u_short			*sptr;
+		acc  = ndh->source_port;
+		acc -= alias_port;
+		sptr = (u_short *) &(ndh->source_ip);
+		acc += *sptr++;
+		acc += *sptr;
+		sptr = (u_short *) alias_address;
+		acc -= *sptr++;
+		acc -= *sptr;
+		ADJUST_CHECKSUM(acc, uh->uh_sum);
+	}
+    ndh->source_ip = *alias_address;
+    ndh->source_port = alias_port;
+#ifdef DEBUG
+	printf("%s:%d\n", inet_ntoa(ndh->source_ip), ntohs(ndh->source_port) );
+	fflush(stdout);
+#endif
+    return((p == NULL) ? -1 : 0);
+}
+/* Question Section */
+#define QS_TYPE_NB		0x0020
+#define QS_TYPE_NBSTAT	0x0021
+#define QS_CLAS_IN		0x0001
+typedef struct {
+	u_short	type;	/* The type of Request */
+	u_short	class;	/* The class of Request */
+} NBTNsQuestion;
+
+static u_char *
+AliasHandleQuestion(
+    u_short count,
+							NBTNsQuestion *q,
+    char *pmax,
+							NBTArguments  *nbtarg)
+{
+
+	while ( count != 0 ) {
+		/* Name Filed */
+		q = (NBTNsQuestion *)AliasHandleName((u_char *)q, pmax);
+
+		if (q == NULL || (char *)(q + 1) > pmax) {
+			q = NULL;
+			break;
+		}
+
+		/* Type and Class filed */
+		switch ( ntohs(q->type) ) {
+			case QS_TYPE_NB:
+			case QS_TYPE_NBSTAT:
+				q= q+1;
+			break;
+			default:
+#ifdef DEBUG
+				printf("\nUnknown Type on Question %0x\n", ntohs(q->type) );
+#endif
+			break;
+		}
+		count--;
+	}
+
+	/* Set up to out of Question Section */
+	return ((u_char *)q);
+}
+
+/* Resource Record */
+#define RR_TYPE_A		0x0001
+#define RR_TYPE_NS		0x0002
+#define RR_TYPE_NULL	0x000a
+#define RR_TYPE_NB		0x0020
+#define RR_TYPE_NBSTAT	0x0021
+#define RR_CLAS_IN		0x0001
+#define SizeOfNsResource	8
+typedef struct {
+ 	u_short type;
+ 	u_short class;
+ 	unsigned int ttl;
+ 	u_short rdlen;
+} NBTNsResource;
+
+#define SizeOfNsRNB			6
+typedef struct {
+	u_short g:1, ont:2, resv:13;
+	struct	in_addr	addr;
+} NBTNsRNB;
+
+static u_char *
+AliasHandleResourceNB( 
+    NBTNsResource *q,
+    char *pmax, 
+							   NBTArguments  *nbtarg)
+{
+	NBTNsRNB	*nb;
+	u_short bcount;
+
+	if (q == NULL || (char *)(q + 1) > pmax)
+		return(NULL);
+	/* Check out a length */
+	bcount = ntohs(q->rdlen);
+
+	/* Forward to Resource NB position */
+	nb = (NBTNsRNB *)((u_char *)q + SizeOfNsResource);
+
+	/* Processing all in_addr array */
+#ifdef DEBUG
+	printf("NB rec[%s", inet_ntoa(nbtarg->oldaddr));
+            printf("->%s, %dbytes] ",inet_ntoa(nbtarg->newaddr ), bcount);
+#endif
+	while ( nb != NULL && bcount != 0 )  {
+		if ((char *)(nb + 1) > pmax) {
+			nb = NULL;
+			break;
+		}
+#ifdef DEBUG
+		printf("<%s>", inet_ntoa(nb->addr) );
+#endif
+		if (!bcmp(&nbtarg->oldaddr,&nb->addr, sizeof(struct in_addr) ) ) {
+			if ( *nbtarg->uh_sum != 0 ) {
+            	int acc;
+            	u_short *sptr;
+
+            	sptr = (u_short *) &(nb->addr);
+            	acc = *sptr++;
+            	acc += *sptr;
+            	sptr = (u_short *) &(nbtarg->newaddr);
+            	acc -= *sptr++;
+            	acc -= *sptr;
+            	ADJUST_CHECKSUM(acc, *nbtarg->uh_sum);
+			}
+
+			nb->addr = nbtarg->newaddr;
+#ifdef DEBUG
+			printf("O");
+#endif
+		}
+#ifdef DEBUG
+		 else {
+			printf(".");
+		}
+#endif
+		nb=(NBTNsRNB *)((u_char *)nb + SizeOfNsRNB);
+	 	bcount -= SizeOfNsRNB;
+	}
+	if (nb == NULL || (char *)(nb + 1) > pmax) {
+		nb = NULL;
+	}
+
+	return ((u_char *)nb);
+}
+
+#define SizeOfResourceA		6
+typedef struct {
+	struct	in_addr	addr;
+} NBTNsResourceA;
+
+static u_char *
+AliasHandleResourceA( 
+    NBTNsResource *q,
+    char *pmax,
+						 	  NBTArguments  *nbtarg)
+{
+	NBTNsResourceA	*a;
+	u_short bcount;
+
+	if (q == NULL || (char *)(q + 1) > pmax)
+		return(NULL);
+
+	/* Forward to Resource A position */
+	a = (NBTNsResourceA *)( (u_char *)q + sizeof(NBTNsResource) );
+
+	/* Check out of length */
+	bcount = ntohs(q->rdlen);
+
+	/* Processing all in_addr array */
+#ifdef DEBUG
+	printf("Arec [%s", inet_ntoa(nbtarg->oldaddr));
+        printf("->%s]",inet_ntoa(nbtarg->newaddr ));
+#endif
+	while ( bcount != 0 )  {
+		if (a == NULL || (char *)(a + 1) > pmax)
+			return(NULL);
+#ifdef DEBUG
+		printf("..%s", inet_ntoa(a->addr) );
+#endif
+		if ( !bcmp(&nbtarg->oldaddr, &a->addr, sizeof(struct in_addr) ) ) {
+			if ( *nbtarg->uh_sum != 0 ) {
+            	int acc;
+            	u_short *sptr;
+
+            	sptr = (u_short *) &(a->addr);		 /* Old */
+            	acc = *sptr++;
+            	acc += *sptr;
+            	sptr = (u_short *) &nbtarg->newaddr; /* New */
+            	acc -= *sptr++;
+            	acc -= *sptr;
+            	ADJUST_CHECKSUM(acc, *nbtarg->uh_sum);
+			}
+
+			a->addr = nbtarg->newaddr;
+		}
+		a++;	/*XXXX*/
+		bcount -= SizeOfResourceA;
+	}
+	if (a == NULL || (char *)(a + 1) > pmax)
+		a =  NULL;
+	return ((u_char *)a);
+}
+
+typedef struct {
+	u_short opcode:4, flags:8, resv:4;
+} NBTNsResourceNULL;
+
+static u_char *
+AliasHandleResourceNULL( 
+    NBTNsResource *q, 
+    char *pmax,
+						 	     NBTArguments  *nbtarg)
+{
+	NBTNsResourceNULL	*n;
+	u_short bcount;
+
+	if (q == NULL || (char *)(q + 1) > pmax)
+		return(NULL);
+
+	/* Forward to Resource NULL position */
+	n = (NBTNsResourceNULL *)( (u_char *)q + sizeof(NBTNsResource) );
+
+	/* Check out of length */
+	bcount = ntohs(q->rdlen);
+
+	/* Processing all in_addr array */
+	while ( bcount != 0 )  {
+		if ((char *)(n + 1) > pmax) {
+			n = NULL;
+			break;
+		}
+		n++;
+		bcount -= sizeof(NBTNsResourceNULL);
+	}
+	if ((char *)(n + 1) > pmax)
+		n = NULL;
+
+	return ((u_char *)n);
+}
+
+static u_char *
+AliasHandleResourceNS( 
+    NBTNsResource *q,
+    char *pmax,
+						 	     NBTArguments  *nbtarg)
+{
+	NBTNsResourceNULL	*n;
+	u_short bcount;
+
+	if (q == NULL || (char *)(q + 1) > pmax)
+		return(NULL);
+
+	/* Forward to Resource NULL position */
+	n = (NBTNsResourceNULL *)( (u_char *)q + sizeof(NBTNsResource) );
+
+	/* Check out of length */
+	bcount = ntohs(q->rdlen);
+
+	/* Resource Record Name Filed */
+	q = (NBTNsResource *)AliasHandleName( (u_char *)n, pmax ); /* XXX */
+
+	if (q == NULL || (char *)((u_char *)n + bcount) > pmax)
+		return(NULL);
+	else
+	return ((u_char *)n + bcount);
+}
+
+typedef struct {
+	u_short	numnames;
+} NBTNsResourceNBSTAT;
+
+static u_char *
+AliasHandleResourceNBSTAT(
+    NBTNsResource *q,
+    char *pmax,
+						 	       NBTArguments  *nbtarg)
+{
+	NBTNsResourceNBSTAT	*n;
+	u_short bcount;
+
+	if (q == NULL || (char *)(q + 1) > pmax)
+		return(NULL);
+
+	/* Forward to Resource NBSTAT position */
+	n = (NBTNsResourceNBSTAT *)( (u_char *)q + sizeof(NBTNsResource) );
+
+	/* Check out of length */
+	bcount = ntohs(q->rdlen);
+
+	if (q == NULL || (char *)((u_char *)n + bcount) > pmax)
+		return(NULL);
+	else
+	return ((u_char *)n + bcount);
+}
+
+static u_char *
+AliasHandleResource(
+    u_short count, 
+							NBTNsResource *q,
+    char *pmax,
+    NBTArguments  
+    *nbtarg)
+{
+	while ( count != 0 ) {
+		/* Resource Record Name Filed */
+		q = (NBTNsResource *)AliasHandleName( (u_char *)q, pmax );
+
+		if (q == NULL || (char *)(q + 1) > pmax)
+			break;
+#ifdef DEBUG
+		printf("type=%02x, count=%d\n", ntohs(q->type), count );
+#endif
+
+		/* Type and Class filed */
+		switch ( ntohs(q->type) ) {
+			case RR_TYPE_NB:
+				q = (NBTNsResource *)AliasHandleResourceNB( 
+				    q,
+				    pmax,
+				    nbtarg 
+				);
+				break;
+			case RR_TYPE_A: 
+				q = (NBTNsResource *)AliasHandleResourceA( 
+				    q, 
+				    pmax, 
+				    nbtarg
+				);
+				break;
+			case RR_TYPE_NS:
+				q = (NBTNsResource *)AliasHandleResourceNS( 
+				    q,
+				    pmax, 
+				    nbtarg 
+				);
+				break;
+			case RR_TYPE_NULL:
+				q = (NBTNsResource *)AliasHandleResourceNULL( 
+				    q, 
+				    pmax, 
+				    nbtarg 
+				);
+				break;
+			case RR_TYPE_NBSTAT:
+				q = (NBTNsResource *)AliasHandleResourceNBSTAT(
+				    q,
+				    pmax, 
+				    nbtarg
+				);
+				break;
+			default: 
+#ifdef DEBUG
+				printf(
+				    "\nUnknown Type of Resource %0x\n", 
+				    ntohs(q->type) 
+				);
+#endif
+				break;
+		}
+		count--;
+	}
+	fflush(stdout);
+	return ((u_char *)q);
+}
+
+int AliasHandleUdpNbtNS(
+	struct ip 		  	*pip,	 /* IP packet to examine/patch */
+	struct alias_link 	*link,
+	struct in_addr		*alias_address,
+	u_short 			*alias_port,
+	struct in_addr		*original_address,
+	u_short 			*original_port )
+{
+    struct udphdr *	uh;
+	NbtNSHeader	  * nsh;
+	u_char		  * p;
+	char		*pmax;
+	NBTArguments    nbtarg;
+
+	/* Set up Common Parameter */	
+	nbtarg.oldaddr	=	*alias_address;
+	nbtarg.oldport	=	*alias_port;
+	nbtarg.newaddr	=	*original_address;
+	nbtarg.newport	=	*original_port;
+
+    /* Calculate data length of UDP packet */
+    uh =  (struct udphdr *) ((char *) pip + (pip->ip_hl << 2));
+	nbtarg.uh_sum	=	&(uh->uh_sum);
+	nsh = (NbtNSHeader *)((char *)uh + (sizeof(struct udphdr)));
+	p = (u_char *)(nsh + 1);
+    pmax = (char *)uh + ntohs( uh->uh_ulen );
+
+    if ((char *)(nsh + 1) > pmax)
+	return(-1);
+
+#ifdef DEBUG
+    printf(" [%s] ID=%02x, op=%01x, flag=%02x, rcode=%01x, qd=%04x"
+	   ", an=%04x, ns=%04x, ar=%04x, [%d]-->", 
+		nsh->dir ? "Response": "Request",
+		nsh->nametrid,
+		nsh->opcode,
+		nsh->nmflags,
+		nsh->rcode,
+		ntohs(nsh->qdcount),
+		ntohs(nsh->ancount),
+		ntohs(nsh->nscount),
+		ntohs(nsh->arcount),
+	(u_char *)p -(u_char *)nsh
+    );
+#endif
+
+	/* Question Entries */
+	if (ntohs(nsh->qdcount) !=0 ) {
+	p = AliasHandleQuestion(
+	    ntohs(nsh->qdcount),
+	    (NBTNsQuestion *)p,
+	    pmax, 
+	    &nbtarg 
+	);
+	}
+
+	/* Answer Resource Records */
+	if (ntohs(nsh->ancount) !=0 ) {
+	p = AliasHandleResource(
+	    ntohs(nsh->ancount),
+	    (NBTNsResource *)p,
+	    pmax, 
+	    &nbtarg 
+	);
+	}
+
+	/* Authority Resource Recodrs */
+	if (ntohs(nsh->nscount) !=0 ) {
+	p = AliasHandleResource(
+	    ntohs(nsh->nscount), 
+	    (NBTNsResource *)p,
+	    pmax, 
+	    &nbtarg 
+	);
+	}
+
+	/* Additional Resource Recodrs */
+	if (ntohs(nsh->arcount) !=0 ) {
+	p = AliasHandleResource(
+	    ntohs(nsh->arcount),
+	    (NBTNsResource *)p,
+	    pmax, 
+	    &nbtarg 
+	);
+	}
+
+#ifdef DEBUG
+	 	PrintRcode(nsh->rcode);
+#endif
+    return ((p == NULL) ? -1 : 0);
+}
diff --git a/sys/netinet/libalias/alias_pptp.c b/sys/netinet/libalias/alias_pptp.c
new file mode 100644
index 0000000..dbab7a9
--- /dev/null
+++ b/sys/netinet/libalias/alias_pptp.c
@@ -0,0 +1,368 @@
+/*
+ * alias_pptp.c
+ *
+ * Copyright (c) 2000 Whistle Communications, Inc.
+ * All rights reserved.
+ *
+ * Subject to the following obligations and disclaimer of warranty, use and
+ * redistribution of this software, in source or object code forms, with or
+ * without modifications are expressly permitted by Whistle Communications;
+ * provided, however, that:
+ * 1. Any and all reproductions of the source or object code must include the
+ *    copyright notice above and the following disclaimer of warranties; and
+ * 2. No rights are granted, in any manner or form, to use Whistle
+ *    Communications, Inc. trademarks, including the mark "WHISTLE
+ *    COMMUNICATIONS" on advertising, endorsements, or otherwise except as
+ *    such appears in the above copyright notice or in the software.
+ *
+ * THIS SOFTWARE IS BEING PROVIDED BY WHISTLE COMMUNICATIONS "AS IS", AND
+ * TO THE MAXIMUM EXTENT PERMITTED BY LAW, WHISTLE COMMUNICATIONS MAKES NO
+ * REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, REGARDING THIS SOFTWARE,
+ * INCLUDING WITHOUT LIMITATION, ANY AND ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT.
+ * WHISTLE COMMUNICATIONS DOES NOT WARRANT, GUARANTEE, OR MAKE ANY
+ * REPRESENTATIONS REGARDING THE USE OF, OR THE RESULTS OF THE USE OF THIS
+ * SOFTWARE IN TERMS OF ITS CORRECTNESS, ACCURACY, RELIABILITY OR OTHERWISE.
+ * IN NO EVENT SHALL WHISTLE COMMUNICATIONS BE LIABLE FOR ANY DAMAGES
+ * RESULTING FROM OR ARISING OUT OF ANY USE OF THIS SOFTWARE, INCLUDING
+ * WITHOUT LIMITATION, ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+ * PUNITIVE, OR CONSEQUENTIAL DAMAGES, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES, LOSS OF USE, DATA OR PROFITS, HOWEVER CAUSED AND UNDER ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF WHISTLE COMMUNICATIONS IS ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE.
+ *
+ * Author: Erik Salander <erik@whistle.com>
+ *
+ * $FreeBSD$
+ */
+
+/*
+   Alias_pptp.c performs special processing for PPTP sessions under TCP.
+   Specifically, watch PPTP control messages and alias the Call ID or the
+   Peer's Call ID in the appropriate messages.  Note, PPTP requires
+   "de-aliasing" of incoming packets, this is different than any other
+   TCP applications that are currently (ie. FTP, IRC and RTSP) aliased.
+
+   For Call IDs encountered for the first time, a PPTP alias link is created.
+   The PPTP alias link uses the Call ID in place of the original port number.
+   An alias Call ID is created.
+
+   For this routine to work, the PPTP control messages must fit entirely
+   into a single TCP packet.  This is typically the case, but is not
+   required by the spec.
+
+   Unlike some of the other TCP applications that are aliased (ie. FTP,
+   IRC and RTSP), the PPTP control messages that need to be aliased are
+   guaranteed to remain the same length.  The aliased Call ID is a fixed
+   length field.
+
+   Reference: RFC 2637
+
+   Initial version:  May, 2000 (eds)
+
+*/
+
+/* Includes */
+#include <sys/types.h>
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+
+#include <stdio.h>
+
+#include "alias_local.h"
+
+/*
+ * PPTP definitions
+ */
+
+struct grehdr			/* Enhanced GRE header. */
+{
+    u_int16_t gh_flags;		/* Flags. */
+    u_int16_t gh_protocol;	/* Protocol type. */
+    u_int16_t gh_length;	/* Payload length. */
+    u_int16_t gh_call_id;	/* Call ID. */
+    u_int32_t gh_seq_no;	/* Sequence number (optional). */
+    u_int32_t gh_ack_no;	/* Acknowledgment number (optional). */
+};
+typedef struct grehdr		GreHdr;
+
+/* The PPTP protocol ID used in the GRE 'proto' field. */
+#define PPTP_GRE_PROTO          0x880b
+
+/* Bits that must be set a certain way in all PPTP/GRE packets. */
+#define PPTP_INIT_VALUE		((0x2001 << 16) | PPTP_GRE_PROTO)
+#define PPTP_INIT_MASK		0xef7fffff
+
+#define PPTP_MAGIC		0x1a2b3c4d
+#define PPTP_CTRL_MSG_TYPE	1
+
+enum {
+  PPTP_StartCtrlConnRequest = 1,
+  PPTP_StartCtrlConnReply = 2,
+  PPTP_StopCtrlConnRequest = 3,
+  PPTP_StopCtrlConnReply = 4,
+  PPTP_EchoRequest = 5,
+  PPTP_EchoReply = 6,
+  PPTP_OutCallRequest = 7,
+  PPTP_OutCallReply = 8,
+  PPTP_InCallRequest = 9,
+  PPTP_InCallReply = 10,
+  PPTP_InCallConn = 11,
+  PPTP_CallClearRequest = 12,
+  PPTP_CallDiscNotify = 13,
+  PPTP_WanErrorNotify = 14,
+  PPTP_SetLinkInfo = 15
+};
+
+  /* Message structures */
+  struct pptpMsgHead {
+    u_int16_t   length;         /* total length */
+    u_int16_t   msgType;        /* PPTP message type */
+    u_int32_t   magic;          /* magic cookie */
+    u_int16_t   type;           /* control message type */
+    u_int16_t   resv0;          /* reserved */
+  };
+  typedef struct pptpMsgHead    *PptpMsgHead;
+
+  struct pptpCodes {
+    u_int8_t    resCode;        /* Result Code */
+    u_int8_t    errCode;        /* Error Code */
+  };
+  typedef struct pptpCodes      *PptpCode;
+
+  struct pptpCallIds {
+    u_int16_t   cid1;           /* Call ID field #1 */
+    u_int16_t   cid2;           /* Call ID field #2 */
+  };
+  typedef struct pptpCallIds    *PptpCallId;
+
+static PptpCallId AliasVerifyPptp(struct ip *, u_int16_t *);
+
+
+void
+AliasHandlePptpOut(struct ip *pip,	    /* IP packet to examine/patch */
+                   struct alias_link *link) /* The PPTP control link */
+{
+    struct alias_link   *pptp_link;
+    PptpCallId    	cptr;
+    PptpCode            codes;
+    u_int16_t           ctl_type;           /* control message type */
+    struct tcphdr 	*tc;
+
+    /* Verify valid PPTP control message */
+    if ((cptr = AliasVerifyPptp(pip, &ctl_type)) == NULL)
+      return;
+
+    /* Modify certain PPTP messages */
+    switch (ctl_type) {
+    case PPTP_OutCallRequest:
+    case PPTP_OutCallReply:
+    case PPTP_InCallRequest:
+    case PPTP_InCallReply:
+	/* Establish PPTP link for address and Call ID found in control message. */
+	pptp_link = AddPptp(GetOriginalAddress(link), GetDestAddress(link),
+			    GetAliasAddress(link), cptr->cid1);
+	break;
+    case PPTP_CallClearRequest:
+    case PPTP_CallDiscNotify:
+	/* Find PPTP link for address and Call ID found in control message. */
+	pptp_link = FindPptpOutByCallId(GetOriginalAddress(link),
+					GetDestAddress(link),
+					cptr->cid1);
+	break;
+    default:
+	return;
+    }
+
+      if (pptp_link != NULL) {
+	int accumulate = cptr->cid1;
+
+	/* alias the Call Id */
+	cptr->cid1 = GetAliasPort(pptp_link);
+
+	/* Compute TCP checksum for revised packet */
+	tc = (struct tcphdr *) ((char *) pip + (pip->ip_hl << 2));
+	accumulate -= cptr->cid1;
+	ADJUST_CHECKSUM(accumulate, tc->th_sum);
+
+	switch (ctl_type) {
+	case PPTP_OutCallReply:
+	case PPTP_InCallReply:
+	    codes = (PptpCode)(cptr + 1);
+	    if (codes->resCode == 1)		/* Connection established, */
+		SetDestCallId(pptp_link,	/* note the Peer's Call ID. */
+			      cptr->cid2);
+	    else
+		SetExpire(pptp_link, 0);	/* Connection refused. */
+	    break;
+	case PPTP_CallDiscNotify:		/* Connection closed. */
+	    SetExpire(pptp_link, 0);
+	    break;
+	}
+      }
+}
+
+void
+AliasHandlePptpIn(struct ip *pip,	   /* IP packet to examine/patch */
+                  struct alias_link *link) /* The PPTP control link */
+{
+    struct alias_link   *pptp_link;
+    PptpCallId    	cptr;
+    u_int16_t     	*pcall_id;
+    u_int16_t           ctl_type;           /* control message type */
+    struct tcphdr 	*tc;
+
+    /* Verify valid PPTP control message */
+    if ((cptr = AliasVerifyPptp(pip, &ctl_type)) == NULL)
+      return;
+
+    /* Modify certain PPTP messages */
+    switch (ctl_type)
+    {
+    case PPTP_InCallConn:
+    case PPTP_WanErrorNotify:
+    case PPTP_SetLinkInfo:
+      pcall_id = &cptr->cid1;
+      break;
+    case PPTP_OutCallReply:
+    case PPTP_InCallReply:
+      pcall_id = &cptr->cid2;
+      break;
+    case PPTP_CallDiscNotify:			/* Connection closed. */
+      pptp_link = FindPptpInByCallId(GetDestAddress(link),
+				     GetAliasAddress(link),
+				     cptr->cid1);
+      if (pptp_link != NULL)
+	    SetExpire(pptp_link, 0);
+      return;
+    default:
+      return;
+    }
+
+    /* Find PPTP link for address and Call ID found in PPTP Control Msg */
+    pptp_link = FindPptpInByPeerCallId(GetDestAddress(link),
+				       GetAliasAddress(link),
+				       *pcall_id);
+
+    if (pptp_link != NULL) {
+      int accumulate = *pcall_id;
+
+      /* De-alias the Peer's Call Id. */
+      *pcall_id = GetOriginalPort(pptp_link);
+
+      /* Compute TCP checksum for modified packet */
+      tc = (struct tcphdr *) ((char *) pip + (pip->ip_hl << 2));
+      accumulate -= *pcall_id;
+      ADJUST_CHECKSUM(accumulate, tc->th_sum);
+
+      if (ctl_type == PPTP_OutCallReply || ctl_type == PPTP_InCallReply) {
+	    PptpCode codes = (PptpCode)(cptr + 1);
+
+	    if (codes->resCode == 1)		/* Connection established, */
+		SetDestCallId(pptp_link,	/* note the Call ID. */
+			      cptr->cid1);
+	    else
+		SetExpire(pptp_link, 0);	/* Connection refused. */
+      }
+    }
+}
+
+static PptpCallId
+AliasVerifyPptp(struct ip *pip, u_int16_t *ptype) /* IP packet to examine/patch */
+{
+    int           	hlen, tlen, dlen;
+    PptpMsgHead   	hptr;
+    struct tcphdr 	*tc;
+
+    /* Calculate some lengths */
+    tc = (struct tcphdr *) ((char *) pip + (pip->ip_hl << 2));
+    hlen = (pip->ip_hl + tc->th_off) << 2;
+    tlen = ntohs(pip->ip_len);
+    dlen = tlen - hlen;
+
+    /* Verify data length */
+    if (dlen < (sizeof(struct pptpMsgHead) + sizeof(struct pptpCallIds)))
+      return(NULL);
+
+    /* Move up to PPTP message header */
+    hptr = (PptpMsgHead)(((char *) pip) + hlen);
+
+    /* Return the control message type */
+    *ptype = ntohs(hptr->type);
+
+    /* Verify PPTP Control Message */
+    if ((ntohs(hptr->msgType) != PPTP_CTRL_MSG_TYPE) ||
+        (ntohl(hptr->magic) != PPTP_MAGIC))
+      return(NULL);
+
+    /* Verify data length. */
+    if ((*ptype == PPTP_OutCallReply || *ptype == PPTP_InCallReply) &&
+	(dlen < sizeof(struct pptpMsgHead) + sizeof(struct pptpCallIds) +
+		sizeof(struct pptpCodes)))
+	return (NULL);
+    else
+	return (PptpCallId)(hptr + 1);
+}
+
+
+int
+AliasHandlePptpGreOut(struct ip *pip)
+{
+    GreHdr		*gr;
+    struct alias_link	*link;
+
+    gr = (GreHdr *)((char *)pip + (pip->ip_hl << 2));
+
+    /* Check GRE header bits. */
+    if ((ntohl(*((u_int32_t *)gr)) & PPTP_INIT_MASK) != PPTP_INIT_VALUE)
+	return (-1);
+
+    link = FindPptpOutByPeerCallId(pip->ip_src, pip->ip_dst, gr->gh_call_id);
+    if (link != NULL) {
+	struct in_addr alias_addr = GetAliasAddress(link);
+
+	/* Change source IP address. */
+	DifferentialChecksum(&pip->ip_sum,
+			     (u_short *)&alias_addr,
+			     (u_short *)&pip->ip_src,
+			     2);
+	pip->ip_src = alias_addr;
+    }
+
+    return (0);
+}
+
+
+int
+AliasHandlePptpGreIn(struct ip *pip)
+{
+    GreHdr		*gr;
+    struct alias_link	*link;
+
+    gr = (GreHdr *)((char *)pip + (pip->ip_hl << 2));
+
+    /* Check GRE header bits. */
+    if ((ntohl(*((u_int32_t *)gr)) & PPTP_INIT_MASK) != PPTP_INIT_VALUE)
+	return (-1);
+
+    link = FindPptpInByPeerCallId(pip->ip_src, pip->ip_dst, gr->gh_call_id);
+    if (link != NULL) {
+	struct in_addr src_addr = GetOriginalAddress(link);
+
+	/* De-alias the Peer's Call Id. */
+	gr->gh_call_id = GetOriginalPort(link);
+
+	/* Restore original IP address. */
+	DifferentialChecksum(&pip->ip_sum,
+			     (u_short *)&src_addr,
+			     (u_short *)&pip->ip_dst,
+			     2);
+	pip->ip_dst = src_addr;
+    }
+
+    return (0);
+}
diff --git a/sys/netinet/libalias/alias_proxy.c b/sys/netinet/libalias/alias_proxy.c
new file mode 100644
index 0000000..75ffad7
--- /dev/null
+++ b/sys/netinet/libalias/alias_proxy.c
@@ -0,0 +1,810 @@
+/* file: alias_proxy.c
+
+    This file encapsulates special operations related to transparent
+    proxy redirection.  This is where packets with a particular destination,
+    usually tcp port 80, are redirected to a proxy server.
+
+    When packets are proxied, the destination address and port are
+    modified.  In certain cases, it is necessary to somehow encode
+    the original address/port info into the packet.  Two methods are
+    presently supported: addition of a [DEST addr port] string at the
+    beginning a of tcp stream, or inclusion of an optional field
+    in the IP header.
+    
+    There is one public API function:
+
+        PacketAliasProxyRule()    -- Adds and deletes proxy
+                                     rules.
+
+    Rules are stored in a linear linked list, so lookup efficiency
+    won't be too good for large lists.
+
+
+    Initial development: April, 1998 (cjm)
+
+    $FreeBSD$
+*/
+
+
+/* System includes */
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <netdb.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+/* BSD IPV4 includes */
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+
+#include <arpa/inet.h>
+
+#include "alias_local.h"  /* Functions used by alias*.c */
+#include "alias.h"        /* Public API functions for libalias */
+
+
+
+/*
+    Data structures
+ */
+
+/*
+ * A linked list of arbitrary length, based on struct proxy_entry is
+ * used to store proxy rules.
+ */
+struct proxy_entry
+{
+#define PROXY_TYPE_ENCODE_NONE      1
+#define PROXY_TYPE_ENCODE_TCPSTREAM 2
+#define PROXY_TYPE_ENCODE_IPHDR     3
+    int rule_index;
+    int proxy_type;
+    u_char proto;
+    u_short proxy_port;
+    u_short server_port;
+
+    struct in_addr server_addr;
+
+    struct in_addr src_addr;
+    struct in_addr src_mask;
+
+    struct in_addr dst_addr;
+    struct in_addr dst_mask;
+
+    struct proxy_entry *next;
+    struct proxy_entry *last;
+};
+
+
+
+/*
+    File scope variables
+*/
+
+static struct proxy_entry *proxyList;
+
+
+
+/* Local (static) functions:
+
+    IpMask()                 -- Utility function for creating IP
+                                masks from integer (1-32) specification.
+    IpAddr()                 -- Utility function for converting string
+                                to IP address
+    IpPort()                 -- Utility function for converting string
+                                to port number
+    RuleAdd()                -- Adds an element to the rule list.
+    RuleDelete()             -- Removes an element from the rule list.
+    RuleNumberDelete()       -- Removes all elements from the rule list
+                                having a certain rule number.
+    ProxyEncodeTcpStream()   -- Adds [DEST x.x.x.x xxxx] to the beginning
+                                of a TCP stream.
+    ProxyEncodeIpHeader()    -- Adds an IP option indicating the true
+                                destination of a proxied IP packet
+*/
+
+static int IpMask(int, struct in_addr *);
+static int IpAddr(char *, struct in_addr *);
+static int IpPort(char *, int, int *);
+static void RuleAdd(struct proxy_entry *);
+static void RuleDelete(struct proxy_entry *);
+static int RuleNumberDelete(int);
+static void ProxyEncodeTcpStream(struct alias_link *, struct ip *, int);
+static void ProxyEncodeIpHeader(struct ip *, int);
+
+static int
+IpMask(int nbits, struct in_addr *mask)
+{
+    int i;
+    u_int imask;
+
+    if (nbits < 0 || nbits > 32)
+        return -1;
+
+    imask = 0;
+    for (i=0; i<nbits; i++)
+        imask = (imask >> 1) + 0x80000000;
+    mask->s_addr = htonl(imask);
+
+    return 0;
+}
+
+static int
+IpAddr(char *s, struct in_addr *addr)
+{
+    if (inet_aton(s, addr) == 0)
+        return -1;
+    else
+        return 0;
+}
+
+static int
+IpPort(char *s, int proto, int *port)
+{
+    int n;
+
+    n = sscanf(s, "%d", port);
+    if (n != 1)
+    {
+        struct servent *se;
+
+        if (proto == IPPROTO_TCP)
+            se = getservbyname(s, "tcp");
+        else if (proto == IPPROTO_UDP)
+            se = getservbyname(s, "udp");
+        else
+            return -1;
+
+        if (se == NULL)
+                return -1;
+
+        *port = (u_int) ntohs(se->s_port);
+    }
+
+    return 0;
+}
+
+void
+RuleAdd(struct proxy_entry *entry)
+{
+    int rule_index;
+    struct proxy_entry *ptr;
+    struct proxy_entry *ptr_last;
+
+    if (proxyList == NULL)
+    {
+        proxyList = entry;
+        entry->last = NULL;
+        entry->next = NULL;
+        return;
+    }
+
+    rule_index = entry->rule_index;
+    ptr = proxyList;
+    ptr_last = NULL;
+    while (ptr != NULL)
+    {
+        if (ptr->rule_index >= rule_index)
+        {
+            if (ptr_last == NULL)
+            {
+                entry->next = proxyList;
+                entry->last = NULL;
+                proxyList->last = entry;
+                proxyList = entry;
+                return;
+            }
+
+            ptr_last->next = entry;
+            ptr->last = entry;
+            entry->last = ptr->last;
+            entry->next = ptr;
+            return;
+        }
+        ptr_last = ptr;
+        ptr = ptr->next;
+    }
+
+    ptr_last->next = entry;
+    entry->last = ptr_last;
+    entry->next = NULL;
+}
+
+static void
+RuleDelete(struct proxy_entry *entry)
+{
+    if (entry->last != NULL)
+        entry->last->next = entry->next;
+    else
+        proxyList = entry->next;
+
+    if (entry->next != NULL)
+        entry->next->last = entry->last;
+
+    free(entry);
+}
+
+static int
+RuleNumberDelete(int rule_index)
+{
+    int err;
+    struct proxy_entry *ptr;
+
+    err = -1;
+    ptr = proxyList;
+    while (ptr != NULL)
+    {
+        struct proxy_entry *ptr_next;
+
+        ptr_next = ptr->next;
+        if (ptr->rule_index == rule_index)
+        {
+            err = 0;
+            RuleDelete(ptr);
+        }
+
+        ptr = ptr_next;
+    }
+
+    return err;
+}
+
+static void
+ProxyEncodeTcpStream(struct alias_link *link,
+                     struct ip *pip,
+                     int maxpacketsize)
+{
+    int slen;
+    char buffer[40];
+    struct tcphdr *tc;
+
+/* Compute pointer to tcp header */
+    tc = (struct tcphdr *) ((char *) pip + (pip->ip_hl << 2));
+
+/* Don't modify if once already modified */
+
+    if (GetAckModified (link))
+	return;
+
+/* Translate destination address and port to string form */
+    snprintf(buffer, sizeof(buffer) - 2, "[DEST %s %d]",
+        inet_ntoa(GetProxyAddress (link)), (u_int) ntohs(GetProxyPort (link)));
+    
+/* Pad string out to a multiple of two in length */
+    slen = strlen(buffer);
+    switch (slen % 2)
+    {
+    case 0:
+        strcat(buffer, " \n");
+	slen += 2;
+        break;
+    case 1:
+        strcat(buffer, "\n");
+	slen += 1;
+    }
+
+/* Check for packet overflow */
+    if ((ntohs(pip->ip_len) + strlen(buffer)) > maxpacketsize)
+        return;
+
+/* Shift existing TCP data and insert destination string */
+    {
+        int dlen;
+        int hlen;
+        u_char *p;
+
+        hlen = (pip->ip_hl + tc->th_off) << 2;
+        dlen = ntohs (pip->ip_len) - hlen;
+
+/* Modify first packet that has data in it */
+
+	if (dlen == 0)
+		return;
+
+        p = (char *) pip;
+        p += hlen;
+
+        memmove(p + slen, p, dlen);
+        memcpy(p, buffer, slen);
+    }
+
+/* Save information about modfied sequence number */
+    {
+        int delta;
+
+        SetAckModified(link);
+        delta = GetDeltaSeqOut(pip, link);
+        AddSeq(pip, link, delta+slen);
+    }
+
+/* Update IP header packet length and checksum */
+    {
+        int accumulate;
+
+        accumulate  = pip->ip_len;
+        pip->ip_len = htons(ntohs(pip->ip_len) + slen);
+        accumulate -= pip->ip_len;
+
+        ADJUST_CHECKSUM(accumulate, pip->ip_sum);
+    }
+
+/* Update TCP checksum, Use TcpChecksum since so many things have
+   already changed. */
+
+    tc->th_sum = 0;
+    tc->th_sum = TcpChecksum (pip);
+}
+
+static void
+ProxyEncodeIpHeader(struct ip *pip,
+                    int maxpacketsize)
+{
+#define OPTION_LEN_BYTES  8
+#define OPTION_LEN_INT16  4
+#define OPTION_LEN_INT32  2
+    u_char option[OPTION_LEN_BYTES];
+
+#ifdef DEBUG
+    fprintf(stdout, " ip cksum 1 = %x\n", (u_int) IpChecksum(pip));
+    fprintf(stdout, "tcp cksum 1 = %x\n", (u_int) TcpChecksum(pip));
+#endif
+
+/* Check to see that there is room to add an IP option */
+    if (pip->ip_hl > (0x0f - OPTION_LEN_INT32))
+        return;
+
+/* Build option and copy into packet */
+    {
+        u_char *ptr;
+        struct tcphdr *tc;
+
+        ptr = (u_char *) pip;
+        ptr += 20;
+        memcpy(ptr + OPTION_LEN_BYTES, ptr, ntohs(pip->ip_len) - 20);
+
+        option[0] = 0x64; /* class: 3 (reserved), option 4 */
+        option[1] = OPTION_LEN_BYTES;
+
+        memcpy(&option[2], (u_char *) &pip->ip_dst, 4);
+
+        tc = (struct tcphdr *) ((char *) pip + (pip->ip_hl << 2));
+        memcpy(&option[6], (u_char *) &tc->th_sport, 2);
+
+        memcpy(ptr, option, 8);
+    }
+
+/* Update checksum, header length and packet length */
+    {
+        int i;
+        int accumulate;
+        u_short *sptr;
+
+        sptr = (u_short *) option;
+        accumulate = 0;
+        for (i=0; i<OPTION_LEN_INT16; i++)
+            accumulate -= *(sptr++);
+
+        sptr = (u_short *) pip;
+        accumulate += *sptr;
+        pip->ip_hl += OPTION_LEN_INT32;
+        accumulate -= *sptr;
+
+        accumulate += pip->ip_len;
+        pip->ip_len = htons(ntohs(pip->ip_len) + OPTION_LEN_BYTES);
+        accumulate -= pip->ip_len;
+
+        ADJUST_CHECKSUM(accumulate, pip->ip_sum);
+    }
+#undef OPTION_LEN_BYTES
+#undef OPTION_LEN_INT16
+#undef OPTION_LEN_INT32
+#ifdef DEBUG
+    fprintf(stdout, " ip cksum 2 = %x\n", (u_int) IpChecksum(pip));
+    fprintf(stdout, "tcp cksum 2 = %x\n", (u_int) TcpChecksum(pip));
+#endif
+}
+
+
+/* Functions by other packet alias source files
+
+    ProxyCheck()         -- Checks whether an outgoing packet should
+                            be proxied.
+    ProxyModify()        -- Encodes the original destination address/port
+                            for a packet which is to be redirected to
+                            a proxy server.
+*/
+
+int
+ProxyCheck(struct ip *pip,
+           struct in_addr *proxy_server_addr,
+           u_short *proxy_server_port)
+{
+    u_short dst_port;
+    struct in_addr src_addr;
+    struct in_addr dst_addr;
+    struct proxy_entry *ptr;
+
+    src_addr = pip->ip_src;
+    dst_addr = pip->ip_dst;
+    dst_port = ((struct tcphdr *) ((char *) pip + (pip->ip_hl << 2)))
+        ->th_dport;
+
+    ptr = proxyList;
+    while (ptr != NULL)
+    {
+        u_short proxy_port;
+
+        proxy_port = ptr->proxy_port;
+        if ((dst_port == proxy_port || proxy_port == 0)
+         && pip->ip_p == ptr->proto
+         && src_addr.s_addr != ptr->server_addr.s_addr)
+        {
+            struct in_addr src_addr_masked;
+            struct in_addr dst_addr_masked;
+
+            src_addr_masked.s_addr = src_addr.s_addr & ptr->src_mask.s_addr;
+            dst_addr_masked.s_addr = dst_addr.s_addr & ptr->dst_mask.s_addr;
+
+            if ((src_addr_masked.s_addr == ptr->src_addr.s_addr)
+             && (dst_addr_masked.s_addr == ptr->dst_addr.s_addr))
+            {
+                if ((*proxy_server_port = ptr->server_port) == 0)
+                    *proxy_server_port = dst_port;
+                *proxy_server_addr = ptr->server_addr;
+                return ptr->proxy_type;
+            }
+        }
+        ptr = ptr->next;
+    }
+
+    return 0;
+}
+
+void
+ProxyModify(struct alias_link *link,
+            struct ip *pip,
+            int maxpacketsize,
+            int proxy_type)
+{
+    switch (proxy_type)
+    {
+    case PROXY_TYPE_ENCODE_IPHDR:
+        ProxyEncodeIpHeader(pip, maxpacketsize);
+        break;
+
+    case PROXY_TYPE_ENCODE_TCPSTREAM:
+        ProxyEncodeTcpStream(link, pip, maxpacketsize);
+        break;
+    }
+}
+
+
+/*
+    Public API functions
+*/
+
+int
+PacketAliasProxyRule(const char *cmd)
+{
+/*
+ * This function takes command strings of the form:
+ *
+ *   server <addr>[:<port>]
+ *   [port <port>]
+ *   [rule n]
+ *   [proto tcp|udp]
+ *   [src <addr>[/n]]
+ *   [dst <addr>[/n]]
+ *   [type encode_tcp_stream|encode_ip_hdr|no_encode]
+ *
+ *   delete <rule number>
+ *
+ * Subfields can be in arbitrary order.  Port numbers and addresses
+ * must be in either numeric or symbolic form. An optional rule number
+ * is used to control the order in which rules are searched.  If two
+ * rules have the same number, then search order cannot be guaranteed,
+ * and the rules should be disjoint.  If no rule number is specified,
+ * then 0 is used, and group 0 rules are always checked before any
+ * others.
+ */
+    int i, n, len;
+    int cmd_len;
+    int token_count;
+    int state;
+    char *token;
+    char buffer[256];
+    char str_port[sizeof(buffer)];
+    char str_server_port[sizeof(buffer)];
+    char *res = buffer;
+
+    int rule_index;
+    int proto;
+    int proxy_type;
+    int proxy_port;
+    int server_port;
+    struct in_addr server_addr;
+    struct in_addr src_addr, src_mask;
+    struct in_addr dst_addr, dst_mask;
+    struct proxy_entry *proxy_entry;
+
+/* Copy command line into a buffer */
+    cmd += strspn(cmd, " \t");
+    cmd_len = strlen(cmd);
+    if (cmd_len > (sizeof(buffer) - 1))
+        return -1;
+    strcpy(buffer, cmd);
+
+/* Convert to lower case */
+    len = strlen(buffer);
+    for (i=0; i<len; i++)
+	buffer[i] = tolower((unsigned char)buffer[i]);
+
+/* Set default proxy type */
+
+/* Set up default values */
+    rule_index = 0;
+    proxy_type = PROXY_TYPE_ENCODE_NONE;
+    proto = IPPROTO_TCP;
+    proxy_port = 0;
+    server_addr.s_addr = 0;
+    server_port = 0;
+    src_addr.s_addr = 0;
+    IpMask(0, &src_mask);
+    dst_addr.s_addr = 0;
+    IpMask(0, &dst_mask);
+
+    str_port[0] = 0;
+    str_server_port[0] = 0;
+
+/* Parse command string with state machine */
+#define STATE_READ_KEYWORD    0
+#define STATE_READ_TYPE       1
+#define STATE_READ_PORT       2
+#define STATE_READ_SERVER     3
+#define STATE_READ_RULE       4
+#define STATE_READ_DELETE     5
+#define STATE_READ_PROTO      6
+#define STATE_READ_SRC        7
+#define STATE_READ_DST        8
+    state = STATE_READ_KEYWORD;
+    token = strsep(&res, " \t");
+    token_count = 0;
+    while (token != NULL)
+    {
+        token_count++;
+        switch (state)
+        {
+        case STATE_READ_KEYWORD:
+            if (strcmp(token, "type") == 0)
+                state = STATE_READ_TYPE;
+            else if (strcmp(token, "port") == 0)
+                state = STATE_READ_PORT;
+            else if (strcmp(token, "server") == 0)
+                state = STATE_READ_SERVER;
+            else if (strcmp(token, "rule") == 0)
+                state = STATE_READ_RULE;
+            else if (strcmp(token, "delete") == 0)
+                state = STATE_READ_DELETE;
+            else if (strcmp(token, "proto") == 0)
+                state = STATE_READ_PROTO;
+            else if (strcmp(token, "src") == 0)
+                state = STATE_READ_SRC;
+            else if (strcmp(token, "dst") == 0)
+                state = STATE_READ_DST;
+            else
+                return -1;
+            break;
+
+        case STATE_READ_TYPE:
+            if (strcmp(token, "encode_ip_hdr") == 0)
+                proxy_type = PROXY_TYPE_ENCODE_IPHDR;
+            else if (strcmp(token, "encode_tcp_stream") == 0)
+                proxy_type = PROXY_TYPE_ENCODE_TCPSTREAM;
+            else if (strcmp(token, "no_encode") == 0)
+                proxy_type = PROXY_TYPE_ENCODE_NONE;
+            else
+                return -1;
+            state = STATE_READ_KEYWORD;
+            break;
+
+        case STATE_READ_PORT:
+            strcpy(str_port, token);
+            state = STATE_READ_KEYWORD;
+            break;
+
+        case STATE_READ_SERVER:
+            {
+                int err;
+                char *p;
+                char s[sizeof(buffer)];
+
+                p = token;
+                while (*p != ':' && *p != 0)
+                    p++;
+
+                if (*p != ':')
+                {
+                    err = IpAddr(token, &server_addr);
+                    if (err)
+                        return -1;
+                }
+                else
+                {
+                    *p = ' ';
+                
+                    n = sscanf(token, "%s %s", s, str_server_port);
+                    if (n != 2)
+                        return -1;
+
+                    err = IpAddr(s, &server_addr);
+                    if (err)
+                        return -1;
+                }
+            }
+            state = STATE_READ_KEYWORD;
+            break;
+
+        case STATE_READ_RULE:
+            n = sscanf(token, "%d", &rule_index);
+            if (n != 1 || rule_index < 0)
+                return -1;
+            state = STATE_READ_KEYWORD;
+            break;
+
+        case STATE_READ_DELETE:
+            {
+                int err;
+                int rule_to_delete;
+
+                if (token_count != 2)
+                    return -1;
+
+                n = sscanf(token, "%d", &rule_to_delete);
+                if (n != 1)
+                    return -1;
+                err = RuleNumberDelete(rule_to_delete);
+                if (err)
+                    return -1;
+                return 0;
+            }
+
+        case STATE_READ_PROTO:
+            if (strcmp(token, "tcp") == 0)
+                proto = IPPROTO_TCP;
+            else if (strcmp(token, "udp") == 0)
+                proto = IPPROTO_UDP;
+            else
+                return -1;
+            state = STATE_READ_KEYWORD;
+            break;
+
+        case STATE_READ_SRC:
+        case STATE_READ_DST:
+            {
+                int err;
+                char *p;
+                struct in_addr mask;
+                struct in_addr addr;
+
+                p = token;
+                while (*p != '/' && *p != 0)
+                    p++;
+
+                if (*p != '/')
+                {
+                     IpMask(32, &mask);
+                     err = IpAddr(token, &addr);
+                     if (err)
+                         return -1;
+                }
+                else
+                {
+                    int nbits;
+                    char s[sizeof(buffer)];
+
+                    *p = ' ';
+                    n = sscanf(token, "%s %d", s, &nbits);
+                    if (n != 2)
+                        return -1;
+
+                    err = IpAddr(s, &addr);
+                    if (err)
+                        return -1;
+
+                    err = IpMask(nbits, &mask);
+                    if (err)
+                        return -1;
+                }
+
+                if (state == STATE_READ_SRC)
+                {
+                    src_addr = addr;
+                    src_mask = mask;
+                }
+                else 
+                {
+                    dst_addr = addr;
+                    dst_mask = mask;
+                }
+            }
+            state = STATE_READ_KEYWORD;
+            break;
+
+        default:
+            return -1;
+            break;
+        }
+
+	do {
+		token = strsep(&res, " \t");
+	} while (token != NULL && !*token);
+    }
+#undef STATE_READ_KEYWORD
+#undef STATE_READ_TYPE
+#undef STATE_READ_PORT
+#undef STATE_READ_SERVER
+#undef STATE_READ_RULE
+#undef STATE_READ_DELETE
+#undef STATE_READ_PROTO
+#undef STATE_READ_SRC
+#undef STATE_READ_DST
+
+/* Convert port strings to numbers.  This needs to be done after
+   the string is parsed, because the prototype might not be designated
+   before the ports (which might be symbolic entries in /etc/services) */
+
+    if (strlen(str_port) != 0)
+    {
+        int err;
+
+        err = IpPort(str_port, proto, &proxy_port);
+        if (err)
+            return -1;
+    }
+    else
+    {
+        proxy_port = 0;
+    }
+
+    if (strlen(str_server_port) != 0)
+    { 
+        int err;
+
+        err = IpPort(str_server_port, proto, &server_port);
+        if (err)
+            return -1;
+    }
+    else
+    {
+        server_port = 0;
+    }
+
+/* Check that at least the server address has been defined */
+    if (server_addr.s_addr == 0)
+        return -1;
+
+/* Add to linked list */
+    proxy_entry = malloc(sizeof(struct proxy_entry));
+    if (proxy_entry == NULL)
+        return -1;
+
+    proxy_entry->proxy_type = proxy_type;
+    proxy_entry->rule_index = rule_index;
+    proxy_entry->proto = proto;
+    proxy_entry->proxy_port = htons(proxy_port);
+    proxy_entry->server_port = htons(server_port);
+    proxy_entry->server_addr = server_addr;
+    proxy_entry->src_addr.s_addr = src_addr.s_addr & src_mask.s_addr;
+    proxy_entry->dst_addr.s_addr = dst_addr.s_addr & dst_mask.s_addr;
+    proxy_entry->src_mask = src_mask;
+    proxy_entry->dst_mask = dst_mask;
+
+    RuleAdd(proxy_entry);
+
+    return 0;
+}
diff --git a/sys/netinet/libalias/alias_smedia.c b/sys/netinet/libalias/alias_smedia.c
new file mode 100644
index 0000000..f4cf332
--- /dev/null
+++ b/sys/netinet/libalias/alias_smedia.c
@@ -0,0 +1,432 @@
+/*
+ * alias_smedia.c
+ *
+ * Copyright (c) 2000 Whistle Communications, Inc.
+ * All rights reserved.
+ *
+ * Subject to the following obligations and disclaimer of warranty, use and
+ * redistribution of this software, in source or object code forms, with or
+ * without modifications are expressly permitted by Whistle Communications;
+ * provided, however, that:
+ * 1. Any and all reproductions of the source or object code must include the
+ *    copyright notice above and the following disclaimer of warranties; and
+ * 2. No rights are granted, in any manner or form, to use Whistle
+ *    Communications, Inc. trademarks, including the mark "WHISTLE
+ *    COMMUNICATIONS" on advertising, endorsements, or otherwise except as
+ *    such appears in the above copyright notice or in the software.
+ *
+ * THIS SOFTWARE IS BEING PROVIDED BY WHISTLE COMMUNICATIONS "AS IS", AND
+ * TO THE MAXIMUM EXTENT PERMITTED BY LAW, WHISTLE COMMUNICATIONS MAKES NO
+ * REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, REGARDING THIS SOFTWARE,
+ * INCLUDING WITHOUT LIMITATION, ANY AND ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT.
+ * WHISTLE COMMUNICATIONS DOES NOT WARRANT, GUARANTEE, OR MAKE ANY
+ * REPRESENTATIONS REGARDING THE USE OF, OR THE RESULTS OF THE USE OF THIS
+ * SOFTWARE IN TERMS OF ITS CORRECTNESS, ACCURACY, RELIABILITY OR OTHERWISE.
+ * IN NO EVENT SHALL WHISTLE COMMUNICATIONS BE LIABLE FOR ANY DAMAGES
+ * RESULTING FROM OR ARISING OUT OF ANY USE OF THIS SOFTWARE, INCLUDING
+ * WITHOUT LIMITATION, ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+ * PUNITIVE, OR CONSEQUENTIAL DAMAGES, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES, LOSS OF USE, DATA OR PROFITS, HOWEVER CAUSED AND UNDER ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF WHISTLE COMMUNICATIONS IS ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE.
+ *
+ * Copyright (c) 2000  Junichi SATOH <junichi@astec.co.jp>
+ *                                   <junichi@junichi.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Authors: Erik Salander <erik@whistle.com>
+ *          Junichi SATOH <junichi@astec.co.jp>
+ *                        <junichi@junichi.org>
+ *
+ * $FreeBSD$
+ */
+
+/*
+   Alias_smedia.c is meant to contain the aliasing code for streaming media
+   protocols.  It performs special processing for RSTP sessions under TCP.
+   Specifically, when a SETUP request is sent by a client, or a 200 reply
+   is sent by a server, it is intercepted and modified.  The address is  
+   changed to the gateway machine and an aliasing port is used.
+
+   More specifically, the "client_port" configuration parameter is 
+   parsed for SETUP requests.  The "server_port" configuration parameter is 
+   parsed for 200 replies eminating from a server.  This is intended to handle
+   the unicast case.
+
+   RTSP also allows a redirection of a stream to another client by using the
+   "destination" configuration parameter.  The destination config parm would
+   indicate a different IP address.  This function is NOT supported by the 
+   RTSP translation code below.
+
+   The RTSP multicast functions without any address translation intervention.
+
+   For this routine to work, the SETUP/200 must fit entirely
+   into a single TCP packet.  This is typically the case, but exceptions
+   can easily be envisioned under the actual specifications.
+
+   Probably the most troubling aspect of the approach taken here is
+   that the new SETUP/200 will typically be a different length, and
+   this causes a certain amount of bookkeeping to keep track of the
+   changes of sequence and acknowledgment numbers, since the client
+   machine is totally unaware of the modification to the TCP stream.
+
+   Initial version:  May, 2000 (eds)  
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+
+#include "alias_local.h"
+
+#define RTSP_CONTROL_PORT_NUMBER_1 554 
+#define RTSP_CONTROL_PORT_NUMBER_2 7070 
+#define RTSP_PORT_GROUP            2
+
+#define ISDIGIT(a) (((a) >= '0') && ((a) <= '9'))
+
+static int
+search_string(char *data, int dlen, const char *search_str)
+{
+    int i, j, k;
+    int search_str_len;
+
+    search_str_len = strlen(search_str);
+    for (i = 0; i < dlen - search_str_len; i++) {
+	for (j = i, k = 0; j < dlen - search_str_len; j++, k++) {
+	    if (data[j] != search_str[k] &&
+		data[j] != search_str[k] - ('a' - 'A')) {
+		break;
+	    }
+	    if (k == search_str_len - 1) {
+		return j + 1;
+	    }
+	}
+    }
+    return -1;
+}
+
+static int
+alias_rtsp_out(struct ip *pip,
+		   struct alias_link *link,
+		   char *data,
+		   const char *port_str)
+{
+    int     hlen, tlen, dlen;
+    struct tcphdr *tc;
+    int     i, j, pos, state, port_dlen, new_dlen, delta;
+    u_short p[2], new_len;
+    u_short sport, eport, base_port;
+    u_short salias = 0, ealias = 0, base_alias = 0;
+    const char *transport_str = "transport:";
+    char    newdata[2048], *port_data, *port_newdata, stemp[80];
+    int     links_created = 0, pkt_updated = 0;
+    struct alias_link *rtsp_link = NULL;
+    struct in_addr null_addr; 
+
+    /* Calculate data length of TCP packet */
+    tc = (struct tcphdr *) ((char *) pip + (pip->ip_hl << 2));
+    hlen = (pip->ip_hl + tc->th_off) << 2;
+    tlen = ntohs(pip->ip_len);
+    dlen = tlen - hlen;
+
+    /* Find keyword, "Transport: " */
+    pos = search_string(data, dlen, transport_str);
+    if (pos < 0) {
+	return -1;
+    }
+    port_data = data + pos;
+    port_dlen = dlen - pos;
+
+    memcpy(newdata, data, pos);
+    port_newdata = newdata + pos;
+
+    while (port_dlen > strlen(port_str)) {
+	/* Find keyword, appropriate port string */
+	pos = search_string(port_data, port_dlen, port_str);
+	if (pos < 0) {
+	    break;
+	}
+
+	memcpy (port_newdata, port_data, pos + 1);
+	port_newdata += (pos + 1);
+
+	p[0] = p[1] = 0;
+	sport = eport = 0;
+	state = 0;
+	for (i = pos; i < port_dlen; i++) {
+	    switch(state) {
+	    case 0:
+		if (port_data[i] == '=') {
+		    state++;
+		}
+		break;
+	    case 1:
+		if (ISDIGIT(port_data[i])) {
+		    p[0] = p[0] * 10 + port_data[i] - '0';
+		} else {
+		    if (port_data[i] == ';') {
+			state = 3;
+		    }
+		    if (port_data[i] == '-') {
+			state++;
+		    }
+		}
+		break;
+	    case 2:
+		if (ISDIGIT(port_data[i])) {
+		    p[1] = p[1] * 10 + port_data[i] - '0';
+		} else {
+		    state++;
+		}
+		break;
+	    case 3:
+		base_port = p[0];
+		sport = htons(p[0]);
+		eport = htons(p[1]);
+
+		if (!links_created) {
+
+	  	  links_created = 1; 
+		  /* Find an even numbered port number base that
+		     satisfies the contiguous number of ports we need  */
+		  null_addr.s_addr = 0;
+		  if (0 == (salias = FindNewPortGroup(null_addr,
+	       			    FindAliasAddress(pip->ip_src),
+				    sport, 0, 
+				    RTSP_PORT_GROUP, 
+				    IPPROTO_UDP, 1))) {  
+#ifdef DEBUG
+		    fprintf(stderr,
+		    "PacketAlias/RTSP: Cannot find contiguous RTSP data ports\n");
+#endif
+		  } else {
+
+  		    base_alias = ntohs(salias);
+		    for (j = 0; j < RTSP_PORT_GROUP; j++) {
+		      /* Establish link to port found in RTSP packet */
+		      rtsp_link = FindRtspOut(GetOriginalAddress(link), null_addr,
+                                htons(base_port + j), htons(base_alias + j),
+                                IPPROTO_UDP);
+		      if (rtsp_link != NULL) {
+#ifndef NO_FW_PUNCH
+		        /* Punch hole in firewall */
+		        PunchFWHole(rtsp_link);
+#endif
+		      } else {
+#ifdef DEBUG
+		        fprintf(stderr,
+		        "PacketAlias/RTSP: Cannot allocate RTSP data ports\n");
+#endif
+		        break;
+		      }
+		    }
+		  }
+                  ealias = htons(base_alias + (RTSP_PORT_GROUP - 1));
+		}
+
+		if (salias && rtsp_link) {
+
+		  pkt_updated = 1;
+
+	          /* Copy into IP packet */
+		  sprintf(stemp, "%d", ntohs(salias));
+		  memcpy(port_newdata, stemp, strlen(stemp));
+		  port_newdata += strlen(stemp);
+
+		  if (eport != 0) {
+		    *port_newdata = '-';
+		    port_newdata++;
+
+		    /* Copy into IP packet */
+		    sprintf(stemp, "%d", ntohs(ealias));
+		    memcpy(port_newdata, stemp, strlen(stemp));
+		    port_newdata += strlen(stemp);
+		  }
+
+	          *port_newdata = ';';
+		  port_newdata++;
+		}
+		state++;
+		break;
+	    }
+	    if (state > 3) {
+		break;
+	    }
+	}
+	port_data += i;
+	port_dlen -= i;
+    }
+
+    if (!pkt_updated)
+      return -1;
+
+    memcpy (port_newdata, port_data, port_dlen);
+    port_newdata += port_dlen;
+    *port_newdata = '\0';
+
+    /* Create new packet */
+    new_dlen = port_newdata - newdata;
+    memcpy (data, newdata, new_dlen);
+
+    SetAckModified(link);
+    delta = GetDeltaSeqOut(pip, link);
+    AddSeq(pip, link, delta + new_dlen - dlen);
+
+    new_len = htons(hlen + new_dlen);
+    DifferentialChecksum(&pip->ip_sum,
+			 &new_len,
+			 &pip->ip_len,
+			 1);
+    pip->ip_len = new_len;
+
+    tc->th_sum = 0;
+    tc->th_sum = TcpChecksum(pip);
+
+    return 0;
+}
+
+/* Support the protocol used by early versions of RealPlayer */
+
+static int
+alias_pna_out(struct ip *pip,
+		  struct alias_link *link,
+		  char *data,
+		  int dlen)
+{
+    struct alias_link *pna_links;
+    u_short msg_id, msg_len;
+    char    *work;
+    u_short alias_port, port;
+    struct  tcphdr *tc;
+
+    work = data;
+    work += 5;
+    while (work + 4 < data + dlen) {
+	memcpy(&msg_id, work, 2);
+	work += 2;
+	memcpy(&msg_len, work, 2);
+	work += 2;
+	if (ntohs(msg_id) == 0) {
+	    /* end of options */
+	    return 0;
+	}
+	if ((ntohs(msg_id) == 1) || (ntohs(msg_id) == 7)) {
+	    memcpy(&port, work, 2);
+	    pna_links = FindUdpTcpOut(pip->ip_src, GetDestAddress(link),
+				      port, 0, IPPROTO_UDP, 1);
+	    if (pna_links != NULL) {
+#ifndef NO_FW_PUNCH
+		/* Punch hole in firewall */
+		PunchFWHole(pna_links);
+#endif
+		tc = (struct tcphdr *) ((char *) pip + (pip->ip_hl << 2));
+		alias_port = GetAliasPort(pna_links);
+		memcpy(work, &alias_port, 2);
+
+		/* Compute TCP checksum for revised packet */
+		tc->th_sum = 0;
+		tc->th_sum = TcpChecksum(pip);
+	    }
+	}
+	work += ntohs(msg_len);
+    }
+    
+    return 0;
+}
+
+void
+AliasHandleRtspOut(struct ip *pip, struct alias_link *link, int maxpacketsize)
+{
+    int    hlen, tlen, dlen;
+    struct tcphdr *tc;
+    char   *data;
+    const  char *setup = "SETUP", *pna = "PNA", *str200 = "200";
+    const  char *okstr = "OK", *client_port_str = "client_port";
+    const  char *server_port_str = "server_port";
+    int    i, parseOk;
+
+    tc = (struct tcphdr *)((char *)pip + (pip->ip_hl << 2));
+    hlen = (pip->ip_hl + tc->th_off) << 2;
+    tlen = ntohs(pip->ip_len);
+    dlen = tlen - hlen;
+
+    data = (char*)pip;
+    data += hlen;
+
+    /* When aliasing a client, check for the SETUP request */
+    if ((ntohs(tc->th_dport) == RTSP_CONTROL_PORT_NUMBER_1) || 
+      (ntohs(tc->th_dport) == RTSP_CONTROL_PORT_NUMBER_2)) { 
+
+      if (dlen >= strlen(setup)) {
+        if (memcmp(data, setup, strlen(setup)) == 0) {
+	    alias_rtsp_out(pip, link, data, client_port_str);
+	    return;
+	}
+      }
+      if (dlen >= strlen(pna)) {
+	if (memcmp(data, pna, strlen(pna)) == 0) {
+	    alias_pna_out(pip, link, data, dlen);
+	}
+      }
+
+    } else {
+
+      /* When aliasing a server, check for the 200 reply
+         Accomodate varying number of blanks between 200 & OK */
+
+      if (dlen >= strlen(str200)) {
+
+        for (parseOk = 0, i = 0;         
+             i <= dlen - strlen(str200);          
+             i++) {
+          if (memcmp(&data[i], str200, strlen(str200)) == 0) { 
+            parseOk = 1; 
+            break;
+          }
+        }
+        if (parseOk) { 
+
+          i += strlen(str200);        /* skip string found */ 
+          while(data[i] == ' ')       /* skip blank(s) */
+	    i++;
+	
+          if ((dlen - i) >= strlen(okstr)) {
+
+            if (memcmp(&data[i], okstr, strlen(okstr)) == 0) 
+              alias_rtsp_out(pip, link, data, server_port_str);
+
+          }
+        }
+      }
+    }
+}
diff --git a/sys/netinet/libalias/alias_util.c b/sys/netinet/libalias/alias_util.c
new file mode 100644
index 0000000..b939428
--- /dev/null
+++ b/sys/netinet/libalias/alias_util.c
@@ -0,0 +1,141 @@
+/*
+    Alias_util.c contains general utilities used by other functions
+    in the packet aliasing module.  At the moment, there are functions
+    for computing IP header and TCP packet checksums.
+
+    The checksum routines are based upon example code in a Unix networking
+    text written by Stevens (sorry, I can't remember the title -- but
+    at least this is a good author).
+
+    Initial Version:  August, 1996  (cjm)
+
+    Version 1.7:  January 9, 1997
+         Added differential checksum update function.
+
+    $FreeBSD$
+*/
+
+/*
+Note: the checksum routines assume that the actual checksum word has
+been zeroed out.  If the checksum word is filled with the proper value,
+then these routines will give a result of zero (useful for testing
+purposes);
+*/
+    
+#include <sys/types.h>
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+
+#include "alias.h"
+#include "alias_local.h"
+
+u_short
+PacketAliasInternetChecksum(u_short *ptr, int nbytes)
+{
+    int sum, oddbyte;
+
+    sum = 0;
+    while (nbytes > 1)
+    {
+        sum += *ptr++;
+        nbytes -= 2;
+    }
+    if (nbytes == 1)
+    {
+        oddbyte = 0;
+        ((u_char *) &oddbyte)[0] = *(u_char *) ptr;
+        ((u_char *) &oddbyte)[1] = 0;
+        sum += oddbyte;
+    }
+    sum = (sum >> 16) + (sum & 0xffff);
+    sum += (sum >> 16);
+    return(~sum);
+}
+
+u_short
+IpChecksum(struct ip *pip)
+{
+    return( PacketAliasInternetChecksum((u_short *) pip,
+            (pip->ip_hl << 2)) );
+
+}
+
+u_short 
+TcpChecksum(struct ip *pip)
+{
+    u_short *ptr;
+    struct tcphdr *tc;
+    int nhdr, ntcp, nbytes;
+    int sum, oddbyte;
+
+    nhdr = pip->ip_hl << 2;
+    ntcp = ntohs(pip->ip_len) - nhdr;
+
+    tc = (struct tcphdr *) ((char *) pip + nhdr);
+    ptr = (u_short *) tc;
+    
+/* Add up TCP header and data */
+    nbytes = ntcp;
+    sum = 0;
+    while (nbytes > 1)
+    {
+        sum += *ptr++;
+        nbytes -= 2;
+    }
+    if (nbytes == 1)
+    {
+        oddbyte = 0;
+        ((u_char *) &oddbyte)[0] = *(u_char *) ptr;
+        ((u_char *) &oddbyte)[1] = 0;
+        sum += oddbyte;
+    }
+
+/* "Pseudo-header" data */
+    ptr = (u_short *) &(pip->ip_dst);
+    sum += *ptr++;
+    sum += *ptr;
+    ptr = (u_short *) &(pip->ip_src);
+    sum += *ptr++;
+    sum += *ptr;
+    sum += htons((u_short) ntcp);
+    sum += htons((u_short) pip->ip_p);
+
+/* Roll over carry bits */
+    sum = (sum >> 16) + (sum & 0xffff);
+    sum += (sum >> 16);
+
+/* Return checksum */
+    return((u_short) ~sum);
+}
+
+
+void
+DifferentialChecksum(u_short *cksum, u_short *new, u_short *old, int n)
+{
+    int i;
+    int accumulate;
+
+    accumulate = *cksum;
+    for (i=0; i<n; i++)
+    {
+        accumulate -= *new++;
+        accumulate += *old++;
+    }
+
+    if (accumulate < 0)
+    {
+        accumulate = -accumulate;
+        accumulate = (accumulate >> 16) + (accumulate & 0xffff);
+        accumulate += accumulate >> 16;
+        *cksum = (u_short) ~accumulate;
+    }
+    else
+    {
+        accumulate = (accumulate >> 16) + (accumulate & 0xffff);
+        accumulate += accumulate >> 16;
+        *cksum = (u_short) accumulate;
+    }
+}
+
diff --git a/sys/netinet/libalias/libalias.3 b/sys/netinet/libalias/libalias.3
new file mode 100644
index 0000000..587e3ca
--- /dev/null
+++ b/sys/netinet/libalias/libalias.3
@@ -0,0 +1,958 @@
+.\" $FreeBSD$
+.\"
+.Dd April 13, 2000
+.Dt LIBALIAS 3
+.Os FreeBSD
+.Sh NAME
+.Nm libalias
+.Nd packet aliasing library for masquerading and network address translation
+.Sh SYNOPSIS
+.Fd #include <sys/types.h>
+.Fd #include <netinet/in.h>
+.Fd #include <alias.h>
+.Pp
+Function prototypes are given in the main body of the text.
+.Sh DESCRIPTION
+The
+.Nm
+library is a collection of functions for aliasing and de-aliasing of IP
+packets, intended for masquerading and network address translation (NAT).
+.Sh INTRODUCTION
+This library is a moderately portable set of functions designed to assist
+in the process of IP masquerading and network address translation.
+Outgoing packets from a local network with unregistered IP addresses can
+be aliased to appear as if they came from an accessible IP address.
+Incoming packets are then de-aliased so that they are sent to the correct
+machine on the local network.
+.Pp
+A certain amount of flexibility is built into the packet aliasing engine.
+In the simplest mode of operation, a many-to-one address mapping takes
+place between local network and the packet aliasing host.
+This is known as IP masquerading.
+In addition, one-to-one mappings between local and public addresses can
+also be implemented, which is known as static NAT.
+In between these extremes, different groups of private addresses can be
+linked to different public addresses, comprising several distinct
+many-to-one mappings.
+Also, a given public address and port can be statically redirected to a
+private address/port.
+.Pp
+The packet aliasing engine was designed to operate in user space outside
+of the kernel, without any access to private kernel data structure, but
+the source code can also be ported to a kernel environment.
+.Sh INITIALIZATION AND CONTROL
+Two special functions,
+.Fn PacketAliasInit
+and
+.Fn PacketAliasSetAddress ,
+must always be called before any packet handling may be performed.
+In addition, the operating mode of the packet aliasing engine can be
+customized by calling
+.Fn PacketAliasSetMode .
+.Pp
+.Ft void
+.Fn PacketAliasInit void
+.Bd -ragged -offset indent
+This function has no arguments or return value and is used to initialize
+internal data structures.
+The following mode bits are always set after calling
+.Fn PacketAliasInit .
+See the description of
+.Fn PacketAliasSetMode
+below for the meaning of these mode bits.
+.Pp
+.Bl -item -offset indent -compact
+.It
+.Dv PKT_ALIAS_SAME_PORTS
+.It
+.Dv PKT_ALIAS_USE_SOCKETS
+.It
+.Dv PKT_ALIAS_RESET_ON_ADDR_CHANGE
+.El
+.Pp
+This function will always return the packet aliasing engine to the same
+initial state.
+.Fn PacketAliasSetAddress
+must be called afterwards, and any desired changes from the default mode
+bits listed above require a call to
+.Fn PacketAliasSetMode .
+.Pp
+It is mandatory that this function be called at the beginning of a program
+prior to any packet handling.
+.Ed
+.Pp
+.Ft void
+.Fn PacketAliasUninit void
+.Bd -ragged -offset indent
+This function has no arguments or return value and is used to clear any
+resources attached to internal data structures.
+.Pp
+This functions should be called when a program stops using the aliasing
+engine; it does, amongst other things, clear out any firewall holes.
+To provide backwards compatibility and extra security, it is added to
+the
+.Xr atexit 3
+chain by
+.Fn PacketAliasInit .
+Calling it multiple times is harmless.
+.Ed
+.Pp
+.Ft void
+.Fn PacketAliasSetAddress "struct in_addr addr"
+.Bd -ragged -offset indent
+This function sets the source address to which outgoing packets from the
+local area network are aliased.
+All outgoing packets are re-mapped to this address unless overridden by a
+static address mapping established by
+.Fn PacketAliasRedirectAddr .
+.Pp
+If the
+.Dv PKT_ALIAS_RESET_ON_ADDR_CHANGE
+mode bit is set (the default mode of operation), then the internal aliasing
+link tables will be reset any time the aliasing address changes.
+This is useful for interfaces such as
+.Xr ppp 8 ,
+where the IP
+address may or may not change on successive dial-up attempts.
+.Pp
+If the
+.Dv PKT_ALIAS_RESET_ON_ADDR_CHANGE
+mode bit is set to zero, this function can also be used to dynamically change
+the aliasing address on a packet to packet basis (it is a low overhead call).
+.Pp
+It is mandatory that this function be called prior to any packet handling.
+.Ed
+.Pp
+.Ft unsigned int
+.Fn PacketAliasSetMode "unsigned int flags" "unsigned int mask"
+.Bd -ragged -offset indent
+This function sets or clears mode bits
+according to the value of
+.Fa flags .
+Only bits marked in
+.Fa mask
+are affected.
+The following mode bits are defined in
+.Aq Pa alias.h :
+.Bl -tag -width indent
+.It Dv PKT_ALIAS_LOG
+Enables logging into
+.Pa /var/log/alias.log .
+Each time an aliasing link is created or deleted, the log file is appended
+with the current number of ICMP, TCP and UDP links.
+Mainly useful for debugging when the log file is viewed continuously with
+.Xr tail 1 .
+.It Dv PKT_ALIAS_DENY_INCOMING
+If this mode bit is set, all incoming packets associated with new TCP
+connections or new UDP transactions will be marked for being ignored
+.Po
+.Fn PacketAliasIn
+returns
+.Dv PKT_ALIAS_IGNORED
+code
+.Pc
+by the calling program.
+Response packets to connections or transactions initiated from the packet
+aliasing host or local network will be unaffected.
+This mode bit is useful for implementing a one-way firewall.
+.It Dv PKT_ALIAS_SAME_PORTS
+If this mode bit is set, the packet aliasing engine will attempt to leave
+the alias port numbers unchanged from the actual local port numbers.
+This can be done as long as the quintuple (proto, alias addr, alias port,
+remote addr, remote port) is unique.
+If a conflict exists, a new aliasing port number is chosen even if this
+mode bit is set.
+.It Dv PKT_ALIAS_USE_SOCKETS
+This bit should be set when the packet aliasing host originates network
+traffic as well as forwards it.
+When the packet aliasing host is waiting for a connection from an unknown
+host address or unknown port number (e.g. an FTP data connection), this
+mode bit specifies that a socket be allocated as a place holder to prevent
+port conflicts.
+Once a connection is established, usually within a minute or so, the socket
+is closed.
+.It Dv PKT_ALIAS_UNREGISTERED_ONLY
+If this mode bit is set, traffic on the local network which does not
+originate from unregistered address spaces will be ignored.
+Standard Class A, B and C unregistered addresses are:
+.Bd -literal -offset indent
+10.0.0.0     ->  10.255.255.255   (Class A subnet)
+172.16.0.0   ->  172.31.255.255   (Class B subnets)
+192.168.0.0  ->  192.168.255.255  (Class C subnets)
+.Ed
+.Pp
+This option is useful in the case that packet aliasing host has both
+registered and unregistered subnets on different interfaces.
+The registered subnet is fully accessible to the outside world, so traffic
+from it does not need to be passed through the packet aliasing engine.
+.It Dv PKT_ALIAS_RESET_ON_ADDR_CHANGE
+When this mode bit is set and
+.Fn PacketAliasSetAddress
+is called to change the aliasing address, the internal link table of the
+packet aliasing engine will be cleared.
+This operating mode is useful for
+.Xr ppp 8
+links where the interface address can sometimes change or remain the same
+between dial-up attempts.
+If this mode bit is not set, the link table will never be reset in the event
+of an address change.
+.It Dv PKT_ALIAS_PUNCH_FW
+This option makes
+.Nm
+`punch holes' in an
+.Xr ipfirewall 4
+based firewall for FTP/IRC DCC connections.
+The holes punched are bound by from/to IP address and port; it will not be
+possible to use a hole for another connection.
+A hole is removed when the connection that uses it dies.
+To cater to unexpected death of a program using
+.Nm
+(e.g. kill -9),
+changing the state of the flag will clear the entire firewall range
+allocated for holes.
+This will also happen on the initial call to
+.Fn PacketAliasSetFWBase .
+This call must happen prior to setting this flag.
+.It Dv PKT_ALIAS_REVERSE
+This option makes
+.Nm
+reverse the way it handles incoming and outgoing packets, allowing it
+to be fed with data that passes through the internal interface rather
+than the external one.
+.It Dv PKT_ALIAS_PROXY_ONLY
+This option tells
+.Nm
+to obey transparent proxy rules only.
+Normal packet aliasing is not performed.
+See
+.Fn PacketAliasProxyRule
+below for details.
+.El
+.Ed
+.Pp
+.Ft void
+.Fn PacketAliasSetFWBase "unsigned int base" "unsigned int num"
+.Bd -ragged -offset indent
+Set firewall range allocated for punching firewall holes (with the
+.Dv PKT_ALIAS_PUNCH_FW
+flag).
+The range will be cleared for all rules on initialization.
+.Ed
+.Sh PACKET HANDLING
+The packet handling functions are used to modify incoming (remote to local)
+and outgoing (local to remote) packets.
+The calling program is responsible for receiving and sending packets via
+network interfaces.
+.Pp
+Along with
+.Fn PacketAliasInit
+and
+.Fn PacketAliasSetAddress ,
+the two packet handling functions,
+.Fn PacketAliasIn
+and
+.Fn PacketAliasOut ,
+comprise minimal set of functions needed for a basic IP masquerading
+implementation.
+.Pp
+.Ft int
+.Fn PacketAliasIn "char *buffer" "int maxpacketsize"
+.Bd -ragged -offset indent
+An incoming packet coming from a remote machine to the local network is
+de-aliased by this function.
+The IP packet is pointed to by
+.Fa buffer ,
+and
+.Fa maxpacketsize
+indicates the size of the data structure containing the packet and should
+be at least as large as the actual packet size.
+.Pp
+Return codes:
+.Bl -tag -width indent
+.It Dv PKT_ALIAS_OK
+The packet aliasing process was successful.
+.It Dv PKT_ALIAS_IGNORED
+The packet was ignored and not de-aliased.
+This can happen if the protocol is unrecognized, possibly an ICMP message
+type is not handled or if incoming packets for new connections are being
+ignored (if
+.Dv PKT_ALIAS_DENY_INCOMING
+mode bit was set by
+.Fn PacketAliasSetMode ) .
+.It Dv PKT_ALIAS_UNRESOLVED_FRAGMENT
+This is returned when a fragment cannot be resolved because the header
+fragment has not been sent yet.
+In this situation, fragments must be saved with
+.Fn PacketAliasSaveFragment
+until a header fragment is found.
+.It Dv PKT_ALIAS_FOUND_HEADER_FRAGMENT
+The packet aliasing process was successful, and a header fragment was found.
+This is a signal to retrieve any unresolved fragments with
+.Fn PacketAliasGetFragment
+and de-alias them with
+.Fn PacketAliasFragmentIn .
+.It Dv PKT_ALIAS_ERROR
+An internal error within the packet aliasing engine occurred.
+.El
+.Ed
+.Pp
+.Ft int
+.Fn PacketAliasOut "char *buffer" "int maxpacketsize"
+.Bd -ragged -offset indent
+An outgoing packet coming from the local network to a remote machine is
+aliased by this function.
+The IP packet is pointed to by
+.Fa buffer ,
+and
+.Fa maxpacketsize
+indicates the maximum packet size permissible should the packet length be
+changed.
+IP encoding protocols place address and port information in the encapsulated
+data stream which has to be modified and can account for changes in packet
+length.
+Well known examples of such protocols are FTP and IRC DCC.
+.Pp
+Return codes:
+.Bl -tag -width indent
+.It Dv PKT_ALIAS_OK
+The packet aliasing process was successful.
+.It Dv PKT_ALIAS_IGNORED
+The packet was ignored and not aliased.
+This can happen if the protocol is unrecognized, or possibly an ICMP message
+type is not handled.
+.It Dv PKT_ALIAS_ERROR
+An internal error within the packet aliasing engine occurred.
+.El
+.Ed
+.Sh PORT AND ADDRESS REDIRECTION
+The functions described in this section allow machines on the local network
+to be accessible in some degree to new incoming connections from the external
+network.
+Individual ports can be re-mapped or static network address translations can
+be designated.
+.Pp
+.Ft struct alias_link *
+.Fo PacketAliasRedirectPort
+.Fa "struct in_addr local_addr"
+.Fa "u_short local_port"
+.Fa "struct in_addr remote_addr"
+.Fa "u_short remote_port"
+.Fa "struct in_addr alias_addr"
+.Fa "u_short alias_port"
+.Fa "u_char proto"
+.Fc
+.Bd -ragged -offset indent
+This function specifies that traffic from a given remote address/port to
+an alias address/port be redirected to a specified local address/port.
+The parameter
+.Fa proto
+can be either
+.Dv IPPROTO_TCP
+or
+.Dv IPPROTO_UDP ,
+as defined in
+.Aq Pa netinet/in.h .
+.Pp
+If
+.Fa local_addr
+or
+.Fa alias_addr
+is zero, this indicates that the packet aliasing address as established
+by
+.Fn PacketAliasSetAddress
+is to be used.
+Even if
+.Fn PacketAliasSetAddress
+is called to change the address after
+.Fn PacketAliasRedirectPort
+is called, a zero reference will track this change.
+.Pp
+If the link is further set up to operate for a load sharing, then
+.Fa local_addr
+and
+.Fa local_port
+are ignored, and are selected dynamically from the server pool, as described in
+.Fn PacketAliasAddServer
+below.
+.Pp
+If
+.Fa remote_addr
+is zero, this indicates to redirect packets from any remote address.
+Likewise, if
+.Fa remote_port
+is zero, this indicates to redirect packets originating from any remote
+port number.
+Almost always, the remote port specification will be zero, but non-zero
+remote addresses can sometimes be useful for firewalling.
+If two calls to
+.Fn PacketAliasRedirectPort
+overlap in their address/port specifications, then the most recent call
+will have precedence.
+.Pp
+This function returns a pointer which can subsequently be used by
+.Fn PacketAliasRedirectDelete .
+If
+.Dv NULL
+is returned, then the function call did not complete successfully.
+.Pp
+All port numbers should be in network address byte order, so it is necessary
+to use
+.Xr htons 3
+to convert these parameters from internally readable numbers to network byte
+order.
+Addresses are also in network byte order, which is implicit in the use of the
+.Fa struct in_addr
+data type.
+.Ed
+.Pp
+.Ft struct alias_link *
+.Fo PacketAliasRedirectAddr
+.Fa "struct in_addr local_addr"
+.Fa "struct in_addr alias_addr"
+.Fc
+.Bd -ragged -offset indent
+This function designates that all incoming traffic to
+.Fa alias_addr
+be redirected to
+.Fa local_addr .
+Similarly, all outgoing traffic from
+.Fa local_addr
+is aliased to
+.Fa alias_addr .
+.Pp
+If
+.Fa local_addr
+or
+.Fa alias_addr
+is zero, this indicates that the packet aliasing address as established by
+.Fn PacketAliasSetAddress
+is to be used.
+Even if
+.Fn PacketAliasSetAddress
+is called to change the address after
+.Fn PacketAliasRedirectAddr
+is called, a zero reference will track this change.
+.Pp
+If the link is further set up to operate for a load sharing, then
+.Fa local_addr
+is ignored, and is selected dynamically from the server pool, as described in
+.Fn PacketAliasAddServer
+below.
+.Pp
+If subsequent calls to
+.Fn PacketAliasRedirectAddr
+use the same aliasing address, all new incoming traffic to this aliasing
+address will be redirected to the local address made in the last function
+call.
+New traffic generated by any of the local machines, designated in the
+several function calls, will be aliased to the same address.
+Consider the following example:
+.Bd -literal -offset indent
+PacketAliasRedirectAddr(inet_aton("192.168.0.2"),
+                        inet_aton("141.221.254.101"));
+PacketAliasRedirectAddr(inet_aton("192.168.0.3"),
+                        inet_aton("141.221.254.101"));
+PacketAliasRedirectAddr(inet_aton("192.168.0.4"),
+                        inet_aton("141.221.254.101"));
+.Ed
+.Pp
+Any outgoing connections such as
+.Xr telnet 1
+or
+.Xr ftp 1
+from 192.168.0.2, 192.168.0.3 and 192.168.0.4 will appear to come from
+141.221.254.101.
+Any incoming connections to 141.221.254.101 will be directed to 192.168.0.4.
+.Pp
+Any calls to
+.Fn PacketAliasRedirectPort
+will have precedence over address mappings designated by
+.Fn PacketAliasRedirectAddr .
+.Pp
+This function returns a pointer which can subsequently be used by
+.Fn PacketAliasRedirectDelete .
+If
+.Dv NULL
+is returned, then the function call did not complete successfully.
+.Ed
+.Pp
+.Ft int
+.Fo PacketAliasAddServer
+.Fa "struct alias_link *link"
+.Fa "struct in_addr addr"
+.Fa "u_short port"
+.Fc
+.Bd -ragged -offset indent
+This function sets the
+.Fa link
+up for Load Sharing using IP Network Address Translation (RFC 2391, LSNAT).
+LSNAT operates as follows.
+A client attempts to access a server by using the server virtual address.
+The LSNAT router transparently redirects the request to one of the hosts
+in server pool, selected using a real-time load sharing algorithm.
+Multiple sessions may be initiated from the same client, and each session
+could be directed to a different host based on load balance across server
+pool hosts at the time.
+If load share is desired for just a few specific services, the configuration
+on LSNAT could be defined to restrict load share for just the services
+desired.
+.Pp
+Currently, only the simplest selection algorithm is implemented, where a
+host is selected on a round-robin basis only, without regard to load on
+the host.
+.Pp
+First, the
+.Fa link
+is created by either
+.Fn PacketAliasRedirectPort
+or
+.Fn PacketAliasRedirectAddr .
+Then,
+.Fn PacketAliasAddServer
+is called multiple times to add entries to the
+.Fa link Ns 's
+server pool.
+.Pp
+For links created with
+.Fn PacketAliasRedirectAddr ,
+the
+.Fa port
+argument is ignored and could have any value, e.g. htons(~0).
+.Pp
+This function returns 0 on success, -1 otherwise.
+.Ed
+.Pp
+.Ft void
+.Fn PacketAliasRedirectDelete "struct alias_link *link"
+.Bd -ragged -offset indent
+This function will delete a specific static redirect rule entered by
+.Fn PacketAliasRedirectPort
+or
+.Fn PacketAliasRedirectAddr .
+The parameter
+.Fa link
+is the pointer returned by either of the redirection functions.
+If an invalid pointer is passed to
+.Fn PacketAliasRedirectDelete ,
+then a program crash or unpredictable operation could result, so it is
+necessary to be careful using this function.
+.Ed
+.Pp
+.Ft int
+.Fn PacketAliasProxyRule "const char *cmd"
+.Bd -ragged -offset indent
+The passed
+.Fa cmd
+string consists of one or more pairs of words.
+The first word in each pair is a token and the second is the value that
+should be applied for that token.
+Tokens and their argument types are as follows:
+.Bl -tag -width indent
+.It Cm type encode_ip_hdr | encode_tcp_stream | no_encode
+In order to support transparent proxying, it is necessary to somehow
+pass the original address and port information into the new destination
+server.
+If
+.Cm encode_ip_hdr
+is specified, the original address and port is passed as an extra IP
+option.
+If
+.Cm encode_tcp_stream
+is specified, the original address and port is passed as the first
+piece of data in the TCP stream in the format
+.Dq DEST Ar IP port .
+.It Cm port Ar portnum
+Only packets with the destination port
+.Ar portnum
+are proxied.
+.It Cm server Ar host Ns Xo
+.Op : Ns Ar portnum
+.Xc
+This specifies the
+.Ar host
+and
+.Ar portnum
+that the data is to be redirected to.
+.Ar host
+must be an IP address rather than a DNS host name.
+If
+.Ar portnum
+is not specified, the destination port number is not changed.
+.Pp
+The
+.Ar server
+specification is mandatory unless the
+.Cm delete
+command is being used.
+.It Cm rule Ar index
+Normally, each call to
+.Fn PacketAliasProxyRule
+inserts the next rule at the start of a linear list of rules.
+If an
+.Ar index
+is specified, the new rule will be checked after all rules with lower
+indices.
+Calls to
+.Fn PacketAliasProxyRule
+that do not specify a rule are assigned rule 0.
+.It Cm delete Ar index
+This token and its argument MUST NOT be used with any other tokens.
+When used, all existing rules with the given
+.Ar index
+are deleted.
+.It Cm proto tcp | udp
+If specified, only packets of the given protocol type are matched.
+.It Cm src Ar IP Ns Xo
+.Op / Ns Ar bits
+.Xc
+If specified, only packets with a source address matching the given
+.Ar IP
+are matched.
+If
+.Ar bits
+is also specified, then the first
+.Ar bits
+bits of
+.Ar IP
+are taken as a network specification, and all IP addresses from that
+network will be matched.
+.It Cm dst Ar IP Ns Xo
+.Op / Ns Ar bits
+.Xc
+If specified, only packets with a destination address matching the given
+.Ar IP
+are matched.
+If
+.Ar bits
+is also specified, then the first
+.Ar bits
+bits of
+.Ar IP
+are taken as a network specification, and all IP addresses from that
+network will be matched.
+.El
+.Pp
+This function is usually used to redirect outgoing connections for
+internal machines that are not permitted certain types of internet
+access, or to restrict access to certain external machines.
+.Ed
+.Pp
+.Ft struct alias_link *
+.Fo PacketAliasRedirectProto
+.Fa "struct in_addr local_addr"
+.Fa "struct in_addr remote_addr"
+.Fa "struct in_addr alias_addr"
+.Fa "u_char proto"
+.Fc
+.Bd -ragged -offset indent
+This function specifies that any IP packet with protocol number of
+.Fa proto
+from a given remote address to an alias address be
+redirected to a specified local address.
+.Pp
+If
+.Fa local_addr
+or
+.Fa alias_addr
+is zero, this indicates that the packet aliasing address as established
+by
+.Fn PacketAliasSetAddress
+is to be used.
+Even if
+.Fn PacketAliasSetAddress
+is called to change the address after
+.Fn PacketAliasRedirectProto
+is called, a zero reference will track this change.
+.Pp
+If
+.Fa remote_addr
+is zero, this indicates to redirect packets from any remote address.
+Non-zero remote addresses can sometimes be useful for firewalling.
+.Pp
+If two calls to
+.Fn PacketAliasRedirectProto
+overlap in their address specifications, then the most recent call
+will have precedence.
+.Pp
+This function returns a pointer which can subsequently be used by
+.Fn PacketAliasRedirectDelete .
+If
+.Dv NULL
+is returned, then the function call did not complete successfully.
+.Ed
+.Sh FRAGMENT HANDLING
+The functions in this section are used to deal with incoming fragments.
+.Pp
+Outgoing fragments are handled within
+.Fn PacketAliasOut
+by changing the address according to any applicable mapping set by
+.Fn PacketAliasRedirectAddr ,
+or the default aliasing address set by
+.Fn PacketAliasSetAddress .
+.Pp
+Incoming fragments are handled in one of two ways.
+If the header of a fragmented IP packet has already been seen, then all
+subsequent fragments will be re-mapped in the same manner the header
+fragment was.
+Fragments which arrive before the header are saved and then retrieved
+once the header fragment has been resolved.
+.Pp
+.Ft int
+.Fn PacketAliasSaveFragment "char *ptr"
+.Bd -ragged -offset indent
+When
+.Fn PacketAliasIn
+returns
+.Dv PKT_ALIAS_UNRESOLVED_FRAGMENT ,
+this function can be used to save the pointer to the unresolved fragment.
+.Pp
+It is implicitly assumed that
+.Fa ptr
+points to a block of memory allocated by
+.Xr malloc 3 .
+If the fragment is never resolved, the packet aliasing engine will
+automatically free the memory after a timeout period.
+[Eventually this function should be modified so that a callback function
+for freeing memory is passed as an argument.]
+.Pp
+This function returns
+.Dv PKT_ALIAS_OK
+if it was successful and
+.Dv PKT_ALIAS_ERROR
+if there was an error.
+.Ed
+.Pp
+.Ft char *
+.Fn PacketAliasGetFragment "char *buffer"
+.Bd -ragged -offset indent
+This function can be used to retrieve fragment pointers saved by
+.Fn PacketAliasSaveFragment .
+The IP header fragment pointed to by
+.Fa buffer
+is the header fragment indicated when
+.Fn PacketAliasIn
+returns
+.Dv PKT_ALIAS_FOUND_HEADER_FRAGMENT .
+Once a fragment pointer is retrieved, it becomes the calling program's
+responsibility to free the dynamically allocated memory for the fragment.
+.Pp
+.Fn PacketAliasGetFragment
+can be called sequentially until there are no more fragments available,
+at which time it returns
+.Dv NULL .
+.Ed
+.Pp
+.Ft void
+.Fn PacketAliasFragmentIn "char *header" "char *fragment"
+.Bd -ragged -offset indent
+When a fragment is retrieved with
+.Fn PacketAliasGetFragment ,
+it can then be de-aliased with a call to
+.Fn PacketAliasFragmentIn .
+The
+.Fa header
+argument is the pointer to a header fragment used as a template, and
+.Fa fragment
+is the pointer to the packet to be de-aliased.
+.Ed
+.Sh MISCELLANEOUS FUNCTIONS
+.Ft void
+.Fn PacketAliasSetTarget "struct in_addr addr"
+.Bd -ragged -offset indent
+When an incoming packet not associated with any pre-existing aliasing link
+arrives at the host machine, it will be sent to the address indicated by a
+call to
+.Fn PacketAliasSetTarget .
+.Pp
+If this function is called with an
+.Dv INADDR_NONE
+address argument, then all new incoming packets go to the address set by
+.Fn PacketAliasSetAddress .
+.Pp
+If this function is not called, or is called with an
+.Dv INADDR_ANY
+address argument, then all new incoming packets go to the address specified
+in the packet.
+This allows external machines to talk directly to internal machines if they
+can route packets to the machine in question.
+.Ed
+.Pp
+.Ft int
+.Fn PacketAliasCheckNewLink void
+.Bd -ragged -offset indent
+This function returns a non-zero value when a new aliasing link is created.
+In circumstances where incoming traffic is being sequentially sent to
+different local servers, this function can be used to trigger when
+.Fn PacketAliasSetTarget
+is called to change the default target address.
+.Ed
+.Pp
+.Ft u_short
+.Fn PacketAliasInternetChecksum "u_short *buffer" "int nbytes"
+.Bd -ragged -offset indent
+This is a utility function that does not seem to be available elsewhere and
+is included as a convenience.
+It computes the internet checksum, which is used in both IP and
+protocol-specific headers (TCP, UDP, ICMP).
+.Pp
+The
+.Fa buffer
+argument points to the data block to be checksummed, and
+.Fa nbytes
+is the number of bytes.
+The 16-bit checksum field should be zeroed before computing the checksum.
+.Pp
+Checksums can also be verified by operating on a block of data including
+its checksum.
+If the checksum is valid,
+.Fn PacketAliasInternetChecksum
+will return zero.
+.Ed
+.Pp
+.Ft int
+.Fn PacketUnaliasOut "char *buffer" "int maxpacketsize"
+.Bd -ragged -offset indent
+An outgoing packet, which has already been aliased,
+has its private address/port information restored by this function.
+The IP packet is pointed to by
+.Fa buffer ,
+and
+.Fa maxpacketsize
+is provided for error checking purposes.
+This function can be used if an already-aliased packet needs to have its
+original IP header restored for further processing (eg. logging).
+.Ed
+.Sh BUGS
+PPTP aliasing does not work when more than one internal client
+connects to the same external server at the same time, because
+PPTP requires a single TCP control connection to be established
+between any two IP addresses.
+.Sh AUTHORS
+.An Charles Mott Aq cmott@scientech.com ,
+versions 1.0 - 1.8, 2.0 - 2.4.
+.An Eivind Eklund Aq eivind@FreeBSD.org ,
+versions 1.8b, 1.9 and 2.5.
+Added IRC DCC support as well as contributing a number of architectural
+improvements; added the firewall bypass for FTP/IRC DCC.
+.An Erik Salander Aq erik@whistle.com
+added support for PPTP and RTSP.
+.An Junichi Satoh Aq junichi@junichi.org
+added support for RTSP/PNA.
+.Sh ACKNOWLEDGMENTS
+Listed below, in approximate chronological order, are individuals who
+have provided valuable comments and/or debugging assistance.
+.Pp
+.Bd -ragged -offset indent
+.An -split
+.An Gary Roberts
+.An Tom Torrance
+.An Reto Burkhalter
+.An Martin Renters
+.An Brian Somers
+.An Paul Traina
+.An Ari Suutari
+.An Dave Remien
+.An J. Fortes
+.An Andrzej Bialecki
+.An Gordon Burditt
+.Ed
+.Sh CONCEPTUAL BACKGROUND
+This section is intended for those who are planning to modify the source
+code or want to create somewhat esoteric applications using the packet
+aliasing functions.
+.Pp
+The conceptual framework under which the packet aliasing engine operates
+is described here.
+Central to the discussion is the idea of an
+.Em aliasing link
+which describes the relationship for a given packet transaction between
+the local machine, aliased identity and remote machine.
+It is discussed how such links come into existence and are destroyed.
+.Ss ALIASING LINKS
+There is a notion of an
+.Em aliasing link ,
+which is a 7-tuple describing a specific translation:
+.Bd -literal -offset indent
+(local addr, local port, alias addr, alias port,
+ remote addr, remote port, protocol)
+.Ed
+.Pp
+Outgoing packets have the local address and port number replaced with the
+alias address and port number.
+Incoming packets undergo the reverse process.
+The packet aliasing engine attempts to match packets against an internal
+table of aliasing links to determine how to modify a given IP packet.
+Both the IP header and protocol dependent headers are modified as necessary.
+Aliasing links are created and deleted as necessary according to network
+traffic.
+.Pp
+Protocols can be TCP, UDP or even ICMP in certain circumstances.
+(Some types of ICMP packets can be aliased according to sequence or ID
+number which acts as an equivalent port number for identifying how
+individual packets should be handled.)
+.Pp
+Each aliasing link must have a unique combination of the following five
+quantities: alias address/port, remote address/port and protocol.
+This ensures that several machines on a local network can share the
+same aliasing IP address.
+In cases where conflicts might arise, the aliasing port is chosen so that
+uniqueness is maintained.
+.Ss STATIC AND DYNAMIC LINKS
+Aliasing links can either be static or dynamic.
+Static links persist indefinitely and represent fixed rules for translating
+IP packets.
+Dynamic links come into existence for a specific TCP connection or UDP
+transaction or ICMP ECHO sequence.
+For the case of TCP, the connection can be monitored to see when the
+associated aliasing link should be deleted.
+Aliasing links for UDP transactions (and ICMP ECHO and TIMESTAMP requests)
+work on a simple timeout rule.
+When no activity is observed on a dynamic link for a certain amount of time
+it is automatically deleted.
+Timeout rules also apply to TCP connections which do not open or close
+properly.
+.Ss PARTIALLY SPECIFIED ALIASING LINKS
+Aliasing links can be partially specified, meaning that the remote address
+and/or remote port are unknown.
+In this case, when a packet matching the incomplete specification is found,
+a fully specified dynamic link is created.
+If the original partially specified link is dynamic, it will be deleted
+after the fully specified link is created, otherwise it will persist.
+.Pp
+For instance, a partially specified link might be
+.Bd -literal -offset indent
+(192.168.0.4, 23, 204.228.203.215, 8066, 0, 0, tcp)
+.Ed
+.Pp
+The zeros denote unspecified components for the remote address and port.
+If this link were static it would have the effect of redirecting all
+incoming traffic from port 8066 of 204.228.203.215 to port 23 (telnet)
+of machine 192.168.0.4 on the local network.
+Each individual telnet connection would initiate the creation of a distinct
+dynamic link.
+.Ss DYNAMIC LINK CREATION
+In addition to aliasing links, there are also address mappings that can be
+stored within the internal data table of the packet aliasing mechanism.
+.Bd -literal -offset indent
+(local addr, alias addr)
+.Ed
+.Pp
+Address mappings are searched when creating new dynamic links.
+.Pp
+All outgoing packets from the local network automatically create a dynamic
+link if they do not match an already existing fully specified link.
+If an address mapping exists for the outgoing packet, this determines
+the alias address to be used.
+If no mapping exists, then a default address, usually the address of the
+packet aliasing host, is used.
+If necessary, this default address can be changed as often as each individual
+packet arrives.
+.Pp
+The aliasing port number is determined such that the new dynamic link does
+not conflict with any existing links.
+In the default operating mode, the packet aliasing engine attempts to set
+the aliasing port equal to the local port number.
+If this results in a conflict, then port numbers are randomly chosen until
+a unique aliasing link can be established.
+In an alternate operating mode, the first choice of an aliasing port is also
+random and unrelated to the local port number.
diff --git a/sys/netinet/mlfk_ipl.c b/sys/netinet/mlfk_ipl.c
new file mode 100644
index 0000000..2a51d7d
--- /dev/null
+++ b/sys/netinet/mlfk_ipl.c
@@ -0,0 +1,196 @@
+/*
+ * Copyright 1999 Guido van Rooij.  All rights reserved.
+ * 
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright notice,
+ *     this list of conditions and the following disclaimer in the documentation
+ *     and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/conf.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <net/if.h>
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#if (__FreeBSD_version >= 199511)
+# include <net/route.h>
+# include <netinet/ip_var.h>
+# include <netinet/tcp.h>
+# include <netinet/tcpip.h>
+#endif
+
+
+#include <netinet/ipl.h>
+#include <netinet/ip_compat.h>
+#include <netinet/ip_fil.h>
+#include <netinet/ip_state.h>
+#include <netinet/ip_nat.h>
+#include <netinet/ip_auth.h>
+#include <netinet/ip_frag.h>
+#include <netinet/ip_proxy.h>
+
+static dev_t ipf_devs[IPL_LOGMAX + 1];
+
+SYSCTL_DECL(_net_inet);
+SYSCTL_NODE(_net_inet, OID_AUTO, ipf, CTLFLAG_RW, 0, "IPF");
+SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_flags, CTLFLAG_RW, &fr_flags, 0, "");
+SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_pass, CTLFLAG_RW, &fr_pass, 0, "");
+SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_active, CTLFLAG_RD, &fr_active, 0, "");
+SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_tcpidletimeout, CTLFLAG_RW,
+	   &fr_tcpidletimeout, 0, "");
+SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_tcpclosewait, CTLFLAG_RW,
+	   &fr_tcpclosewait, 0, "");
+SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_tcplastack, CTLFLAG_RW,
+	   &fr_tcplastack, 0, "");
+SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_tcptimeout, CTLFLAG_RW,
+	   &fr_tcptimeout, 0, "");
+SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_tcpclosed, CTLFLAG_RW,
+	   &fr_tcpclosed, 0, "");
+SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_tcphalfclosed, CTLFLAG_RW,
+	   &fr_tcphalfclosed, 0, "");
+SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_udptimeout, CTLFLAG_RW,
+	   &fr_udptimeout, 0, "");
+SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_icmptimeout, CTLFLAG_RW,
+	   &fr_icmptimeout, 0, "");
+SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_defnatage, CTLFLAG_RW,
+	   &fr_defnatage, 0, "");
+SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_ipfrttl, CTLFLAG_RW,
+	   &fr_ipfrttl, 0, "");
+SYSCTL_INT(_net_inet_ipf, OID_AUTO, ipl_unreach, CTLFLAG_RW,
+	   &ipl_unreach, 0, "");
+SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_running, CTLFLAG_RD,
+	   &fr_running, 0, "");
+SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_authsize, CTLFLAG_RD,
+	   &fr_authsize, 0, "");
+SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_authused, CTLFLAG_RD,
+	   &fr_authused, 0, "");
+SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_defaultauthage, CTLFLAG_RW,
+	   &fr_defaultauthage, 0, "");
+SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_chksrc, CTLFLAG_RW, &fr_chksrc, 0, "");
+SYSCTL_INT(_net_inet_ipf, OID_AUTO, ippr_ftp_pasvonly, CTLFLAG_RW,
+	   &ippr_ftp_pasvonly, 0, "");
+SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_minttl, CTLFLAG_RW, &fr_minttl, 0, "");
+SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_minttllog, CTLFLAG_RW,
+	   &fr_minttllog, 0, "");
+
+#define CDEV_MAJOR 79
+static struct cdevsw ipl_cdevsw = {
+	/* open */	iplopen,
+	/* close */	iplclose,
+	/* read */	iplread,
+	/* write */	nowrite,
+	/* ioctl */	iplioctl,
+	/* poll */	nopoll,
+	/* mmap */	nommap,
+	/* strategy */	nostrategy,
+	/* name */	"ipl",
+	/* maj */	CDEV_MAJOR,
+	/* dump */	nodump,
+	/* psize */	nopsize,
+	/* flags */	0,
+};
+
+static int
+ipfilter_modevent(module_t mod, int type, void *unused)
+{
+	char	*c;
+	int	i, error = 0;
+
+	switch (type) {
+	case MOD_LOAD :
+
+		error = iplattach();
+		if (error)
+			break;
+
+		c = NULL;
+		for(i=strlen(IPL_NAME); i>0; i--)
+			if (IPL_NAME[i] == '/') {
+				c = &IPL_NAME[i+1];
+				break;
+			}
+		if (!c)
+			c = IPL_NAME;
+		ipf_devs[IPL_LOGIPF] =
+		    make_dev(&ipl_cdevsw, IPL_LOGIPF, 0, 0, 0600, c);
+
+		c = NULL;
+		for(i=strlen(IPL_NAT); i>0; i--)
+			if (IPL_NAT[i] == '/') {
+				c = &IPL_NAT[i+1];
+				break;
+			}
+		if (!c)
+			c = IPL_NAT;
+		ipf_devs[IPL_LOGNAT] =
+		    make_dev(&ipl_cdevsw, IPL_LOGNAT, 0, 0, 0600, c);
+
+		c = NULL;
+		for(i=strlen(IPL_STATE); i>0; i--)
+			if (IPL_STATE[i] == '/') {
+				c = &IPL_STATE[i+1];
+				break;
+			}
+		if (!c)
+			c = IPL_STATE;
+		ipf_devs[IPL_LOGSTATE] =
+		    make_dev(&ipl_cdevsw, IPL_LOGSTATE, 0, 0, 0600, c);
+
+		c = NULL;
+		for(i=strlen(IPL_AUTH); i>0; i--)
+			if (IPL_AUTH[i] == '/') {
+				c = &IPL_AUTH[i+1];
+				break;
+			}
+		if (!c)
+			c = IPL_AUTH;
+		ipf_devs[IPL_LOGAUTH] =
+		    make_dev(&ipl_cdevsw, IPL_LOGAUTH, 0, 0, 0600, c);
+
+		break;
+	case MOD_UNLOAD :
+		destroy_dev(ipf_devs[IPL_LOGIPF]);
+		destroy_dev(ipf_devs[IPL_LOGNAT]);
+		destroy_dev(ipf_devs[IPL_LOGSTATE]);
+		destroy_dev(ipf_devs[IPL_LOGAUTH]);
+		error = ipldetach();
+		break;
+	default:
+		error = EINVAL;
+		break;
+	}
+	return error;
+}
+
+static moduledata_t ipfiltermod = {
+	IPL_VERSION,
+	ipfilter_modevent,
+        0
+};
+DECLARE_MODULE(ipfilter, ipfiltermod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY);
diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
new file mode 100644
index 0000000..50bbf32
--- /dev/null
+++ b/sys/netinet/raw_ip.c
@@ -0,0 +1,660 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)raw_ip.c	8.7 (Berkeley) 5/15/95
+ * $FreeBSD$
+ */
+
+#include "opt_inet6.h"
+#include "opt_ipsec.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+
+#include <vm/vm_zone.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#define _IP_VHL
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_mroute.h>
+
+#include <netinet/ip_fw.h>
+
+#ifdef IPSEC
+#include <netinet6/ipsec.h>
+#endif /*IPSEC*/
+
+#include "opt_ipdn.h"
+#ifdef DUMMYNET
+#include <netinet/ip_dummynet.h>
+#endif
+
+struct	inpcbhead ripcb;
+struct	inpcbinfo ripcbinfo;
+
+/*
+ * Nominal space allocated to a raw ip socket.
+ */
+#define	RIPSNDQ		8192
+#define	RIPRCVQ		8192
+
+/*
+ * Raw interface to IP protocol.
+ */
+
+/*
+ * Initialize raw connection block q.
+ */
+void
+rip_init()
+{
+	LIST_INIT(&ripcb);
+	ripcbinfo.listhead = &ripcb;
+	/*
+	 * XXX We don't use the hash list for raw IP, but it's easier
+	 * to allocate a one entry hash list than it is to check all
+	 * over the place for hashbase == NULL.
+	 */
+	ripcbinfo.hashbase = hashinit(1, M_PCB, &ripcbinfo.hashmask);
+	ripcbinfo.porthashbase = hashinit(1, M_PCB, &ripcbinfo.porthashmask);
+	ripcbinfo.ipi_zone = zinit("ripcb", sizeof(struct inpcb),
+				   maxsockets, ZONE_INTERRUPT, 0);
+}
+
+static struct	sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET };
+/*
+ * Setup generic address and protocol structures
+ * for raw_input routine, then pass them along with
+ * mbuf chain.
+ */
+void
+rip_input(m, off, proto)
+	struct mbuf *m;
+	int off, proto;
+{
+	register struct ip *ip = mtod(m, struct ip *);
+	register struct inpcb *inp;
+	struct inpcb *last = 0;
+	struct mbuf *opts = 0;
+
+	ripsrc.sin_addr = ip->ip_src;
+	LIST_FOREACH(inp, &ripcb, inp_list) {
+#ifdef INET6
+		if ((inp->inp_vflag & INP_IPV4) == 0)
+			continue;
+#endif
+		if (inp->inp_ip_p && inp->inp_ip_p != proto)
+			continue;
+		if (inp->inp_laddr.s_addr &&
+                  inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
+			continue;
+		if (inp->inp_faddr.s_addr &&
+                  inp->inp_faddr.s_addr != ip->ip_src.s_addr)
+			continue;
+		if (last) {
+			struct mbuf *n = m_copy(m, 0, (int)M_COPYALL);
+			if (n) {
+				if (last->inp_flags & INP_CONTROLOPTS ||
+				    last->inp_socket->so_options & SO_TIMESTAMP)
+				    ip_savecontrol(last, &opts, ip, n);
+				if (sbappendaddr(&last->inp_socket->so_rcv,
+				    (struct sockaddr *)&ripsrc, n,
+				    opts) == 0) {
+					/* should notify about lost packet */
+					m_freem(n);
+					if (opts)
+					    m_freem(opts);
+				} else
+					sorwakeup(last->inp_socket);
+				opts = 0;
+			}
+		}
+		last = inp;
+	}
+	if (last) {
+		if (last->inp_flags & INP_CONTROLOPTS ||
+		    last->inp_socket->so_options & SO_TIMESTAMP)
+			ip_savecontrol(last, &opts, ip, m);
+		if (sbappendaddr(&last->inp_socket->so_rcv,
+		    (struct sockaddr *)&ripsrc, m, opts) == 0) {
+			m_freem(m);
+			if (opts)
+			    m_freem(opts);
+		} else
+			sorwakeup(last->inp_socket);
+	} else {
+		m_freem(m);
+              ipstat.ips_noproto++;
+              ipstat.ips_delivered--;
+      }
+}
+
+/*
+ * Generate IP header and pass packet to ip_output.
+ * Tack on options user may have setup with control call.
+ */
+int
+rip_output(m, so, dst)
+	struct mbuf *m;
+	struct socket *so;
+	u_long dst;
+{
+	register struct ip *ip;
+	register struct inpcb *inp = sotoinpcb(so);
+	int flags = (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST;
+
+	/*
+	 * If the user handed us a complete IP packet, use it.
+	 * Otherwise, allocate an mbuf for a header and fill it in.
+	 */
+	if ((inp->inp_flags & INP_HDRINCL) == 0) {
+		if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) {
+			m_freem(m);
+			return(EMSGSIZE);
+		}
+		M_PREPEND(m, sizeof(struct ip), M_TRYWAIT);
+		ip = mtod(m, struct ip *);
+		ip->ip_tos = inp->inp_ip_tos;
+		ip->ip_off = 0;
+		ip->ip_p = inp->inp_ip_p;
+		ip->ip_len = m->m_pkthdr.len;
+		ip->ip_src = inp->inp_laddr;
+		ip->ip_dst.s_addr = dst;
+		ip->ip_ttl = inp->inp_ip_ttl;
+	} else {
+		if (m->m_pkthdr.len > IP_MAXPACKET) {
+			m_freem(m);
+			return(EMSGSIZE);
+		}
+		ip = mtod(m, struct ip *);
+		/* don't allow both user specified and setsockopt options,
+		   and don't allow packet length sizes that will crash */
+		if (((IP_VHL_HL(ip->ip_vhl) != (sizeof (*ip) >> 2))
+		     && inp->inp_options)
+		    || (ip->ip_len > m->m_pkthdr.len)
+		    || (ip->ip_len < (IP_VHL_HL(ip->ip_vhl) << 2))) {
+			m_freem(m);
+			return EINVAL;
+		}
+		if (ip->ip_id == 0)
+			ip->ip_id = htons(ip_id++);
+		/* XXX prevent ip_output from overwriting header fields */
+		flags |= IP_RAWOUTPUT;
+		ipstat.ips_rawout++;
+	}
+
+#ifdef IPSEC
+	ipsec_setsocket(m, so);
+#endif /*IPSEC*/
+
+	return (ip_output(m, inp->inp_options, &inp->inp_route, flags,
+			  inp->inp_moptions));
+}
+
+/*
+ * Raw IP socket option processing.
+ */
+int
+rip_ctloutput(so, sopt)
+	struct socket *so;
+	struct sockopt *sopt;
+{
+	struct	inpcb *inp = sotoinpcb(so);
+	int	error, optval;
+
+	if (sopt->sopt_level != IPPROTO_IP)
+		return (EINVAL);
+
+	error = 0;
+
+	switch (sopt->sopt_dir) {
+	case SOPT_GET:
+		switch (sopt->sopt_name) {
+		case IP_HDRINCL:
+			optval = inp->inp_flags & INP_HDRINCL;
+			error = sooptcopyout(sopt, &optval, sizeof optval);
+			break;
+
+		case IP_FW_ADD:
+		case IP_FW_GET:
+			if (ip_fw_ctl_ptr == 0)
+				error = ENOPROTOOPT;
+			else
+				error = ip_fw_ctl_ptr(sopt);
+			break;
+
+#ifdef DUMMYNET
+		case IP_DUMMYNET_GET:
+			if (ip_dn_ctl_ptr == NULL)
+				error = ENOPROTOOPT ;
+			else
+				error = ip_dn_ctl_ptr(sopt);
+			break ;
+#endif /* DUMMYNET */
+
+		case MRT_INIT:
+		case MRT_DONE:
+		case MRT_ADD_VIF:
+		case MRT_DEL_VIF:
+		case MRT_ADD_MFC:
+		case MRT_DEL_MFC:
+		case MRT_VERSION:
+		case MRT_ASSERT:
+			error = ip_mrouter_get(so, sopt);
+			break;
+
+		default:
+			error = ip_ctloutput(so, sopt);
+			break;
+		}
+		break;
+
+	case SOPT_SET:
+		switch (sopt->sopt_name) {
+		case IP_HDRINCL:
+			error = sooptcopyin(sopt, &optval, sizeof optval,
+					    sizeof optval);
+			if (error)
+				break;
+			if (optval)
+				inp->inp_flags |= INP_HDRINCL;
+			else
+				inp->inp_flags &= ~INP_HDRINCL;
+			break;
+
+		case IP_FW_ADD:
+		case IP_FW_DEL:
+		case IP_FW_FLUSH:
+		case IP_FW_ZERO:
+		case IP_FW_RESETLOG:
+			if (ip_fw_ctl_ptr == 0)
+				error = ENOPROTOOPT;
+			else
+				error = ip_fw_ctl_ptr(sopt);
+			break;
+
+#ifdef DUMMYNET
+		case IP_DUMMYNET_CONFIGURE:
+		case IP_DUMMYNET_DEL:
+		case IP_DUMMYNET_FLUSH:
+			if (ip_dn_ctl_ptr == NULL)
+				error = ENOPROTOOPT ;
+			else
+				error = ip_dn_ctl_ptr(sopt);
+			break ;
+#endif
+
+		case IP_RSVP_ON:
+			error = ip_rsvp_init(so);
+			break;
+
+		case IP_RSVP_OFF:
+			error = ip_rsvp_done();
+			break;
+
+			/* XXX - should be combined */
+		case IP_RSVP_VIF_ON:
+			error = ip_rsvp_vif_init(so, sopt);
+			break;
+			
+		case IP_RSVP_VIF_OFF:
+			error = ip_rsvp_vif_done(so, sopt);
+			break;
+
+		case MRT_INIT:
+		case MRT_DONE:
+		case MRT_ADD_VIF:
+		case MRT_DEL_VIF:
+		case MRT_ADD_MFC:
+		case MRT_DEL_MFC:
+		case MRT_VERSION:
+		case MRT_ASSERT:
+			error = ip_mrouter_set(so, sopt);
+			break;
+
+		default:
+			error = ip_ctloutput(so, sopt);
+			break;
+		}
+		break;
+	}
+
+	return (error);
+}
+
+/*
+ * This function exists solely to receive the PRC_IFDOWN messages which
+ * are sent by if_down().  It looks for an ifaddr whose ifa_addr is sa,
+ * and calls in_ifadown() to remove all routes corresponding to that address.
+ * It also receives the PRC_IFUP messages from if_up() and reinstalls the
+ * interface routes.
+ */
+void
+rip_ctlinput(cmd, sa, vip)
+	int cmd;
+	struct sockaddr *sa;
+	void *vip;
+{
+	struct in_ifaddr *ia;
+	struct ifnet *ifp;
+	int err;
+	int flags;
+
+	switch (cmd) {
+	case PRC_IFDOWN:
+		TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
+			if (ia->ia_ifa.ifa_addr == sa
+			    && (ia->ia_flags & IFA_ROUTE)) {
+				/*
+				 * in_ifscrub kills the interface route.
+				 */
+				in_ifscrub(ia->ia_ifp, ia);
+				/*
+				 * in_ifadown gets rid of all the rest of
+				 * the routes.  This is not quite the right
+				 * thing to do, but at least if we are running
+				 * a routing process they will come back.
+				 */
+				in_ifadown(&ia->ia_ifa, 0);
+				break;
+			}
+		}
+		break;
+
+	case PRC_IFUP:
+		TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
+			if (ia->ia_ifa.ifa_addr == sa)
+				break;
+		}
+		if (ia == 0 || (ia->ia_flags & IFA_ROUTE))
+			return;
+		flags = RTF_UP;
+		ifp = ia->ia_ifa.ifa_ifp;
+
+		if ((ifp->if_flags & IFF_LOOPBACK)
+		    || (ifp->if_flags & IFF_POINTOPOINT))
+			flags |= RTF_HOST;
+
+		err = rtinit(&ia->ia_ifa, RTM_ADD, flags);
+		if (err == 0)
+			ia->ia_flags |= IFA_ROUTE;
+		break;
+	}
+}
+
+u_long	rip_sendspace = RIPSNDQ;
+u_long	rip_recvspace = RIPRCVQ;
+
+SYSCTL_INT(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW,
+    &rip_sendspace, 0, "Maximum outgoing raw IP datagram size");
+SYSCTL_INT(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW,
+    &rip_recvspace, 0, "Maximum incoming raw IP datagram size");
+
+static int
+rip_attach(struct socket *so, int proto, struct proc *p)
+{
+	struct inpcb *inp;
+	int error, s;
+
+	inp = sotoinpcb(so);
+	if (inp)
+		panic("rip_attach");
+	if (p && (error = suser(p)) != 0)
+		return error;
+
+	error = soreserve(so, rip_sendspace, rip_recvspace);
+	if (error)
+		return error;
+	s = splnet();
+	error = in_pcballoc(so, &ripcbinfo, p);
+	splx(s);
+	if (error)
+		return error;
+	inp = (struct inpcb *)so->so_pcb;
+	inp->inp_vflag |= INP_IPV4;
+	inp->inp_ip_p = proto;
+	inp->inp_ip_ttl = ip_defttl;
+#ifdef IPSEC
+	error = ipsec_init_policy(so, &inp->inp_sp);
+	if (error != 0) {
+		in_pcbdetach(inp);
+		return error;
+	}
+#endif /*IPSEC*/
+	return 0;
+}
+
+static int
+rip_detach(struct socket *so)
+{
+	struct inpcb *inp;
+
+	inp = sotoinpcb(so);
+	if (inp == 0)
+		panic("rip_detach");
+	if (so == ip_mrouter)
+		ip_mrouter_done();
+	ip_rsvp_force_done(so);
+	if (so == ip_rsvpd)
+		ip_rsvp_done();
+	in_pcbdetach(inp);
+	return 0;
+}
+
+static int
+rip_abort(struct socket *so)
+{
+	soisdisconnected(so);
+	return rip_detach(so);
+}
+
+static int
+rip_disconnect(struct socket *so)
+{
+	if ((so->so_state & SS_ISCONNECTED) == 0)
+		return ENOTCONN;
+	return rip_abort(so);
+}
+
+static int
+rip_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
+{
+	struct inpcb *inp = sotoinpcb(so);
+	struct sockaddr_in *addr = (struct sockaddr_in *)nam;
+
+	if (nam->sa_len != sizeof(*addr))
+		return EINVAL;
+
+	if (TAILQ_EMPTY(&ifnet) || ((addr->sin_family != AF_INET) &&
+				    (addr->sin_family != AF_IMPLINK)) ||
+	    (addr->sin_addr.s_addr &&
+	     ifa_ifwithaddr((struct sockaddr *)addr) == 0))
+		return EADDRNOTAVAIL;
+	inp->inp_laddr = addr->sin_addr;
+	return 0;
+}
+
+static int
+rip_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
+{
+	struct inpcb *inp = sotoinpcb(so);
+	struct sockaddr_in *addr = (struct sockaddr_in *)nam;
+
+	if (nam->sa_len != sizeof(*addr))
+		return EINVAL;
+	if (TAILQ_EMPTY(&ifnet))
+		return EADDRNOTAVAIL;
+	if ((addr->sin_family != AF_INET) &&
+	    (addr->sin_family != AF_IMPLINK))
+		return EAFNOSUPPORT;
+	inp->inp_faddr = addr->sin_addr;
+	soisconnected(so);
+	return 0;
+}
+
+static int
+rip_shutdown(struct socket *so)
+{
+	socantsendmore(so);
+	return 0;
+}
+
+static int
+rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
+	 struct mbuf *control, struct proc *p)
+{
+	struct inpcb *inp = sotoinpcb(so);
+	register u_long dst;
+
+	if (so->so_state & SS_ISCONNECTED) {
+		if (nam) {
+			m_freem(m);
+			return EISCONN;
+		}
+		dst = inp->inp_faddr.s_addr;
+	} else {
+		if (nam == NULL) {
+			m_freem(m);
+			return ENOTCONN;
+		}
+		dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr;
+	}
+	return rip_output(m, so, dst);
+}
+
+static int
+rip_pcblist(SYSCTL_HANDLER_ARGS)
+{
+	int error, i, n, s;
+	struct inpcb *inp, **inp_list;
+	inp_gen_t gencnt;
+	struct xinpgen xig;
+
+	/*
+	 * The process of preparing the TCB list is too time-consuming and
+	 * resource-intensive to repeat twice on every request.
+	 */
+	if (req->oldptr == 0) {
+		n = ripcbinfo.ipi_count;
+		req->oldidx = 2 * (sizeof xig)
+			+ (n + n/8) * sizeof(struct xinpcb);
+		return 0;
+	}
+
+	if (req->newptr != 0)
+		return EPERM;
+
+	/*
+	 * OK, now we're committed to doing something.
+	 */
+	s = splnet();
+	gencnt = ripcbinfo.ipi_gencnt;
+	n = ripcbinfo.ipi_count;
+	splx(s);
+
+	xig.xig_len = sizeof xig;
+	xig.xig_count = n;
+	xig.xig_gen = gencnt;
+	xig.xig_sogen = so_gencnt;
+	error = SYSCTL_OUT(req, &xig, sizeof xig);
+	if (error)
+		return error;
+
+	inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
+	if (inp_list == 0)
+		return ENOMEM;
+	
+	s = splnet();
+	for (inp = LIST_FIRST(ripcbinfo.listhead), i = 0; inp && i < n;
+	     inp = LIST_NEXT(inp, inp_list)) {
+		if (inp->inp_gencnt <= gencnt)
+			inp_list[i++] = inp;
+	}
+	splx(s);
+	n = i;
+
+	error = 0;
+	for (i = 0; i < n; i++) {
+		inp = inp_list[i];
+		if (inp->inp_gencnt <= gencnt) {
+			struct xinpcb xi;
+			xi.xi_len = sizeof xi;
+			/* XXX should avoid extra copy */
+			bcopy(inp, &xi.xi_inp, sizeof *inp);
+			if (inp->inp_socket)
+				sotoxsocket(inp->inp_socket, &xi.xi_socket);
+			error = SYSCTL_OUT(req, &xi, sizeof xi);
+		}
+	}
+	if (!error) {
+		/*
+		 * Give the user an updated idea of our state.
+		 * If the generation differs from what we told
+		 * her before, she knows that something happened
+		 * while we were processing this request, and it
+		 * might be necessary to retry.
+		 */
+		s = splnet();
+		xig.xig_gen = ripcbinfo.ipi_gencnt;
+		xig.xig_sogen = so_gencnt;
+		xig.xig_count = ripcbinfo.ipi_count;
+		splx(s);
+		error = SYSCTL_OUT(req, &xig, sizeof xig);
+	}
+	free(inp_list, M_TEMP);
+	return error;
+}
+
+SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLFLAG_RD, 0, 0,
+	    rip_pcblist, "S,xinpcb", "List of active raw IP sockets");
+
+struct pr_usrreqs rip_usrreqs = {
+	rip_abort, pru_accept_notsupp, rip_attach, rip_bind, rip_connect,
+	pru_connect2_notsupp, in_control, rip_detach, rip_disconnect,
+	pru_listen_notsupp, in_setpeeraddr, pru_rcvd_notsupp,
+	pru_rcvoob_notsupp, rip_send, pru_sense_null, rip_shutdown,
+	in_setsockaddr, sosend, soreceive, sopoll
+};
diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h
new file mode 100644
index 0000000..fee449f
--- /dev/null
+++ b/sys/netinet/tcp.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp.h	8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_TCP_H_
+#define _NETINET_TCP_H_
+
+typedef	u_int32_t tcp_seq;
+typedef u_int32_t tcp_cc;		/* connection count per rfc1644 */
+
+#define tcp6_seq	tcp_seq	/* for KAME src sync over BSD*'s */
+#define tcp6hdr		tcphdr	/* for KAME src sync over BSD*'s */
+
+/*
+ * TCP header.
+ * Per RFC 793, September, 1981.
+ */
+struct tcphdr {
+	u_short	th_sport;		/* source port */
+	u_short	th_dport;		/* destination port */
+	tcp_seq	th_seq;			/* sequence number */
+	tcp_seq	th_ack;			/* acknowledgement number */
+#if BYTE_ORDER == LITTLE_ENDIAN
+	u_int	th_x2:4,		/* (unused) */
+		th_off:4;		/* data offset */
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+	u_int	th_off:4,		/* data offset */
+		th_x2:4;		/* (unused) */
+#endif
+	u_char	th_flags;
+#define	TH_FIN	0x01
+#define	TH_SYN	0x02
+#define	TH_RST	0x04
+#define	TH_PUSH	0x08
+#define	TH_ACK	0x10
+#define	TH_URG	0x20
+#define	TH_ECE	0x40
+#define	TH_CWR	0x80
+#define	TH_FLAGS	(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG|TH_ECE|TH_CWR)
+
+	u_short	th_win;			/* window */
+	u_short	th_sum;			/* checksum */
+	u_short	th_urp;			/* urgent pointer */
+};
+
+#define	TCPOPT_EOL		0
+#define	TCPOPT_NOP		1
+#define	TCPOPT_MAXSEG		2
+#define    TCPOLEN_MAXSEG		4
+#define TCPOPT_WINDOW		3
+#define    TCPOLEN_WINDOW		3
+#define TCPOPT_SACK_PERMITTED	4		/* Experimental */
+#define    TCPOLEN_SACK_PERMITTED	2
+#define TCPOPT_SACK		5		/* Experimental */
+#define TCPOPT_TIMESTAMP	8
+#define    TCPOLEN_TIMESTAMP		10
+#define    TCPOLEN_TSTAMP_APPA		(TCPOLEN_TIMESTAMP+2) /* appendix A */
+#define    TCPOPT_TSTAMP_HDR		\
+    (TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)
+
+#define	TCPOPT_CC		11		/* CC options: RFC-1644 */
+#define TCPOPT_CCNEW		12
+#define TCPOPT_CCECHO		13
+#define	   TCPOLEN_CC			6
+#define	   TCPOLEN_CC_APPA		(TCPOLEN_CC+2)
+#define	   TCPOPT_CC_HDR(ccopt)		\
+    (TCPOPT_NOP<<24|TCPOPT_NOP<<16|(ccopt)<<8|TCPOLEN_CC)
+
+/*
+ * Default maximum segment size for TCP.
+ * With an IP MSS of 576, this is 536,
+ * but 512 is probably more convenient.
+ * This should be defined as MIN(512, IP_MSS - sizeof (struct tcpiphdr)).
+ */
+#define	TCP_MSS	512
+
+/*
+ * Default maximum segment size for TCP6.
+ * With an IP6 MSS of 1280, this is 1220,
+ * but 1024 is probably more convenient. (xxx kazu in doubt)
+ * This should be defined as MIN(1024, IP6_MSS - sizeof (struct tcpip6hdr))
+ */
+#define	TCP6_MSS	1024
+
+#define	TCP_MAXWIN	65535	/* largest value for (unscaled) window */
+#define	TTCP_CLIENT_SND_WND	4096	/* dflt send window for T/TCP client */
+
+#define TCP_MAX_WINSHIFT	14	/* maximum window shift */
+
+#define TCP_MAXBURST		4 	/* maximum segments in a burst */
+
+#define TCP_MAXHLEN	(0xf<<2)	/* max length of header in bytes */
+#define TCP_MAXOLEN	(TCP_MAXHLEN - sizeof(struct tcphdr))
+					/* max space left for options */
+
+/*
+ * User-settable options (used with setsockopt).
+ */
+#define	TCP_NODELAY	0x01	/* don't delay send to coalesce packets */
+#define	TCP_MAXSEG	0x02	/* set maximum segment size */
+#define TCP_NOPUSH	0x04	/* don't push last block of write */
+#define TCP_NOOPT	0x08	/* don't use TCP options */
+
+#endif
diff --git a/sys/netinet/tcp_debug.c b/sys/netinet/tcp_debug.c
new file mode 100644
index 0000000..89e9d7c
--- /dev/null
+++ b/sys/netinet/tcp_debug.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_debug.c	8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+#include "opt_tcpdebug.h"
+
+#ifndef INET
+#error The option TCPDEBUG requires option INET.
+#endif
+
+#ifdef TCPDEBUG
+/* load symbolic names */
+#define PRUREQUESTS
+#define TCPSTATES
+#define	TCPTIMERS
+#define	TANAMES
+#endif
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#include <netinet/tcp_debug.h>
+
+#ifdef TCPDEBUG
+static int	tcpconsdebug = 0;
+#endif
+
+static struct tcp_debug tcp_debug[TCP_NDEBUG];
+static int	tcp_debx;
+
+/*
+ * Tcp debug routines
+ */
+void
+tcp_trace(act, ostate, tp, ipgen, th, req)
+	short act, ostate;
+	struct tcpcb *tp;
+	void *ipgen;
+	struct tcphdr *th;
+	int req;
+{
+#ifdef INET6
+	int isipv6;
+#endif /* INET6 */
+	tcp_seq seq, ack;
+	int len, flags;
+	struct tcp_debug *td = &tcp_debug[tcp_debx++];
+
+#ifdef INET6
+	isipv6 = (ipgen != NULL && ((struct ip *)ipgen)->ip_v == 6) ? 1 : 0;
+#endif /* INET6 */
+	td->td_family =
+#ifdef INET6
+		(isipv6 != 0) ? AF_INET6 :
+#endif
+		AF_INET;
+	if (tcp_debx == TCP_NDEBUG)
+		tcp_debx = 0;
+	td->td_time = iptime();
+	td->td_act = act;
+	td->td_ostate = ostate;
+	td->td_tcb = (caddr_t)tp;
+	if (tp)
+		td->td_cb = *tp;
+	else
+		bzero((caddr_t)&td->td_cb, sizeof (*tp));
+	if (ipgen) {
+		switch (td->td_family) {
+		case AF_INET:
+			bcopy((caddr_t)ipgen, (caddr_t)&td->td_ti.ti_i,
+			      sizeof(td->td_ti.ti_i));
+			bzero((caddr_t)td->td_ip6buf, sizeof(td->td_ip6buf));
+			break;
+#ifdef INET6
+		case AF_INET6:
+			bcopy((caddr_t)ipgen, (caddr_t)td->td_ip6buf,
+			      sizeof(td->td_ip6buf));
+			bzero((caddr_t)&td->td_ti.ti_i,
+			      sizeof(td->td_ti.ti_i));
+			break;
+#endif
+		default:
+			bzero((caddr_t)td->td_ip6buf, sizeof(td->td_ip6buf));
+			bzero((caddr_t)&td->td_ti.ti_i,
+			      sizeof(td->td_ti.ti_i));
+			break;
+		}
+	} else {
+		bzero((caddr_t)&td->td_ti.ti_i, sizeof(td->td_ti.ti_i));
+		bzero((caddr_t)td->td_ip6buf, sizeof(td->td_ip6buf));
+	}
+	if (th) {
+		switch (td->td_family) {
+		case AF_INET:
+			td->td_ti.ti_t = *th;
+			bzero((caddr_t)&td->td_ti6.th, sizeof(td->td_ti6.th));
+			break;
+#ifdef INET6
+		case AF_INET6:
+			td->td_ti6.th = *th;
+			bzero((caddr_t)&td->td_ti.ti_t,
+			      sizeof(td->td_ti.ti_t));
+			break;
+#endif
+		default:
+			bzero((caddr_t)&td->td_ti.ti_t,
+			      sizeof(td->td_ti.ti_t));
+			bzero((caddr_t)&td->td_ti6.th, sizeof(td->td_ti6.th));
+			break;
+		}
+	} else {
+		bzero((caddr_t)&td->td_ti.ti_t, sizeof(td->td_ti.ti_t));
+		bzero((caddr_t)&td->td_ti6.th, sizeof(td->td_ti6.th));
+	}
+	td->td_req = req;
+#ifdef TCPDEBUG
+	if (tcpconsdebug == 0)
+		return;
+	if (tp)
+		printf("%p %s:", tp, tcpstates[ostate]);
+	else
+		printf("???????? ");
+	printf("%s ", tanames[act]);
+	switch (act) {
+
+	case TA_INPUT:
+	case TA_OUTPUT:
+	case TA_DROP:
+		if (ipgen == NULL || th == NULL)
+			break;
+		seq = th->th_seq;
+		ack = th->th_ack;
+		len =
+#ifdef INET6
+			isipv6 ? ((struct ip6_hdr *)ipgen)->ip6_plen :
+#endif
+			((struct ip *)ipgen)->ip_len;
+		if (act == TA_OUTPUT) {
+			seq = ntohl(seq);
+			ack = ntohl(ack);
+			len = ntohs((u_short)len);
+		}
+		if (act == TA_OUTPUT)
+			len -= sizeof (struct tcphdr);
+		if (len)
+			printf("[%x..%x)", seq, seq+len);
+		else
+			printf("%x", seq);
+		printf("@%x, urp=%x", ack, th->th_urp);
+		flags = th->th_flags;
+		if (flags) {
+			char *cp = "<";
+#define pf(f) {					\
+	if (th->th_flags & TH_##f) {		\
+		printf("%s%s", cp, #f);		\
+		cp = ",";			\
+	}					\
+}
+			pf(SYN); pf(ACK); pf(FIN); pf(RST); pf(PUSH); pf(URG);
+			printf(">");
+		}
+		break;
+
+	case TA_USER:
+		printf("%s", prurequests[req&0xff]);
+		if ((req & 0xff) == PRU_SLOWTIMO)
+			printf("<%s>", tcptimers[req>>8]);
+		break;
+	}
+	if (tp)
+		printf(" -> %s", tcpstates[tp->t_state]);
+	/* print out internal state of tp !?! */
+	printf("\n");
+	if (tp == 0)
+		return;
+	printf(
+	"\trcv_(nxt,wnd,up) (%lx,%lx,%lx) snd_(una,nxt,max) (%lx,%lx,%lx)\n",
+	    (u_long)tp->rcv_nxt, tp->rcv_wnd, (u_long)tp->rcv_up,
+	    (u_long)tp->snd_una, (u_long)tp->snd_nxt, (u_long)tp->snd_max);
+	printf("\tsnd_(wl1,wl2,wnd) (%lx,%lx,%lx)\n",
+	    (u_long)tp->snd_wl1, (u_long)tp->snd_wl2, tp->snd_wnd);
+#endif /* TCPDEBUG */
+}
diff --git a/sys/netinet/tcp_debug.h b/sys/netinet/tcp_debug.h
new file mode 100644
index 0000000..773d3e4
--- /dev/null
+++ b/sys/netinet/tcp_debug.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_debug.h	8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_TCP_DEBUG_H_
+#define	_NETINET_TCP_DEBUG_H_
+
+struct	tcp_debug {
+	n_time	td_time;
+	short	td_act;
+	short	td_ostate;
+	caddr_t	td_tcb;
+	int	td_family;
+	/*
+	 * Co-existense of td_ti and td_ti6 below is ugly, but it is necessary
+	 * to achieve backword compatibility to some extent.
+	 */
+	struct	tcpiphdr td_ti;
+	struct {
+#if !defined(_KERNEL) && defined(INET6)
+		struct	ip6_hdr ip6;
+#else
+		u_char	ip6buf[40]; /* sizeof(struct ip6_hdr) */
+#endif
+		struct	tcphdr th;
+	} td_ti6;
+#define	td_ip6buf	td_ti6.ip6buf
+	short	td_req;
+	struct	tcpcb td_cb;
+};
+
+#define	TA_INPUT 	0
+#define	TA_OUTPUT	1
+#define	TA_USER		2
+#define	TA_RESPOND	3
+#define	TA_DROP		4
+
+#ifdef TANAMES
+static char	*tanames[] =
+    { "input", "output", "user", "respond", "drop" };
+#endif
+
+#define	TCP_NDEBUG 100
+
+#ifndef _KERNEL
+/* XXX common variables for broken applications. */
+struct	tcp_debug tcp_debug[TCP_NDEBUG];
+int	tcp_debx;
+#endif
+
+#endif /* !_NETINET_TCP_DEBUG_H_ */
diff --git a/sys/netinet/tcp_fsm.h b/sys/netinet/tcp_fsm.h
new file mode 100644
index 0000000..37752c4
--- /dev/null
+++ b/sys/netinet/tcp_fsm.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_fsm.h	8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_TCP_FSM_H_
+#define _NETINET_TCP_FSM_H_
+
+/*
+ * TCP FSM state definitions.
+ * Per RFC793, September, 1981.
+ */
+
+#define	TCP_NSTATES	11
+
+#define	TCPS_CLOSED		0	/* closed */
+#define	TCPS_LISTEN		1	/* listening for connection */
+#define	TCPS_SYN_SENT		2	/* active, have sent syn */
+#define	TCPS_SYN_RECEIVED	3	/* have send and received syn */
+/* states < TCPS_ESTABLISHED are those where connections not established */
+#define	TCPS_ESTABLISHED	4	/* established */
+#define	TCPS_CLOSE_WAIT		5	/* rcvd fin, waiting for close */
+/* states > TCPS_CLOSE_WAIT are those where user has closed */
+#define	TCPS_FIN_WAIT_1		6	/* have closed, sent fin */
+#define	TCPS_CLOSING		7	/* closed xchd FIN; await FIN ACK */
+#define	TCPS_LAST_ACK		8	/* had fin and close; await FIN ACK */
+/* states > TCPS_CLOSE_WAIT && < TCPS_FIN_WAIT_2 await ACK of FIN */
+#define	TCPS_FIN_WAIT_2		9	/* have closed, fin is acked */
+#define	TCPS_TIME_WAIT		10	/* in 2*msl quiet wait after close */
+
+/* for KAME src sync over BSD*'s */
+#define	TCP6_NSTATES		TCP_NSTATES
+#define	TCP6S_CLOSED		TCPS_CLOSED
+#define	TCP6S_LISTEN		TCPS_LISTEN
+#define	TCP6S_SYN_SENT		TCPS_SYN_SENT
+#define	TCP6S_SYN_RECEIVED	TCPS_SYN_RECEIVED
+#define	TCP6S_ESTABLISHED	TCPS_ESTABLISHED
+#define	TCP6S_CLOSE_WAIT	TCPS_CLOSE_WAIT
+#define	TCP6S_FIN_WAIT_1	TCPS_FIN_WAIT_1
+#define	TCP6S_CLOSING		TCPS_CLOSING
+#define	TCP6S_LAST_ACK		TCPS_LAST_ACK
+#define	TCP6S_FIN_WAIT_2	TCPS_FIN_WAIT_2
+#define	TCP6S_TIME_WAIT		TCPS_TIME_WAIT
+
+#define	TCPS_HAVERCVDSYN(s)	((s) >= TCPS_SYN_RECEIVED)
+#define	TCPS_HAVEESTABLISHED(s)	((s) >= TCPS_ESTABLISHED)
+#define	TCPS_HAVERCVDFIN(s)	((s) >= TCPS_TIME_WAIT)
+
+#ifdef	TCPOUTFLAGS
+/*
+ * Flags used when sending segments in tcp_output.
+ * Basic flags (TH_RST,TH_ACK,TH_SYN,TH_FIN) are totally
+ * determined by state, with the proviso that TH_FIN is sent only
+ * if all data queued for output is included in the segment.
+ */
+static u_char	tcp_outflags[TCP_NSTATES] = {
+	TH_RST|TH_ACK,		/* 0, CLOSED */
+	0,			/* 1, LISTEN */
+	TH_SYN,			/* 2, SYN_SENT */
+	TH_SYN|TH_ACK,		/* 3, SYN_RECEIVED */
+	TH_ACK,			/* 4, ESTABLISHED */
+	TH_ACK,			/* 5, CLOSE_WAIT */
+	TH_FIN|TH_ACK,		/* 6, FIN_WAIT_1 */
+	TH_FIN|TH_ACK,		/* 7, CLOSING */
+	TH_FIN|TH_ACK,		/* 8, LAST_ACK */
+	TH_ACK,			/* 9, FIN_WAIT_2 */
+	TH_ACK,			/* 10, TIME_WAIT */
+};	
+#endif
+
+#ifdef KPROF
+int	tcp_acounts[TCP_NSTATES][PRU_NREQ];
+#endif
+
+#ifdef	TCPSTATES
+char *tcpstates[] = {
+	"CLOSED",	"LISTEN",	"SYN_SENT",	"SYN_RCVD",
+	"ESTABLISHED",	"CLOSE_WAIT",	"FIN_WAIT_1",	"CLOSING",
+	"LAST_ACK",	"FIN_WAIT_2",	"TIME_WAIT",
+};
+#endif
+
+#endif
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
new file mode 100644
index 0000000..bf578b7
--- /dev/null
+++ b/sys/netinet/tcp_input.c
@@ -0,0 +1,2885 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_input.c	8.12 (Berkeley) 5/24/95
+ * $FreeBSD$
+ */
+
+#include "opt_ipfw.h"		/* for ipfw_fwd		*/
+#include "opt_inet6.h"
+#include "opt_ipsec.h"
+#include "opt_tcpdebug.h"
+#include "opt_tcp_input.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/proc.h>		/* for proc0 declaration */
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/syslog.h>
+
+#include <machine/cpu.h>	/* before tcp_seq.h, for tcp_random18() */
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>	/* for ICMP_BANDLIM		*/
+#include <netinet/in_var.h>
+#include <netinet/icmp_var.h>	/* for ICMP_BANDLIM		*/
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#include <netinet6/nd6.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/in6_pcb.h>
+#endif
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#ifdef INET6
+#include <netinet6/tcp6_var.h>
+#endif
+#include <netinet/tcpip.h>
+#ifdef TCPDEBUG
+#include <netinet/tcp_debug.h>
+
+u_char tcp_saveipgen[40]; /* the size must be of max ip header, now IPv6 */
+struct tcphdr tcp_savetcp;
+#endif /* TCPDEBUG */
+
+#ifdef IPSEC
+#include <netinet6/ipsec.h>
+#ifdef INET6
+#include <netinet6/ipsec6.h>
+#endif
+#include <netkey/key.h>
+#endif /*IPSEC*/
+
+#include <machine/in_cksum.h>
+
+MALLOC_DEFINE(M_TSEGQ, "tseg_qent", "TCP segment queue entry");
+
+static int	tcprexmtthresh = 3;
+tcp_seq	tcp_iss;
+tcp_cc	tcp_ccgen;
+
+struct	tcpstat tcpstat;
+SYSCTL_STRUCT(_net_inet_tcp, TCPCTL_STATS, stats, CTLFLAG_RD, 
+    &tcpstat , tcpstat, "TCP statistics (struct tcpstat, netinet/tcp_var.h)");
+
+static int log_in_vain = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_RW, 
+    &log_in_vain, 0, "Log all incoming TCP connections");
+
+static int blackhole = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, blackhole, CTLFLAG_RW,
+	&blackhole, 0, "Do not send RST when dropping refused connections");
+
+int tcp_delack_enabled = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, delayed_ack, CTLFLAG_RW, 
+    &tcp_delack_enabled, 0, 
+    "Delay ACK to try and piggyback it onto a data packet");
+
+int tcp_lq_overflow = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_lq_overflow, CTLFLAG_RW,
+    &tcp_lq_overflow, 0, 
+    "Listen Queue Overflow");
+
+#ifdef TCP_DROP_SYNFIN
+static int drop_synfin = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFLAG_RW,
+    &drop_synfin, 0, "Drop TCP packets with SYN+FIN set");
+#endif
+
+struct inpcbhead tcb;
+#define	tcb6	tcb  /* for KAME src sync over BSD*'s */
+struct inpcbinfo tcbinfo;
+
+static void	 tcp_dooptions __P((struct tcpcb *,
+	    u_char *, int, struct tcphdr *, struct tcpopt *));
+static void	 tcp_pulloutofband __P((struct socket *,
+	    struct tcphdr *, struct mbuf *, int));
+static int	 tcp_reass __P((struct tcpcb *, struct tcphdr *, int *,
+				struct mbuf *));
+static void	 tcp_xmit_timer __P((struct tcpcb *, int));
+static int	 tcp_newreno __P((struct tcpcb *, struct tcphdr *));
+
+/* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */
+#ifdef INET6
+#define ND6_HINT(tp) \
+do { \
+	if ((tp) && (tp)->t_inpcb && \
+	    ((tp)->t_inpcb->inp_vflag & INP_IPV6) != 0 && \
+	    (tp)->t_inpcb->in6p_route.ro_rt) \
+		nd6_nud_hint((tp)->t_inpcb->in6p_route.ro_rt, NULL, 0); \
+} while (0)
+#else
+#define ND6_HINT(tp)
+#endif
+
+/*
+ * Indicate whether this ack should be delayed.
+ */
+#define DELAY_ACK(tp) \
+	(tcp_delack_enabled && !callout_pending(tp->tt_delack))
+
+/*
+ * Insert segment which inludes th into reassembly queue of tcp with
+ * control block tp.  Return TH_FIN if reassembly now includes
+ * a segment with FIN.  The macro form does the common case inline
+ * (segment is the next to be received on an established connection,
+ * and the queue is empty), avoiding linkage into and removal
+ * from the queue and repetition of various conversions.
+ * Set DELACK for segments received in order, but ack immediately
+ * when segments are out of order (so fast retransmit can work).
+ */
+#define	TCP_REASS(tp, th, tlenp, m, so, flags) { \
+	if ((th)->th_seq == (tp)->rcv_nxt && \
+	    LIST_EMPTY(&(tp)->t_segq) && \
+	    TCPS_HAVEESTABLISHED((tp)->t_state)) { \
+		if (DELAY_ACK(tp)) \
+			callout_reset(tp->tt_delack, tcp_delacktime, \
+			    tcp_timer_delack, tp); \
+		else \
+			tp->t_flags |= TF_ACKNOW; \
+		(tp)->rcv_nxt += *(tlenp); \
+		flags = (th)->th_flags & TH_FIN; \
+		tcpstat.tcps_rcvpack++;\
+		tcpstat.tcps_rcvbyte += *(tlenp);\
+		ND6_HINT(tp); \
+		sbappend(&(so)->so_rcv, (m)); \
+		sorwakeup(so); \
+	} else { \
+		(flags) = tcp_reass((tp), (th), (tlenp), (m)); \
+		tp->t_flags |= TF_ACKNOW; \
+	} \
+}
+
+static int
+tcp_reass(tp, th, tlenp, m)
+	register struct tcpcb *tp;
+	register struct tcphdr *th;
+	int *tlenp;
+	struct mbuf *m;
+{
+	struct tseg_qent *q;
+	struct tseg_qent *p = NULL;
+	struct tseg_qent *nq;
+	struct tseg_qent *te;
+	struct socket *so = tp->t_inpcb->inp_socket;
+	int flags;
+
+	/*
+	 * Call with th==0 after become established to
+	 * force pre-ESTABLISHED data up to user socket.
+	 */
+	if (th == 0)
+		goto present;
+
+	/* Allocate a new queue entry. If we can't, just drop the pkt. XXX */
+	MALLOC(te, struct tseg_qent *, sizeof (struct tseg_qent), M_TSEGQ,
+	       M_NOWAIT);
+	if (te == NULL) {
+		tcpstat.tcps_rcvmemdrop++;
+		m_freem(m);
+		return (0);
+	}
+
+	/*
+	 * Find a segment which begins after this one does.
+	 */
+	LIST_FOREACH(q, &tp->t_segq, tqe_q) {
+		if (SEQ_GT(q->tqe_th->th_seq, th->th_seq))
+			break;
+		p = q;
+	}
+
+	/*
+	 * If there is a preceding segment, it may provide some of
+	 * our data already.  If so, drop the data from the incoming
+	 * segment.  If it provides all of our data, drop us.
+	 */
+	if (p != NULL) {
+		register int i;
+		/* conversion to int (in i) handles seq wraparound */
+		i = p->tqe_th->th_seq + p->tqe_len - th->th_seq;
+		if (i > 0) {
+			if (i >= *tlenp) {
+				tcpstat.tcps_rcvduppack++;
+				tcpstat.tcps_rcvdupbyte += *tlenp;
+				m_freem(m);
+				FREE(te, M_TSEGQ);
+				/*
+				 * Try to present any queued data
+				 * at the left window edge to the user.
+				 * This is needed after the 3-WHS
+				 * completes.
+				 */
+				goto present;	/* ??? */
+			}
+			m_adj(m, i);
+			*tlenp -= i;
+			th->th_seq += i;
+		}
+	}
+	tcpstat.tcps_rcvoopack++;
+	tcpstat.tcps_rcvoobyte += *tlenp;
+
+	/*
+	 * While we overlap succeeding segments trim them or,
+	 * if they are completely covered, dequeue them.
+	 */
+	while (q) {
+		register int i = (th->th_seq + *tlenp) - q->tqe_th->th_seq;
+		if (i <= 0)
+			break;
+		if (i < q->tqe_len) {
+			q->tqe_th->th_seq += i;
+			q->tqe_len -= i;
+			m_adj(q->tqe_m, i);
+			break;
+		}
+
+		nq = LIST_NEXT(q, tqe_q);
+		LIST_REMOVE(q, tqe_q);
+		m_freem(q->tqe_m);
+		FREE(q, M_TSEGQ);
+		q = nq;
+	}
+
+	/* Insert the new segment queue entry into place. */
+	te->tqe_m = m;
+	te->tqe_th = th;
+	te->tqe_len = *tlenp;
+
+	if (p == NULL) {
+		LIST_INSERT_HEAD(&tp->t_segq, te, tqe_q);
+	} else {
+		LIST_INSERT_AFTER(p, te, tqe_q);
+	}
+
+present:
+	/*
+	 * Present data to user, advancing rcv_nxt through
+	 * completed sequence space.
+	 */
+	if (!TCPS_HAVEESTABLISHED(tp->t_state))
+		return (0);
+	q = LIST_FIRST(&tp->t_segq);
+	if (!q || q->tqe_th->th_seq != tp->rcv_nxt)
+		return (0);
+	do {
+		tp->rcv_nxt += q->tqe_len;
+		flags = q->tqe_th->th_flags & TH_FIN;
+		nq = LIST_NEXT(q, tqe_q);
+		LIST_REMOVE(q, tqe_q);
+		if (so->so_state & SS_CANTRCVMORE)
+			m_freem(q->tqe_m);
+		else
+			sbappend(&so->so_rcv, q->tqe_m);
+		FREE(q, M_TSEGQ);
+		q = nq;
+	} while (q && q->tqe_th->th_seq == tp->rcv_nxt);
+	ND6_HINT(tp);
+	sorwakeup(so);
+	return (flags);
+}
+
+/*
+ * TCP input routine, follows pages 65-76 of the
+ * protocol specification dated September, 1981 very closely.
+ */
+#ifdef INET6
+int
+tcp6_input(mp, offp, proto)
+	struct mbuf **mp;
+	int *offp, proto;
+{
+	register struct mbuf *m = *mp;
+
+	IP6_EXTHDR_CHECK(m, *offp, sizeof(struct tcphdr), IPPROTO_DONE);
+
+	/*
+	 * draft-itojun-ipv6-tcp-to-anycast
+	 * better place to put this in?
+	 */
+	if (m->m_flags & M_ANYCAST6) {
+		struct ip6_hdr *ip6;
+
+		ip6 = mtod(m, struct ip6_hdr *);
+		icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR,
+			    (caddr_t)&ip6->ip6_dst - (caddr_t)ip6);
+		return IPPROTO_DONE;
+	}
+
+	tcp_input(m, *offp, proto);
+	return IPPROTO_DONE;
+}
+#endif
+
+void
+tcp_input(m, off0, proto)
+	register struct mbuf *m;
+	int off0, proto;
+{
+	register struct tcphdr *th;
+	register struct ip *ip = NULL;
+	register struct ipovly *ipov;
+	register struct inpcb *inp;
+	u_char *optp = NULL;
+	int optlen = 0;
+	int len, tlen, off;
+	int drop_hdrlen;
+	register struct tcpcb *tp = 0;
+	register int thflags;
+	struct socket *so = 0;
+	int todrop, acked, ourfinisacked, needoutput = 0;
+	struct in_addr laddr;
+#ifdef INET6
+	struct in6_addr laddr6;
+#endif
+	int dropsocket = 0;
+	int iss = 0;
+	u_long tiwin;
+	struct tcpopt to;		/* options in this segment */
+	struct rmxp_tao *taop;		/* pointer to our TAO cache entry */
+	struct rmxp_tao	tao_noncached;	/* in case there's no cached entry */
+#ifdef TCPDEBUG
+	short ostate = 0;
+#endif
+#ifdef INET6
+	struct ip6_hdr *ip6 = NULL;
+	int isipv6;
+#endif /* INET6 */
+	int rstreason; /* For badport_bandlim accounting purposes */
+
+#ifdef INET6
+	isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
+#endif
+	bzero((char *)&to, sizeof(to));
+
+	tcpstat.tcps_rcvtotal++;
+
+#ifdef INET6
+	if (isipv6) {
+		/* IP6_EXTHDR_CHECK() is already done at tcp6_input() */
+		ip6 = mtod(m, struct ip6_hdr *);
+		tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0;
+		if (in6_cksum(m, IPPROTO_TCP, off0, tlen)) {
+			tcpstat.tcps_rcvbadsum++;
+			goto drop;
+		}
+		th = (struct tcphdr *)((caddr_t)ip6 + off0);
+	} else
+#endif /* INET6 */
+      {
+	/*
+	 * Get IP and TCP header together in first mbuf.
+	 * Note: IP leaves IP header in first mbuf.
+	 */
+	if (off0 > sizeof (struct ip)) {
+		ip_stripoptions(m, (struct mbuf *)0);
+		off0 = sizeof(struct ip);
+	}
+	if (m->m_len < sizeof (struct tcpiphdr)) {
+		if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == 0) {
+			tcpstat.tcps_rcvshort++;
+			return;
+		}
+	}
+	ip = mtod(m, struct ip *);
+	ipov = (struct ipovly *)ip;
+	th = (struct tcphdr *)((caddr_t)ip + off0);
+	tlen = ip->ip_len;
+
+	if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
+		if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
+                	th->th_sum = m->m_pkthdr.csum_data;
+		else
+	                th->th_sum = in_pseudo(ip->ip_src.s_addr,
+			    ip->ip_dst.s_addr, htonl(m->m_pkthdr.csum_data +
+			    ip->ip_len + IPPROTO_TCP));
+		th->th_sum ^= 0xffff;
+	} else {
+		/*
+		 * Checksum extended TCP header and data.
+		 */
+		len = sizeof (struct ip) + tlen;
+		bzero(ipov->ih_x1, sizeof(ipov->ih_x1));
+		ipov->ih_len = (u_short)tlen;
+		HTONS(ipov->ih_len);
+		th->th_sum = in_cksum(m, len);
+	}
+	if (th->th_sum) {
+		tcpstat.tcps_rcvbadsum++;
+		goto drop;
+	}
+#ifdef INET6
+	/* Re-initialization for later version check */
+	ip->ip_v = IPVERSION;
+#endif
+      }
+
+	/*
+	 * Check that TCP offset makes sense,
+	 * pull out TCP options and adjust length.		XXX
+	 */
+	off = th->th_off << 2;
+	if (off < sizeof (struct tcphdr) || off > tlen) {
+		tcpstat.tcps_rcvbadoff++;
+		goto drop;
+	}
+	tlen -= off;	/* tlen is used instead of ti->ti_len */
+	if (off > sizeof (struct tcphdr)) {
+#ifdef INET6
+		if (isipv6) {
+			IP6_EXTHDR_CHECK(m, off0, off, );
+			ip6 = mtod(m, struct ip6_hdr *);
+			th = (struct tcphdr *)((caddr_t)ip6 + off0);
+		} else
+#endif /* INET6 */
+	      {
+		if (m->m_len < sizeof(struct ip) + off) {
+			if ((m = m_pullup(m, sizeof (struct ip) + off)) == 0) {
+				tcpstat.tcps_rcvshort++;
+				return;
+			}
+			ip = mtod(m, struct ip *);
+			ipov = (struct ipovly *)ip;
+			th = (struct tcphdr *)((caddr_t)ip + off0);
+		}
+	      }
+		optlen = off - sizeof (struct tcphdr);
+		optp = (u_char *)(th + 1);
+	}
+	thflags = th->th_flags;
+
+#ifdef TCP_DROP_SYNFIN
+	/*
+	 * If the drop_synfin option is enabled, drop all packets with
+	 * both the SYN and FIN bits set. This prevents e.g. nmap from
+	 * identifying the TCP/IP stack.
+	 *
+	 * This is a violation of the TCP specification.
+	 */
+	if (drop_synfin && (thflags & (TH_SYN|TH_FIN)) == (TH_SYN|TH_FIN))
+		goto drop;
+#endif
+
+	/*
+	 * Convert TCP protocol specific fields to host format.
+	 */
+	NTOHL(th->th_seq);
+	NTOHL(th->th_ack);
+	NTOHS(th->th_win);
+	NTOHS(th->th_urp);
+
+	/*
+	 * Delay droping TCP, IP headers, IPv6 ext headers, and TCP options,
+	 * until after ip6_savecontrol() is called and before other functions
+	 * which don't want those proto headers.
+	 * Because ip6_savecontrol() is going to parse the mbuf to
+	 * search for data to be passed up to user-land, it wants mbuf
+	 * parameters to be unchanged.
+	 */
+	drop_hdrlen = off0 + off;
+
+	/*
+	 * Locate pcb for segment.
+	 */
+findpcb:
+#ifdef IPFIREWALL_FORWARD
+	if (ip_fw_fwd_addr != NULL
+#ifdef INET6
+	    && isipv6 == NULL /* IPv6 support is not yet */
+#endif /* INET6 */
+	    ) {
+		/*
+		 * Diverted. Pretend to be the destination.
+		 * already got one like this? 
+		 */
+		inp = in_pcblookup_hash(&tcbinfo, ip->ip_src, th->th_sport,
+			ip->ip_dst, th->th_dport, 0, m->m_pkthdr.rcvif);
+		if (!inp) {
+			/* 
+			 * No, then it's new. Try find the ambushing socket
+			 */
+			if (!ip_fw_fwd_addr->sin_port) {
+				inp = in_pcblookup_hash(&tcbinfo, ip->ip_src,
+				    th->th_sport, ip_fw_fwd_addr->sin_addr,
+				    th->th_dport, 1, m->m_pkthdr.rcvif);
+			} else {
+				inp = in_pcblookup_hash(&tcbinfo,
+				    ip->ip_src, th->th_sport,
+	    			    ip_fw_fwd_addr->sin_addr,
+				    ntohs(ip_fw_fwd_addr->sin_port), 1,
+				    m->m_pkthdr.rcvif);
+			}
+		}
+		ip_fw_fwd_addr = NULL;
+	} else
+#endif	/* IPFIREWALL_FORWARD */
+      {
+#ifdef INET6
+	if (isipv6)
+		inp = in6_pcblookup_hash(&tcbinfo, &ip6->ip6_src, th->th_sport,
+					 &ip6->ip6_dst, th->th_dport, 1,
+					 m->m_pkthdr.rcvif);
+	else
+#endif /* INET6 */
+	inp = in_pcblookup_hash(&tcbinfo, ip->ip_src, th->th_sport,
+	    ip->ip_dst, th->th_dport, 1, m->m_pkthdr.rcvif);
+      }
+
+#ifdef IPSEC
+#ifdef INET6
+	if (isipv6) {
+		if (inp != NULL && ipsec6_in_reject_so(m, inp->inp_socket)) {
+			ipsec6stat.in_polvio++;
+			goto drop;
+		}
+	} else
+#endif /* INET6 */
+	if (inp != NULL && ipsec4_in_reject_so(m, inp->inp_socket)) {
+		ipsecstat.in_polvio++;
+		goto drop;
+	}
+#endif /*IPSEC*/
+
+	/*
+	 * If the state is CLOSED (i.e., TCB does not exist) then
+	 * all data in the incoming segment is discarded.
+	 * If the TCB exists but is in CLOSED state, it is embryonic,
+	 * but should either do a listen or a connect soon.
+	 */
+	if (inp == NULL) {
+		if (log_in_vain) {
+#ifdef INET6
+			char dbuf[INET6_ADDRSTRLEN], sbuf[INET6_ADDRSTRLEN];
+#else /* INET6 */
+			char dbuf[4*sizeof "123"], sbuf[4*sizeof "123"];
+#endif /* INET6 */
+
+#ifdef INET6
+			if (isipv6) {
+				strcpy(dbuf, ip6_sprintf(&ip6->ip6_dst));
+				strcpy(sbuf, ip6_sprintf(&ip6->ip6_src));
+			} else
+#endif
+		      {
+			strcpy(dbuf, inet_ntoa(ip->ip_dst));
+			strcpy(sbuf, inet_ntoa(ip->ip_src));
+		      }
+			switch (log_in_vain) {
+			case 1:
+				if(thflags & TH_SYN)
+					log(LOG_INFO,
+			    		"Connection attempt to TCP %s:%d from %s:%d\n",
+			    		dbuf, ntohs(th->th_dport),
+					sbuf,
+					ntohs(th->th_sport));
+				break;
+			case 2:
+				log(LOG_INFO,
+			    	"Connection attempt to TCP %s:%d from %s:%d flags:0x%x\n",
+			    	dbuf, ntohs(th->th_dport), sbuf,
+			    	ntohs(th->th_sport), thflags);
+				break;
+			default:
+				break;
+			}
+		}
+		if (blackhole) { 
+			switch (blackhole) {
+			case 1:
+				if (thflags & TH_SYN)
+					goto drop;
+				break;
+			case 2:
+				goto drop;
+			default:
+				goto drop;
+			}
+		}
+		rstreason = BANDLIM_RST_CLOSEDPORT;
+		goto dropwithreset;
+	}
+	tp = intotcpcb(inp);
+	if (tp == 0) {
+		rstreason = BANDLIM_RST_CLOSEDPORT;
+		goto dropwithreset;
+	}
+	if (tp->t_state == TCPS_CLOSED)
+		goto drop;
+
+	/* Unscale the window into a 32-bit value. */
+	if ((thflags & TH_SYN) == 0)
+		tiwin = th->th_win << tp->snd_scale;
+	else
+		tiwin = th->th_win;
+
+#ifdef INET6
+	/* save packet options if user wanted */
+	if (isipv6 && inp->in6p_flags & INP_CONTROLOPTS) {
+		if (inp->in6p_options) {
+			m_freem(inp->in6p_options);
+			inp->in6p_options = 0;
+		}
+		ip6_savecontrol(inp, &inp->in6p_options, ip6, m);
+	}
+        /* else, should also do ip_srcroute() here? */
+#endif /* INET6 */
+
+	so = inp->inp_socket;
+	if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) {
+#ifdef TCPDEBUG
+		if (so->so_options & SO_DEBUG) {
+			ostate = tp->t_state;
+#ifdef INET6
+			if (isipv6)
+				bcopy((char *)ip6, (char *)tcp_saveipgen,
+				      sizeof(*ip6));
+			else
+#endif /* INET6 */
+			bcopy((char *)ip, (char *)tcp_saveipgen, sizeof(*ip));
+			tcp_savetcp = *th;
+		}
+#endif
+		if (so->so_options & SO_ACCEPTCONN) {
+			register struct tcpcb *tp0 = tp;
+			struct socket *so2;
+#ifdef IPSEC
+			struct socket *oso;
+#endif
+#ifdef INET6
+			struct inpcb *oinp = sotoinpcb(so);
+#endif /* INET6 */
+
+#ifndef IPSEC
+			/*
+			 * Current IPsec implementation makes incorrect IPsec
+			 * cache if this check is done here.
+			 * So delay this until duplicated socket is created.
+			 */
+			if ((thflags & (TH_RST|TH_ACK|TH_SYN)) != TH_SYN) {
+				/*
+				 * Note: dropwithreset makes sure we don't
+				 * send a RST in response to a RST.
+				 */
+				if (thflags & TH_ACK) {
+					tcpstat.tcps_badsyn++;
+					rstreason = BANDLIM_RST_OPENPORT;
+					goto dropwithreset;
+				}
+				goto drop;
+			}
+#endif
+			so2 = sonewconn(so, 0);
+			if (so2 == 0) {
+				tcpstat.tcps_listendrop++;
+				so2 = sodropablereq(so);
+				if (so2) {
+					if (tcp_lq_overflow)
+						sototcpcb(so2)->t_flags |= 
+						    TF_LQ_OVERFLOW;
+					tcp_drop(sototcpcb(so2), ETIMEDOUT);
+					so2 = sonewconn(so, 0);
+				}
+				if (!so2)
+					goto drop;
+			}
+#ifdef IPSEC
+			oso = so;
+#endif
+			so = so2;
+			/*
+			 * This is ugly, but ....
+			 *
+			 * Mark socket as temporary until we're
+			 * committed to keeping it.  The code at
+			 * ``drop'' and ``dropwithreset'' check the
+			 * flag dropsocket to see if the temporary
+			 * socket created here should be discarded.
+			 * We mark the socket as discardable until
+			 * we're committed to it below in TCPS_LISTEN.
+			 */
+			dropsocket++;
+			inp = (struct inpcb *)so->so_pcb;
+#ifdef INET6
+			if (isipv6)
+				inp->in6p_laddr = ip6->ip6_dst;
+			else {
+				if ((inp->inp_flags & IN6P_BINDV6ONLY) == 0) {
+					inp->inp_vflag &= ~INP_IPV6;
+					inp->inp_vflag |= INP_IPV4;
+				}
+#endif /* INET6 */
+			inp->inp_laddr = ip->ip_dst;
+#ifdef INET6
+			}
+#endif /* INET6 */
+			inp->inp_lport = th->th_dport;
+			if (in_pcbinshash(inp) != 0) {
+				/*
+				 * Undo the assignments above if we failed to
+				 * put the PCB on the hash lists.
+				 */
+#ifdef INET6
+				if (isipv6)
+					inp->in6p_laddr = in6addr_any;
+				else
+#endif /* INET6 */
+				inp->inp_laddr.s_addr = INADDR_ANY;
+				inp->inp_lport = 0;
+				goto drop;
+			}
+#ifdef IPSEC
+			/*
+			 * To avoid creating incorrectly cached IPsec
+			 * association, this is need to be done here.
+			 *
+			 * Subject: (KAME-snap 748)
+			 * From: Wayne Knowles <w.knowles@niwa.cri.nz>
+			 * ftp://ftp.kame.net/pub/mail-list/snap-users/748
+			 */
+			if ((thflags & (TH_RST|TH_ACK|TH_SYN)) != TH_SYN) {
+				/*
+				 * Note: dropwithreset makes sure we don't
+				 * send a RST in response to a RST.
+				 */
+				if (thflags & TH_ACK) {
+					tcpstat.tcps_badsyn++;
+					rstreason = BANDLIM_RST_OPENPORT;
+					goto dropwithreset;
+				}
+				goto drop;
+			}
+#endif
+#ifdef INET6
+			if (isipv6) {
+				/*
+				 * inherit socket options from the listening
+				 * socket.
+				 */
+				inp->inp_flags |=
+					oinp->inp_flags & INP_CONTROLOPTS;
+				if (inp->inp_flags & INP_CONTROLOPTS) {
+					if (inp->in6p_options) {
+						m_freem(inp->in6p_options);
+						inp->in6p_options = 0;
+					}
+					ip6_savecontrol(inp,
+							&inp->in6p_options,
+							ip6, m);
+				}
+			} else
+#endif /* INET6 */
+			inp->inp_options = ip_srcroute();
+#ifdef IPSEC
+			/* copy old policy into new socket's */
+			if (ipsec_copy_policy(sotoinpcb(oso)->inp_sp,
+			                      inp->inp_sp))
+				printf("tcp_input: could not copy policy\n");
+#endif
+			tp = intotcpcb(inp);
+			tp->t_state = TCPS_LISTEN;
+			tp->t_flags |= tp0->t_flags & (TF_NOPUSH|TF_NOOPT);
+
+			/* Compute proper scaling value from buffer space */
+			while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
+			   TCP_MAXWIN << tp->request_r_scale <
+			   so->so_rcv.sb_hiwat)
+				tp->request_r_scale++;
+		}
+	}
+
+	/*
+	 * Segment received on connection.
+	 * Reset idle time and keep-alive timer.
+	 */
+	tp->t_rcvtime = ticks;
+	if (TCPS_HAVEESTABLISHED(tp->t_state))
+		callout_reset(tp->tt_keep, tcp_keepidle, tcp_timer_keep, tp);
+
+	/*
+	 * Process options if not in LISTEN state,
+	 * else do it below (after getting remote address).
+	 */
+	if (tp->t_state != TCPS_LISTEN)
+		tcp_dooptions(tp, optp, optlen, th, &to);
+
+	/*
+	 * Header prediction: check for the two common cases
+	 * of a uni-directional data xfer.  If the packet has
+	 * no control flags, is in-sequence, the window didn't
+	 * change and we're not retransmitting, it's a
+	 * candidate.  If the length is zero and the ack moved
+	 * forward, we're the sender side of the xfer.  Just
+	 * free the data acked & wake any higher level process
+	 * that was blocked waiting for space.  If the length
+	 * is non-zero and the ack didn't move, we're the
+	 * receiver side.  If we're getting packets in-order
+	 * (the reassembly queue is empty), add the data to
+	 * the socket buffer and note that we need a delayed ack.
+	 * Make sure that the hidden state-flags are also off.
+	 * Since we check for TCPS_ESTABLISHED above, it can only
+	 * be TH_NEEDSYN.
+	 */
+	if (tp->t_state == TCPS_ESTABLISHED &&
+	    (thflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
+	    ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) &&
+	    ((to.to_flag & TOF_TS) == 0 ||
+	     TSTMP_GEQ(to.to_tsval, tp->ts_recent)) &&
+	    /*
+	     * Using the CC option is compulsory if once started:
+	     *   the segment is OK if no T/TCP was negotiated or
+	     *   if the segment has a CC option equal to CCrecv
+	     */
+	    ((tp->t_flags & (TF_REQ_CC|TF_RCVD_CC)) != (TF_REQ_CC|TF_RCVD_CC) ||
+	     ((to.to_flag & TOF_CC) != 0 && to.to_cc == tp->cc_recv)) &&
+	    th->th_seq == tp->rcv_nxt &&
+	    tiwin && tiwin == tp->snd_wnd &&
+	    tp->snd_nxt == tp->snd_max) {
+
+		/*
+		 * If last ACK falls within this segment's sequence numbers,
+		 * record the timestamp.
+		 * NOTE that the test is modified according to the latest
+		 * proposal of the tcplw@cray.com list (Braden 1993/04/26).
+		 */
+		if ((to.to_flag & TOF_TS) != 0 &&
+		   SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
+			tp->ts_recent_age = ticks;
+			tp->ts_recent = to.to_tsval;
+		}
+
+		if (tlen == 0) {
+			if (SEQ_GT(th->th_ack, tp->snd_una) &&
+			    SEQ_LEQ(th->th_ack, tp->snd_max) &&
+			    tp->snd_cwnd >= tp->snd_wnd &&
+			    tp->t_dupacks < tcprexmtthresh) {
+				/*
+				 * this is a pure ack for outstanding data.
+				 */
+				++tcpstat.tcps_predack;
+				/*
+				 * "bad retransmit" recovery
+				 */
+				if (tp->t_rxtshift == 1 &&
+				    ticks < tp->t_badrxtwin) {
+					tp->snd_cwnd = tp->snd_cwnd_prev;
+					tp->snd_ssthresh =
+					    tp->snd_ssthresh_prev;
+					tp->snd_nxt = tp->snd_max;
+					tp->t_badrxtwin = 0;
+				}
+				if ((to.to_flag & TOF_TS) != 0)
+					tcp_xmit_timer(tp,
+					    ticks - to.to_tsecr + 1);
+				else if (tp->t_rtttime &&
+					    SEQ_GT(th->th_ack, tp->t_rtseq))
+					tcp_xmit_timer(tp, ticks - tp->t_rtttime);
+				acked = th->th_ack - tp->snd_una;
+				tcpstat.tcps_rcvackpack++;
+				tcpstat.tcps_rcvackbyte += acked;
+				sbdrop(&so->so_snd, acked);
+				tp->snd_una = th->th_ack;
+				m_freem(m);
+				ND6_HINT(tp); /* some progress has been done */
+
+				/*
+				 * If all outstanding data are acked, stop
+				 * retransmit timer, otherwise restart timer
+				 * using current (possibly backed-off) value.
+				 * If process is waiting for space,
+				 * wakeup/selwakeup/signal.  If data
+				 * are ready to send, let tcp_output
+				 * decide between more output or persist.
+				 */
+				if (tp->snd_una == tp->snd_max)
+					callout_stop(tp->tt_rexmt);
+				else if (!callout_active(tp->tt_persist))
+					callout_reset(tp->tt_rexmt, 
+						      tp->t_rxtcur,
+						      tcp_timer_rexmt, tp);
+
+				sowwakeup(so);
+				if (so->so_snd.sb_cc)
+					(void) tcp_output(tp);
+				return;
+			}
+		} else if (th->th_ack == tp->snd_una &&
+		    LIST_EMPTY(&tp->t_segq) &&
+		    tlen <= sbspace(&so->so_rcv)) {
+			/*
+			 * this is a pure, in-sequence data packet
+			 * with nothing on the reassembly queue and
+			 * we have enough buffer space to take it.
+			 */
+			++tcpstat.tcps_preddat;
+			tp->rcv_nxt += tlen;
+			tcpstat.tcps_rcvpack++;
+			tcpstat.tcps_rcvbyte += tlen;
+			ND6_HINT(tp);	/* some progress has been done */
+			/*
+			 * Add data to socket buffer.
+			 */
+			m_adj(m, drop_hdrlen);	/* delayed header drop */
+			sbappend(&so->so_rcv, m);
+			sorwakeup(so);
+			if (DELAY_ACK(tp)) {
+	                        callout_reset(tp->tt_delack, tcp_delacktime,
+	                            tcp_timer_delack, tp);
+			} else {
+				tp->t_flags |= TF_ACKNOW;
+				tcp_output(tp);
+			}
+			return;
+		}
+	}
+
+	/*
+	 * Calculate amount of space in receive window,
+	 * and then do TCP input processing.
+	 * Receive window is amount of space in rcv queue,
+	 * but not less than advertised window.
+	 */
+	{ int win;
+
+	win = sbspace(&so->so_rcv);
+	if (win < 0)
+		win = 0;
+	tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
+	}
+
+	switch (tp->t_state) {
+
+	/*
+	 * If the state is LISTEN then ignore segment if it contains an RST.
+	 * If the segment contains an ACK then it is bad and send a RST.
+	 * If it does not contain a SYN then it is not interesting; drop it.
+	 * If it is from this socket, drop it, it must be forged.
+	 * Don't bother responding if the destination was a broadcast.
+	 * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial
+	 * tp->iss, and send a segment:
+	 *     <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
+	 * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss.
+	 * Fill in remote peer address fields if not previously specified.
+	 * Enter SYN_RECEIVED state, and process any other fields of this
+	 * segment in this state.
+	 */
+	case TCPS_LISTEN: {
+		register struct sockaddr_in *sin;
+#ifdef INET6
+		register struct sockaddr_in6 *sin6;
+#endif
+
+		if (thflags & TH_RST)
+			goto drop;
+		if (thflags & TH_ACK) {
+			rstreason = BANDLIM_RST_OPENPORT;
+			goto dropwithreset;
+		}
+		if ((thflags & TH_SYN) == 0)
+			goto drop;
+		if (th->th_dport == th->th_sport) {
+#ifdef INET6
+			if (isipv6) {
+				if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
+						       &ip6->ip6_src))
+					goto drop;
+			} else
+#endif /* INET6 */
+			if (ip->ip_dst.s_addr == ip->ip_src.s_addr)
+				goto drop;
+		}
+		/*
+		 * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN
+		 * in_broadcast() should never return true on a received
+		 * packet with M_BCAST not set.
+ 		 *
+ 		 * Packets with a multicast source address should also
+ 		 * be discarded.
+		 */
+		if (m->m_flags & (M_BCAST|M_MCAST))
+			goto drop;
+#ifdef INET6
+		if (isipv6) {
+			if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
+			    IN6_IS_ADDR_MULTICAST(&ip6->ip6_src))
+				goto drop;
+		} else
+#endif
+		if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
+		    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
+		    ip->ip_src.s_addr == htonl(INADDR_BROADCAST))
+			goto drop;
+#ifdef INET6
+		if (isipv6) {
+			MALLOC(sin6, struct sockaddr_in6 *, sizeof *sin6,
+			       M_SONAME, M_NOWAIT | M_ZERO);
+			if (sin6 == NULL)
+				goto drop;
+			sin6->sin6_family = AF_INET6;
+			sin6->sin6_len = sizeof(*sin6);
+			sin6->sin6_addr = ip6->ip6_src;
+			sin6->sin6_port = th->th_sport;
+			laddr6 = inp->in6p_laddr;
+			if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
+				inp->in6p_laddr = ip6->ip6_dst;
+			if (in6_pcbconnect(inp, (struct sockaddr *)sin6,
+					   &proc0)) {
+				inp->in6p_laddr = laddr6;
+				FREE(sin6, M_SONAME);
+				goto drop;
+			}
+			FREE(sin6, M_SONAME);
+		} else
+#endif
+	      {
+		MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME,
+		       M_NOWAIT);
+		if (sin == NULL)
+			goto drop;
+		sin->sin_family = AF_INET;
+		sin->sin_len = sizeof(*sin);
+		sin->sin_addr = ip->ip_src;
+		sin->sin_port = th->th_sport;
+		bzero((caddr_t)sin->sin_zero, sizeof(sin->sin_zero));
+		laddr = inp->inp_laddr;
+		if (inp->inp_laddr.s_addr == INADDR_ANY)
+			inp->inp_laddr = ip->ip_dst;
+		if (in_pcbconnect(inp, (struct sockaddr *)sin, &proc0)) {
+			inp->inp_laddr = laddr;
+			FREE(sin, M_SONAME);
+			goto drop;
+		}
+		FREE(sin, M_SONAME);
+	      }
+		tp->t_template = tcp_template(tp);
+		if (tp->t_template == 0) {
+			tp = tcp_drop(tp, ENOBUFS);
+			dropsocket = 0;		/* socket is already gone */
+			goto drop;
+		}
+		if ((taop = tcp_gettaocache(inp)) == NULL) {
+			taop = &tao_noncached;
+			bzero(taop, sizeof(*taop));
+		}
+		tcp_dooptions(tp, optp, optlen, th, &to);
+		if (iss)
+			tp->iss = iss;
+		else {
+			tp->iss = tcp_rndiss_next();
+ 		}
+		tp->irs = th->th_seq;
+		tcp_sendseqinit(tp);
+		tcp_rcvseqinit(tp);
+		tp->snd_recover = tp->snd_una;
+		/*
+		 * Initialization of the tcpcb for transaction;
+		 *   set SND.WND = SEG.WND,
+		 *   initialize CCsend and CCrecv.
+		 */
+		tp->snd_wnd = tiwin;	/* initial send-window */
+		tp->cc_send = CC_INC(tcp_ccgen);
+		tp->cc_recv = to.to_cc;
+		/*
+		 * Perform TAO test on incoming CC (SEG.CC) option, if any.
+		 * - compare SEG.CC against cached CC from the same host,
+		 *	if any.
+		 * - if SEG.CC > chached value, SYN must be new and is accepted
+		 *	immediately: save new CC in the cache, mark the socket
+		 *	connected, enter ESTABLISHED state, turn on flag to
+		 *	send a SYN in the next segment.
+		 *	A virtual advertised window is set in rcv_adv to
+		 *	initialize SWS prevention.  Then enter normal segment
+		 *	processing: drop SYN, process data and FIN.
+		 * - otherwise do a normal 3-way handshake.
+		 */
+		if ((to.to_flag & TOF_CC) != 0) {
+		    if (((tp->t_flags & TF_NOPUSH) != 0) &&
+			taop->tao_cc != 0 && CC_GT(to.to_cc, taop->tao_cc)) {
+
+			taop->tao_cc = to.to_cc;
+			tp->t_starttime = ticks;
+			tp->t_state = TCPS_ESTABLISHED;
+
+			/*
+			 * If there is a FIN, or if there is data and the
+			 * connection is local, then delay SYN,ACK(SYN) in
+			 * the hope of piggy-backing it on a response
+			 * segment.  Otherwise must send ACK now in case
+			 * the other side is slow starting.
+			 */
+			if (DELAY_ACK(tp) && ((thflags & TH_FIN) ||
+			    (tlen != 0 &&
+#ifdef INET6
+			      ((isipv6 && in6_localaddr(&inp->in6p_faddr))
+			      ||
+			      (!isipv6 &&
+#endif
+			    in_localaddr(inp->inp_faddr)
+#ifdef INET6
+			       ))
+#endif
+			     ))) {
+                                callout_reset(tp->tt_delack, tcp_delacktime,  
+                                    tcp_timer_delack, tp);  
+				tp->t_flags |= TF_NEEDSYN;
+			} else 
+				tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);
+
+			/*
+			 * Limit the `virtual advertised window' to TCP_MAXWIN
+			 * here.  Even if we requested window scaling, it will
+			 * become effective only later when our SYN is acked.
+			 */
+			tp->rcv_adv += min(tp->rcv_wnd, TCP_MAXWIN);
+			tcpstat.tcps_connects++;
+			soisconnected(so);
+			callout_reset(tp->tt_keep, tcp_keepinit,
+				      tcp_timer_keep, tp);
+			dropsocket = 0;		/* committed to socket */
+			tcpstat.tcps_accepts++;
+			goto trimthenstep6;
+		    }
+		/* else do standard 3-way handshake */
+		} else {
+		    /*
+		     * No CC option, but maybe CC.NEW:
+		     *   invalidate cached value.
+		     */
+		     taop->tao_cc = 0;
+		}
+		/*
+		 * TAO test failed or there was no CC option,
+		 *    do a standard 3-way handshake.
+		 */
+		tp->t_flags |= TF_ACKNOW;
+		tp->t_state = TCPS_SYN_RECEIVED;
+		callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
+		dropsocket = 0;		/* committed to socket */
+		tcpstat.tcps_accepts++;
+		goto trimthenstep6;
+		}
+
+	/*
+	 * If the state is SYN_RECEIVED:
+	 *	if seg contains an ACK, but not for our SYN/ACK, send a RST.
+	 */
+	case TCPS_SYN_RECEIVED:
+		if ((thflags & TH_ACK) &&
+		    (SEQ_LEQ(th->th_ack, tp->snd_una) ||
+		     SEQ_GT(th->th_ack, tp->snd_max))) {
+				rstreason = BANDLIM_RST_OPENPORT;
+				goto dropwithreset;
+		}
+		break;
+
+	/*
+	 * If the state is SYN_SENT:
+	 *	if seg contains an ACK, but not for our SYN, drop the input.
+	 *	if seg contains a RST, then drop the connection.
+	 *	if seg does not contain SYN, then drop it.
+	 * Otherwise this is an acceptable SYN segment
+	 *	initialize tp->rcv_nxt and tp->irs
+	 *	if seg contains ack then advance tp->snd_una
+	 *	if SYN has been acked change to ESTABLISHED else SYN_RCVD state
+	 *	arrange for segment to be acked (eventually)
+	 *	continue processing rest of data/controls, beginning with URG
+	 */
+	case TCPS_SYN_SENT:
+		if ((taop = tcp_gettaocache(inp)) == NULL) {
+			taop = &tao_noncached;
+			bzero(taop, sizeof(*taop));
+		}
+
+		if ((thflags & TH_ACK) &&
+		    (SEQ_LEQ(th->th_ack, tp->iss) ||
+		     SEQ_GT(th->th_ack, tp->snd_max))) {
+			/*
+			 * If we have a cached CCsent for the remote host,
+			 * hence we haven't just crashed and restarted,
+			 * do not send a RST.  This may be a retransmission
+			 * from the other side after our earlier ACK was lost.
+			 * Our new SYN, when it arrives, will serve as the
+			 * needed ACK.
+			 */
+			if (taop->tao_ccsent != 0)
+				goto drop;
+			else {
+				rstreason = BANDLIM_UNLIMITED;
+				goto dropwithreset;
+			}
+		}
+		if (thflags & TH_RST) {
+			if (thflags & TH_ACK)
+				tp = tcp_drop(tp, ECONNREFUSED);
+			goto drop;
+		}
+		if ((thflags & TH_SYN) == 0)
+			goto drop;
+		tp->snd_wnd = th->th_win;	/* initial send window */
+		tp->cc_recv = to.to_cc;		/* foreign CC */
+
+		tp->irs = th->th_seq;
+		tcp_rcvseqinit(tp);
+		if (thflags & TH_ACK) {
+			/*
+			 * Our SYN was acked.  If segment contains CC.ECHO
+			 * option, check it to make sure this segment really
+			 * matches our SYN.  If not, just drop it as old
+			 * duplicate, but send an RST if we're still playing
+			 * by the old rules.  If no CC.ECHO option, make sure
+			 * we don't get fooled into using T/TCP.
+			 */
+			if (to.to_flag & TOF_CCECHO) {
+				if (tp->cc_send != to.to_ccecho) {
+					if (taop->tao_ccsent != 0)
+						goto drop;
+					else {
+						rstreason = BANDLIM_UNLIMITED;
+						goto dropwithreset;
+					}
+				}
+			} else
+				tp->t_flags &= ~TF_RCVD_CC;
+			tcpstat.tcps_connects++;
+			soisconnected(so);
+			/* Do window scaling on this connection? */
+			if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
+				(TF_RCVD_SCALE|TF_REQ_SCALE)) {
+				tp->snd_scale = tp->requested_s_scale;
+				tp->rcv_scale = tp->request_r_scale;
+			}
+			/* Segment is acceptable, update cache if undefined. */
+			if (taop->tao_ccsent == 0)
+				taop->tao_ccsent = to.to_ccecho;
+
+			tp->rcv_adv += tp->rcv_wnd;
+			tp->snd_una++;		/* SYN is acked */
+			/*
+			 * If there's data, delay ACK; if there's also a FIN
+			 * ACKNOW will be turned on later.
+			 */
+			if (DELAY_ACK(tp) && tlen != 0)
+                                callout_reset(tp->tt_delack, tcp_delacktime,  
+                                    tcp_timer_delack, tp);  
+			else
+				tp->t_flags |= TF_ACKNOW;
+			/*
+			 * Received <SYN,ACK> in SYN_SENT[*] state.
+			 * Transitions:
+			 *	SYN_SENT  --> ESTABLISHED
+			 *	SYN_SENT* --> FIN_WAIT_1
+			 */
+			tp->t_starttime = ticks;
+			if (tp->t_flags & TF_NEEDFIN) {
+				tp->t_state = TCPS_FIN_WAIT_1;
+				tp->t_flags &= ~TF_NEEDFIN;
+				thflags &= ~TH_SYN;
+			} else {
+				tp->t_state = TCPS_ESTABLISHED;
+				callout_reset(tp->tt_keep, tcp_keepidle,
+					      tcp_timer_keep, tp);
+			}
+		} else {
+		/*
+		 *  Received initial SYN in SYN-SENT[*] state => simul-
+		 *  taneous open.  If segment contains CC option and there is
+		 *  a cached CC, apply TAO test; if it succeeds, connection is
+		 *  half-synchronized.  Otherwise, do 3-way handshake:
+		 *        SYN-SENT -> SYN-RECEIVED
+		 *        SYN-SENT* -> SYN-RECEIVED*
+		 *  If there was no CC option, clear cached CC value.
+		 */
+			tp->t_flags |= TF_ACKNOW;
+			callout_stop(tp->tt_rexmt);
+			if (to.to_flag & TOF_CC) {
+				if (taop->tao_cc != 0 &&
+				    CC_GT(to.to_cc, taop->tao_cc)) {
+					/*
+					 * update cache and make transition:
+					 *        SYN-SENT -> ESTABLISHED*
+					 *        SYN-SENT* -> FIN-WAIT-1*
+					 */
+					taop->tao_cc = to.to_cc;
+					tp->t_starttime = ticks;
+					if (tp->t_flags & TF_NEEDFIN) {
+						tp->t_state = TCPS_FIN_WAIT_1;
+						tp->t_flags &= ~TF_NEEDFIN;
+					} else {
+						tp->t_state = TCPS_ESTABLISHED;
+						callout_reset(tp->tt_keep,
+							      tcp_keepidle,
+							      tcp_timer_keep,
+							      tp);
+					}
+					tp->t_flags |= TF_NEEDSYN;
+				} else
+					tp->t_state = TCPS_SYN_RECEIVED;
+			} else {
+				/* CC.NEW or no option => invalidate cache */
+				taop->tao_cc = 0;
+				tp->t_state = TCPS_SYN_RECEIVED;
+			}
+		}
+
+trimthenstep6:
+		/*
+		 * Advance th->th_seq to correspond to first data byte.
+		 * If data, trim to stay within window,
+		 * dropping FIN if necessary.
+		 */
+		th->th_seq++;
+		if (tlen > tp->rcv_wnd) {
+			todrop = tlen - tp->rcv_wnd;
+			m_adj(m, -todrop);
+			tlen = tp->rcv_wnd;
+			thflags &= ~TH_FIN;
+			tcpstat.tcps_rcvpackafterwin++;
+			tcpstat.tcps_rcvbyteafterwin += todrop;
+		}
+		tp->snd_wl1 = th->th_seq - 1;
+		tp->rcv_up = th->th_seq;
+		/*
+		 *  Client side of transaction: already sent SYN and data.
+		 *  If the remote host used T/TCP to validate the SYN,
+		 *  our data will be ACK'd; if so, enter normal data segment
+		 *  processing in the middle of step 5, ack processing.
+		 *  Otherwise, goto step 6.
+		 */
+ 		if (thflags & TH_ACK)
+			goto process_ACK;
+		goto step6;
+	/*
+	 * If the state is LAST_ACK or CLOSING or TIME_WAIT:
+	 *	if segment contains a SYN and CC [not CC.NEW] option:
+	 *              if state == TIME_WAIT and connection duration > MSL,
+	 *                  drop packet and send RST;
+	 *
+	 *		if SEG.CC > CCrecv then is new SYN, and can implicitly
+	 *		    ack the FIN (and data) in retransmission queue.
+	 *                  Complete close and delete TCPCB.  Then reprocess
+	 *                  segment, hoping to find new TCPCB in LISTEN state;
+	 *
+	 *		else must be old SYN; drop it.
+	 *      else do normal processing.
+	 */
+	case TCPS_LAST_ACK:
+	case TCPS_CLOSING:
+	case TCPS_TIME_WAIT:
+		if ((thflags & TH_SYN) &&
+		    (to.to_flag & TOF_CC) && tp->cc_recv != 0) {
+			if (tp->t_state == TCPS_TIME_WAIT &&
+					(ticks - tp->t_starttime) > tcp_msl) {
+				rstreason = BANDLIM_UNLIMITED;
+				goto dropwithreset;
+			}
+			if (CC_GT(to.to_cc, tp->cc_recv)) {
+				tp = tcp_close(tp);
+				goto findpcb;
+			}
+			else
+				goto drop;
+		}
+ 		break;  /* continue normal processing */
+	}
+
+	/*
+	 * States other than LISTEN or SYN_SENT.
+	 * First check the RST flag and sequence number since reset segments
+	 * are exempt from the timestamp and connection count tests.  This
+	 * fixes a bug introduced by the Stevens, vol. 2, p. 960 bugfix
+	 * below which allowed reset segments in half the sequence space
+	 * to fall though and be processed (which gives forged reset
+	 * segments with a random sequence number a 50 percent chance of
+	 * killing a connection).
+	 * Then check timestamp, if present.
+	 * Then check the connection count, if present.
+	 * Then check that at least some bytes of segment are within
+	 * receive window.  If segment begins before rcv_nxt,
+	 * drop leading data (and SYN); if nothing left, just ack.
+	 *
+	 *
+	 * If the RST bit is set, check the sequence number to see
+	 * if this is a valid reset segment.
+	 * RFC 793 page 37:
+	 *   In all states except SYN-SENT, all reset (RST) segments
+	 *   are validated by checking their SEQ-fields.  A reset is
+	 *   valid if its sequence number is in the window.
+	 * Note: this does not take into account delayed ACKs, so
+	 *   we should test against last_ack_sent instead of rcv_nxt.
+	 *   The sequence number in the reset segment is normally an
+	 *   echo of our outgoing acknowlegement numbers, but some hosts
+	 *   send a reset with the sequence number at the rightmost edge
+	 *   of our receive window, and we have to handle this case.
+	 * If we have multiple segments in flight, the intial reset
+	 * segment sequence numbers will be to the left of last_ack_sent,
+	 * but they will eventually catch up.
+	 * In any case, it never made sense to trim reset segments to
+	 * fit the receive window since RFC 1122 says:
+	 *   4.2.2.12  RST Segment: RFC-793 Section 3.4
+	 *
+	 *    A TCP SHOULD allow a received RST segment to include data.
+	 *
+	 *    DISCUSSION
+	 *         It has been suggested that a RST segment could contain
+	 *         ASCII text that encoded and explained the cause of the
+	 *         RST.  No standard has yet been established for such
+	 *         data.
+	 *
+	 * If the reset segment passes the sequence number test examine
+	 * the state:
+	 *    SYN_RECEIVED STATE:
+	 *	If passive open, return to LISTEN state.
+	 *	If active open, inform user that connection was refused.
+	 *    ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES:
+	 *	Inform user that connection was reset, and close tcb.
+	 *    CLOSING, LAST_ACK STATES:
+	 *	Close the tcb.
+	 *    TIME_WAIT STATE:
+	 *	Drop the segment - see Stevens, vol. 2, p. 964 and
+	 *      RFC 1337.
+	 */
+	if (thflags & TH_RST) {
+		if (SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
+		    SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) {
+			switch (tp->t_state) {
+
+			case TCPS_SYN_RECEIVED:
+				so->so_error = ECONNREFUSED;
+				goto close;
+
+			case TCPS_ESTABLISHED:
+			case TCPS_FIN_WAIT_1:
+			case TCPS_FIN_WAIT_2:
+			case TCPS_CLOSE_WAIT:
+				so->so_error = ECONNRESET;
+			close:
+				tp->t_state = TCPS_CLOSED;
+				tcpstat.tcps_drops++;
+				tp = tcp_close(tp);
+				break;
+
+			case TCPS_CLOSING:
+			case TCPS_LAST_ACK:
+				tp = tcp_close(tp);
+				break;
+
+			case TCPS_TIME_WAIT:
+				break;
+			}
+		}
+		goto drop;
+	}
+
+	/*
+	 * RFC 1323 PAWS: If we have a timestamp reply on this segment
+	 * and it's less than ts_recent, drop it.
+	 */
+	if ((to.to_flag & TOF_TS) != 0 && tp->ts_recent &&
+	    TSTMP_LT(to.to_tsval, tp->ts_recent)) {
+
+		/* Check to see if ts_recent is over 24 days old.  */
+		if ((int)(ticks - tp->ts_recent_age) > TCP_PAWS_IDLE) {
+			/*
+			 * Invalidate ts_recent.  If this segment updates
+			 * ts_recent, the age will be reset later and ts_recent
+			 * will get a valid value.  If it does not, setting
+			 * ts_recent to zero will at least satisfy the
+			 * requirement that zero be placed in the timestamp
+			 * echo reply when ts_recent isn't valid.  The
+			 * age isn't reset until we get a valid ts_recent
+			 * because we don't want out-of-order segments to be
+			 * dropped when ts_recent is old.
+			 */
+			tp->ts_recent = 0;
+		} else {
+			tcpstat.tcps_rcvduppack++;
+			tcpstat.tcps_rcvdupbyte += tlen;
+			tcpstat.tcps_pawsdrop++;
+			goto dropafterack;
+		}
+	}
+
+	/*
+	 * T/TCP mechanism
+	 *   If T/TCP was negotiated and the segment doesn't have CC,
+	 *   or if its CC is wrong then drop the segment.
+	 *   RST segments do not have to comply with this.
+	 */
+	if ((tp->t_flags & (TF_REQ_CC|TF_RCVD_CC)) == (TF_REQ_CC|TF_RCVD_CC) &&
+	    ((to.to_flag & TOF_CC) == 0 || tp->cc_recv != to.to_cc))
+ 		goto dropafterack;
+
+	/*
+	 * In the SYN-RECEIVED state, validate that the packet belongs to
+	 * this connection before trimming the data to fit the receive
+	 * window.  Check the sequence number versus IRS since we know
+	 * the sequence numbers haven't wrapped.  This is a partial fix
+	 * for the "LAND" DoS attack.
+	 */
+	if (tp->t_state == TCPS_SYN_RECEIVED && SEQ_LT(th->th_seq, tp->irs)) {
+		rstreason = BANDLIM_RST_OPENPORT;
+		goto dropwithreset;
+	}
+
+	todrop = tp->rcv_nxt - th->th_seq;
+	if (todrop > 0) {
+		if (thflags & TH_SYN) {
+			thflags &= ~TH_SYN;
+			th->th_seq++;
+			if (th->th_urp > 1)
+				th->th_urp--;
+			else
+				thflags &= ~TH_URG;
+			todrop--;
+		}
+		/*
+		 * Following if statement from Stevens, vol. 2, p. 960.
+		 */
+		if (todrop > tlen
+		    || (todrop == tlen && (thflags & TH_FIN) == 0)) {
+			/*
+			 * Any valid FIN must be to the left of the window.
+			 * At this point the FIN must be a duplicate or out
+			 * of sequence; drop it.
+			 */
+			thflags &= ~TH_FIN;
+
+			/*
+			 * Send an ACK to resynchronize and drop any data.
+			 * But keep on processing for RST or ACK.
+			 */
+			tp->t_flags |= TF_ACKNOW;
+			todrop = tlen;
+			tcpstat.tcps_rcvduppack++;
+			tcpstat.tcps_rcvdupbyte += todrop;
+		} else {
+			tcpstat.tcps_rcvpartduppack++;
+			tcpstat.tcps_rcvpartdupbyte += todrop;
+		}
+		drop_hdrlen += todrop;	/* drop from the top afterwards */
+		th->th_seq += todrop;
+		tlen -= todrop;
+		if (th->th_urp > todrop)
+			th->th_urp -= todrop;
+		else {
+			thflags &= ~TH_URG;
+			th->th_urp = 0;
+		}
+	}
+
+	/*
+	 * If new data are received on a connection after the
+	 * user processes are gone, then RST the other end.
+	 */
+	if ((so->so_state & SS_NOFDREF) &&
+	    tp->t_state > TCPS_CLOSE_WAIT && tlen) {
+		tp = tcp_close(tp);
+		tcpstat.tcps_rcvafterclose++;
+		rstreason = BANDLIM_UNLIMITED;
+		goto dropwithreset;
+	}
+
+	/*
+	 * If segment ends after window, drop trailing data
+	 * (and PUSH and FIN); if nothing left, just ACK.
+	 */
+	todrop = (th->th_seq+tlen) - (tp->rcv_nxt+tp->rcv_wnd);
+	if (todrop > 0) {
+		tcpstat.tcps_rcvpackafterwin++;
+		if (todrop >= tlen) {
+			tcpstat.tcps_rcvbyteafterwin += tlen;
+			/*
+			 * If a new connection request is received
+			 * while in TIME_WAIT, drop the old connection
+			 * and start over if the sequence numbers
+			 * are above the previous ones.
+			 */
+			if (thflags & TH_SYN &&
+			    tp->t_state == TCPS_TIME_WAIT &&
+			    SEQ_GT(th->th_seq, tp->rcv_nxt)) {
+				iss = tcp_rndiss_next();
+				tp = tcp_close(tp);
+				goto findpcb;
+			}
+			/*
+			 * If window is closed can only take segments at
+			 * window edge, and have to drop data and PUSH from
+			 * incoming segments.  Continue processing, but
+			 * remember to ack.  Otherwise, drop segment
+			 * and ack.
+			 */
+			if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) {
+				tp->t_flags |= TF_ACKNOW;
+				tcpstat.tcps_rcvwinprobe++;
+			} else
+				goto dropafterack;
+		} else
+			tcpstat.tcps_rcvbyteafterwin += todrop;
+		m_adj(m, -todrop);
+		tlen -= todrop;
+		thflags &= ~(TH_PUSH|TH_FIN);
+	}
+
+	/*
+	 * If last ACK falls within this segment's sequence numbers,
+	 * record its timestamp.
+	 * NOTE that the test is modified according to the latest
+	 * proposal of the tcplw@cray.com list (Braden 1993/04/26).
+	 */
+	if ((to.to_flag & TOF_TS) != 0 &&
+	    SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
+		tp->ts_recent_age = ticks;
+		tp->ts_recent = to.to_tsval;
+	}
+
+	/*
+	 * If a SYN is in the window, then this is an
+	 * error and we send an RST and drop the connection.
+	 */
+	if (thflags & TH_SYN) {
+		tp = tcp_drop(tp, ECONNRESET);
+		rstreason = BANDLIM_UNLIMITED;
+		goto dropwithreset;
+	}
+
+	/*
+	 * If the ACK bit is off:  if in SYN-RECEIVED state or SENDSYN
+	 * flag is on (half-synchronized state), then queue data for
+	 * later processing; else drop segment and return.
+	 */
+	if ((thflags & TH_ACK) == 0) {
+		if (tp->t_state == TCPS_SYN_RECEIVED ||
+		    (tp->t_flags & TF_NEEDSYN))
+			goto step6;
+		else
+			goto drop;
+	}
+
+	/*
+	 * Ack processing.
+	 */
+	switch (tp->t_state) {
+
+	/*
+	 * In SYN_RECEIVED state, the ack ACKs our SYN, so enter
+	 * ESTABLISHED state and continue processing.
+	 * The ACK was checked above.
+	 */
+	case TCPS_SYN_RECEIVED:
+
+		tcpstat.tcps_connects++;
+		soisconnected(so);
+		/* Do window scaling? */
+		if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
+			(TF_RCVD_SCALE|TF_REQ_SCALE)) {
+			tp->snd_scale = tp->requested_s_scale;
+			tp->rcv_scale = tp->request_r_scale;
+		}
+		/*
+		 * Upon successful completion of 3-way handshake,
+		 * update cache.CC if it was undefined, pass any queued
+		 * data to the user, and advance state appropriately.
+		 */
+		if ((taop = tcp_gettaocache(inp)) != NULL &&
+		    taop->tao_cc == 0)
+			taop->tao_cc = tp->cc_recv;
+
+		/*
+		 * Make transitions:
+		 *      SYN-RECEIVED  -> ESTABLISHED
+		 *      SYN-RECEIVED* -> FIN-WAIT-1
+		 */
+		tp->t_starttime = ticks;
+		if (tp->t_flags & TF_NEEDFIN) {
+			tp->t_state = TCPS_FIN_WAIT_1;
+			tp->t_flags &= ~TF_NEEDFIN;
+		} else {
+			tp->t_state = TCPS_ESTABLISHED;
+			callout_reset(tp->tt_keep, tcp_keepidle, 
+				      tcp_timer_keep, tp);
+		}
+		/*
+		 * If segment contains data or ACK, will call tcp_reass()
+		 * later; if not, do so now to pass queued data to user.
+		 */
+		if (tlen == 0 && (thflags & TH_FIN) == 0)
+			(void) tcp_reass(tp, (struct tcphdr *)0, 0,
+			    (struct mbuf *)0);
+		tp->snd_wl1 = th->th_seq - 1;
+		/* fall into ... */
+
+	/*
+	 * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
+	 * ACKs.  If the ack is in the range
+	 *	tp->snd_una < th->th_ack <= tp->snd_max
+	 * then advance tp->snd_una to th->th_ack and drop
+	 * data from the retransmission queue.  If this ACK reflects
+	 * more up to date window information we update our window information.
+	 */
+	case TCPS_ESTABLISHED:
+	case TCPS_FIN_WAIT_1:
+	case TCPS_FIN_WAIT_2:
+	case TCPS_CLOSE_WAIT:
+	case TCPS_CLOSING:
+	case TCPS_LAST_ACK:
+	case TCPS_TIME_WAIT:
+
+		if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
+			if (tlen == 0 && tiwin == tp->snd_wnd) {
+				tcpstat.tcps_rcvdupack++;
+				/*
+				 * If we have outstanding data (other than
+				 * a window probe), this is a completely
+				 * duplicate ack (ie, window info didn't
+				 * change), the ack is the biggest we've
+				 * seen and we've seen exactly our rexmt
+				 * threshhold of them, assume a packet
+				 * has been dropped and retransmit it.
+				 * Kludge snd_nxt & the congestion
+				 * window so we send only this one
+				 * packet.
+				 *
+				 * We know we're losing at the current
+				 * window size so do congestion avoidance
+				 * (set ssthresh to half the current window
+				 * and pull our congestion window back to
+				 * the new ssthresh).
+				 *
+				 * Dup acks mean that packets have left the
+				 * network (they're now cached at the receiver)
+				 * so bump cwnd by the amount in the receiver
+				 * to keep a constant cwnd packets in the
+				 * network.
+				 */
+				if (!callout_active(tp->tt_rexmt) ||
+				    th->th_ack != tp->snd_una)
+					tp->t_dupacks = 0;
+				else if (++tp->t_dupacks == tcprexmtthresh) {
+					tcp_seq onxt = tp->snd_nxt;
+					u_int win =
+					    min(tp->snd_wnd, tp->snd_cwnd) / 2 /
+						tp->t_maxseg;
+					if (tcp_do_newreno && SEQ_LT(th->th_ack,
+					    tp->snd_recover)) {
+						/* False retransmit, should not
+						 * cut window
+						 */
+						tp->snd_cwnd += tp->t_maxseg;
+						tp->t_dupacks = 0;
+						(void) tcp_output(tp);
+						goto drop;
+					}
+					if (win < 2)
+						win = 2;
+					tp->snd_ssthresh = win * tp->t_maxseg;
+					tp->snd_recover = tp->snd_max;
+					callout_stop(tp->tt_rexmt);
+					tp->t_rtttime = 0;
+					tp->snd_nxt = th->th_ack;
+					tp->snd_cwnd = tp->t_maxseg;
+					(void) tcp_output(tp);
+					tp->snd_cwnd = tp->snd_ssthresh +
+					       tp->t_maxseg * tp->t_dupacks;
+					if (SEQ_GT(onxt, tp->snd_nxt))
+						tp->snd_nxt = onxt;
+					goto drop;
+				} else if (tp->t_dupacks > tcprexmtthresh) {
+					tp->snd_cwnd += tp->t_maxseg;
+					(void) tcp_output(tp);
+					goto drop;
+				}
+			} else
+				tp->t_dupacks = 0;
+			break;
+		}
+		/*
+		 * If the congestion window was inflated to account
+		 * for the other side's cached packets, retract it.
+		 */
+		if (tcp_do_newreno == 0) {
+                        if (tp->t_dupacks >= tcprexmtthresh &&
+                                tp->snd_cwnd > tp->snd_ssthresh)
+                                tp->snd_cwnd = tp->snd_ssthresh;
+                        tp->t_dupacks = 0;
+                } else if (tp->t_dupacks >= tcprexmtthresh &&
+		    !tcp_newreno(tp, th)) {
+                        /*
+                         * Window inflation should have left us with approx.
+                         * snd_ssthresh outstanding data.  But in case we
+                         * would be inclined to send a burst, better to do
+                         * it via the slow start mechanism.
+                         */
+			if (SEQ_GT(th->th_ack + tp->snd_ssthresh, tp->snd_max))
+                                tp->snd_cwnd =
+				    tp->snd_max - th->th_ack + tp->t_maxseg;
+			else
+                        	tp->snd_cwnd = tp->snd_ssthresh;
+                        tp->t_dupacks = 0;
+                }
+		if (SEQ_GT(th->th_ack, tp->snd_max)) {
+			tcpstat.tcps_rcvacktoomuch++;
+			goto dropafterack;
+		}
+		/*
+		 *  If we reach this point, ACK is not a duplicate,
+		 *     i.e., it ACKs something we sent.
+		 */
+		if (tp->t_flags & TF_NEEDSYN) {
+			/*
+			 * T/TCP: Connection was half-synchronized, and our
+			 * SYN has been ACK'd (so connection is now fully
+			 * synchronized).  Go to non-starred state,
+			 * increment snd_una for ACK of SYN, and check if
+			 * we can do window scaling.
+			 */
+			tp->t_flags &= ~TF_NEEDSYN;
+			tp->snd_una++;
+			/* Do window scaling? */
+			if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
+				(TF_RCVD_SCALE|TF_REQ_SCALE)) {
+				tp->snd_scale = tp->requested_s_scale;
+				tp->rcv_scale = tp->request_r_scale;
+			}
+		}
+
+process_ACK:
+		acked = th->th_ack - tp->snd_una;
+		tcpstat.tcps_rcvackpack++;
+		tcpstat.tcps_rcvackbyte += acked;
+
+		/*
+		 * If we just performed our first retransmit, and the ACK
+		 * arrives within our recovery window, then it was a mistake
+		 * to do the retransmit in the first place.  Recover our
+		 * original cwnd and ssthresh, and proceed to transmit where
+		 * we left off.
+		 */
+		if (tp->t_rxtshift == 1 && ticks < tp->t_badrxtwin) {
+			tp->snd_cwnd = tp->snd_cwnd_prev;
+			tp->snd_ssthresh = tp->snd_ssthresh_prev;
+			tp->snd_nxt = tp->snd_max;
+			tp->t_badrxtwin = 0;	/* XXX probably not required */ 
+		}
+
+		/*
+		 * If we have a timestamp reply, update smoothed
+		 * round trip time.  If no timestamp is present but
+		 * transmit timer is running and timed sequence
+		 * number was acked, update smoothed round trip time.
+		 * Since we now have an rtt measurement, cancel the
+		 * timer backoff (cf., Phil Karn's retransmit alg.).
+		 * Recompute the initial retransmit timer.
+		 */
+		if (to.to_flag & TOF_TS)
+			tcp_xmit_timer(tp, ticks - to.to_tsecr + 1);
+		else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq))
+			tcp_xmit_timer(tp, ticks - tp->t_rtttime);
+
+		/*
+		 * If all outstanding data is acked, stop retransmit
+		 * timer and remember to restart (more output or persist).
+		 * If there is more data to be acked, restart retransmit
+		 * timer, using current (possibly backed-off) value.
+		 */
+		if (th->th_ack == tp->snd_max) {
+			callout_stop(tp->tt_rexmt);
+			needoutput = 1;
+		} else if (!callout_active(tp->tt_persist))
+			callout_reset(tp->tt_rexmt, tp->t_rxtcur,
+				      tcp_timer_rexmt, tp);
+
+		/*
+		 * If no data (only SYN) was ACK'd,
+		 *    skip rest of ACK processing.
+		 */
+		if (acked == 0)
+			goto step6;
+
+		/*
+		 * When new data is acked, open the congestion window.
+		 * If the window gives us less than ssthresh packets
+		 * in flight, open exponentially (maxseg per packet).
+		 * Otherwise open linearly: maxseg per window
+		 * (maxseg^2 / cwnd per packet).
+		 */
+		{
+		register u_int cw = tp->snd_cwnd;
+		register u_int incr = tp->t_maxseg;
+
+		if (cw > tp->snd_ssthresh)
+			incr = incr * incr / cw;
+		/*
+		 * If t_dupacks != 0 here, it indicates that we are still
+		 * in NewReno fast recovery mode, so we leave the congestion
+		 * window alone.
+		 */
+		if (tcp_do_newreno == 0 || tp->t_dupacks == 0)
+			tp->snd_cwnd = min(cw + incr,TCP_MAXWIN<<tp->snd_scale);
+		}
+		if (acked > so->so_snd.sb_cc) {
+			tp->snd_wnd -= so->so_snd.sb_cc;
+			sbdrop(&so->so_snd, (int)so->so_snd.sb_cc);
+			ourfinisacked = 1;
+		} else {
+			sbdrop(&so->so_snd, acked);
+			tp->snd_wnd -= acked;
+			ourfinisacked = 0;
+		}
+		sowwakeup(so);
+		tp->snd_una = th->th_ack;
+		if (SEQ_LT(tp->snd_nxt, tp->snd_una))
+			tp->snd_nxt = tp->snd_una;
+
+		switch (tp->t_state) {
+
+		/*
+		 * In FIN_WAIT_1 STATE in addition to the processing
+		 * for the ESTABLISHED state if our FIN is now acknowledged
+		 * then enter FIN_WAIT_2.
+		 */
+		case TCPS_FIN_WAIT_1:
+			if (ourfinisacked) {
+				/*
+				 * If we can't receive any more
+				 * data, then closing user can proceed.
+				 * Starting the timer is contrary to the
+				 * specification, but if we don't get a FIN
+				 * we'll hang forever.
+				 */
+				if (so->so_state & SS_CANTRCVMORE) {
+					soisdisconnected(so);
+					callout_reset(tp->tt_2msl, tcp_maxidle,
+						      tcp_timer_2msl, tp);
+				}
+				tp->t_state = TCPS_FIN_WAIT_2;
+			}
+			break;
+
+	 	/*
+		 * In CLOSING STATE in addition to the processing for
+		 * the ESTABLISHED state if the ACK acknowledges our FIN
+		 * then enter the TIME-WAIT state, otherwise ignore
+		 * the segment.
+		 */
+		case TCPS_CLOSING:
+			if (ourfinisacked) {
+				tp->t_state = TCPS_TIME_WAIT;
+				tcp_canceltimers(tp);
+				/* Shorten TIME_WAIT [RFC-1644, p.28] */
+				if (tp->cc_recv != 0 &&
+				    (ticks - tp->t_starttime) < tcp_msl)
+					callout_reset(tp->tt_2msl,
+						      tp->t_rxtcur *
+						      TCPTV_TWTRUNC,
+						      tcp_timer_2msl, tp);
+				else
+					callout_reset(tp->tt_2msl, 2 * tcp_msl,
+						      tcp_timer_2msl, tp);
+				soisdisconnected(so);
+			}
+			break;
+
+		/*
+		 * In LAST_ACK, we may still be waiting for data to drain
+		 * and/or to be acked, as well as for the ack of our FIN.
+		 * If our FIN is now acknowledged, delete the TCB,
+		 * enter the closed state and return.
+		 */
+		case TCPS_LAST_ACK:
+			if (ourfinisacked) {
+				tp = tcp_close(tp);
+				goto drop;
+			}
+			break;
+
+		/*
+		 * In TIME_WAIT state the only thing that should arrive
+		 * is a retransmission of the remote FIN.  Acknowledge
+		 * it and restart the finack timer.
+		 */
+		case TCPS_TIME_WAIT:
+			callout_reset(tp->tt_2msl, 2 * tcp_msl,
+				      tcp_timer_2msl, tp);
+			goto dropafterack;
+		}
+	}
+
+step6:
+	/*
+	 * Update window information.
+	 * Don't look at window if no ACK: TAC's send garbage on first SYN.
+	 */
+	if ((thflags & TH_ACK) &&
+	    (SEQ_LT(tp->snd_wl1, th->th_seq) ||
+	    (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) ||
+	     (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) {
+		/* keep track of pure window updates */
+		if (tlen == 0 &&
+		    tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd)
+			tcpstat.tcps_rcvwinupd++;
+		tp->snd_wnd = tiwin;
+		tp->snd_wl1 = th->th_seq;
+		tp->snd_wl2 = th->th_ack;
+		if (tp->snd_wnd > tp->max_sndwnd)
+			tp->max_sndwnd = tp->snd_wnd;
+		needoutput = 1;
+	}
+
+	/*
+	 * Process segments with URG.
+	 */
+	if ((thflags & TH_URG) && th->th_urp &&
+	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
+		/*
+		 * This is a kludge, but if we receive and accept
+		 * random urgent pointers, we'll crash in
+		 * soreceive.  It's hard to imagine someone
+		 * actually wanting to send this much urgent data.
+		 */
+		if (th->th_urp + so->so_rcv.sb_cc > sb_max) {
+			th->th_urp = 0;			/* XXX */
+			thflags &= ~TH_URG;		/* XXX */
+			goto dodata;			/* XXX */
+		}
+		/*
+		 * If this segment advances the known urgent pointer,
+		 * then mark the data stream.  This should not happen
+		 * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
+		 * a FIN has been received from the remote side.
+		 * In these states we ignore the URG.
+		 *
+		 * According to RFC961 (Assigned Protocols),
+		 * the urgent pointer points to the last octet
+		 * of urgent data.  We continue, however,
+		 * to consider it to indicate the first octet
+		 * of data past the urgent section as the original
+		 * spec states (in one of two places).
+		 */
+		if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) {
+			tp->rcv_up = th->th_seq + th->th_urp;
+			so->so_oobmark = so->so_rcv.sb_cc +
+			    (tp->rcv_up - tp->rcv_nxt) - 1;
+			if (so->so_oobmark == 0)
+				so->so_state |= SS_RCVATMARK;
+			sohasoutofband(so);
+			tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
+		}
+		/*
+		 * Remove out of band data so doesn't get presented to user.
+		 * This can happen independent of advancing the URG pointer,
+		 * but if two URG's are pending at once, some out-of-band
+		 * data may creep in... ick.
+		 */
+		if (th->th_urp <= (u_long)tlen
+#ifdef SO_OOBINLINE
+		     && (so->so_options & SO_OOBINLINE) == 0
+#endif
+		     )
+			tcp_pulloutofband(so, th, m,
+				drop_hdrlen);	/* hdr drop is delayed */
+	} else
+		/*
+		 * If no out of band data is expected,
+		 * pull receive urgent pointer along
+		 * with the receive window.
+		 */
+		if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
+			tp->rcv_up = tp->rcv_nxt;
+dodata:							/* XXX */
+
+	/*
+	 * Process the segment text, merging it into the TCP sequencing queue,
+	 * and arranging for acknowledgment of receipt if necessary.
+	 * This process logically involves adjusting tp->rcv_wnd as data
+	 * is presented to the user (this happens in tcp_usrreq.c,
+	 * case PRU_RCVD).  If a FIN has already been received on this
+	 * connection then we just ignore the text.
+	 */
+	if ((tlen || (thflags&TH_FIN)) &&
+	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
+		m_adj(m, drop_hdrlen);	/* delayed header drop */
+		TCP_REASS(tp, th, &tlen, m, so, thflags);
+		/*
+		 * Note the amount of data that peer has sent into
+		 * our window, in order to estimate the sender's
+		 * buffer size.
+		 */
+		len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt);
+	} else {
+		m_freem(m);
+		thflags &= ~TH_FIN;
+	}
+
+	/*
+	 * If FIN is received ACK the FIN and let the user know
+	 * that the connection is closing.
+	 */
+	if (thflags & TH_FIN) {
+		if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
+			socantrcvmore(so);
+			/*
+			 *  If connection is half-synchronized
+			 *  (ie NEEDSYN flag on) then delay ACK,
+			 *  so it may be piggybacked when SYN is sent.
+			 *  Otherwise, since we received a FIN then no
+			 *  more input can be expected, send ACK now.
+			 */
+			if (DELAY_ACK(tp) && (tp->t_flags & TF_NEEDSYN))
+                                callout_reset(tp->tt_delack, tcp_delacktime,  
+                                    tcp_timer_delack, tp);  
+			else
+				tp->t_flags |= TF_ACKNOW;
+			tp->rcv_nxt++;
+		}
+		switch (tp->t_state) {
+
+	 	/*
+		 * In SYN_RECEIVED and ESTABLISHED STATES
+		 * enter the CLOSE_WAIT state.
+		 */
+		case TCPS_SYN_RECEIVED:
+			tp->t_starttime = ticks;
+			/*FALLTHROUGH*/
+		case TCPS_ESTABLISHED:
+			tp->t_state = TCPS_CLOSE_WAIT;
+			break;
+
+	 	/*
+		 * If still in FIN_WAIT_1 STATE FIN has not been acked so
+		 * enter the CLOSING state.
+		 */
+		case TCPS_FIN_WAIT_1:
+			tp->t_state = TCPS_CLOSING;
+			break;
+
+	 	/*
+		 * In FIN_WAIT_2 state enter the TIME_WAIT state,
+		 * starting the time-wait timer, turning off the other
+		 * standard timers.
+		 */
+		case TCPS_FIN_WAIT_2:
+			tp->t_state = TCPS_TIME_WAIT;
+			tcp_canceltimers(tp);
+			/* Shorten TIME_WAIT [RFC-1644, p.28] */
+			if (tp->cc_recv != 0 &&
+			    (ticks - tp->t_starttime) < tcp_msl) {
+				callout_reset(tp->tt_2msl,
+					      tp->t_rxtcur * TCPTV_TWTRUNC,
+					      tcp_timer_2msl, tp);
+				/* For transaction client, force ACK now. */
+				tp->t_flags |= TF_ACKNOW;
+			}
+			else
+				callout_reset(tp->tt_2msl, 2 * tcp_msl,
+					      tcp_timer_2msl, tp);
+			soisdisconnected(so);
+			break;
+
+		/*
+		 * In TIME_WAIT state restart the 2 MSL time_wait timer.
+		 */
+		case TCPS_TIME_WAIT:
+			callout_reset(tp->tt_2msl, 2 * tcp_msl,
+				      tcp_timer_2msl, tp);
+			break;
+		}
+	}
+#ifdef TCPDEBUG
+	if (so->so_options & SO_DEBUG)
+		tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen,
+			  &tcp_savetcp, 0);
+#endif
+
+	/*
+	 * Return any desired output.
+	 */
+	if (needoutput || (tp->t_flags & TF_ACKNOW))
+		(void) tcp_output(tp);
+	return;
+
+dropafterack:
+	/*
+	 * Generate an ACK dropping incoming segment if it occupies
+	 * sequence space, where the ACK reflects our state.
+	 *
+	 * We can now skip the test for the RST flag since all
+	 * paths to this code happen after packets containing
+	 * RST have been dropped.
+	 *
+	 * In the SYN-RECEIVED state, don't send an ACK unless the
+	 * segment we received passes the SYN-RECEIVED ACK test.
+	 * If it fails send a RST.  This breaks the loop in the
+	 * "LAND" DoS attack, and also prevents an ACK storm
+	 * between two listening ports that have been sent forged
+	 * SYN segments, each with the source address of the other.
+	 */
+	if (tp->t_state == TCPS_SYN_RECEIVED && (thflags & TH_ACK) &&
+	    (SEQ_GT(tp->snd_una, th->th_ack) ||
+	     SEQ_GT(th->th_ack, tp->snd_max)) ) {
+		rstreason = BANDLIM_RST_OPENPORT;
+		goto dropwithreset;
+	}
+#ifdef TCPDEBUG
+	if (so->so_options & SO_DEBUG)
+		tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
+			  &tcp_savetcp, 0);
+#endif
+	m_freem(m);
+	tp->t_flags |= TF_ACKNOW;
+	(void) tcp_output(tp);
+	return;
+
+dropwithreset:
+	/*
+	 * Generate a RST, dropping incoming segment.
+	 * Make ACK acceptable to originator of segment.
+	 * Don't bother to respond if destination was broadcast/multicast.
+	 */
+	if ((thflags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST))
+		goto drop;
+#ifdef INET6
+	if (isipv6) {
+		if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
+		    IN6_IS_ADDR_MULTICAST(&ip6->ip6_src))
+			goto drop;
+	} else
+#endif /* INET6 */
+	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
+	    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
+	    ip->ip_src.s_addr == htonl(INADDR_BROADCAST))
+		goto drop;
+	/* IPv6 anycast check is done at tcp6_input() */
+
+	/*
+	 * Perform bandwidth limiting.
+	 */
+	if (badport_bandlim(rstreason) < 0)
+		goto drop;
+ 
+#ifdef TCPDEBUG
+	if (tp == 0 || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+		tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
+			  &tcp_savetcp, 0);
+#endif
+	if (thflags & TH_ACK)
+		/* mtod() below is safe as long as hdr dropping is delayed */
+		tcp_respond(tp, mtod(m, void *), th, m, (tcp_seq)0, th->th_ack,
+			    TH_RST);
+	else {
+		if (thflags & TH_SYN)
+			tlen++;
+		/* mtod() below is safe as long as hdr dropping is delayed */
+		tcp_respond(tp, mtod(m, void *), th, m, th->th_seq+tlen,
+			    (tcp_seq)0, TH_RST|TH_ACK);
+	}
+	/* destroy temporarily created socket */
+	if (dropsocket)
+		(void) soabort(so);
+	return;
+
+drop:
+	/*
+	 * Drop space held by incoming segment and return.
+	 */
+#ifdef TCPDEBUG
+	if (tp == 0 || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+		tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
+			  &tcp_savetcp, 0);
+#endif
+	m_freem(m);
+	/* destroy temporarily created socket */
+	if (dropsocket)
+		(void) soabort(so);
+	return;
+}
+
+static void
+tcp_dooptions(tp, cp, cnt, th, to)
+	struct tcpcb *tp;
+	u_char *cp;
+	int cnt;
+	struct tcphdr *th;
+	struct tcpopt *to;
+{
+	u_short mss = 0;
+	int opt, optlen;
+
+	for (; cnt > 0; cnt -= optlen, cp += optlen) {
+		opt = cp[0];
+		if (opt == TCPOPT_EOL)
+			break;
+		if (opt == TCPOPT_NOP)
+			optlen = 1;
+		else {
+			if (cnt < 2)
+				break;
+			optlen = cp[1];
+			if (optlen < 2 || optlen > cnt)
+				break;
+		}
+		switch (opt) {
+
+		default:
+			continue;
+
+		case TCPOPT_MAXSEG:
+			if (optlen != TCPOLEN_MAXSEG)
+				continue;
+			if (!(th->th_flags & TH_SYN))
+				continue;
+			bcopy((char *) cp + 2, (char *) &mss, sizeof(mss));
+			NTOHS(mss);
+			break;
+
+		case TCPOPT_WINDOW:
+			if (optlen != TCPOLEN_WINDOW)
+				continue;
+			if (!(th->th_flags & TH_SYN))
+				continue;
+			tp->t_flags |= TF_RCVD_SCALE;
+			tp->requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT);
+			break;
+
+		case TCPOPT_TIMESTAMP:
+			if (optlen != TCPOLEN_TIMESTAMP)
+				continue;
+			to->to_flag |= TOF_TS;
+			bcopy((char *)cp + 2,
+			    (char *)&to->to_tsval, sizeof(to->to_tsval));
+			NTOHL(to->to_tsval);
+			bcopy((char *)cp + 6,
+			    (char *)&to->to_tsecr, sizeof(to->to_tsecr));
+			NTOHL(to->to_tsecr);
+
+			/*
+			 * A timestamp received in a SYN makes
+			 * it ok to send timestamp requests and replies.
+			 */
+			if (th->th_flags & TH_SYN) {
+				tp->t_flags |= TF_RCVD_TSTMP;
+				tp->ts_recent = to->to_tsval;
+				tp->ts_recent_age = ticks;
+			}
+			break;
+		case TCPOPT_CC:
+			if (optlen != TCPOLEN_CC)
+				continue;
+			to->to_flag |= TOF_CC;
+			bcopy((char *)cp + 2,
+			    (char *)&to->to_cc, sizeof(to->to_cc));
+			NTOHL(to->to_cc);
+			/*
+			 * A CC or CC.new option received in a SYN makes
+			 * it ok to send CC in subsequent segments.
+			 */
+			if (th->th_flags & TH_SYN)
+				tp->t_flags |= TF_RCVD_CC;
+			break;
+		case TCPOPT_CCNEW:
+			if (optlen != TCPOLEN_CC)
+				continue;
+			if (!(th->th_flags & TH_SYN))
+				continue;
+			to->to_flag |= TOF_CCNEW;
+			bcopy((char *)cp + 2,
+			    (char *)&to->to_cc, sizeof(to->to_cc));
+			NTOHL(to->to_cc);
+			/*
+			 * A CC or CC.new option received in a SYN makes
+			 * it ok to send CC in subsequent segments.
+			 */
+			tp->t_flags |= TF_RCVD_CC;
+			break;
+		case TCPOPT_CCECHO:
+			if (optlen != TCPOLEN_CC)
+				continue;
+			if (!(th->th_flags & TH_SYN))
+				continue;
+			to->to_flag |= TOF_CCECHO;
+			bcopy((char *)cp + 2,
+			    (char *)&to->to_ccecho, sizeof(to->to_ccecho));
+			NTOHL(to->to_ccecho);
+			break;
+		}
+	}
+	if (th->th_flags & TH_SYN)
+		tcp_mss(tp, mss);	/* sets t_maxseg */
+}
+
+/*
+ * Pull out of band byte out of a segment so
+ * it doesn't appear in the user's data queue.
+ * It is still reflected in the segment length for
+ * sequencing purposes.
+ */
+static void
+tcp_pulloutofband(so, th, m, off)
+	struct socket *so;
+	struct tcphdr *th;
+	register struct mbuf *m;
+	int off;		/* delayed to be droped hdrlen */
+{
+	int cnt = off + th->th_urp - 1;
+
+	while (cnt >= 0) {
+		if (m->m_len > cnt) {
+			char *cp = mtod(m, caddr_t) + cnt;
+			struct tcpcb *tp = sototcpcb(so);
+
+			tp->t_iobc = *cp;
+			tp->t_oobflags |= TCPOOB_HAVEDATA;
+			bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1));
+			m->m_len--;
+			if (m->m_flags & M_PKTHDR)
+				m->m_pkthdr.len--;
+			return;
+		}
+		cnt -= m->m_len;
+		m = m->m_next;
+		if (m == 0)
+			break;
+	}
+	panic("tcp_pulloutofband");
+}
+
+/*
+ * Collect new round-trip time estimate
+ * and update averages and current timeout.
+ */
+static void
+tcp_xmit_timer(tp, rtt)
+	register struct tcpcb *tp;
+	int rtt;
+{
+	register int delta;
+
+	tcpstat.tcps_rttupdated++;
+	tp->t_rttupdated++;
+	if (tp->t_srtt != 0) {
+		/*
+		 * srtt is stored as fixed point with 5 bits after the
+		 * binary point (i.e., scaled by 8).  The following magic
+		 * is equivalent to the smoothing algorithm in rfc793 with
+		 * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
+		 * point).  Adjust rtt to origin 0.
+		 */
+		delta = ((rtt - 1) << TCP_DELTA_SHIFT)
+			- (tp->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT));
+
+		if ((tp->t_srtt += delta) <= 0)
+			tp->t_srtt = 1;
+
+		/*
+		 * We accumulate a smoothed rtt variance (actually, a
+		 * smoothed mean difference), then set the retransmit
+		 * timer to smoothed rtt + 4 times the smoothed variance.
+		 * rttvar is stored as fixed point with 4 bits after the
+		 * binary point (scaled by 16).  The following is
+		 * equivalent to rfc793 smoothing with an alpha of .75
+		 * (rttvar = rttvar*3/4 + |delta| / 4).  This replaces
+		 * rfc793's wired-in beta.
+		 */
+		if (delta < 0)
+			delta = -delta;
+		delta -= tp->t_rttvar >> (TCP_RTTVAR_SHIFT - TCP_DELTA_SHIFT);
+		if ((tp->t_rttvar += delta) <= 0)
+			tp->t_rttvar = 1;
+	} else {
+		/*
+		 * No rtt measurement yet - use the unsmoothed rtt.
+		 * Set the variance to half the rtt (so our first
+		 * retransmit happens at 3*rtt).
+		 */
+		tp->t_srtt = rtt << TCP_RTT_SHIFT;
+		tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
+	}
+	tp->t_rtttime = 0;
+	tp->t_rxtshift = 0;
+
+	/*
+	 * the retransmit should happen at rtt + 4 * rttvar.
+	 * Because of the way we do the smoothing, srtt and rttvar
+	 * will each average +1/2 tick of bias.  When we compute
+	 * the retransmit timer, we want 1/2 tick of rounding and
+	 * 1 extra tick because of +-1/2 tick uncertainty in the
+	 * firing of the timer.  The bias will give us exactly the
+	 * 1.5 tick we need.  But, because the bias is
+	 * statistical, we have to test that we don't drop below
+	 * the minimum feasible timer (which is 2 ticks).
+	 */
+	TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
+		      max(tp->t_rttmin, rtt + 2), TCPTV_REXMTMAX);
+
+	/*
+	 * We received an ack for a packet that wasn't retransmitted;
+	 * it is probably safe to discard any error indications we've
+	 * received recently.  This isn't quite right, but close enough
+	 * for now (a route might have failed after we sent a segment,
+	 * and the return path might not be symmetrical).
+	 */
+	tp->t_softerror = 0;
+}
+
+/*
+ * Determine a reasonable value for maxseg size.
+ * If the route is known, check route for mtu.
+ * If none, use an mss that can be handled on the outgoing
+ * interface without forcing IP to fragment; if bigger than
+ * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES
+ * to utilize large mbufs.  If no route is found, route has no mtu,
+ * or the destination isn't local, use a default, hopefully conservative
+ * size (usually 512 or the default IP max size, but no more than the mtu
+ * of the interface), as we can't discover anything about intervening
+ * gateways or networks.  We also initialize the congestion/slow start
+ * window to be a single segment if the destination isn't local.
+ * While looking at the routing entry, we also initialize other path-dependent
+ * parameters from pre-set or cached values in the routing entry.
+ *
+ * Also take into account the space needed for options that we
+ * send regularly.  Make maxseg shorter by that amount to assure
+ * that we can send maxseg amount of data even when the options
+ * are present.  Store the upper limit of the length of options plus
+ * data in maxopd.
+ *
+ * NOTE that this routine is only called when we process an incoming
+ * segment, for outgoing segments only tcp_mssopt is called.
+ *
+ * In case of T/TCP, we call this routine during implicit connection
+ * setup as well (offer = -1), to initialize maxseg from the cached
+ * MSS of our peer.
+ */
+void
+tcp_mss(tp, offer)
+	struct tcpcb *tp;
+	int offer;
+{
+	register struct rtentry *rt;
+	struct ifnet *ifp;
+	register int rtt, mss;
+	u_long bufsize;
+	struct inpcb *inp;
+	struct socket *so;
+	struct rmxp_tao *taop;
+	int origoffer = offer;
+#ifdef INET6
+	int isipv6;
+	int min_protoh;
+#endif
+
+	inp = tp->t_inpcb;
+#ifdef INET6
+	isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0;
+	min_protoh = isipv6 ? sizeof (struct ip6_hdr) + sizeof (struct tcphdr)
+			    : sizeof (struct tcpiphdr);
+#else
+#define min_protoh  (sizeof (struct tcpiphdr))
+#endif
+#ifdef INET6
+	if (isipv6)
+		rt = tcp_rtlookup6(inp);
+	else
+#endif
+	rt = tcp_rtlookup(inp);
+	if (rt == NULL) {
+		tp->t_maxopd = tp->t_maxseg =
+#ifdef INET6
+		isipv6 ? tcp_v6mssdflt :
+#endif /* INET6 */
+		tcp_mssdflt;
+		return;
+	}
+	ifp = rt->rt_ifp;
+	so = inp->inp_socket;
+
+	taop = rmx_taop(rt->rt_rmx);
+	/*
+	 * Offer == -1 means that we didn't receive SYN yet,
+	 * use cached value in that case;
+	 */
+	if (offer == -1)
+		offer = taop->tao_mssopt;
+	/*
+	 * Offer == 0 means that there was no MSS on the SYN segment,
+	 * in this case we use tcp_mssdflt.
+	 */
+	if (offer == 0)
+		offer =
+#ifdef INET6
+			isipv6 ? tcp_v6mssdflt :
+#endif /* INET6 */
+			tcp_mssdflt;
+	else
+		/*
+		 * Sanity check: make sure that maxopd will be large
+		 * enough to allow some data on segments even is the
+		 * all the option space is used (40bytes).  Otherwise
+		 * funny things may happen in tcp_output.
+		 */
+		offer = max(offer, 64);
+	taop->tao_mssopt = offer;
+
+	/*
+	 * While we're here, check if there's an initial rtt
+	 * or rttvar.  Convert from the route-table units
+	 * to scaled multiples of the slow timeout timer.
+	 */
+	if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) {
+		/*
+		 * XXX the lock bit for RTT indicates that the value
+		 * is also a minimum value; this is subject to time.
+		 */
+		if (rt->rt_rmx.rmx_locks & RTV_RTT)
+			tp->t_rttmin = rtt / (RTM_RTTUNIT / hz);
+		tp->t_srtt = rtt / (RTM_RTTUNIT / (hz * TCP_RTT_SCALE));
+		tcpstat.tcps_usedrtt++;
+		if (rt->rt_rmx.rmx_rttvar) {
+			tp->t_rttvar = rt->rt_rmx.rmx_rttvar /
+			    (RTM_RTTUNIT / (hz * TCP_RTTVAR_SCALE));
+			tcpstat.tcps_usedrttvar++;
+		} else {
+			/* default variation is +- 1 rtt */
+			tp->t_rttvar =
+			    tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
+		}
+		TCPT_RANGESET(tp->t_rxtcur,
+			      ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
+			      tp->t_rttmin, TCPTV_REXMTMAX);
+	}
+	/*
+	 * if there's an mtu associated with the route, use it
+	 * else, use the link mtu.
+	 */
+	if (rt->rt_rmx.rmx_mtu)
+		mss = rt->rt_rmx.rmx_mtu - min_protoh;
+	else
+	{
+		mss =
+#ifdef INET6
+			(isipv6 ? nd_ifinfo[rt->rt_ifp->if_index].linkmtu :
+#endif
+			 ifp->if_mtu
+#ifdef INET6
+			 )
+#endif
+			- min_protoh;
+#ifdef INET6
+		if (isipv6) {
+			if (!in6_localaddr(&inp->in6p_faddr))
+				mss = min(mss, tcp_v6mssdflt);
+		} else
+#endif
+		if (!in_localaddr(inp->inp_faddr))
+			mss = min(mss, tcp_mssdflt);
+	}
+	mss = min(mss, offer);
+	/*
+	 * maxopd stores the maximum length of data AND options
+	 * in a segment; maxseg is the amount of data in a normal
+	 * segment.  We need to store this value (maxopd) apart
+	 * from maxseg, because now every segment carries options
+	 * and thus we normally have somewhat less data in segments.
+	 */
+	tp->t_maxopd = mss;
+
+	/*
+	 * In case of T/TCP, origoffer==-1 indicates, that no segments
+	 * were received yet.  In this case we just guess, otherwise
+	 * we do the same as before T/TCP.
+	 */
+ 	if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
+	    (origoffer == -1 ||
+	     (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP))
+		mss -= TCPOLEN_TSTAMP_APPA;
+ 	if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC &&
+	    (origoffer == -1 ||
+	     (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC))
+		mss -= TCPOLEN_CC_APPA;
+
+#if	(MCLBYTES & (MCLBYTES - 1)) == 0
+		if (mss > MCLBYTES)
+			mss &= ~(MCLBYTES-1);
+#else
+		if (mss > MCLBYTES)
+			mss = mss / MCLBYTES * MCLBYTES;
+#endif
+	/*
+	 * If there's a pipesize, change the socket buffer
+	 * to that size.  Make the socket buffers an integral
+	 * number of mss units; if the mss is larger than
+	 * the socket buffer, decrease the mss.
+	 */
+#ifdef RTV_SPIPE
+	if ((bufsize = rt->rt_rmx.rmx_sendpipe) == 0)
+#endif
+		bufsize = so->so_snd.sb_hiwat;
+	if (bufsize < mss)
+		mss = bufsize;
+	else {
+		bufsize = roundup(bufsize, mss);
+		if (bufsize > sb_max)
+			bufsize = sb_max;
+		(void)sbreserve(&so->so_snd, bufsize, so, NULL);
+	}
+	tp->t_maxseg = mss;
+
+#ifdef RTV_RPIPE
+	if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0)
+#endif
+		bufsize = so->so_rcv.sb_hiwat;
+	if (bufsize > mss) {
+		bufsize = roundup(bufsize, mss);
+		if (bufsize > sb_max)
+			bufsize = sb_max;
+		(void)sbreserve(&so->so_rcv, bufsize, so, NULL);
+	}
+
+	/*
+	 * Set the slow-start flight size depending on whether this
+	 * is a local network or not.
+	 */
+	if (
+#ifdef INET6
+	    (isipv6 && in6_localaddr(&inp->in6p_faddr)) ||
+	    (!isipv6 &&
+#endif
+	     in_localaddr(inp->inp_faddr)
+#ifdef INET6
+	     )
+#endif
+	    )
+		tp->snd_cwnd = mss * ss_fltsz_local;
+	else 
+		tp->snd_cwnd = mss * ss_fltsz;
+
+	if (rt->rt_rmx.rmx_ssthresh) {
+		/*
+		 * There's some sort of gateway or interface
+		 * buffer limit on the path.  Use this to set
+		 * the slow start threshhold, but set the
+		 * threshold to no less than 2*mss.
+		 */
+		tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh);
+		tcpstat.tcps_usedssthresh++;
+	}
+}
+
+/*
+ * Determine the MSS option to send on an outgoing SYN.
+ */
+int
+tcp_mssopt(tp)
+	struct tcpcb *tp;
+{
+	struct rtentry *rt;
+#ifdef INET6
+	int isipv6;
+	int min_protoh;
+#endif
+
+#ifdef INET6
+	isipv6 = ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) ? 1 : 0;
+	min_protoh = isipv6 ? sizeof (struct ip6_hdr) + sizeof (struct tcphdr)
+			    : sizeof (struct tcpiphdr);
+#else
+#define min_protoh  (sizeof (struct tcpiphdr))
+#endif
+#ifdef INET6
+	if (isipv6)
+		rt = tcp_rtlookup6(tp->t_inpcb);
+	else
+#endif /* INET6 */
+	rt = tcp_rtlookup(tp->t_inpcb);
+	if (rt == NULL)
+		return
+#ifdef INET6
+			isipv6 ? tcp_v6mssdflt :
+#endif /* INET6 */
+			tcp_mssdflt;
+
+	return rt->rt_ifp->if_mtu - min_protoh;
+}
+
+
+/*
+ * Checks for partial ack.  If partial ack arrives, force the retransmission
+ * of the next unacknowledged segment, do not clear tp->t_dupacks, and return
+ * 1.  By setting snd_nxt to ti_ack, this forces retransmission timer to
+ * be started again.  If the ack advances at least to tp->snd_recover, return 0.
+ */
+static int
+tcp_newreno(tp, th)
+	struct tcpcb *tp;
+	struct tcphdr *th;
+{
+	if (SEQ_LT(th->th_ack, tp->snd_recover)) {
+		tcp_seq onxt = tp->snd_nxt;
+		u_long  ocwnd = tp->snd_cwnd;
+
+		callout_stop(tp->tt_rexmt);
+		tp->t_rtttime = 0;
+		tp->snd_nxt = th->th_ack;
+		/*
+		 * Set snd_cwnd to one segment beyond acknowledged offset
+		 * (tp->snd_una has not yet been updated when this function 
+		 *  is called)
+		 */
+		tp->snd_cwnd = tp->t_maxseg + (th->th_ack - tp->snd_una);
+		(void) tcp_output(tp);
+		tp->snd_cwnd = ocwnd;
+		if (SEQ_GT(onxt, tp->snd_nxt))
+			tp->snd_nxt = onxt;
+		/*
+		 * Partial window deflation.  Relies on fact that tp->snd_una
+		 * not updated yet.
+		 */
+		tp->snd_cwnd -= (th->th_ack - tp->snd_una - tp->t_maxseg);
+		return (1);
+	}
+	return (0);
+}
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
new file mode 100644
index 0000000..286b420
--- /dev/null
+++ b/sys/netinet/tcp_output.c
@@ -0,0 +1,943 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_output.c	8.4 (Berkeley) 5/24/95
+ * $FreeBSD$
+ */
+
+#include "opt_inet6.h"
+#include "opt_ipsec.h"
+#include "opt_tcpdebug.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/domain.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/mutex.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#ifdef INET6
+#include <netinet6/in6_pcb.h>
+#include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
+#endif
+#include <netinet/tcp.h>
+#define	TCPOUTFLAGS
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#ifdef TCPDEBUG
+#include <netinet/tcp_debug.h>
+#endif
+
+#ifdef IPSEC
+#include <netinet6/ipsec.h>
+#endif /*IPSEC*/
+
+#include <machine/in_cksum.h>
+
+#ifdef notyet
+extern struct mbuf *m_copypack();
+#endif
+
+static int path_mtu_discovery = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, path_mtu_discovery, CTLFLAG_RW,
+	&path_mtu_discovery, 1, "Enable Path MTU Discovery");
+
+int ss_fltsz = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, slowstart_flightsize, CTLFLAG_RW,
+	&ss_fltsz, 1, "Slow start flight size");
+
+int ss_fltsz_local = TCP_MAXWIN;               /* something large */
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, local_slowstart_flightsize, CTLFLAG_RW,
+	&ss_fltsz_local, 1, "Slow start flight size for local networks");
+
+int     tcp_do_newreno = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, newreno, CTLFLAG_RW, &tcp_do_newreno,
+        0, "Enable NewReno Algorithms");
+/*
+ * Tcp output routine: figure out what should be sent and send it.
+ */
+int
+tcp_output(tp)
+	register struct tcpcb *tp;
+{
+	register struct socket *so = tp->t_inpcb->inp_socket;
+	register long len, win;
+	int off, flags, error;
+	register struct mbuf *m;
+	struct ip *ip = NULL;
+	register struct ipovly *ipov = NULL;
+#ifdef INET6
+	struct ip6_hdr *ip6 = NULL;
+#endif /* INET6 */
+	register struct tcphdr *th;
+	u_char opt[TCP_MAXOLEN];
+	unsigned ipoptlen, optlen, hdrlen;
+	int idle, sendalot;
+	int maxburst = TCP_MAXBURST;
+	struct rmxp_tao *taop;
+	struct rmxp_tao tao_noncached;
+#ifdef INET6
+	int isipv6;
+#endif
+
+#ifdef INET6
+	isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0;
+#endif
+
+	/*
+	 * Determine length of data that should be transmitted,
+	 * and flags that will be used.
+	 * If there is some data or critical controls (SYN, RST)
+	 * to send, then transmit; otherwise, investigate further.
+	 */
+	idle = (tp->snd_max == tp->snd_una);
+	if (idle && (ticks - tp->t_rcvtime) >= tp->t_rxtcur) {
+		/*
+		 * We have been idle for "a while" and no acks are
+		 * expected to clock out any data we send --
+		 * slow start to get ack "clock" running again.
+		 *       
+		 * Set the slow-start flight size depending on whether
+		 * this is a local network or not.
+		 */      
+		if (
+#ifdef INET6
+		    (isipv6 && in6_localaddr(&tp->t_inpcb->in6p_faddr)) ||
+		    (!isipv6 &&
+#endif
+		     in_localaddr(tp->t_inpcb->inp_faddr)
+#ifdef INET6
+		     )
+#endif
+		    )
+			tp->snd_cwnd = tp->t_maxseg * ss_fltsz_local;
+		else     
+			tp->snd_cwnd = tp->t_maxseg * ss_fltsz;
+	}
+again:
+	sendalot = 0;
+	off = tp->snd_nxt - tp->snd_una;
+	win = min(tp->snd_wnd, tp->snd_cwnd);
+
+	flags = tcp_outflags[tp->t_state];
+	/*
+	 * Get standard flags, and add SYN or FIN if requested by 'hidden'
+	 * state flags.
+	 */
+	if (tp->t_flags & TF_NEEDFIN)
+		flags |= TH_FIN;
+	if (tp->t_flags & TF_NEEDSYN)
+		flags |= TH_SYN;
+
+	/*
+	 * If in persist timeout with window of 0, send 1 byte.
+	 * Otherwise, if window is small but nonzero
+	 * and timer expired, we will send what we can
+	 * and go to transmit state.
+	 */
+	if (tp->t_force) {
+		if (win == 0) {
+			/*
+			 * If we still have some data to send, then
+			 * clear the FIN bit.  Usually this would
+			 * happen below when it realizes that we
+			 * aren't sending all the data.  However,
+			 * if we have exactly 1 byte of unsent data,
+			 * then it won't clear the FIN bit below,
+			 * and if we are in persist state, we wind
+			 * up sending the packet without recording
+			 * that we sent the FIN bit.
+			 *
+			 * We can't just blindly clear the FIN bit,
+			 * because if we don't have any more data
+			 * to send then the probe will be the FIN
+			 * itself.
+			 */
+			if (off < so->so_snd.sb_cc)
+				flags &= ~TH_FIN;
+			win = 1;
+		} else {
+			callout_stop(tp->tt_persist);
+			tp->t_rxtshift = 0;
+		}
+	}
+
+	len = (long)ulmin(so->so_snd.sb_cc, win) - off;
+
+	if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) {
+		taop = &tao_noncached;
+		bzero(taop, sizeof(*taop));
+	}
+
+	/*
+	 * Lop off SYN bit if it has already been sent.  However, if this
+	 * is SYN-SENT state and if segment contains data and if we don't
+	 * know that foreign host supports TAO, suppress sending segment.
+	 */
+	if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) {
+		flags &= ~TH_SYN;
+		off--, len++;
+		if (len > 0 && tp->t_state == TCPS_SYN_SENT &&
+		    taop->tao_ccsent == 0)
+			return 0;
+	}
+
+	/*
+	 * Be careful not to send data and/or FIN on SYN segments
+	 * in cases when no CC option will be sent.
+	 * This measure is needed to prevent interoperability problems
+	 * with not fully conformant TCP implementations.
+	 */
+	if ((flags & TH_SYN) &&
+	    ((tp->t_flags & TF_NOOPT) || !(tp->t_flags & TF_REQ_CC) ||
+	     ((flags & TH_ACK) && !(tp->t_flags & TF_RCVD_CC)))) {
+		len = 0;
+		flags &= ~TH_FIN;
+	}
+
+	if (len < 0) {
+		/*
+		 * If FIN has been sent but not acked,
+		 * but we haven't been called to retransmit,
+		 * len will be -1.  Otherwise, window shrank
+		 * after we sent into it.  If window shrank to 0,
+		 * cancel pending retransmit, pull snd_nxt back
+		 * to (closed) window, and set the persist timer
+		 * if it isn't already going.  If the window didn't
+		 * close completely, just wait for an ACK.
+		 */
+		len = 0;
+		if (win == 0) {
+			callout_stop(tp->tt_rexmt);
+			tp->t_rxtshift = 0;
+			tp->snd_nxt = tp->snd_una;
+			if (!callout_active(tp->tt_persist))
+				tcp_setpersist(tp);
+		}
+	}
+	if (len > tp->t_maxseg) {
+		len = tp->t_maxseg;
+		sendalot = 1;
+	}
+	if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc))
+		flags &= ~TH_FIN;
+
+	win = sbspace(&so->so_rcv);
+
+	/*
+	 * Sender silly window avoidance.  If connection is idle
+	 * and can send all data, a maximum segment,
+	 * at least a maximum default-size segment do it,
+	 * or are forced, do it; otherwise don't bother.
+	 * If peer's buffer is tiny, then send
+	 * when window is at least half open.
+	 * If retransmitting (possibly after persist timer forced us
+	 * to send into a small window), then must resend.
+	 */
+	if (len) {
+		if (len == tp->t_maxseg)
+			goto send;
+		if (!(tp->t_flags & TF_MORETOCOME) &&
+		    (idle || tp->t_flags & TF_NODELAY) &&
+		    (tp->t_flags & TF_NOPUSH) == 0 &&
+		    len + off >= so->so_snd.sb_cc)
+			goto send;
+		if (tp->t_force)
+			goto send;
+		if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0)
+			goto send;
+		if (SEQ_LT(tp->snd_nxt, tp->snd_max))
+			goto send;
+	}
+
+	/*
+	 * Compare available window to amount of window
+	 * known to peer (as advertised window less
+	 * next expected input).  If the difference is at least two
+	 * max size segments, or at least 50% of the maximum possible
+	 * window, then want to send a window update to peer.
+	 */
+	if (win > 0) {
+		/*
+		 * "adv" is the amount we can increase the window,
+		 * taking into account that we are limited by
+		 * TCP_MAXWIN << tp->rcv_scale.
+		 */
+		long adv = min(win, (long)TCP_MAXWIN << tp->rcv_scale) -
+			(tp->rcv_adv - tp->rcv_nxt);
+
+		if (adv >= (long) (2 * tp->t_maxseg))
+			goto send;
+		if (2 * adv >= (long) so->so_rcv.sb_hiwat)
+			goto send;
+	}
+
+	/*
+	 * Send if we owe peer an ACK.
+	 */
+	if (tp->t_flags & TF_ACKNOW)
+		goto send;
+	if ((flags & TH_RST) ||
+	    ((flags & TH_SYN) && (tp->t_flags & TF_NEEDSYN) == 0))
+		goto send;
+	if (SEQ_GT(tp->snd_up, tp->snd_una))
+		goto send;
+	/*
+	 * If our state indicates that FIN should be sent
+	 * and we have not yet done so, or we're retransmitting the FIN,
+	 * then we need to send.
+	 */
+	if (flags & TH_FIN &&
+	    ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una))
+		goto send;
+
+	/*
+	 * TCP window updates are not reliable, rather a polling protocol
+	 * using ``persist'' packets is used to insure receipt of window
+	 * updates.  The three ``states'' for the output side are:
+	 *	idle			not doing retransmits or persists
+	 *	persisting		to move a small or zero window
+	 *	(re)transmitting	and thereby not persisting
+	 *
+	 * callout_active(tp->tt_persist)
+	 *	is true when we are in persist state.
+	 * tp->t_force
+	 *	is set when we are called to send a persist packet.
+	 * callout_active(tp->tt_rexmt)
+	 *	is set when we are retransmitting
+	 * The output side is idle when both timers are zero.
+	 *
+	 * If send window is too small, there is data to transmit, and no
+	 * retransmit or persist is pending, then go to persist state.
+	 * If nothing happens soon, send when timer expires:
+	 * if window is nonzero, transmit what we can,
+	 * otherwise force out a byte.
+	 */
+	if (so->so_snd.sb_cc && !callout_active(tp->tt_rexmt) &&
+	    !callout_active(tp->tt_persist)) {
+		tp->t_rxtshift = 0;
+		tcp_setpersist(tp);
+	}
+
+	/*
+	 * No reason to send a segment, just return.
+	 */
+	return (0);
+
+send:
+	/*
+	 * Before ESTABLISHED, force sending of initial options
+	 * unless TCP set not to do any options.
+	 * NOTE: we assume that the IP/TCP header plus TCP options
+	 * always fit in a single mbuf, leaving room for a maximum
+	 * link header, i.e.
+	 *	max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MHLEN
+	 */
+	optlen = 0;
+#ifdef INET6
+	if (isipv6)
+		hdrlen = sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
+	else
+#endif
+	hdrlen = sizeof (struct tcpiphdr);
+	if (flags & TH_SYN) {
+		tp->snd_nxt = tp->iss;
+		if ((tp->t_flags & TF_NOOPT) == 0) {
+			u_short mss;
+
+			opt[0] = TCPOPT_MAXSEG;
+			opt[1] = TCPOLEN_MAXSEG;
+			mss = htons((u_short) tcp_mssopt(tp));
+			(void)memcpy(opt + 2, &mss, sizeof(mss));
+			optlen = TCPOLEN_MAXSEG;
+
+			if ((tp->t_flags & TF_REQ_SCALE) &&
+			    ((flags & TH_ACK) == 0 ||
+			    (tp->t_flags & TF_RCVD_SCALE))) {
+				*((u_int32_t *)(opt + optlen)) = htonl(
+					TCPOPT_NOP << 24 |
+					TCPOPT_WINDOW << 16 |
+					TCPOLEN_WINDOW << 8 |
+					tp->request_r_scale);
+				optlen += 4;
+			}
+		}
+ 	}
+
+ 	/*
+	 * Send a timestamp and echo-reply if this is a SYN and our side
+	 * wants to use timestamps (TF_REQ_TSTMP is set) or both our side
+	 * and our peer have sent timestamps in our SYN's.
+ 	 */
+ 	if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
+ 	    (flags & TH_RST) == 0 &&
+	    ((flags & TH_ACK) == 0 ||
+	     (tp->t_flags & TF_RCVD_TSTMP))) {
+		u_int32_t *lp = (u_int32_t *)(opt + optlen);
+
+ 		/* Form timestamp option as shown in appendix A of RFC 1323. */
+ 		*lp++ = htonl(TCPOPT_TSTAMP_HDR);
+ 		*lp++ = htonl(ticks);
+ 		*lp   = htonl(tp->ts_recent);
+ 		optlen += TCPOLEN_TSTAMP_APPA;
+ 	}
+
+ 	/*
+	 * Send `CC-family' options if our side wants to use them (TF_REQ_CC),
+	 * options are allowed (!TF_NOOPT) and it's not a RST.
+ 	 */
+ 	if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC &&
+ 	     (flags & TH_RST) == 0) {
+		switch (flags & (TH_SYN|TH_ACK)) {
+		/*
+		 * This is a normal ACK, send CC if we received CC before
+		 * from our peer.
+		 */
+		case TH_ACK:
+			if (!(tp->t_flags & TF_RCVD_CC))
+				break;
+			/*FALLTHROUGH*/
+
+		/*
+		 * We can only get here in T/TCP's SYN_SENT* state, when
+		 * we're a sending a non-SYN segment without waiting for
+		 * the ACK of our SYN.  A check above assures that we only
+		 * do this if our peer understands T/TCP.
+		 */
+		case 0:
+			opt[optlen++] = TCPOPT_NOP;
+			opt[optlen++] = TCPOPT_NOP;
+			opt[optlen++] = TCPOPT_CC;
+			opt[optlen++] = TCPOLEN_CC;
+			*(u_int32_t *)&opt[optlen] = htonl(tp->cc_send);
+
+			optlen += 4;
+			break;
+
+		/*
+		 * This is our initial SYN, check whether we have to use
+		 * CC or CC.new.
+		 */
+		case TH_SYN:
+			opt[optlen++] = TCPOPT_NOP;
+			opt[optlen++] = TCPOPT_NOP;
+			opt[optlen++] = tp->t_flags & TF_SENDCCNEW ?
+						TCPOPT_CCNEW : TCPOPT_CC;
+			opt[optlen++] = TCPOLEN_CC;
+			*(u_int32_t *)&opt[optlen] = htonl(tp->cc_send);
+ 			optlen += 4;
+			break;
+
+		/*
+		 * This is a SYN,ACK; send CC and CC.echo if we received
+		 * CC from our peer.
+		 */
+		case (TH_SYN|TH_ACK):
+			if (tp->t_flags & TF_RCVD_CC) {
+				opt[optlen++] = TCPOPT_NOP;
+				opt[optlen++] = TCPOPT_NOP;
+				opt[optlen++] = TCPOPT_CC;
+				opt[optlen++] = TCPOLEN_CC;
+				*(u_int32_t *)&opt[optlen] =
+					htonl(tp->cc_send);
+				optlen += 4;
+				opt[optlen++] = TCPOPT_NOP;
+				opt[optlen++] = TCPOPT_NOP;
+				opt[optlen++] = TCPOPT_CCECHO;
+				opt[optlen++] = TCPOLEN_CC;
+				*(u_int32_t *)&opt[optlen] =
+					htonl(tp->cc_recv);
+				optlen += 4;
+			}
+			break;
+		}
+ 	}
+
+ 	hdrlen += optlen;
+
+#ifdef INET6
+	if (isipv6)
+		ipoptlen = ip6_optlen(tp->t_inpcb);
+	else
+#endif
+      {
+	if (tp->t_inpcb->inp_options) {
+		ipoptlen = tp->t_inpcb->inp_options->m_len -
+				offsetof(struct ipoption, ipopt_list);
+	} else {
+		ipoptlen = 0;
+	}
+      }
+#ifdef IPSEC
+	ipoptlen += ipsec_hdrsiz_tcp(tp);
+#endif
+
+	/*
+	 * Adjust data length if insertion of options will
+	 * bump the packet length beyond the t_maxopd length.
+	 * Clear the FIN bit because we cut off the tail of
+	 * the segment.
+	 */
+	if (len + optlen + ipoptlen > tp->t_maxopd) {
+		/*
+		 * If there is still more to send, don't close the connection.
+		 */
+		flags &= ~TH_FIN;
+		len = tp->t_maxopd - optlen - ipoptlen;
+		sendalot = 1;
+	}
+
+/*#ifdef DIAGNOSTIC*/
+#ifdef INET6
+ 	if (max_linkhdr + hdrlen > MCLBYTES)
+		panic("tcphdr too big");
+#else
+ 	if (max_linkhdr + hdrlen > MHLEN)
+		panic("tcphdr too big");
+#endif
+/*#endif*/
+
+	/*
+	 * Grab a header mbuf, attaching a copy of data to
+	 * be transmitted, and initialize the header from
+	 * the template for sends on this connection.
+	 */
+	if (len) {
+		if (tp->t_force && len == 1)
+			tcpstat.tcps_sndprobe++;
+		else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {
+			tcpstat.tcps_sndrexmitpack++;
+			tcpstat.tcps_sndrexmitbyte += len;
+		} else {
+			tcpstat.tcps_sndpack++;
+			tcpstat.tcps_sndbyte += len;
+		}
+#ifdef notyet
+		if ((m = m_copypack(so->so_snd.sb_mb, off,
+		    (int)len, max_linkhdr + hdrlen)) == 0) {
+			error = ENOBUFS;
+			goto out;
+		}
+		/*
+		 * m_copypack left space for our hdr; use it.
+		 */
+		m->m_len += hdrlen;
+		m->m_data -= hdrlen;
+#else
+		MGETHDR(m, M_DONTWAIT, MT_HEADER);
+		if (m == NULL) {
+			error = ENOBUFS;
+			goto out;
+		}
+#ifdef INET6
+		if (MHLEN < hdrlen + max_linkhdr) {
+			MCLGET(m, M_DONTWAIT);
+			if ((m->m_flags & M_EXT) == 0) {
+				m_freem(m);
+				error = ENOBUFS;
+				goto out;
+			}
+		}
+#endif
+		m->m_data += max_linkhdr;
+		m->m_len = hdrlen;
+		if (len <= MHLEN - hdrlen - max_linkhdr) {
+			m_copydata(so->so_snd.sb_mb, off, (int) len,
+			    mtod(m, caddr_t) + hdrlen);
+			m->m_len += len;
+		} else {
+			m->m_next = m_copy(so->so_snd.sb_mb, off, (int) len);
+			if (m->m_next == 0) {
+				(void) m_free(m);
+				error = ENOBUFS;
+				goto out;
+			}
+		}
+#endif
+		/*
+		 * If we're sending everything we've got, set PUSH.
+		 * (This will keep happy those implementations which only
+		 * give data to the user when a buffer fills or
+		 * a PUSH comes in.)
+		 */
+		if (off + len == so->so_snd.sb_cc)
+			flags |= TH_PUSH;
+	} else {
+		if (tp->t_flags & TF_ACKNOW)
+			tcpstat.tcps_sndacks++;
+		else if (flags & (TH_SYN|TH_FIN|TH_RST))
+			tcpstat.tcps_sndctrl++;
+		else if (SEQ_GT(tp->snd_up, tp->snd_una))
+			tcpstat.tcps_sndurg++;
+		else
+			tcpstat.tcps_sndwinup++;
+
+		MGETHDR(m, M_DONTWAIT, MT_HEADER);
+		if (m == NULL) {
+			error = ENOBUFS;
+			goto out;
+		}
+#ifdef INET6
+		if (isipv6 && (MHLEN < hdrlen + max_linkhdr) &&
+		    MHLEN >= hdrlen) {
+			MH_ALIGN(m, hdrlen);
+		} else
+#endif
+		m->m_data += max_linkhdr;
+		m->m_len = hdrlen;
+	}
+	m->m_pkthdr.rcvif = (struct ifnet *)0;
+	if (tp->t_template == 0)
+		panic("tcp_output");
+#ifdef INET6
+	if (isipv6) {
+		ip6 = mtod(m, struct ip6_hdr *);
+		th = (struct tcphdr *)(ip6 + 1);
+		bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip6,
+		      sizeof(struct ip6_hdr));
+		bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th,
+		      sizeof(struct tcphdr));
+	} else
+#endif /* INET6 */
+      {
+	ip = mtod(m, struct ip *);
+	ipov = (struct ipovly *)ip;
+	th = (struct tcphdr *)(ip + 1);
+	/* this picks up the pseudo header (w/o the length) */
+	bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip,
+	      sizeof(struct ip));
+	bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th,
+	      sizeof(struct tcphdr));
+      }
+
+	/*
+	 * Fill in fields, remembering maximum advertised
+	 * window for use in delaying messages about window sizes.
+	 * If resending a FIN, be sure not to use a new sequence number.
+	 */
+	if (flags & TH_FIN && tp->t_flags & TF_SENTFIN &&
+	    tp->snd_nxt == tp->snd_max)
+		tp->snd_nxt--;
+	/*
+	 * If we are doing retransmissions, then snd_nxt will
+	 * not reflect the first unsent octet.  For ACK only
+	 * packets, we do not want the sequence number of the
+	 * retransmitted packet, we want the sequence number
+	 * of the next unsent octet.  So, if there is no data
+	 * (and no SYN or FIN), use snd_max instead of snd_nxt
+	 * when filling in ti_seq.  But if we are in persist
+	 * state, snd_max might reflect one byte beyond the
+	 * right edge of the window, so use snd_nxt in that
+	 * case, since we know we aren't doing a retransmission.
+	 * (retransmit and persist are mutually exclusive...)
+	 */
+	if (len || (flags & (TH_SYN|TH_FIN)) 
+	    || callout_active(tp->tt_persist))
+		th->th_seq = htonl(tp->snd_nxt);
+	else
+		th->th_seq = htonl(tp->snd_max);
+	th->th_ack = htonl(tp->rcv_nxt);
+	if (optlen) {
+		bcopy(opt, th + 1, optlen);
+		th->th_off = (sizeof (struct tcphdr) + optlen) >> 2;
+	}
+	th->th_flags = flags;
+	/*
+	 * Calculate receive window.  Don't shrink window,
+	 * but avoid silly window syndrome.
+	 */
+	if (win < (long)(so->so_rcv.sb_hiwat / 4) && win < (long)tp->t_maxseg)
+		win = 0;
+	if (win < (long)(tp->rcv_adv - tp->rcv_nxt))
+		win = (long)(tp->rcv_adv - tp->rcv_nxt);
+	if (win > (long)TCP_MAXWIN << tp->rcv_scale)
+		win = (long)TCP_MAXWIN << tp->rcv_scale;
+	th->th_win = htons((u_short) (win>>tp->rcv_scale));
+	if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
+		th->th_urp = htons((u_short)(tp->snd_up - tp->snd_nxt));
+		th->th_flags |= TH_URG;
+	} else
+		/*
+		 * If no urgent pointer to send, then we pull
+		 * the urgent pointer to the left edge of the send window
+		 * so that it doesn't drift into the send window on sequence
+		 * number wraparound.
+		 */
+		tp->snd_up = tp->snd_una;		/* drag it along */
+
+	/*
+	 * Put TCP length in extended header, and then
+	 * checksum extended header and data.
+	 */
+	m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */
+#ifdef INET6
+	if (isipv6)
+		/*
+		 * ip6_plen is not need to be filled now, and will be filled
+		 * in ip6_output.
+		 */
+		th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr),
+				       sizeof(struct tcphdr) + optlen + len);
+	else
+#endif /* INET6 */
+      {
+	m->m_pkthdr.csum_flags = CSUM_TCP;
+	m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
+	if (len + optlen)
+		th->th_sum = in_addword(th->th_sum, 
+		    htons((u_short)(optlen + len)));
+
+	/* IP version must be set here for ipv4/ipv6 checking later */
+	KASSERT(ip->ip_v == IPVERSION,
+	    ("%s: IP version incorrect: %d", __FUNCTION__, ip->ip_v));
+      }
+
+	/*
+	 * In transmit state, time the transmission and arrange for
+	 * the retransmit.  In persist state, just set snd_max.
+	 */
+	if (tp->t_force == 0 || !callout_active(tp->tt_persist)) {
+		tcp_seq startseq = tp->snd_nxt;
+
+		/*
+		 * Advance snd_nxt over sequence space of this segment.
+		 */
+		if (flags & (TH_SYN|TH_FIN)) {
+			if (flags & TH_SYN)
+				tp->snd_nxt++;
+			if (flags & TH_FIN) {
+				tp->snd_nxt++;
+				tp->t_flags |= TF_SENTFIN;
+			}
+		}
+		tp->snd_nxt += len;
+		if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
+			tp->snd_max = tp->snd_nxt;
+			/*
+			 * Time this transmission if not a retransmission and
+			 * not currently timing anything.
+			 */
+			if (tp->t_rtttime == 0) {
+				tp->t_rtttime = ticks;
+				tp->t_rtseq = startseq;
+				tcpstat.tcps_segstimed++;
+			}
+		}
+
+		/*
+		 * Set retransmit timer if not currently set,
+		 * and not doing an ack or a keep-alive probe.
+		 * Initial value for retransmit timer is smoothed
+		 * round-trip time + 2 * round-trip time variance.
+		 * Initialize shift counter which is used for backoff
+		 * of retransmit time.
+		 */
+		if (!callout_active(tp->tt_rexmt) &&
+		    tp->snd_nxt != tp->snd_una) {
+			if (callout_active(tp->tt_persist)) {
+				callout_stop(tp->tt_persist);
+				tp->t_rxtshift = 0;
+			}
+			callout_reset(tp->tt_rexmt, tp->t_rxtcur,
+				      tcp_timer_rexmt, tp);
+		}
+	} else
+		if (SEQ_GT(tp->snd_nxt + len, tp->snd_max))
+			tp->snd_max = tp->snd_nxt + len;
+
+#ifdef TCPDEBUG
+	/*
+	 * Trace.
+	 */
+	if (so->so_options & SO_DEBUG)
+		tcp_trace(TA_OUTPUT, tp->t_state, tp, mtod(m, void *), th, 0);
+#endif
+
+	/*
+	 * Fill in IP length and desired time to live and
+	 * send to IP level.  There should be a better way
+	 * to handle ttl and tos; we could keep them in
+	 * the template, but need a way to checksum without them.
+	 */
+	/*
+	 * m->m_pkthdr.len should have been set before cksum calcuration,
+	 * because in6_cksum() need it.
+	 */
+#ifdef INET6
+	if (isipv6) {
+		/*
+		 * we separately set hoplimit for every segment, since the
+		 * user might want to change the value via setsockopt.
+		 * Also, desired default hop limit might be changed via
+		 * Neighbor Discovery.
+		 */
+		ip6->ip6_hlim = in6_selecthlim(tp->t_inpcb,
+					       tp->t_inpcb->in6p_route.ro_rt ?
+					       tp->t_inpcb->in6p_route.ro_rt->rt_ifp
+					       : NULL);
+
+		/* TODO: IPv6 IP6TOS_ECT bit on */
+#ifdef IPSEC
+		ipsec_setsocket(m, so);
+#endif /*IPSEC*/
+		error = ip6_output(m,
+			    tp->t_inpcb->in6p_outputopts,
+			    &tp->t_inpcb->in6p_route,
+			    (so->so_options & SO_DONTROUTE), NULL, NULL);
+	} else
+#endif /* INET6 */
+    {
+	struct rtentry *rt;
+	ip->ip_len = m->m_pkthdr.len;
+#ifdef INET6
+ 	if (INP_CHECK_SOCKAF(so, AF_INET6))
+ 		ip->ip_ttl = in6_selecthlim(tp->t_inpcb,
+ 					    tp->t_inpcb->in6p_route.ro_rt ?
+ 					    tp->t_inpcb->in6p_route.ro_rt->rt_ifp
+ 					    : NULL);
+ 	else
+#endif /* INET6 */
+	ip->ip_ttl = tp->t_inpcb->inp_ip_ttl;	/* XXX */
+	ip->ip_tos = tp->t_inpcb->inp_ip_tos;	/* XXX */
+	/*
+	 * See if we should do MTU discovery.  We do it only if the following
+	 * are true:
+	 *	1) we have a valid route to the destination
+	 *	2) the MTU is not locked (if it is, then discovery has been
+	 *	   disabled)
+	 */
+	if (path_mtu_discovery
+	    && (rt = tp->t_inpcb->inp_route.ro_rt)
+	    && rt->rt_flags & RTF_UP
+	    && !(rt->rt_rmx.rmx_locks & RTV_MTU)) {
+		ip->ip_off |= IP_DF;
+	}
+#ifdef IPSEC
+ 	ipsec_setsocket(m, so);
+#endif /*IPSEC*/
+	error = ip_output(m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route,
+	    (so->so_options & SO_DONTROUTE), 0);
+    }
+	if (error) {
+
+		/*
+		 * We know that the packet was lost, so back out the
+		 * sequence number advance, if any.
+		 */
+		if (tp->t_force == 0 || !callout_active(tp->tt_persist)) {
+			/*
+			 * No need to check for TH_FIN here because
+			 * the TF_SENTFIN flag handles that case.
+			 */
+			if ((flags & TH_SYN) == 0)
+				tp->snd_nxt -= len;
+		}
+
+out:
+		if (error == ENOBUFS) {
+	                if (!callout_active(tp->tt_rexmt) &&
+                            !callout_active(tp->tt_persist))
+	                        callout_reset(tp->tt_rexmt, tp->t_rxtcur,
+                                      tcp_timer_rexmt, tp);
+			tcp_quench(tp->t_inpcb, 0);
+			return (0);
+		}
+		if (error == EMSGSIZE) {
+			/*
+			 * ip_output() will have already fixed the route
+			 * for us.  tcp_mtudisc() will, as its last action,
+			 * initiate retransmission, so it is important to
+			 * not do so here.
+			 */
+			tcp_mtudisc(tp->t_inpcb, 0);
+			return 0;
+		}
+		if ((error == EHOSTUNREACH || error == ENETDOWN)
+		    && TCPS_HAVERCVDSYN(tp->t_state)) {
+			tp->t_softerror = error;
+			return (0);
+		}
+		return (error);
+	}
+	tcpstat.tcps_sndtotal++;
+
+	/*
+	 * Data sent (as far as we can tell).
+	 * If this advertises a larger window than any other segment,
+	 * then remember the size of the advertised window.
+	 * Any pending ACK has now been sent.
+	 */
+	if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv))
+		tp->rcv_adv = tp->rcv_nxt + win;
+	tp->last_ack_sent = tp->rcv_nxt;
+	tp->t_flags &= ~TF_ACKNOW;
+	if (tcp_delack_enabled)
+		callout_stop(tp->tt_delack);
+	if (sendalot && (!tcp_do_newreno || --maxburst))
+		goto again;
+	return (0);
+}
+
+void
+tcp_setpersist(tp)
+	register struct tcpcb *tp;
+{
+	int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
+	int tt;
+
+	if (callout_active(tp->tt_rexmt))
+		panic("tcp_setpersist: retransmit pending");
+	/*
+	 * Start/restart persistance timer.
+	 */
+	TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift],
+		      TCPTV_PERSMIN, TCPTV_PERSMAX);
+	callout_reset(tp->tt_persist, tt, tcp_timer_persist, tp);
+	if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
+		tp->t_rxtshift++;
+}
diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c
new file mode 100644
index 0000000..bf578b7
--- /dev/null
+++ b/sys/netinet/tcp_reass.c
@@ -0,0 +1,2885 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_input.c	8.12 (Berkeley) 5/24/95
+ * $FreeBSD$
+ */
+
+#include "opt_ipfw.h"		/* for ipfw_fwd		*/
+#include "opt_inet6.h"
+#include "opt_ipsec.h"
+#include "opt_tcpdebug.h"
+#include "opt_tcp_input.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/proc.h>		/* for proc0 declaration */
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/syslog.h>
+
+#include <machine/cpu.h>	/* before tcp_seq.h, for tcp_random18() */
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>	/* for ICMP_BANDLIM		*/
+#include <netinet/in_var.h>
+#include <netinet/icmp_var.h>	/* for ICMP_BANDLIM		*/
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#include <netinet6/nd6.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/in6_pcb.h>
+#endif
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#ifdef INET6
+#include <netinet6/tcp6_var.h>
+#endif
+#include <netinet/tcpip.h>
+#ifdef TCPDEBUG
+#include <netinet/tcp_debug.h>
+
+u_char tcp_saveipgen[40]; /* the size must be of max ip header, now IPv6 */
+struct tcphdr tcp_savetcp;
+#endif /* TCPDEBUG */
+
+#ifdef IPSEC
+#include <netinet6/ipsec.h>
+#ifdef INET6
+#include <netinet6/ipsec6.h>
+#endif
+#include <netkey/key.h>
+#endif /*IPSEC*/
+
+#include <machine/in_cksum.h>
+
+MALLOC_DEFINE(M_TSEGQ, "tseg_qent", "TCP segment queue entry");
+
+static int	tcprexmtthresh = 3;
+tcp_seq	tcp_iss;
+tcp_cc	tcp_ccgen;
+
+struct	tcpstat tcpstat;
+SYSCTL_STRUCT(_net_inet_tcp, TCPCTL_STATS, stats, CTLFLAG_RD, 
+    &tcpstat , tcpstat, "TCP statistics (struct tcpstat, netinet/tcp_var.h)");
+
+static int log_in_vain = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_RW, 
+    &log_in_vain, 0, "Log all incoming TCP connections");
+
+static int blackhole = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, blackhole, CTLFLAG_RW,
+	&blackhole, 0, "Do not send RST when dropping refused connections");
+
+int tcp_delack_enabled = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, delayed_ack, CTLFLAG_RW, 
+    &tcp_delack_enabled, 0, 
+    "Delay ACK to try and piggyback it onto a data packet");
+
+int tcp_lq_overflow = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_lq_overflow, CTLFLAG_RW,
+    &tcp_lq_overflow, 0, 
+    "Listen Queue Overflow");
+
+#ifdef TCP_DROP_SYNFIN
+static int drop_synfin = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFLAG_RW,
+    &drop_synfin, 0, "Drop TCP packets with SYN+FIN set");
+#endif
+
+struct inpcbhead tcb;
+#define	tcb6	tcb  /* for KAME src sync over BSD*'s */
+struct inpcbinfo tcbinfo;
+
+static void	 tcp_dooptions __P((struct tcpcb *,
+	    u_char *, int, struct tcphdr *, struct tcpopt *));
+static void	 tcp_pulloutofband __P((struct socket *,
+	    struct tcphdr *, struct mbuf *, int));
+static int	 tcp_reass __P((struct tcpcb *, struct tcphdr *, int *,
+				struct mbuf *));
+static void	 tcp_xmit_timer __P((struct tcpcb *, int));
+static int	 tcp_newreno __P((struct tcpcb *, struct tcphdr *));
+
+/* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */
+#ifdef INET6
+#define ND6_HINT(tp) \
+do { \
+	if ((tp) && (tp)->t_inpcb && \
+	    ((tp)->t_inpcb->inp_vflag & INP_IPV6) != 0 && \
+	    (tp)->t_inpcb->in6p_route.ro_rt) \
+		nd6_nud_hint((tp)->t_inpcb->in6p_route.ro_rt, NULL, 0); \
+} while (0)
+#else
+#define ND6_HINT(tp)
+#endif
+
+/*
+ * Indicate whether this ack should be delayed.
+ */
+#define DELAY_ACK(tp) \
+	(tcp_delack_enabled && !callout_pending(tp->tt_delack))
+
+/*
+ * Insert segment which inludes th into reassembly queue of tcp with
+ * control block tp.  Return TH_FIN if reassembly now includes
+ * a segment with FIN.  The macro form does the common case inline
+ * (segment is the next to be received on an established connection,
+ * and the queue is empty), avoiding linkage into and removal
+ * from the queue and repetition of various conversions.
+ * Set DELACK for segments received in order, but ack immediately
+ * when segments are out of order (so fast retransmit can work).
+ */
+#define	TCP_REASS(tp, th, tlenp, m, so, flags) { \
+	if ((th)->th_seq == (tp)->rcv_nxt && \
+	    LIST_EMPTY(&(tp)->t_segq) && \
+	    TCPS_HAVEESTABLISHED((tp)->t_state)) { \
+		if (DELAY_ACK(tp)) \
+			callout_reset(tp->tt_delack, tcp_delacktime, \
+			    tcp_timer_delack, tp); \
+		else \
+			tp->t_flags |= TF_ACKNOW; \
+		(tp)->rcv_nxt += *(tlenp); \
+		flags = (th)->th_flags & TH_FIN; \
+		tcpstat.tcps_rcvpack++;\
+		tcpstat.tcps_rcvbyte += *(tlenp);\
+		ND6_HINT(tp); \
+		sbappend(&(so)->so_rcv, (m)); \
+		sorwakeup(so); \
+	} else { \
+		(flags) = tcp_reass((tp), (th), (tlenp), (m)); \
+		tp->t_flags |= TF_ACKNOW; \
+	} \
+}
+
+static int
+tcp_reass(tp, th, tlenp, m)
+	register struct tcpcb *tp;
+	register struct tcphdr *th;
+	int *tlenp;
+	struct mbuf *m;
+{
+	struct tseg_qent *q;
+	struct tseg_qent *p = NULL;
+	struct tseg_qent *nq;
+	struct tseg_qent *te;
+	struct socket *so = tp->t_inpcb->inp_socket;
+	int flags;
+
+	/*
+	 * Call with th==0 after become established to
+	 * force pre-ESTABLISHED data up to user socket.
+	 */
+	if (th == 0)
+		goto present;
+
+	/* Allocate a new queue entry. If we can't, just drop the pkt. XXX */
+	MALLOC(te, struct tseg_qent *, sizeof (struct tseg_qent), M_TSEGQ,
+	       M_NOWAIT);
+	if (te == NULL) {
+		tcpstat.tcps_rcvmemdrop++;
+		m_freem(m);
+		return (0);
+	}
+
+	/*
+	 * Find a segment which begins after this one does.
+	 */
+	LIST_FOREACH(q, &tp->t_segq, tqe_q) {
+		if (SEQ_GT(q->tqe_th->th_seq, th->th_seq))
+			break;
+		p = q;
+	}
+
+	/*
+	 * If there is a preceding segment, it may provide some of
+	 * our data already.  If so, drop the data from the incoming
+	 * segment.  If it provides all of our data, drop us.
+	 */
+	if (p != NULL) {
+		register int i;
+		/* conversion to int (in i) handles seq wraparound */
+		i = p->tqe_th->th_seq + p->tqe_len - th->th_seq;
+		if (i > 0) {
+			if (i >= *tlenp) {
+				tcpstat.tcps_rcvduppack++;
+				tcpstat.tcps_rcvdupbyte += *tlenp;
+				m_freem(m);
+				FREE(te, M_TSEGQ);
+				/*
+				 * Try to present any queued data
+				 * at the left window edge to the user.
+				 * This is needed after the 3-WHS
+				 * completes.
+				 */
+				goto present;	/* ??? */
+			}
+			m_adj(m, i);
+			*tlenp -= i;
+			th->th_seq += i;
+		}
+	}
+	tcpstat.tcps_rcvoopack++;
+	tcpstat.tcps_rcvoobyte += *tlenp;
+
+	/*
+	 * While we overlap succeeding segments trim them or,
+	 * if they are completely covered, dequeue them.
+	 */
+	while (q) {
+		register int i = (th->th_seq + *tlenp) - q->tqe_th->th_seq;
+		if (i <= 0)
+			break;
+		if (i < q->tqe_len) {
+			q->tqe_th->th_seq += i;
+			q->tqe_len -= i;
+			m_adj(q->tqe_m, i);
+			break;
+		}
+
+		nq = LIST_NEXT(q, tqe_q);
+		LIST_REMOVE(q, tqe_q);
+		m_freem(q->tqe_m);
+		FREE(q, M_TSEGQ);
+		q = nq;
+	}
+
+	/* Insert the new segment queue entry into place. */
+	te->tqe_m = m;
+	te->tqe_th = th;
+	te->tqe_len = *tlenp;
+
+	if (p == NULL) {
+		LIST_INSERT_HEAD(&tp->t_segq, te, tqe_q);
+	} else {
+		LIST_INSERT_AFTER(p, te, tqe_q);
+	}
+
+present:
+	/*
+	 * Present data to user, advancing rcv_nxt through
+	 * completed sequence space.
+	 */
+	if (!TCPS_HAVEESTABLISHED(tp->t_state))
+		return (0);
+	q = LIST_FIRST(&tp->t_segq);
+	if (!q || q->tqe_th->th_seq != tp->rcv_nxt)
+		return (0);
+	do {
+		tp->rcv_nxt += q->tqe_len;
+		flags = q->tqe_th->th_flags & TH_FIN;
+		nq = LIST_NEXT(q, tqe_q);
+		LIST_REMOVE(q, tqe_q);
+		if (so->so_state & SS_CANTRCVMORE)
+			m_freem(q->tqe_m);
+		else
+			sbappend(&so->so_rcv, q->tqe_m);
+		FREE(q, M_TSEGQ);
+		q = nq;
+	} while (q && q->tqe_th->th_seq == tp->rcv_nxt);
+	ND6_HINT(tp);
+	sorwakeup(so);
+	return (flags);
+}
+
+/*
+ * TCP input routine, follows pages 65-76 of the
+ * protocol specification dated September, 1981 very closely.
+ */
+#ifdef INET6
+int
+tcp6_input(mp, offp, proto)
+	struct mbuf **mp;
+	int *offp, proto;
+{
+	register struct mbuf *m = *mp;
+
+	IP6_EXTHDR_CHECK(m, *offp, sizeof(struct tcphdr), IPPROTO_DONE);
+
+	/*
+	 * draft-itojun-ipv6-tcp-to-anycast
+	 * better place to put this in?
+	 */
+	if (m->m_flags & M_ANYCAST6) {
+		struct ip6_hdr *ip6;
+
+		ip6 = mtod(m, struct ip6_hdr *);
+		icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR,
+			    (caddr_t)&ip6->ip6_dst - (caddr_t)ip6);
+		return IPPROTO_DONE;
+	}
+
+	tcp_input(m, *offp, proto);
+	return IPPROTO_DONE;
+}
+#endif
+
+void
+tcp_input(m, off0, proto)
+	register struct mbuf *m;
+	int off0, proto;
+{
+	register struct tcphdr *th;
+	register struct ip *ip = NULL;
+	register struct ipovly *ipov;
+	register struct inpcb *inp;
+	u_char *optp = NULL;
+	int optlen = 0;
+	int len, tlen, off;
+	int drop_hdrlen;
+	register struct tcpcb *tp = 0;
+	register int thflags;
+	struct socket *so = 0;
+	int todrop, acked, ourfinisacked, needoutput = 0;
+	struct in_addr laddr;
+#ifdef INET6
+	struct in6_addr laddr6;
+#endif
+	int dropsocket = 0;
+	int iss = 0;
+	u_long tiwin;
+	struct tcpopt to;		/* options in this segment */
+	struct rmxp_tao *taop;		/* pointer to our TAO cache entry */
+	struct rmxp_tao	tao_noncached;	/* in case there's no cached entry */
+#ifdef TCPDEBUG
+	short ostate = 0;
+#endif
+#ifdef INET6
+	struct ip6_hdr *ip6 = NULL;
+	int isipv6;
+#endif /* INET6 */
+	int rstreason; /* For badport_bandlim accounting purposes */
+
+#ifdef INET6
+	isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
+#endif
+	bzero((char *)&to, sizeof(to));
+
+	tcpstat.tcps_rcvtotal++;
+
+#ifdef INET6
+	if (isipv6) {
+		/* IP6_EXTHDR_CHECK() is already done at tcp6_input() */
+		ip6 = mtod(m, struct ip6_hdr *);
+		tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0;
+		if (in6_cksum(m, IPPROTO_TCP, off0, tlen)) {
+			tcpstat.tcps_rcvbadsum++;
+			goto drop;
+		}
+		th = (struct tcphdr *)((caddr_t)ip6 + off0);
+	} else
+#endif /* INET6 */
+      {
+	/*
+	 * Get IP and TCP header together in first mbuf.
+	 * Note: IP leaves IP header in first mbuf.
+	 */
+	if (off0 > sizeof (struct ip)) {
+		ip_stripoptions(m, (struct mbuf *)0);
+		off0 = sizeof(struct ip);
+	}
+	if (m->m_len < sizeof (struct tcpiphdr)) {
+		if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == 0) {
+			tcpstat.tcps_rcvshort++;
+			return;
+		}
+	}
+	ip = mtod(m, struct ip *);
+	ipov = (struct ipovly *)ip;
+	th = (struct tcphdr *)((caddr_t)ip + off0);
+	tlen = ip->ip_len;
+
+	if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
+		if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
+                	th->th_sum = m->m_pkthdr.csum_data;
+		else
+	                th->th_sum = in_pseudo(ip->ip_src.s_addr,
+			    ip->ip_dst.s_addr, htonl(m->m_pkthdr.csum_data +
+			    ip->ip_len + IPPROTO_TCP));
+		th->th_sum ^= 0xffff;
+	} else {
+		/*
+		 * Checksum extended TCP header and data.
+		 */
+		len = sizeof (struct ip) + tlen;
+		bzero(ipov->ih_x1, sizeof(ipov->ih_x1));
+		ipov->ih_len = (u_short)tlen;
+		HTONS(ipov->ih_len);
+		th->th_sum = in_cksum(m, len);
+	}
+	if (th->th_sum) {
+		tcpstat.tcps_rcvbadsum++;
+		goto drop;
+	}
+#ifdef INET6
+	/* Re-initialization for later version check */
+	ip->ip_v = IPVERSION;
+#endif
+      }
+
+	/*
+	 * Check that TCP offset makes sense,
+	 * pull out TCP options and adjust length.		XXX
+	 */
+	off = th->th_off << 2;
+	if (off < sizeof (struct tcphdr) || off > tlen) {
+		tcpstat.tcps_rcvbadoff++;
+		goto drop;
+	}
+	tlen -= off;	/* tlen is used instead of ti->ti_len */
+	if (off > sizeof (struct tcphdr)) {
+#ifdef INET6
+		if (isipv6) {
+			IP6_EXTHDR_CHECK(m, off0, off, );
+			ip6 = mtod(m, struct ip6_hdr *);
+			th = (struct tcphdr *)((caddr_t)ip6 + off0);
+		} else
+#endif /* INET6 */
+	      {
+		if (m->m_len < sizeof(struct ip) + off) {
+			if ((m = m_pullup(m, sizeof (struct ip) + off)) == 0) {
+				tcpstat.tcps_rcvshort++;
+				return;
+			}
+			ip = mtod(m, struct ip *);
+			ipov = (struct ipovly *)ip;
+			th = (struct tcphdr *)((caddr_t)ip + off0);
+		}
+	      }
+		optlen = off - sizeof (struct tcphdr);
+		optp = (u_char *)(th + 1);
+	}
+	thflags = th->th_flags;
+
+#ifdef TCP_DROP_SYNFIN
+	/*
+	 * If the drop_synfin option is enabled, drop all packets with
+	 * both the SYN and FIN bits set. This prevents e.g. nmap from
+	 * identifying the TCP/IP stack.
+	 *
+	 * This is a violation of the TCP specification.
+	 */
+	if (drop_synfin && (thflags & (TH_SYN|TH_FIN)) == (TH_SYN|TH_FIN))
+		goto drop;
+#endif
+
+	/*
+	 * Convert TCP protocol specific fields to host format.
+	 */
+	NTOHL(th->th_seq);
+	NTOHL(th->th_ack);
+	NTOHS(th->th_win);
+	NTOHS(th->th_urp);
+
+	/*
+	 * Delay droping TCP, IP headers, IPv6 ext headers, and TCP options,
+	 * until after ip6_savecontrol() is called and before other functions
+	 * which don't want those proto headers.
+	 * Because ip6_savecontrol() is going to parse the mbuf to
+	 * search for data to be passed up to user-land, it wants mbuf
+	 * parameters to be unchanged.
+	 */
+	drop_hdrlen = off0 + off;
+
+	/*
+	 * Locate pcb for segment.
+	 */
+findpcb:
+#ifdef IPFIREWALL_FORWARD
+	if (ip_fw_fwd_addr != NULL
+#ifdef INET6
+	    && isipv6 == NULL /* IPv6 support is not yet */
+#endif /* INET6 */
+	    ) {
+		/*
+		 * Diverted. Pretend to be the destination.
+		 * already got one like this? 
+		 */
+		inp = in_pcblookup_hash(&tcbinfo, ip->ip_src, th->th_sport,
+			ip->ip_dst, th->th_dport, 0, m->m_pkthdr.rcvif);
+		if (!inp) {
+			/* 
+			 * No, then it's new. Try find the ambushing socket
+			 */
+			if (!ip_fw_fwd_addr->sin_port) {
+				inp = in_pcblookup_hash(&tcbinfo, ip->ip_src,
+				    th->th_sport, ip_fw_fwd_addr->sin_addr,
+				    th->th_dport, 1, m->m_pkthdr.rcvif);
+			} else {
+				inp = in_pcblookup_hash(&tcbinfo,
+				    ip->ip_src, th->th_sport,
+	    			    ip_fw_fwd_addr->sin_addr,
+				    ntohs(ip_fw_fwd_addr->sin_port), 1,
+				    m->m_pkthdr.rcvif);
+			}
+		}
+		ip_fw_fwd_addr = NULL;
+	} else
+#endif	/* IPFIREWALL_FORWARD */
+      {
+#ifdef INET6
+	if (isipv6)
+		inp = in6_pcblookup_hash(&tcbinfo, &ip6->ip6_src, th->th_sport,
+					 &ip6->ip6_dst, th->th_dport, 1,
+					 m->m_pkthdr.rcvif);
+	else
+#endif /* INET6 */
+	inp = in_pcblookup_hash(&tcbinfo, ip->ip_src, th->th_sport,
+	    ip->ip_dst, th->th_dport, 1, m->m_pkthdr.rcvif);
+      }
+
+#ifdef IPSEC
+#ifdef INET6
+	if (isipv6) {
+		if (inp != NULL && ipsec6_in_reject_so(m, inp->inp_socket)) {
+			ipsec6stat.in_polvio++;
+			goto drop;
+		}
+	} else
+#endif /* INET6 */
+	if (inp != NULL && ipsec4_in_reject_so(m, inp->inp_socket)) {
+		ipsecstat.in_polvio++;
+		goto drop;
+	}
+#endif /*IPSEC*/
+
+	/*
+	 * If the state is CLOSED (i.e., TCB does not exist) then
+	 * all data in the incoming segment is discarded.
+	 * If the TCB exists but is in CLOSED state, it is embryonic,
+	 * but should either do a listen or a connect soon.
+	 */
+	if (inp == NULL) {
+		if (log_in_vain) {
+#ifdef INET6
+			char dbuf[INET6_ADDRSTRLEN], sbuf[INET6_ADDRSTRLEN];
+#else /* INET6 */
+			char dbuf[4*sizeof "123"], sbuf[4*sizeof "123"];
+#endif /* INET6 */
+
+#ifdef INET6
+			if (isipv6) {
+				strcpy(dbuf, ip6_sprintf(&ip6->ip6_dst));
+				strcpy(sbuf, ip6_sprintf(&ip6->ip6_src));
+			} else
+#endif
+		      {
+			strcpy(dbuf, inet_ntoa(ip->ip_dst));
+			strcpy(sbuf, inet_ntoa(ip->ip_src));
+		      }
+			switch (log_in_vain) {
+			case 1:
+				if(thflags & TH_SYN)
+					log(LOG_INFO,
+			    		"Connection attempt to TCP %s:%d from %s:%d\n",
+			    		dbuf, ntohs(th->th_dport),
+					sbuf,
+					ntohs(th->th_sport));
+				break;
+			case 2:
+				log(LOG_INFO,
+			    	"Connection attempt to TCP %s:%d from %s:%d flags:0x%x\n",
+			    	dbuf, ntohs(th->th_dport), sbuf,
+			    	ntohs(th->th_sport), thflags);
+				break;
+			default:
+				break;
+			}
+		}
+		if (blackhole) { 
+			switch (blackhole) {
+			case 1:
+				if (thflags & TH_SYN)
+					goto drop;
+				break;
+			case 2:
+				goto drop;
+			default:
+				goto drop;
+			}
+		}
+		rstreason = BANDLIM_RST_CLOSEDPORT;
+		goto dropwithreset;
+	}
+	tp = intotcpcb(inp);
+	if (tp == 0) {
+		rstreason = BANDLIM_RST_CLOSEDPORT;
+		goto dropwithreset;
+	}
+	if (tp->t_state == TCPS_CLOSED)
+		goto drop;
+
+	/* Unscale the window into a 32-bit value. */
+	if ((thflags & TH_SYN) == 0)
+		tiwin = th->th_win << tp->snd_scale;
+	else
+		tiwin = th->th_win;
+
+#ifdef INET6
+	/* save packet options if user wanted */
+	if (isipv6 && inp->in6p_flags & INP_CONTROLOPTS) {
+		if (inp->in6p_options) {
+			m_freem(inp->in6p_options);
+			inp->in6p_options = 0;
+		}
+		ip6_savecontrol(inp, &inp->in6p_options, ip6, m);
+	}
+        /* else, should also do ip_srcroute() here? */
+#endif /* INET6 */
+
+	so = inp->inp_socket;
+	if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) {
+#ifdef TCPDEBUG
+		if (so->so_options & SO_DEBUG) {
+			ostate = tp->t_state;
+#ifdef INET6
+			if (isipv6)
+				bcopy((char *)ip6, (char *)tcp_saveipgen,
+				      sizeof(*ip6));
+			else
+#endif /* INET6 */
+			bcopy((char *)ip, (char *)tcp_saveipgen, sizeof(*ip));
+			tcp_savetcp = *th;
+		}
+#endif
+		if (so->so_options & SO_ACCEPTCONN) {
+			register struct tcpcb *tp0 = tp;
+			struct socket *so2;
+#ifdef IPSEC
+			struct socket *oso;
+#endif
+#ifdef INET6
+			struct inpcb *oinp = sotoinpcb(so);
+#endif /* INET6 */
+
+#ifndef IPSEC
+			/*
+			 * Current IPsec implementation makes incorrect IPsec
+			 * cache if this check is done here.
+			 * So delay this until duplicated socket is created.
+			 */
+			if ((thflags & (TH_RST|TH_ACK|TH_SYN)) != TH_SYN) {
+				/*
+				 * Note: dropwithreset makes sure we don't
+				 * send a RST in response to a RST.
+				 */
+				if (thflags & TH_ACK) {
+					tcpstat.tcps_badsyn++;
+					rstreason = BANDLIM_RST_OPENPORT;
+					goto dropwithreset;
+				}
+				goto drop;
+			}
+#endif
+			so2 = sonewconn(so, 0);
+			if (so2 == 0) {
+				tcpstat.tcps_listendrop++;
+				so2 = sodropablereq(so);
+				if (so2) {
+					if (tcp_lq_overflow)
+						sototcpcb(so2)->t_flags |= 
+						    TF_LQ_OVERFLOW;
+					tcp_drop(sototcpcb(so2), ETIMEDOUT);
+					so2 = sonewconn(so, 0);
+				}
+				if (!so2)
+					goto drop;
+			}
+#ifdef IPSEC
+			oso = so;
+#endif
+			so = so2;
+			/*
+			 * This is ugly, but ....
+			 *
+			 * Mark socket as temporary until we're
+			 * committed to keeping it.  The code at
+			 * ``drop'' and ``dropwithreset'' check the
+			 * flag dropsocket to see if the temporary
+			 * socket created here should be discarded.
+			 * We mark the socket as discardable until
+			 * we're committed to it below in TCPS_LISTEN.
+			 */
+			dropsocket++;
+			inp = (struct inpcb *)so->so_pcb;
+#ifdef INET6
+			if (isipv6)
+				inp->in6p_laddr = ip6->ip6_dst;
+			else {
+				if ((inp->inp_flags & IN6P_BINDV6ONLY) == 0) {
+					inp->inp_vflag &= ~INP_IPV6;
+					inp->inp_vflag |= INP_IPV4;
+				}
+#endif /* INET6 */
+			inp->inp_laddr = ip->ip_dst;
+#ifdef INET6
+			}
+#endif /* INET6 */
+			inp->inp_lport = th->th_dport;
+			if (in_pcbinshash(inp) != 0) {
+				/*
+				 * Undo the assignments above if we failed to
+				 * put the PCB on the hash lists.
+				 */
+#ifdef INET6
+				if (isipv6)
+					inp->in6p_laddr = in6addr_any;
+				else
+#endif /* INET6 */
+				inp->inp_laddr.s_addr = INADDR_ANY;
+				inp->inp_lport = 0;
+				goto drop;
+			}
+#ifdef IPSEC
+			/*
+			 * To avoid creating incorrectly cached IPsec
+			 * association, this is need to be done here.
+			 *
+			 * Subject: (KAME-snap 748)
+			 * From: Wayne Knowles <w.knowles@niwa.cri.nz>
+			 * ftp://ftp.kame.net/pub/mail-list/snap-users/748
+			 */
+			if ((thflags & (TH_RST|TH_ACK|TH_SYN)) != TH_SYN) {
+				/*
+				 * Note: dropwithreset makes sure we don't
+				 * send a RST in response to a RST.
+				 */
+				if (thflags & TH_ACK) {
+					tcpstat.tcps_badsyn++;
+					rstreason = BANDLIM_RST_OPENPORT;
+					goto dropwithreset;
+				}
+				goto drop;
+			}
+#endif
+#ifdef INET6
+			if (isipv6) {
+				/*
+				 * inherit socket options from the listening
+				 * socket.
+				 */
+				inp->inp_flags |=
+					oinp->inp_flags & INP_CONTROLOPTS;
+				if (inp->inp_flags & INP_CONTROLOPTS) {
+					if (inp->in6p_options) {
+						m_freem(inp->in6p_options);
+						inp->in6p_options = 0;
+					}
+					ip6_savecontrol(inp,
+							&inp->in6p_options,
+							ip6, m);
+				}
+			} else
+#endif /* INET6 */
+			inp->inp_options = ip_srcroute();
+#ifdef IPSEC
+			/* copy old policy into new socket's */
+			if (ipsec_copy_policy(sotoinpcb(oso)->inp_sp,
+			                      inp->inp_sp))
+				printf("tcp_input: could not copy policy\n");
+#endif
+			tp = intotcpcb(inp);
+			tp->t_state = TCPS_LISTEN;
+			tp->t_flags |= tp0->t_flags & (TF_NOPUSH|TF_NOOPT);
+
+			/* Compute proper scaling value from buffer space */
+			while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
+			   TCP_MAXWIN << tp->request_r_scale <
+			   so->so_rcv.sb_hiwat)
+				tp->request_r_scale++;
+		}
+	}
+
+	/*
+	 * Segment received on connection.
+	 * Reset idle time and keep-alive timer.
+	 */
+	tp->t_rcvtime = ticks;
+	if (TCPS_HAVEESTABLISHED(tp->t_state))
+		callout_reset(tp->tt_keep, tcp_keepidle, tcp_timer_keep, tp);
+
+	/*
+	 * Process options if not in LISTEN state,
+	 * else do it below (after getting remote address).
+	 */
+	if (tp->t_state != TCPS_LISTEN)
+		tcp_dooptions(tp, optp, optlen, th, &to);
+
+	/*
+	 * Header prediction: check for the two common cases
+	 * of a uni-directional data xfer.  If the packet has
+	 * no control flags, is in-sequence, the window didn't
+	 * change and we're not retransmitting, it's a
+	 * candidate.  If the length is zero and the ack moved
+	 * forward, we're the sender side of the xfer.  Just
+	 * free the data acked & wake any higher level process
+	 * that was blocked waiting for space.  If the length
+	 * is non-zero and the ack didn't move, we're the
+	 * receiver side.  If we're getting packets in-order
+	 * (the reassembly queue is empty), add the data to
+	 * the socket buffer and note that we need a delayed ack.
+	 * Make sure that the hidden state-flags are also off.
+	 * Since we check for TCPS_ESTABLISHED above, it can only
+	 * be TH_NEEDSYN.
+	 */
+	if (tp->t_state == TCPS_ESTABLISHED &&
+	    (thflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
+	    ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) &&
+	    ((to.to_flag & TOF_TS) == 0 ||
+	     TSTMP_GEQ(to.to_tsval, tp->ts_recent)) &&
+	    /*
+	     * Using the CC option is compulsory if once started:
+	     *   the segment is OK if no T/TCP was negotiated or
+	     *   if the segment has a CC option equal to CCrecv
+	     */
+	    ((tp->t_flags & (TF_REQ_CC|TF_RCVD_CC)) != (TF_REQ_CC|TF_RCVD_CC) ||
+	     ((to.to_flag & TOF_CC) != 0 && to.to_cc == tp->cc_recv)) &&
+	    th->th_seq == tp->rcv_nxt &&
+	    tiwin && tiwin == tp->snd_wnd &&
+	    tp->snd_nxt == tp->snd_max) {
+
+		/*
+		 * If last ACK falls within this segment's sequence numbers,
+		 * record the timestamp.
+		 * NOTE that the test is modified according to the latest
+		 * proposal of the tcplw@cray.com list (Braden 1993/04/26).
+		 */
+		if ((to.to_flag & TOF_TS) != 0 &&
+		   SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
+			tp->ts_recent_age = ticks;
+			tp->ts_recent = to.to_tsval;
+		}
+
+		if (tlen == 0) {
+			if (SEQ_GT(th->th_ack, tp->snd_una) &&
+			    SEQ_LEQ(th->th_ack, tp->snd_max) &&
+			    tp->snd_cwnd >= tp->snd_wnd &&
+			    tp->t_dupacks < tcprexmtthresh) {
+				/*
+				 * this is a pure ack for outstanding data.
+				 */
+				++tcpstat.tcps_predack;
+				/*
+				 * "bad retransmit" recovery
+				 */
+				if (tp->t_rxtshift == 1 &&
+				    ticks < tp->t_badrxtwin) {
+					tp->snd_cwnd = tp->snd_cwnd_prev;
+					tp->snd_ssthresh =
+					    tp->snd_ssthresh_prev;
+					tp->snd_nxt = tp->snd_max;
+					tp->t_badrxtwin = 0;
+				}
+				if ((to.to_flag & TOF_TS) != 0)
+					tcp_xmit_timer(tp,
+					    ticks - to.to_tsecr + 1);
+				else if (tp->t_rtttime &&
+					    SEQ_GT(th->th_ack, tp->t_rtseq))
+					tcp_xmit_timer(tp, ticks - tp->t_rtttime);
+				acked = th->th_ack - tp->snd_una;
+				tcpstat.tcps_rcvackpack++;
+				tcpstat.tcps_rcvackbyte += acked;
+				sbdrop(&so->so_snd, acked);
+				tp->snd_una = th->th_ack;
+				m_freem(m);
+				ND6_HINT(tp); /* some progress has been done */
+
+				/*
+				 * If all outstanding data are acked, stop
+				 * retransmit timer, otherwise restart timer
+				 * using current (possibly backed-off) value.
+				 * If process is waiting for space,
+				 * wakeup/selwakeup/signal.  If data
+				 * are ready to send, let tcp_output
+				 * decide between more output or persist.
+				 */
+				if (tp->snd_una == tp->snd_max)
+					callout_stop(tp->tt_rexmt);
+				else if (!callout_active(tp->tt_persist))
+					callout_reset(tp->tt_rexmt, 
+						      tp->t_rxtcur,
+						      tcp_timer_rexmt, tp);
+
+				sowwakeup(so);
+				if (so->so_snd.sb_cc)
+					(void) tcp_output(tp);
+				return;
+			}
+		} else if (th->th_ack == tp->snd_una &&
+		    LIST_EMPTY(&tp->t_segq) &&
+		    tlen <= sbspace(&so->so_rcv)) {
+			/*
+			 * this is a pure, in-sequence data packet
+			 * with nothing on the reassembly queue and
+			 * we have enough buffer space to take it.
+			 */
+			++tcpstat.tcps_preddat;
+			tp->rcv_nxt += tlen;
+			tcpstat.tcps_rcvpack++;
+			tcpstat.tcps_rcvbyte += tlen;
+			ND6_HINT(tp);	/* some progress has been done */
+			/*
+			 * Add data to socket buffer.
+			 */
+			m_adj(m, drop_hdrlen);	/* delayed header drop */
+			sbappend(&so->so_rcv, m);
+			sorwakeup(so);
+			if (DELAY_ACK(tp)) {
+	                        callout_reset(tp->tt_delack, tcp_delacktime,
+	                            tcp_timer_delack, tp);
+			} else {
+				tp->t_flags |= TF_ACKNOW;
+				tcp_output(tp);
+			}
+			return;
+		}
+	}
+
+	/*
+	 * Calculate amount of space in receive window,
+	 * and then do TCP input processing.
+	 * Receive window is amount of space in rcv queue,
+	 * but not less than advertised window.
+	 */
+	{ int win;
+
+	win = sbspace(&so->so_rcv);
+	if (win < 0)
+		win = 0;
+	tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
+	}
+
+	switch (tp->t_state) {
+
+	/*
+	 * If the state is LISTEN then ignore segment if it contains an RST.
+	 * If the segment contains an ACK then it is bad and send a RST.
+	 * If it does not contain a SYN then it is not interesting; drop it.
+	 * If it is from this socket, drop it, it must be forged.
+	 * Don't bother responding if the destination was a broadcast.
+	 * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial
+	 * tp->iss, and send a segment:
+	 *     <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
+	 * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss.
+	 * Fill in remote peer address fields if not previously specified.
+	 * Enter SYN_RECEIVED state, and process any other fields of this
+	 * segment in this state.
+	 */
+	case TCPS_LISTEN: {
+		register struct sockaddr_in *sin;
+#ifdef INET6
+		register struct sockaddr_in6 *sin6;
+#endif
+
+		if (thflags & TH_RST)
+			goto drop;
+		if (thflags & TH_ACK) {
+			rstreason = BANDLIM_RST_OPENPORT;
+			goto dropwithreset;
+		}
+		if ((thflags & TH_SYN) == 0)
+			goto drop;
+		if (th->th_dport == th->th_sport) {
+#ifdef INET6
+			if (isipv6) {
+				if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
+						       &ip6->ip6_src))
+					goto drop;
+			} else
+#endif /* INET6 */
+			if (ip->ip_dst.s_addr == ip->ip_src.s_addr)
+				goto drop;
+		}
+		/*
+		 * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN
+		 * in_broadcast() should never return true on a received
+		 * packet with M_BCAST not set.
+ 		 *
+ 		 * Packets with a multicast source address should also
+ 		 * be discarded.
+		 */
+		if (m->m_flags & (M_BCAST|M_MCAST))
+			goto drop;
+#ifdef INET6
+		if (isipv6) {
+			if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
+			    IN6_IS_ADDR_MULTICAST(&ip6->ip6_src))
+				goto drop;
+		} else
+#endif
+		if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
+		    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
+		    ip->ip_src.s_addr == htonl(INADDR_BROADCAST))
+			goto drop;
+#ifdef INET6
+		if (isipv6) {
+			MALLOC(sin6, struct sockaddr_in6 *, sizeof *sin6,
+			       M_SONAME, M_NOWAIT | M_ZERO);
+			if (sin6 == NULL)
+				goto drop;
+			sin6->sin6_family = AF_INET6;
+			sin6->sin6_len = sizeof(*sin6);
+			sin6->sin6_addr = ip6->ip6_src;
+			sin6->sin6_port = th->th_sport;
+			laddr6 = inp->in6p_laddr;
+			if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
+				inp->in6p_laddr = ip6->ip6_dst;
+			if (in6_pcbconnect(inp, (struct sockaddr *)sin6,
+					   &proc0)) {
+				inp->in6p_laddr = laddr6;
+				FREE(sin6, M_SONAME);
+				goto drop;
+			}
+			FREE(sin6, M_SONAME);
+		} else
+#endif
+	      {
+		MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME,
+		       M_NOWAIT);
+		if (sin == NULL)
+			goto drop;
+		sin->sin_family = AF_INET;
+		sin->sin_len = sizeof(*sin);
+		sin->sin_addr = ip->ip_src;
+		sin->sin_port = th->th_sport;
+		bzero((caddr_t)sin->sin_zero, sizeof(sin->sin_zero));
+		laddr = inp->inp_laddr;
+		if (inp->inp_laddr.s_addr == INADDR_ANY)
+			inp->inp_laddr = ip->ip_dst;
+		if (in_pcbconnect(inp, (struct sockaddr *)sin, &proc0)) {
+			inp->inp_laddr = laddr;
+			FREE(sin, M_SONAME);
+			goto drop;
+		}
+		FREE(sin, M_SONAME);
+	      }
+		tp->t_template = tcp_template(tp);
+		if (tp->t_template == 0) {
+			tp = tcp_drop(tp, ENOBUFS);
+			dropsocket = 0;		/* socket is already gone */
+			goto drop;
+		}
+		if ((taop = tcp_gettaocache(inp)) == NULL) {
+			taop = &tao_noncached;
+			bzero(taop, sizeof(*taop));
+		}
+		tcp_dooptions(tp, optp, optlen, th, &to);
+		if (iss)
+			tp->iss = iss;
+		else {
+			tp->iss = tcp_rndiss_next();
+ 		}
+		tp->irs = th->th_seq;
+		tcp_sendseqinit(tp);
+		tcp_rcvseqinit(tp);
+		tp->snd_recover = tp->snd_una;
+		/*
+		 * Initialization of the tcpcb for transaction;
+		 *   set SND.WND = SEG.WND,
+		 *   initialize CCsend and CCrecv.
+		 */
+		tp->snd_wnd = tiwin;	/* initial send-window */
+		tp->cc_send = CC_INC(tcp_ccgen);
+		tp->cc_recv = to.to_cc;
+		/*
+		 * Perform TAO test on incoming CC (SEG.CC) option, if any.
+		 * - compare SEG.CC against cached CC from the same host,
+		 *	if any.
+		 * - if SEG.CC > chached value, SYN must be new and is accepted
+		 *	immediately: save new CC in the cache, mark the socket
+		 *	connected, enter ESTABLISHED state, turn on flag to
+		 *	send a SYN in the next segment.
+		 *	A virtual advertised window is set in rcv_adv to
+		 *	initialize SWS prevention.  Then enter normal segment
+		 *	processing: drop SYN, process data and FIN.
+		 * - otherwise do a normal 3-way handshake.
+		 */
+		if ((to.to_flag & TOF_CC) != 0) {
+		    if (((tp->t_flags & TF_NOPUSH) != 0) &&
+			taop->tao_cc != 0 && CC_GT(to.to_cc, taop->tao_cc)) {
+
+			taop->tao_cc = to.to_cc;
+			tp->t_starttime = ticks;
+			tp->t_state = TCPS_ESTABLISHED;
+
+			/*
+			 * If there is a FIN, or if there is data and the
+			 * connection is local, then delay SYN,ACK(SYN) in
+			 * the hope of piggy-backing it on a response
+			 * segment.  Otherwise must send ACK now in case
+			 * the other side is slow starting.
+			 */
+			if (DELAY_ACK(tp) && ((thflags & TH_FIN) ||
+			    (tlen != 0 &&
+#ifdef INET6
+			      ((isipv6 && in6_localaddr(&inp->in6p_faddr))
+			      ||
+			      (!isipv6 &&
+#endif
+			    in_localaddr(inp->inp_faddr)
+#ifdef INET6
+			       ))
+#endif
+			     ))) {
+                                callout_reset(tp->tt_delack, tcp_delacktime,  
+                                    tcp_timer_delack, tp);  
+				tp->t_flags |= TF_NEEDSYN;
+			} else 
+				tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);
+
+			/*
+			 * Limit the `virtual advertised window' to TCP_MAXWIN
+			 * here.  Even if we requested window scaling, it will
+			 * become effective only later when our SYN is acked.
+			 */
+			tp->rcv_adv += min(tp->rcv_wnd, TCP_MAXWIN);
+			tcpstat.tcps_connects++;
+			soisconnected(so);
+			callout_reset(tp->tt_keep, tcp_keepinit,
+				      tcp_timer_keep, tp);
+			dropsocket = 0;		/* committed to socket */
+			tcpstat.tcps_accepts++;
+			goto trimthenstep6;
+		    }
+		/* else do standard 3-way handshake */
+		} else {
+		    /*
+		     * No CC option, but maybe CC.NEW:
+		     *   invalidate cached value.
+		     */
+		     taop->tao_cc = 0;
+		}
+		/*
+		 * TAO test failed or there was no CC option,
+		 *    do a standard 3-way handshake.
+		 */
+		tp->t_flags |= TF_ACKNOW;
+		tp->t_state = TCPS_SYN_RECEIVED;
+		callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
+		dropsocket = 0;		/* committed to socket */
+		tcpstat.tcps_accepts++;
+		goto trimthenstep6;
+		}
+
+	/*
+	 * If the state is SYN_RECEIVED:
+	 *	if seg contains an ACK, but not for our SYN/ACK, send a RST.
+	 */
+	case TCPS_SYN_RECEIVED:
+		if ((thflags & TH_ACK) &&
+		    (SEQ_LEQ(th->th_ack, tp->snd_una) ||
+		     SEQ_GT(th->th_ack, tp->snd_max))) {
+				rstreason = BANDLIM_RST_OPENPORT;
+				goto dropwithreset;
+		}
+		break;
+
+	/*
+	 * If the state is SYN_SENT:
+	 *	if seg contains an ACK, but not for our SYN, drop the input.
+	 *	if seg contains a RST, then drop the connection.
+	 *	if seg does not contain SYN, then drop it.
+	 * Otherwise this is an acceptable SYN segment
+	 *	initialize tp->rcv_nxt and tp->irs
+	 *	if seg contains ack then advance tp->snd_una
+	 *	if SYN has been acked change to ESTABLISHED else SYN_RCVD state
+	 *	arrange for segment to be acked (eventually)
+	 *	continue processing rest of data/controls, beginning with URG
+	 */
+	case TCPS_SYN_SENT:
+		if ((taop = tcp_gettaocache(inp)) == NULL) {
+			taop = &tao_noncached;
+			bzero(taop, sizeof(*taop));
+		}
+
+		if ((thflags & TH_ACK) &&
+		    (SEQ_LEQ(th->th_ack, tp->iss) ||
+		     SEQ_GT(th->th_ack, tp->snd_max))) {
+			/*
+			 * If we have a cached CCsent for the remote host,
+			 * hence we haven't just crashed and restarted,
+			 * do not send a RST.  This may be a retransmission
+			 * from the other side after our earlier ACK was lost.
+			 * Our new SYN, when it arrives, will serve as the
+			 * needed ACK.
+			 */
+			if (taop->tao_ccsent != 0)
+				goto drop;
+			else {
+				rstreason = BANDLIM_UNLIMITED;
+				goto dropwithreset;
+			}
+		}
+		if (thflags & TH_RST) {
+			if (thflags & TH_ACK)
+				tp = tcp_drop(tp, ECONNREFUSED);
+			goto drop;
+		}
+		if ((thflags & TH_SYN) == 0)
+			goto drop;
+		tp->snd_wnd = th->th_win;	/* initial send window */
+		tp->cc_recv = to.to_cc;		/* foreign CC */
+
+		tp->irs = th->th_seq;
+		tcp_rcvseqinit(tp);
+		if (thflags & TH_ACK) {
+			/*
+			 * Our SYN was acked.  If segment contains CC.ECHO
+			 * option, check it to make sure this segment really
+			 * matches our SYN.  If not, just drop it as old
+			 * duplicate, but send an RST if we're still playing
+			 * by the old rules.  If no CC.ECHO option, make sure
+			 * we don't get fooled into using T/TCP.
+			 */
+			if (to.to_flag & TOF_CCECHO) {
+				if (tp->cc_send != to.to_ccecho) {
+					if (taop->tao_ccsent != 0)
+						goto drop;
+					else {
+						rstreason = BANDLIM_UNLIMITED;
+						goto dropwithreset;
+					}
+				}
+			} else
+				tp->t_flags &= ~TF_RCVD_CC;
+			tcpstat.tcps_connects++;
+			soisconnected(so);
+			/* Do window scaling on this connection? */
+			if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
+				(TF_RCVD_SCALE|TF_REQ_SCALE)) {
+				tp->snd_scale = tp->requested_s_scale;
+				tp->rcv_scale = tp->request_r_scale;
+			}
+			/* Segment is acceptable, update cache if undefined. */
+			if (taop->tao_ccsent == 0)
+				taop->tao_ccsent = to.to_ccecho;
+
+			tp->rcv_adv += tp->rcv_wnd;
+			tp->snd_una++;		/* SYN is acked */
+			/*
+			 * If there's data, delay ACK; if there's also a FIN
+			 * ACKNOW will be turned on later.
+			 */
+			if (DELAY_ACK(tp) && tlen != 0)
+                                callout_reset(tp->tt_delack, tcp_delacktime,  
+                                    tcp_timer_delack, tp);  
+			else
+				tp->t_flags |= TF_ACKNOW;
+			/*
+			 * Received <SYN,ACK> in SYN_SENT[*] state.
+			 * Transitions:
+			 *	SYN_SENT  --> ESTABLISHED
+			 *	SYN_SENT* --> FIN_WAIT_1
+			 */
+			tp->t_starttime = ticks;
+			if (tp->t_flags & TF_NEEDFIN) {
+				tp->t_state = TCPS_FIN_WAIT_1;
+				tp->t_flags &= ~TF_NEEDFIN;
+				thflags &= ~TH_SYN;
+			} else {
+				tp->t_state = TCPS_ESTABLISHED;
+				callout_reset(tp->tt_keep, tcp_keepidle,
+					      tcp_timer_keep, tp);
+			}
+		} else {
+		/*
+		 *  Received initial SYN in SYN-SENT[*] state => simul-
+		 *  taneous open.  If segment contains CC option and there is
+		 *  a cached CC, apply TAO test; if it succeeds, connection is
+		 *  half-synchronized.  Otherwise, do 3-way handshake:
+		 *        SYN-SENT -> SYN-RECEIVED
+		 *        SYN-SENT* -> SYN-RECEIVED*
+		 *  If there was no CC option, clear cached CC value.
+		 */
+			tp->t_flags |= TF_ACKNOW;
+			callout_stop(tp->tt_rexmt);
+			if (to.to_flag & TOF_CC) {
+				if (taop->tao_cc != 0 &&
+				    CC_GT(to.to_cc, taop->tao_cc)) {
+					/*
+					 * update cache and make transition:
+					 *        SYN-SENT -> ESTABLISHED*
+					 *        SYN-SENT* -> FIN-WAIT-1*
+					 */
+					taop->tao_cc = to.to_cc;
+					tp->t_starttime = ticks;
+					if (tp->t_flags & TF_NEEDFIN) {
+						tp->t_state = TCPS_FIN_WAIT_1;
+						tp->t_flags &= ~TF_NEEDFIN;
+					} else {
+						tp->t_state = TCPS_ESTABLISHED;
+						callout_reset(tp->tt_keep,
+							      tcp_keepidle,
+							      tcp_timer_keep,
+							      tp);
+					}
+					tp->t_flags |= TF_NEEDSYN;
+				} else
+					tp->t_state = TCPS_SYN_RECEIVED;
+			} else {
+				/* CC.NEW or no option => invalidate cache */
+				taop->tao_cc = 0;
+				tp->t_state = TCPS_SYN_RECEIVED;
+			}
+		}
+
+trimthenstep6:
+		/*
+		 * Advance th->th_seq to correspond to first data byte.
+		 * If data, trim to stay within window,
+		 * dropping FIN if necessary.
+		 */
+		th->th_seq++;
+		if (tlen > tp->rcv_wnd) {
+			todrop = tlen - tp->rcv_wnd;
+			m_adj(m, -todrop);
+			tlen = tp->rcv_wnd;
+			thflags &= ~TH_FIN;
+			tcpstat.tcps_rcvpackafterwin++;
+			tcpstat.tcps_rcvbyteafterwin += todrop;
+		}
+		tp->snd_wl1 = th->th_seq - 1;
+		tp->rcv_up = th->th_seq;
+		/*
+		 *  Client side of transaction: already sent SYN and data.
+		 *  If the remote host used T/TCP to validate the SYN,
+		 *  our data will be ACK'd; if so, enter normal data segment
+		 *  processing in the middle of step 5, ack processing.
+		 *  Otherwise, goto step 6.
+		 */
+ 		if (thflags & TH_ACK)
+			goto process_ACK;
+		goto step6;
+	/*
+	 * If the state is LAST_ACK or CLOSING or TIME_WAIT:
+	 *	if segment contains a SYN and CC [not CC.NEW] option:
+	 *              if state == TIME_WAIT and connection duration > MSL,
+	 *                  drop packet and send RST;
+	 *
+	 *		if SEG.CC > CCrecv then is new SYN, and can implicitly
+	 *		    ack the FIN (and data) in retransmission queue.
+	 *                  Complete close and delete TCPCB.  Then reprocess
+	 *                  segment, hoping to find new TCPCB in LISTEN state;
+	 *
+	 *		else must be old SYN; drop it.
+	 *      else do normal processing.
+	 */
+	case TCPS_LAST_ACK:
+	case TCPS_CLOSING:
+	case TCPS_TIME_WAIT:
+		if ((thflags & TH_SYN) &&
+		    (to.to_flag & TOF_CC) && tp->cc_recv != 0) {
+			if (tp->t_state == TCPS_TIME_WAIT &&
+					(ticks - tp->t_starttime) > tcp_msl) {
+				rstreason = BANDLIM_UNLIMITED;
+				goto dropwithreset;
+			}
+			if (CC_GT(to.to_cc, tp->cc_recv)) {
+				tp = tcp_close(tp);
+				goto findpcb;
+			}
+			else
+				goto drop;
+		}
+ 		break;  /* continue normal processing */
+	}
+
+	/*
+	 * States other than LISTEN or SYN_SENT.
+	 * First check the RST flag and sequence number since reset segments
+	 * are exempt from the timestamp and connection count tests.  This
+	 * fixes a bug introduced by the Stevens, vol. 2, p. 960 bugfix
+	 * below which allowed reset segments in half the sequence space
+	 * to fall though and be processed (which gives forged reset
+	 * segments with a random sequence number a 50 percent chance of
+	 * killing a connection).
+	 * Then check timestamp, if present.
+	 * Then check the connection count, if present.
+	 * Then check that at least some bytes of segment are within
+	 * receive window.  If segment begins before rcv_nxt,
+	 * drop leading data (and SYN); if nothing left, just ack.
+	 *
+	 *
+	 * If the RST bit is set, check the sequence number to see
+	 * if this is a valid reset segment.
+	 * RFC 793 page 37:
+	 *   In all states except SYN-SENT, all reset (RST) segments
+	 *   are validated by checking their SEQ-fields.  A reset is
+	 *   valid if its sequence number is in the window.
+	 * Note: this does not take into account delayed ACKs, so
+	 *   we should test against last_ack_sent instead of rcv_nxt.
+	 *   The sequence number in the reset segment is normally an
+	 *   echo of our outgoing acknowlegement numbers, but some hosts
+	 *   send a reset with the sequence number at the rightmost edge
+	 *   of our receive window, and we have to handle this case.
+	 * If we have multiple segments in flight, the intial reset
+	 * segment sequence numbers will be to the left of last_ack_sent,
+	 * but they will eventually catch up.
+	 * In any case, it never made sense to trim reset segments to
+	 * fit the receive window since RFC 1122 says:
+	 *   4.2.2.12  RST Segment: RFC-793 Section 3.4
+	 *
+	 *    A TCP SHOULD allow a received RST segment to include data.
+	 *
+	 *    DISCUSSION
+	 *         It has been suggested that a RST segment could contain
+	 *         ASCII text that encoded and explained the cause of the
+	 *         RST.  No standard has yet been established for such
+	 *         data.
+	 *
+	 * If the reset segment passes the sequence number test examine
+	 * the state:
+	 *    SYN_RECEIVED STATE:
+	 *	If passive open, return to LISTEN state.
+	 *	If active open, inform user that connection was refused.
+	 *    ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES:
+	 *	Inform user that connection was reset, and close tcb.
+	 *    CLOSING, LAST_ACK STATES:
+	 *	Close the tcb.
+	 *    TIME_WAIT STATE:
+	 *	Drop the segment - see Stevens, vol. 2, p. 964 and
+	 *      RFC 1337.
+	 */
+	if (thflags & TH_RST) {
+		if (SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
+		    SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) {
+			switch (tp->t_state) {
+
+			case TCPS_SYN_RECEIVED:
+				so->so_error = ECONNREFUSED;
+				goto close;
+
+			case TCPS_ESTABLISHED:
+			case TCPS_FIN_WAIT_1:
+			case TCPS_FIN_WAIT_2:
+			case TCPS_CLOSE_WAIT:
+				so->so_error = ECONNRESET;
+			close:
+				tp->t_state = TCPS_CLOSED;
+				tcpstat.tcps_drops++;
+				tp = tcp_close(tp);
+				break;
+
+			case TCPS_CLOSING:
+			case TCPS_LAST_ACK:
+				tp = tcp_close(tp);
+				break;
+
+			case TCPS_TIME_WAIT:
+				break;
+			}
+		}
+		goto drop;
+	}
+
+	/*
+	 * RFC 1323 PAWS: If we have a timestamp reply on this segment
+	 * and it's less than ts_recent, drop it.
+	 */
+	if ((to.to_flag & TOF_TS) != 0 && tp->ts_recent &&
+	    TSTMP_LT(to.to_tsval, tp->ts_recent)) {
+
+		/* Check to see if ts_recent is over 24 days old.  */
+		if ((int)(ticks - tp->ts_recent_age) > TCP_PAWS_IDLE) {
+			/*
+			 * Invalidate ts_recent.  If this segment updates
+			 * ts_recent, the age will be reset later and ts_recent
+			 * will get a valid value.  If it does not, setting
+			 * ts_recent to zero will at least satisfy the
+			 * requirement that zero be placed in the timestamp
+			 * echo reply when ts_recent isn't valid.  The
+			 * age isn't reset until we get a valid ts_recent
+			 * because we don't want out-of-order segments to be
+			 * dropped when ts_recent is old.
+			 */
+			tp->ts_recent = 0;
+		} else {
+			tcpstat.tcps_rcvduppack++;
+			tcpstat.tcps_rcvdupbyte += tlen;
+			tcpstat.tcps_pawsdrop++;
+			goto dropafterack;
+		}
+	}
+
+	/*
+	 * T/TCP mechanism
+	 *   If T/TCP was negotiated and the segment doesn't have CC,
+	 *   or if its CC is wrong then drop the segment.
+	 *   RST segments do not have to comply with this.
+	 */
+	if ((tp->t_flags & (TF_REQ_CC|TF_RCVD_CC)) == (TF_REQ_CC|TF_RCVD_CC) &&
+	    ((to.to_flag & TOF_CC) == 0 || tp->cc_recv != to.to_cc))
+ 		goto dropafterack;
+
+	/*
+	 * In the SYN-RECEIVED state, validate that the packet belongs to
+	 * this connection before trimming the data to fit the receive
+	 * window.  Check the sequence number versus IRS since we know
+	 * the sequence numbers haven't wrapped.  This is a partial fix
+	 * for the "LAND" DoS attack.
+	 */
+	if (tp->t_state == TCPS_SYN_RECEIVED && SEQ_LT(th->th_seq, tp->irs)) {
+		rstreason = BANDLIM_RST_OPENPORT;
+		goto dropwithreset;
+	}
+
+	todrop = tp->rcv_nxt - th->th_seq;
+	if (todrop > 0) {
+		if (thflags & TH_SYN) {
+			thflags &= ~TH_SYN;
+			th->th_seq++;
+			if (th->th_urp > 1)
+				th->th_urp--;
+			else
+				thflags &= ~TH_URG;
+			todrop--;
+		}
+		/*
+		 * Following if statement from Stevens, vol. 2, p. 960.
+		 */
+		if (todrop > tlen
+		    || (todrop == tlen && (thflags & TH_FIN) == 0)) {
+			/*
+			 * Any valid FIN must be to the left of the window.
+			 * At this point the FIN must be a duplicate or out
+			 * of sequence; drop it.
+			 */
+			thflags &= ~TH_FIN;
+
+			/*
+			 * Send an ACK to resynchronize and drop any data.
+			 * But keep on processing for RST or ACK.
+			 */
+			tp->t_flags |= TF_ACKNOW;
+			todrop = tlen;
+			tcpstat.tcps_rcvduppack++;
+			tcpstat.tcps_rcvdupbyte += todrop;
+		} else {
+			tcpstat.tcps_rcvpartduppack++;
+			tcpstat.tcps_rcvpartdupbyte += todrop;
+		}
+		drop_hdrlen += todrop;	/* drop from the top afterwards */
+		th->th_seq += todrop;
+		tlen -= todrop;
+		if (th->th_urp > todrop)
+			th->th_urp -= todrop;
+		else {
+			thflags &= ~TH_URG;
+			th->th_urp = 0;
+		}
+	}
+
+	/*
+	 * If new data are received on a connection after the
+	 * user processes are gone, then RST the other end.
+	 */
+	if ((so->so_state & SS_NOFDREF) &&
+	    tp->t_state > TCPS_CLOSE_WAIT && tlen) {
+		tp = tcp_close(tp);
+		tcpstat.tcps_rcvafterclose++;
+		rstreason = BANDLIM_UNLIMITED;
+		goto dropwithreset;
+	}
+
+	/*
+	 * If segment ends after window, drop trailing data
+	 * (and PUSH and FIN); if nothing left, just ACK.
+	 */
+	todrop = (th->th_seq+tlen) - (tp->rcv_nxt+tp->rcv_wnd);
+	if (todrop > 0) {
+		tcpstat.tcps_rcvpackafterwin++;
+		if (todrop >= tlen) {
+			tcpstat.tcps_rcvbyteafterwin += tlen;
+			/*
+			 * If a new connection request is received
+			 * while in TIME_WAIT, drop the old connection
+			 * and start over if the sequence numbers
+			 * are above the previous ones.
+			 */
+			if (thflags & TH_SYN &&
+			    tp->t_state == TCPS_TIME_WAIT &&
+			    SEQ_GT(th->th_seq, tp->rcv_nxt)) {
+				iss = tcp_rndiss_next();
+				tp = tcp_close(tp);
+				goto findpcb;
+			}
+			/*
+			 * If window is closed can only take segments at
+			 * window edge, and have to drop data and PUSH from
+			 * incoming segments.  Continue processing, but
+			 * remember to ack.  Otherwise, drop segment
+			 * and ack.
+			 */
+			if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) {
+				tp->t_flags |= TF_ACKNOW;
+				tcpstat.tcps_rcvwinprobe++;
+			} else
+				goto dropafterack;
+		} else
+			tcpstat.tcps_rcvbyteafterwin += todrop;
+		m_adj(m, -todrop);
+		tlen -= todrop;
+		thflags &= ~(TH_PUSH|TH_FIN);
+	}
+
+	/*
+	 * If last ACK falls within this segment's sequence numbers,
+	 * record its timestamp.
+	 * NOTE that the test is modified according to the latest
+	 * proposal of the tcplw@cray.com list (Braden 1993/04/26).
+	 */
+	if ((to.to_flag & TOF_TS) != 0 &&
+	    SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
+		tp->ts_recent_age = ticks;
+		tp->ts_recent = to.to_tsval;
+	}
+
+	/*
+	 * If a SYN is in the window, then this is an
+	 * error and we send an RST and drop the connection.
+	 */
+	if (thflags & TH_SYN) {
+		tp = tcp_drop(tp, ECONNRESET);
+		rstreason = BANDLIM_UNLIMITED;
+		goto dropwithreset;
+	}
+
+	/*
+	 * If the ACK bit is off:  if in SYN-RECEIVED state or SENDSYN
+	 * flag is on (half-synchronized state), then queue data for
+	 * later processing; else drop segment and return.
+	 */
+	if ((thflags & TH_ACK) == 0) {
+		if (tp->t_state == TCPS_SYN_RECEIVED ||
+		    (tp->t_flags & TF_NEEDSYN))
+			goto step6;
+		else
+			goto drop;
+	}
+
+	/*
+	 * Ack processing.
+	 */
+	switch (tp->t_state) {
+
+	/*
+	 * In SYN_RECEIVED state, the ack ACKs our SYN, so enter
+	 * ESTABLISHED state and continue processing.
+	 * The ACK was checked above.
+	 */
+	case TCPS_SYN_RECEIVED:
+
+		tcpstat.tcps_connects++;
+		soisconnected(so);
+		/* Do window scaling? */
+		if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
+			(TF_RCVD_SCALE|TF_REQ_SCALE)) {
+			tp->snd_scale = tp->requested_s_scale;
+			tp->rcv_scale = tp->request_r_scale;
+		}
+		/*
+		 * Upon successful completion of 3-way handshake,
+		 * update cache.CC if it was undefined, pass any queued
+		 * data to the user, and advance state appropriately.
+		 */
+		if ((taop = tcp_gettaocache(inp)) != NULL &&
+		    taop->tao_cc == 0)
+			taop->tao_cc = tp->cc_recv;
+
+		/*
+		 * Make transitions:
+		 *      SYN-RECEIVED  -> ESTABLISHED
+		 *      SYN-RECEIVED* -> FIN-WAIT-1
+		 */
+		tp->t_starttime = ticks;
+		if (tp->t_flags & TF_NEEDFIN) {
+			tp->t_state = TCPS_FIN_WAIT_1;
+			tp->t_flags &= ~TF_NEEDFIN;
+		} else {
+			tp->t_state = TCPS_ESTABLISHED;
+			callout_reset(tp->tt_keep, tcp_keepidle, 
+				      tcp_timer_keep, tp);
+		}
+		/*
+		 * If segment contains data or ACK, will call tcp_reass()
+		 * later; if not, do so now to pass queued data to user.
+		 */
+		if (tlen == 0 && (thflags & TH_FIN) == 0)
+			(void) tcp_reass(tp, (struct tcphdr *)0, 0,
+			    (struct mbuf *)0);
+		tp->snd_wl1 = th->th_seq - 1;
+		/* fall into ... */
+
+	/*
+	 * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
+	 * ACKs.  If the ack is in the range
+	 *	tp->snd_una < th->th_ack <= tp->snd_max
+	 * then advance tp->snd_una to th->th_ack and drop
+	 * data from the retransmission queue.  If this ACK reflects
+	 * more up to date window information we update our window information.
+	 */
+	case TCPS_ESTABLISHED:
+	case TCPS_FIN_WAIT_1:
+	case TCPS_FIN_WAIT_2:
+	case TCPS_CLOSE_WAIT:
+	case TCPS_CLOSING:
+	case TCPS_LAST_ACK:
+	case TCPS_TIME_WAIT:
+
+		if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
+			if (tlen == 0 && tiwin == tp->snd_wnd) {
+				tcpstat.tcps_rcvdupack++;
+				/*
+				 * If we have outstanding data (other than
+				 * a window probe), this is a completely
+				 * duplicate ack (ie, window info didn't
+				 * change), the ack is the biggest we've
+				 * seen and we've seen exactly our rexmt
+				 * threshhold of them, assume a packet
+				 * has been dropped and retransmit it.
+				 * Kludge snd_nxt & the congestion
+				 * window so we send only this one
+				 * packet.
+				 *
+				 * We know we're losing at the current
+				 * window size so do congestion avoidance
+				 * (set ssthresh to half the current window
+				 * and pull our congestion window back to
+				 * the new ssthresh).
+				 *
+				 * Dup acks mean that packets have left the
+				 * network (they're now cached at the receiver)
+				 * so bump cwnd by the amount in the receiver
+				 * to keep a constant cwnd packets in the
+				 * network.
+				 */
+				if (!callout_active(tp->tt_rexmt) ||
+				    th->th_ack != tp->snd_una)
+					tp->t_dupacks = 0;
+				else if (++tp->t_dupacks == tcprexmtthresh) {
+					tcp_seq onxt = tp->snd_nxt;
+					u_int win =
+					    min(tp->snd_wnd, tp->snd_cwnd) / 2 /
+						tp->t_maxseg;
+					if (tcp_do_newreno && SEQ_LT(th->th_ack,
+					    tp->snd_recover)) {
+						/* False retransmit, should not
+						 * cut window
+						 */
+						tp->snd_cwnd += tp->t_maxseg;
+						tp->t_dupacks = 0;
+						(void) tcp_output(tp);
+						goto drop;
+					}
+					if (win < 2)
+						win = 2;
+					tp->snd_ssthresh = win * tp->t_maxseg;
+					tp->snd_recover = tp->snd_max;
+					callout_stop(tp->tt_rexmt);
+					tp->t_rtttime = 0;
+					tp->snd_nxt = th->th_ack;
+					tp->snd_cwnd = tp->t_maxseg;
+					(void) tcp_output(tp);
+					tp->snd_cwnd = tp->snd_ssthresh +
+					       tp->t_maxseg * tp->t_dupacks;
+					if (SEQ_GT(onxt, tp->snd_nxt))
+						tp->snd_nxt = onxt;
+					goto drop;
+				} else if (tp->t_dupacks > tcprexmtthresh) {
+					tp->snd_cwnd += tp->t_maxseg;
+					(void) tcp_output(tp);
+					goto drop;
+				}
+			} else
+				tp->t_dupacks = 0;
+			break;
+		}
+		/*
+		 * If the congestion window was inflated to account
+		 * for the other side's cached packets, retract it.
+		 */
+		if (tcp_do_newreno == 0) {
+                        if (tp->t_dupacks >= tcprexmtthresh &&
+                                tp->snd_cwnd > tp->snd_ssthresh)
+                                tp->snd_cwnd = tp->snd_ssthresh;
+                        tp->t_dupacks = 0;
+                } else if (tp->t_dupacks >= tcprexmtthresh &&
+		    !tcp_newreno(tp, th)) {
+                        /*
+                         * Window inflation should have left us with approx.
+                         * snd_ssthresh outstanding data.  But in case we
+                         * would be inclined to send a burst, better to do
+                         * it via the slow start mechanism.
+                         */
+			if (SEQ_GT(th->th_ack + tp->snd_ssthresh, tp->snd_max))
+                                tp->snd_cwnd =
+				    tp->snd_max - th->th_ack + tp->t_maxseg;
+			else
+                        	tp->snd_cwnd = tp->snd_ssthresh;
+                        tp->t_dupacks = 0;
+                }
+		if (SEQ_GT(th->th_ack, tp->snd_max)) {
+			tcpstat.tcps_rcvacktoomuch++;
+			goto dropafterack;
+		}
+		/*
+		 *  If we reach this point, ACK is not a duplicate,
+		 *     i.e., it ACKs something we sent.
+		 */
+		if (tp->t_flags & TF_NEEDSYN) {
+			/*
+			 * T/TCP: Connection was half-synchronized, and our
+			 * SYN has been ACK'd (so connection is now fully
+			 * synchronized).  Go to non-starred state,
+			 * increment snd_una for ACK of SYN, and check if
+			 * we can do window scaling.
+			 */
+			tp->t_flags &= ~TF_NEEDSYN;
+			tp->snd_una++;
+			/* Do window scaling? */
+			if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
+				(TF_RCVD_SCALE|TF_REQ_SCALE)) {
+				tp->snd_scale = tp->requested_s_scale;
+				tp->rcv_scale = tp->request_r_scale;
+			}
+		}
+
+process_ACK:
+		acked = th->th_ack - tp->snd_una;
+		tcpstat.tcps_rcvackpack++;
+		tcpstat.tcps_rcvackbyte += acked;
+
+		/*
+		 * If we just performed our first retransmit, and the ACK
+		 * arrives within our recovery window, then it was a mistake
+		 * to do the retransmit in the first place.  Recover our
+		 * original cwnd and ssthresh, and proceed to transmit where
+		 * we left off.
+		 */
+		if (tp->t_rxtshift == 1 && ticks < tp->t_badrxtwin) {
+			tp->snd_cwnd = tp->snd_cwnd_prev;
+			tp->snd_ssthresh = tp->snd_ssthresh_prev;
+			tp->snd_nxt = tp->snd_max;
+			tp->t_badrxtwin = 0;	/* XXX probably not required */ 
+		}
+
+		/*
+		 * If we have a timestamp reply, update smoothed
+		 * round trip time.  If no timestamp is present but
+		 * transmit timer is running and timed sequence
+		 * number was acked, update smoothed round trip time.
+		 * Since we now have an rtt measurement, cancel the
+		 * timer backoff (cf., Phil Karn's retransmit alg.).
+		 * Recompute the initial retransmit timer.
+		 */
+		if (to.to_flag & TOF_TS)
+			tcp_xmit_timer(tp, ticks - to.to_tsecr + 1);
+		else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq))
+			tcp_xmit_timer(tp, ticks - tp->t_rtttime);
+
+		/*
+		 * If all outstanding data is acked, stop retransmit
+		 * timer and remember to restart (more output or persist).
+		 * If there is more data to be acked, restart retransmit
+		 * timer, using current (possibly backed-off) value.
+		 */
+		if (th->th_ack == tp->snd_max) {
+			callout_stop(tp->tt_rexmt);
+			needoutput = 1;
+		} else if (!callout_active(tp->tt_persist))
+			callout_reset(tp->tt_rexmt, tp->t_rxtcur,
+				      tcp_timer_rexmt, tp);
+
+		/*
+		 * If no data (only SYN) was ACK'd,
+		 *    skip rest of ACK processing.
+		 */
+		if (acked == 0)
+			goto step6;
+
+		/*
+		 * When new data is acked, open the congestion window.
+		 * If the window gives us less than ssthresh packets
+		 * in flight, open exponentially (maxseg per packet).
+		 * Otherwise open linearly: maxseg per window
+		 * (maxseg^2 / cwnd per packet).
+		 */
+		{
+		register u_int cw = tp->snd_cwnd;
+		register u_int incr = tp->t_maxseg;
+
+		if (cw > tp->snd_ssthresh)
+			incr = incr * incr / cw;
+		/*
+		 * If t_dupacks != 0 here, it indicates that we are still
+		 * in NewReno fast recovery mode, so we leave the congestion
+		 * window alone.
+		 */
+		if (tcp_do_newreno == 0 || tp->t_dupacks == 0)
+			tp->snd_cwnd = min(cw + incr,TCP_MAXWIN<<tp->snd_scale);
+		}
+		if (acked > so->so_snd.sb_cc) {
+			tp->snd_wnd -= so->so_snd.sb_cc;
+			sbdrop(&so->so_snd, (int)so->so_snd.sb_cc);
+			ourfinisacked = 1;
+		} else {
+			sbdrop(&so->so_snd, acked);
+			tp->snd_wnd -= acked;
+			ourfinisacked = 0;
+		}
+		sowwakeup(so);
+		tp->snd_una = th->th_ack;
+		if (SEQ_LT(tp->snd_nxt, tp->snd_una))
+			tp->snd_nxt = tp->snd_una;
+
+		switch (tp->t_state) {
+
+		/*
+		 * In FIN_WAIT_1 STATE in addition to the processing
+		 * for the ESTABLISHED state if our FIN is now acknowledged
+		 * then enter FIN_WAIT_2.
+		 */
+		case TCPS_FIN_WAIT_1:
+			if (ourfinisacked) {
+				/*
+				 * If we can't receive any more
+				 * data, then closing user can proceed.
+				 * Starting the timer is contrary to the
+				 * specification, but if we don't get a FIN
+				 * we'll hang forever.
+				 */
+				if (so->so_state & SS_CANTRCVMORE) {
+					soisdisconnected(so);
+					callout_reset(tp->tt_2msl, tcp_maxidle,
+						      tcp_timer_2msl, tp);
+				}
+				tp->t_state = TCPS_FIN_WAIT_2;
+			}
+			break;
+
+	 	/*
+		 * In CLOSING STATE in addition to the processing for
+		 * the ESTABLISHED state if the ACK acknowledges our FIN
+		 * then enter the TIME-WAIT state, otherwise ignore
+		 * the segment.
+		 */
+		case TCPS_CLOSING:
+			if (ourfinisacked) {
+				tp->t_state = TCPS_TIME_WAIT;
+				tcp_canceltimers(tp);
+				/* Shorten TIME_WAIT [RFC-1644, p.28] */
+				if (tp->cc_recv != 0 &&
+				    (ticks - tp->t_starttime) < tcp_msl)
+					callout_reset(tp->tt_2msl,
+						      tp->t_rxtcur *
+						      TCPTV_TWTRUNC,
+						      tcp_timer_2msl, tp);
+				else
+					callout_reset(tp->tt_2msl, 2 * tcp_msl,
+						      tcp_timer_2msl, tp);
+				soisdisconnected(so);
+			}
+			break;
+
+		/*
+		 * In LAST_ACK, we may still be waiting for data to drain
+		 * and/or to be acked, as well as for the ack of our FIN.
+		 * If our FIN is now acknowledged, delete the TCB,
+		 * enter the closed state and return.
+		 */
+		case TCPS_LAST_ACK:
+			if (ourfinisacked) {
+				tp = tcp_close(tp);
+				goto drop;
+			}
+			break;
+
+		/*
+		 * In TIME_WAIT state the only thing that should arrive
+		 * is a retransmission of the remote FIN.  Acknowledge
+		 * it and restart the finack timer.
+		 */
+		case TCPS_TIME_WAIT:
+			callout_reset(tp->tt_2msl, 2 * tcp_msl,
+				      tcp_timer_2msl, tp);
+			goto dropafterack;
+		}
+	}
+
+step6:
+	/*
+	 * Update window information.
+	 * Don't look at window if no ACK: TAC's send garbage on first SYN.
+	 */
+	if ((thflags & TH_ACK) &&
+	    (SEQ_LT(tp->snd_wl1, th->th_seq) ||
+	    (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) ||
+	     (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) {
+		/* keep track of pure window updates */
+		if (tlen == 0 &&
+		    tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd)
+			tcpstat.tcps_rcvwinupd++;
+		tp->snd_wnd = tiwin;
+		tp->snd_wl1 = th->th_seq;
+		tp->snd_wl2 = th->th_ack;
+		if (tp->snd_wnd > tp->max_sndwnd)
+			tp->max_sndwnd = tp->snd_wnd;
+		needoutput = 1;
+	}
+
+	/*
+	 * Process segments with URG.
+	 */
+	if ((thflags & TH_URG) && th->th_urp &&
+	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
+		/*
+		 * This is a kludge, but if we receive and accept
+		 * random urgent pointers, we'll crash in
+		 * soreceive.  It's hard to imagine someone
+		 * actually wanting to send this much urgent data.
+		 */
+		if (th->th_urp + so->so_rcv.sb_cc > sb_max) {
+			th->th_urp = 0;			/* XXX */
+			thflags &= ~TH_URG;		/* XXX */
+			goto dodata;			/* XXX */
+		}
+		/*
+		 * If this segment advances the known urgent pointer,
+		 * then mark the data stream.  This should not happen
+		 * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
+		 * a FIN has been received from the remote side.
+		 * In these states we ignore the URG.
+		 *
+		 * According to RFC961 (Assigned Protocols),
+		 * the urgent pointer points to the last octet
+		 * of urgent data.  We continue, however,
+		 * to consider it to indicate the first octet
+		 * of data past the urgent section as the original
+		 * spec states (in one of two places).
+		 */
+		if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) {
+			tp->rcv_up = th->th_seq + th->th_urp;
+			so->so_oobmark = so->so_rcv.sb_cc +
+			    (tp->rcv_up - tp->rcv_nxt) - 1;
+			if (so->so_oobmark == 0)
+				so->so_state |= SS_RCVATMARK;
+			sohasoutofband(so);
+			tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
+		}
+		/*
+		 * Remove out of band data so doesn't get presented to user.
+		 * This can happen independent of advancing the URG pointer,
+		 * but if two URG's are pending at once, some out-of-band
+		 * data may creep in... ick.
+		 */
+		if (th->th_urp <= (u_long)tlen
+#ifdef SO_OOBINLINE
+		     && (so->so_options & SO_OOBINLINE) == 0
+#endif
+		     )
+			tcp_pulloutofband(so, th, m,
+				drop_hdrlen);	/* hdr drop is delayed */
+	} else
+		/*
+		 * If no out of band data is expected,
+		 * pull receive urgent pointer along
+		 * with the receive window.
+		 */
+		if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
+			tp->rcv_up = tp->rcv_nxt;
+dodata:							/* XXX */
+
+	/*
+	 * Process the segment text, merging it into the TCP sequencing queue,
+	 * and arranging for acknowledgment of receipt if necessary.
+	 * This process logically involves adjusting tp->rcv_wnd as data
+	 * is presented to the user (this happens in tcp_usrreq.c,
+	 * case PRU_RCVD).  If a FIN has already been received on this
+	 * connection then we just ignore the text.
+	 */
+	if ((tlen || (thflags&TH_FIN)) &&
+	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
+		m_adj(m, drop_hdrlen);	/* delayed header drop */
+		TCP_REASS(tp, th, &tlen, m, so, thflags);
+		/*
+		 * Note the amount of data that peer has sent into
+		 * our window, in order to estimate the sender's
+		 * buffer size.
+		 */
+		len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt);
+	} else {
+		m_freem(m);
+		thflags &= ~TH_FIN;
+	}
+
+	/*
+	 * If FIN is received ACK the FIN and let the user know
+	 * that the connection is closing.
+	 */
+	if (thflags & TH_FIN) {
+		if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
+			socantrcvmore(so);
+			/*
+			 *  If connection is half-synchronized
+			 *  (ie NEEDSYN flag on) then delay ACK,
+			 *  so it may be piggybacked when SYN is sent.
+			 *  Otherwise, since we received a FIN then no
+			 *  more input can be expected, send ACK now.
+			 */
+			if (DELAY_ACK(tp) && (tp->t_flags & TF_NEEDSYN))
+                                callout_reset(tp->tt_delack, tcp_delacktime,  
+                                    tcp_timer_delack, tp);  
+			else
+				tp->t_flags |= TF_ACKNOW;
+			tp->rcv_nxt++;
+		}
+		switch (tp->t_state) {
+
+	 	/*
+		 * In SYN_RECEIVED and ESTABLISHED STATES
+		 * enter the CLOSE_WAIT state.
+		 */
+		case TCPS_SYN_RECEIVED:
+			tp->t_starttime = ticks;
+			/*FALLTHROUGH*/
+		case TCPS_ESTABLISHED:
+			tp->t_state = TCPS_CLOSE_WAIT;
+			break;
+
+	 	/*
+		 * If still in FIN_WAIT_1 STATE FIN has not been acked so
+		 * enter the CLOSING state.
+		 */
+		case TCPS_FIN_WAIT_1:
+			tp->t_state = TCPS_CLOSING;
+			break;
+
+	 	/*
+		 * In FIN_WAIT_2 state enter the TIME_WAIT state,
+		 * starting the time-wait timer, turning off the other
+		 * standard timers.
+		 */
+		case TCPS_FIN_WAIT_2:
+			tp->t_state = TCPS_TIME_WAIT;
+			tcp_canceltimers(tp);
+			/* Shorten TIME_WAIT [RFC-1644, p.28] */
+			if (tp->cc_recv != 0 &&
+			    (ticks - tp->t_starttime) < tcp_msl) {
+				callout_reset(tp->tt_2msl,
+					      tp->t_rxtcur * TCPTV_TWTRUNC,
+					      tcp_timer_2msl, tp);
+				/* For transaction client, force ACK now. */
+				tp->t_flags |= TF_ACKNOW;
+			}
+			else
+				callout_reset(tp->tt_2msl, 2 * tcp_msl,
+					      tcp_timer_2msl, tp);
+			soisdisconnected(so);
+			break;
+
+		/*
+		 * In TIME_WAIT state restart the 2 MSL time_wait timer.
+		 */
+		case TCPS_TIME_WAIT:
+			callout_reset(tp->tt_2msl, 2 * tcp_msl,
+				      tcp_timer_2msl, tp);
+			break;
+		}
+	}
+#ifdef TCPDEBUG
+	if (so->so_options & SO_DEBUG)
+		tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen,
+			  &tcp_savetcp, 0);
+#endif
+
+	/*
+	 * Return any desired output.
+	 */
+	if (needoutput || (tp->t_flags & TF_ACKNOW))
+		(void) tcp_output(tp);
+	return;
+
+dropafterack:
+	/*
+	 * Generate an ACK dropping incoming segment if it occupies
+	 * sequence space, where the ACK reflects our state.
+	 *
+	 * We can now skip the test for the RST flag since all
+	 * paths to this code happen after packets containing
+	 * RST have been dropped.
+	 *
+	 * In the SYN-RECEIVED state, don't send an ACK unless the
+	 * segment we received passes the SYN-RECEIVED ACK test.
+	 * If it fails send a RST.  This breaks the loop in the
+	 * "LAND" DoS attack, and also prevents an ACK storm
+	 * between two listening ports that have been sent forged
+	 * SYN segments, each with the source address of the other.
+	 */
+	if (tp->t_state == TCPS_SYN_RECEIVED && (thflags & TH_ACK) &&
+	    (SEQ_GT(tp->snd_una, th->th_ack) ||
+	     SEQ_GT(th->th_ack, tp->snd_max)) ) {
+		rstreason = BANDLIM_RST_OPENPORT;
+		goto dropwithreset;
+	}
+#ifdef TCPDEBUG
+	if (so->so_options & SO_DEBUG)
+		tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
+			  &tcp_savetcp, 0);
+#endif
+	m_freem(m);
+	tp->t_flags |= TF_ACKNOW;
+	(void) tcp_output(tp);
+	return;
+
+dropwithreset:
+	/*
+	 * Generate a RST, dropping incoming segment.
+	 * Make ACK acceptable to originator of segment.
+	 * Don't bother to respond if destination was broadcast/multicast.
+	 */
+	if ((thflags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST))
+		goto drop;
+#ifdef INET6
+	if (isipv6) {
+		if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
+		    IN6_IS_ADDR_MULTICAST(&ip6->ip6_src))
+			goto drop;
+	} else
+#endif /* INET6 */
+	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
+	    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
+	    ip->ip_src.s_addr == htonl(INADDR_BROADCAST))
+		goto drop;
+	/* IPv6 anycast check is done at tcp6_input() */
+
+	/*
+	 * Perform bandwidth limiting.
+	 */
+	if (badport_bandlim(rstreason) < 0)
+		goto drop;
+ 
+#ifdef TCPDEBUG
+	if (tp == 0 || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+		tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
+			  &tcp_savetcp, 0);
+#endif
+	if (thflags & TH_ACK)
+		/* mtod() below is safe as long as hdr dropping is delayed */
+		tcp_respond(tp, mtod(m, void *), th, m, (tcp_seq)0, th->th_ack,
+			    TH_RST);
+	else {
+		if (thflags & TH_SYN)
+			tlen++;
+		/* mtod() below is safe as long as hdr dropping is delayed */
+		tcp_respond(tp, mtod(m, void *), th, m, th->th_seq+tlen,
+			    (tcp_seq)0, TH_RST|TH_ACK);
+	}
+	/* destroy temporarily created socket */
+	if (dropsocket)
+		(void) soabort(so);
+	return;
+
+drop:
+	/*
+	 * Drop space held by incoming segment and return.
+	 */
+#ifdef TCPDEBUG
+	if (tp == 0 || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+		tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
+			  &tcp_savetcp, 0);
+#endif
+	m_freem(m);
+	/* destroy temporarily created socket */
+	if (dropsocket)
+		(void) soabort(so);
+	return;
+}
+
+static void
+tcp_dooptions(tp, cp, cnt, th, to)
+	struct tcpcb *tp;
+	u_char *cp;
+	int cnt;
+	struct tcphdr *th;
+	struct tcpopt *to;
+{
+	u_short mss = 0;
+	int opt, optlen;
+
+	for (; cnt > 0; cnt -= optlen, cp += optlen) {
+		opt = cp[0];
+		if (opt == TCPOPT_EOL)
+			break;
+		if (opt == TCPOPT_NOP)
+			optlen = 1;
+		else {
+			if (cnt < 2)
+				break;
+			optlen = cp[1];
+			if (optlen < 2 || optlen > cnt)
+				break;
+		}
+		switch (opt) {
+
+		default:
+			continue;
+
+		case TCPOPT_MAXSEG:
+			if (optlen != TCPOLEN_MAXSEG)
+				continue;
+			if (!(th->th_flags & TH_SYN))
+				continue;
+			bcopy((char *) cp + 2, (char *) &mss, sizeof(mss));
+			NTOHS(mss);
+			break;
+
+		case TCPOPT_WINDOW:
+			if (optlen != TCPOLEN_WINDOW)
+				continue;
+			if (!(th->th_flags & TH_SYN))
+				continue;
+			tp->t_flags |= TF_RCVD_SCALE;
+			tp->requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT);
+			break;
+
+		case TCPOPT_TIMESTAMP:
+			if (optlen != TCPOLEN_TIMESTAMP)
+				continue;
+			to->to_flag |= TOF_TS;
+			bcopy((char *)cp + 2,
+			    (char *)&to->to_tsval, sizeof(to->to_tsval));
+			NTOHL(to->to_tsval);
+			bcopy((char *)cp + 6,
+			    (char *)&to->to_tsecr, sizeof(to->to_tsecr));
+			NTOHL(to->to_tsecr);
+
+			/*
+			 * A timestamp received in a SYN makes
+			 * it ok to send timestamp requests and replies.
+			 */
+			if (th->th_flags & TH_SYN) {
+				tp->t_flags |= TF_RCVD_TSTMP;
+				tp->ts_recent = to->to_tsval;
+				tp->ts_recent_age = ticks;
+			}
+			break;
+		case TCPOPT_CC:
+			if (optlen != TCPOLEN_CC)
+				continue;
+			to->to_flag |= TOF_CC;
+			bcopy((char *)cp + 2,
+			    (char *)&to->to_cc, sizeof(to->to_cc));
+			NTOHL(to->to_cc);
+			/*
+			 * A CC or CC.new option received in a SYN makes
+			 * it ok to send CC in subsequent segments.
+			 */
+			if (th->th_flags & TH_SYN)
+				tp->t_flags |= TF_RCVD_CC;
+			break;
+		case TCPOPT_CCNEW:
+			if (optlen != TCPOLEN_CC)
+				continue;
+			if (!(th->th_flags & TH_SYN))
+				continue;
+			to->to_flag |= TOF_CCNEW;
+			bcopy((char *)cp + 2,
+			    (char *)&to->to_cc, sizeof(to->to_cc));
+			NTOHL(to->to_cc);
+			/*
+			 * A CC or CC.new option received in a SYN makes
+			 * it ok to send CC in subsequent segments.
+			 */
+			tp->t_flags |= TF_RCVD_CC;
+			break;
+		case TCPOPT_CCECHO:
+			if (optlen != TCPOLEN_CC)
+				continue;
+			if (!(th->th_flags & TH_SYN))
+				continue;
+			to->to_flag |= TOF_CCECHO;
+			bcopy((char *)cp + 2,
+			    (char *)&to->to_ccecho, sizeof(to->to_ccecho));
+			NTOHL(to->to_ccecho);
+			break;
+		}
+	}
+	if (th->th_flags & TH_SYN)
+		tcp_mss(tp, mss);	/* sets t_maxseg */
+}
+
+/*
+ * Pull out of band byte out of a segment so
+ * it doesn't appear in the user's data queue.
+ * It is still reflected in the segment length for
+ * sequencing purposes.
+ */
+static void
+tcp_pulloutofband(so, th, m, off)
+	struct socket *so;
+	struct tcphdr *th;
+	register struct mbuf *m;
+	int off;		/* delayed to be droped hdrlen */
+{
+	int cnt = off + th->th_urp - 1;
+
+	while (cnt >= 0) {
+		if (m->m_len > cnt) {
+			char *cp = mtod(m, caddr_t) + cnt;
+			struct tcpcb *tp = sototcpcb(so);
+
+			tp->t_iobc = *cp;
+			tp->t_oobflags |= TCPOOB_HAVEDATA;
+			bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1));
+			m->m_len--;
+			if (m->m_flags & M_PKTHDR)
+				m->m_pkthdr.len--;
+			return;
+		}
+		cnt -= m->m_len;
+		m = m->m_next;
+		if (m == 0)
+			break;
+	}
+	panic("tcp_pulloutofband");
+}
+
+/*
+ * Collect new round-trip time estimate
+ * and update averages and current timeout.
+ */
+static void
+tcp_xmit_timer(tp, rtt)
+	register struct tcpcb *tp;
+	int rtt;
+{
+	register int delta;
+
+	tcpstat.tcps_rttupdated++;
+	tp->t_rttupdated++;
+	if (tp->t_srtt != 0) {
+		/*
+		 * srtt is stored as fixed point with 5 bits after the
+		 * binary point (i.e., scaled by 8).  The following magic
+		 * is equivalent to the smoothing algorithm in rfc793 with
+		 * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
+		 * point).  Adjust rtt to origin 0.
+		 */
+		delta = ((rtt - 1) << TCP_DELTA_SHIFT)
+			- (tp->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT));
+
+		if ((tp->t_srtt += delta) <= 0)
+			tp->t_srtt = 1;
+
+		/*
+		 * We accumulate a smoothed rtt variance (actually, a
+		 * smoothed mean difference), then set the retransmit
+		 * timer to smoothed rtt + 4 times the smoothed variance.
+		 * rttvar is stored as fixed point with 4 bits after the
+		 * binary point (scaled by 16).  The following is
+		 * equivalent to rfc793 smoothing with an alpha of .75
+		 * (rttvar = rttvar*3/4 + |delta| / 4).  This replaces
+		 * rfc793's wired-in beta.
+		 */
+		if (delta < 0)
+			delta = -delta;
+		delta -= tp->t_rttvar >> (TCP_RTTVAR_SHIFT - TCP_DELTA_SHIFT);
+		if ((tp->t_rttvar += delta) <= 0)
+			tp->t_rttvar = 1;
+	} else {
+		/*
+		 * No rtt measurement yet - use the unsmoothed rtt.
+		 * Set the variance to half the rtt (so our first
+		 * retransmit happens at 3*rtt).
+		 */
+		tp->t_srtt = rtt << TCP_RTT_SHIFT;
+		tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
+	}
+	tp->t_rtttime = 0;
+	tp->t_rxtshift = 0;
+
+	/*
+	 * the retransmit should happen at rtt + 4 * rttvar.
+	 * Because of the way we do the smoothing, srtt and rttvar
+	 * will each average +1/2 tick of bias.  When we compute
+	 * the retransmit timer, we want 1/2 tick of rounding and
+	 * 1 extra tick because of +-1/2 tick uncertainty in the
+	 * firing of the timer.  The bias will give us exactly the
+	 * 1.5 tick we need.  But, because the bias is
+	 * statistical, we have to test that we don't drop below
+	 * the minimum feasible timer (which is 2 ticks).
+	 */
+	TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
+		      max(tp->t_rttmin, rtt + 2), TCPTV_REXMTMAX);
+
+	/*
+	 * We received an ack for a packet that wasn't retransmitted;
+	 * it is probably safe to discard any error indications we've
+	 * received recently.  This isn't quite right, but close enough
+	 * for now (a route might have failed after we sent a segment,
+	 * and the return path might not be symmetrical).
+	 */
+	tp->t_softerror = 0;
+}
+
+/*
+ * Determine a reasonable value for maxseg size.
+ * If the route is known, check route for mtu.
+ * If none, use an mss that can be handled on the outgoing
+ * interface without forcing IP to fragment; if bigger than
+ * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES
+ * to utilize large mbufs.  If no route is found, route has no mtu,
+ * or the destination isn't local, use a default, hopefully conservative
+ * size (usually 512 or the default IP max size, but no more than the mtu
+ * of the interface), as we can't discover anything about intervening
+ * gateways or networks.  We also initialize the congestion/slow start
+ * window to be a single segment if the destination isn't local.
+ * While looking at the routing entry, we also initialize other path-dependent
+ * parameters from pre-set or cached values in the routing entry.
+ *
+ * Also take into account the space needed for options that we
+ * send regularly.  Make maxseg shorter by that amount to assure
+ * that we can send maxseg amount of data even when the options
+ * are present.  Store the upper limit of the length of options plus
+ * data in maxopd.
+ *
+ * NOTE that this routine is only called when we process an incoming
+ * segment, for outgoing segments only tcp_mssopt is called.
+ *
+ * In case of T/TCP, we call this routine during implicit connection
+ * setup as well (offer = -1), to initialize maxseg from the cached
+ * MSS of our peer.
+ */
+void
+tcp_mss(tp, offer)
+	struct tcpcb *tp;
+	int offer;
+{
+	register struct rtentry *rt;
+	struct ifnet *ifp;
+	register int rtt, mss;
+	u_long bufsize;
+	struct inpcb *inp;
+	struct socket *so;
+	struct rmxp_tao *taop;
+	int origoffer = offer;
+#ifdef INET6
+	int isipv6;
+	int min_protoh;
+#endif
+
+	inp = tp->t_inpcb;
+#ifdef INET6
+	isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0;
+	min_protoh = isipv6 ? sizeof (struct ip6_hdr) + sizeof (struct tcphdr)
+			    : sizeof (struct tcpiphdr);
+#else
+#define min_protoh  (sizeof (struct tcpiphdr))
+#endif
+#ifdef INET6
+	if (isipv6)
+		rt = tcp_rtlookup6(inp);
+	else
+#endif
+	rt = tcp_rtlookup(inp);
+	if (rt == NULL) {
+		tp->t_maxopd = tp->t_maxseg =
+#ifdef INET6
+		isipv6 ? tcp_v6mssdflt :
+#endif /* INET6 */
+		tcp_mssdflt;
+		return;
+	}
+	ifp = rt->rt_ifp;
+	so = inp->inp_socket;
+
+	taop = rmx_taop(rt->rt_rmx);
+	/*
+	 * Offer == -1 means that we didn't receive SYN yet,
+	 * use cached value in that case;
+	 */
+	if (offer == -1)
+		offer = taop->tao_mssopt;
+	/*
+	 * Offer == 0 means that there was no MSS on the SYN segment,
+	 * in this case we use tcp_mssdflt.
+	 */
+	if (offer == 0)
+		offer =
+#ifdef INET6
+			isipv6 ? tcp_v6mssdflt :
+#endif /* INET6 */
+			tcp_mssdflt;
+	else
+		/*
+		 * Sanity check: make sure that maxopd will be large
+		 * enough to allow some data on segments even is the
+		 * all the option space is used (40bytes).  Otherwise
+		 * funny things may happen in tcp_output.
+		 */
+		offer = max(offer, 64);
+	taop->tao_mssopt = offer;
+
+	/*
+	 * While we're here, check if there's an initial rtt
+	 * or rttvar.  Convert from the route-table units
+	 * to scaled multiples of the slow timeout timer.
+	 */
+	if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) {
+		/*
+		 * XXX the lock bit for RTT indicates that the value
+		 * is also a minimum value; this is subject to time.
+		 */
+		if (rt->rt_rmx.rmx_locks & RTV_RTT)
+			tp->t_rttmin = rtt / (RTM_RTTUNIT / hz);
+		tp->t_srtt = rtt / (RTM_RTTUNIT / (hz * TCP_RTT_SCALE));
+		tcpstat.tcps_usedrtt++;
+		if (rt->rt_rmx.rmx_rttvar) {
+			tp->t_rttvar = rt->rt_rmx.rmx_rttvar /
+			    (RTM_RTTUNIT / (hz * TCP_RTTVAR_SCALE));
+			tcpstat.tcps_usedrttvar++;
+		} else {
+			/* default variation is +- 1 rtt */
+			tp->t_rttvar =
+			    tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
+		}
+		TCPT_RANGESET(tp->t_rxtcur,
+			      ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
+			      tp->t_rttmin, TCPTV_REXMTMAX);
+	}
+	/*
+	 * if there's an mtu associated with the route, use it
+	 * else, use the link mtu.
+	 */
+	if (rt->rt_rmx.rmx_mtu)
+		mss = rt->rt_rmx.rmx_mtu - min_protoh;
+	else
+	{
+		mss =
+#ifdef INET6
+			(isipv6 ? nd_ifinfo[rt->rt_ifp->if_index].linkmtu :
+#endif
+			 ifp->if_mtu
+#ifdef INET6
+			 )
+#endif
+			- min_protoh;
+#ifdef INET6
+		if (isipv6) {
+			if (!in6_localaddr(&inp->in6p_faddr))
+				mss = min(mss, tcp_v6mssdflt);
+		} else
+#endif
+		if (!in_localaddr(inp->inp_faddr))
+			mss = min(mss, tcp_mssdflt);
+	}
+	mss = min(mss, offer);
+	/*
+	 * maxopd stores the maximum length of data AND options
+	 * in a segment; maxseg is the amount of data in a normal
+	 * segment.  We need to store this value (maxopd) apart
+	 * from maxseg, because now every segment carries options
+	 * and thus we normally have somewhat less data in segments.
+	 */
+	tp->t_maxopd = mss;
+
+	/*
+	 * In case of T/TCP, origoffer==-1 indicates, that no segments
+	 * were received yet.  In this case we just guess, otherwise
+	 * we do the same as before T/TCP.
+	 */
+ 	if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
+	    (origoffer == -1 ||
+	     (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP))
+		mss -= TCPOLEN_TSTAMP_APPA;
+ 	if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC &&
+	    (origoffer == -1 ||
+	     (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC))
+		mss -= TCPOLEN_CC_APPA;
+
+#if	(MCLBYTES & (MCLBYTES - 1)) == 0
+		if (mss > MCLBYTES)
+			mss &= ~(MCLBYTES-1);
+#else
+		if (mss > MCLBYTES)
+			mss = mss / MCLBYTES * MCLBYTES;
+#endif
+	/*
+	 * If there's a pipesize, change the socket buffer
+	 * to that size.  Make the socket buffers an integral
+	 * number of mss units; if the mss is larger than
+	 * the socket buffer, decrease the mss.
+	 */
+#ifdef RTV_SPIPE
+	if ((bufsize = rt->rt_rmx.rmx_sendpipe) == 0)
+#endif
+		bufsize = so->so_snd.sb_hiwat;
+	if (bufsize < mss)
+		mss = bufsize;
+	else {
+		bufsize = roundup(bufsize, mss);
+		if (bufsize > sb_max)
+			bufsize = sb_max;
+		(void)sbreserve(&so->so_snd, bufsize, so, NULL);
+	}
+	tp->t_maxseg = mss;
+
+#ifdef RTV_RPIPE
+	if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0)
+#endif
+		bufsize = so->so_rcv.sb_hiwat;
+	if (bufsize > mss) {
+		bufsize = roundup(bufsize, mss);
+		if (bufsize > sb_max)
+			bufsize = sb_max;
+		(void)sbreserve(&so->so_rcv, bufsize, so, NULL);
+	}
+
+	/*
+	 * Set the slow-start flight size depending on whether this
+	 * is a local network or not.
+	 */
+	if (
+#ifdef INET6
+	    (isipv6 && in6_localaddr(&inp->in6p_faddr)) ||
+	    (!isipv6 &&
+#endif
+	     in_localaddr(inp->inp_faddr)
+#ifdef INET6
+	     )
+#endif
+	    )
+		tp->snd_cwnd = mss * ss_fltsz_local;
+	else 
+		tp->snd_cwnd = mss * ss_fltsz;
+
+	if (rt->rt_rmx.rmx_ssthresh) {
+		/*
+		 * There's some sort of gateway or interface
+		 * buffer limit on the path.  Use this to set
+		 * the slow start threshhold, but set the
+		 * threshold to no less than 2*mss.
+		 */
+		tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh);
+		tcpstat.tcps_usedssthresh++;
+	}
+}
+
+/*
+ * Determine the MSS option to send on an outgoing SYN.
+ */
+int
+tcp_mssopt(tp)
+	struct tcpcb *tp;
+{
+	struct rtentry *rt;
+#ifdef INET6
+	int isipv6;
+	int min_protoh;
+#endif
+
+#ifdef INET6
+	isipv6 = ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) ? 1 : 0;
+	min_protoh = isipv6 ? sizeof (struct ip6_hdr) + sizeof (struct tcphdr)
+			    : sizeof (struct tcpiphdr);
+#else
+#define min_protoh  (sizeof (struct tcpiphdr))
+#endif
+#ifdef INET6
+	if (isipv6)
+		rt = tcp_rtlookup6(tp->t_inpcb);
+	else
+#endif /* INET6 */
+	rt = tcp_rtlookup(tp->t_inpcb);
+	if (rt == NULL)
+		return
+#ifdef INET6
+			isipv6 ? tcp_v6mssdflt :
+#endif /* INET6 */
+			tcp_mssdflt;
+
+	return rt->rt_ifp->if_mtu - min_protoh;
+}
+
+
+/*
+ * Checks for partial ack.  If partial ack arrives, force the retransmission
+ * of the next unacknowledged segment, do not clear tp->t_dupacks, and return
+ * 1.  By setting snd_nxt to ti_ack, this forces retransmission timer to
+ * be started again.  If the ack advances at least to tp->snd_recover, return 0.
+ */
+static int
+tcp_newreno(tp, th)
+	struct tcpcb *tp;
+	struct tcphdr *th;
+{
+	if (SEQ_LT(th->th_ack, tp->snd_recover)) {
+		tcp_seq onxt = tp->snd_nxt;
+		u_long  ocwnd = tp->snd_cwnd;
+
+		callout_stop(tp->tt_rexmt);
+		tp->t_rtttime = 0;
+		tp->snd_nxt = th->th_ack;
+		/*
+		 * Set snd_cwnd to one segment beyond acknowledged offset
+		 * (tp->snd_una has not yet been updated when this function 
+		 *  is called)
+		 */
+		tp->snd_cwnd = tp->t_maxseg + (th->th_ack - tp->snd_una);
+		(void) tcp_output(tp);
+		tp->snd_cwnd = ocwnd;
+		if (SEQ_GT(onxt, tp->snd_nxt))
+			tp->snd_nxt = onxt;
+		/*
+		 * Partial window deflation.  Relies on fact that tp->snd_una
+		 * not updated yet.
+		 */
+		tp->snd_cwnd -= (th->th_ack - tp->snd_una - tp->t_maxseg);
+		return (1);
+	}
+	return (0);
+}
diff --git a/sys/netinet/tcp_seq.h b/sys/netinet/tcp_seq.h
new file mode 100644
index 0000000..9d4adc8
--- /dev/null
+++ b/sys/netinet/tcp_seq.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 1982, 1986, 1993, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_seq.h	8.3 (Berkeley) 6/21/95
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_TCP_SEQ_H_
+#define _NETINET_TCP_SEQ_H_
+/*
+ * TCP sequence numbers are 32 bit integers operated
+ * on with modular arithmetic.  These macros can be
+ * used to compare such integers.
+ */
+#define	SEQ_LT(a,b)	((int)((a)-(b)) < 0)
+#define	SEQ_LEQ(a,b)	((int)((a)-(b)) <= 0)
+#define	SEQ_GT(a,b)	((int)((a)-(b)) > 0)
+#define	SEQ_GEQ(a,b)	((int)((a)-(b)) >= 0)
+
+/* for modulo comparisons of timestamps */
+#define TSTMP_LT(a,b)	((int)((a)-(b)) < 0)
+#define TSTMP_GEQ(a,b)	((int)((a)-(b)) >= 0)
+
+/*
+ * TCP connection counts are 32 bit integers operated
+ * on with modular arithmetic.  These macros can be
+ * used to compare such integers.
+ */
+#define	CC_LT(a,b)	((int)((a)-(b)) < 0)
+#define	CC_LEQ(a,b)	((int)((a)-(b)) <= 0)
+#define	CC_GT(a,b)	((int)((a)-(b)) > 0)
+#define	CC_GEQ(a,b)	((int)((a)-(b)) >= 0)
+
+/* Macro to increment a CC: skip 0 which has a special meaning */
+#define CC_INC(c)	(++(c) == 0 ? ++(c) : (c))
+
+/*
+ * Macros to initialize tcp sequence numbers for
+ * send and receive from initial send and receive
+ * sequence numbers.
+ */
+#define	tcp_rcvseqinit(tp) \
+	(tp)->rcv_adv = (tp)->rcv_nxt = (tp)->irs + 1
+
+#define	tcp_sendseqinit(tp) \
+	(tp)->snd_una = (tp)->snd_nxt = (tp)->snd_max = (tp)->snd_up = \
+	    (tp)->iss
+
+#define TCP_PAWS_IDLE	(24 * 24 * 60 * 60 * hz)
+					/* timestamp wrap-around time */
+
+#ifdef _KERNEL
+extern tcp_cc	tcp_ccgen;		/* global connection count */
+
+#else
+#define	TCP_ISSINCR	(250*1024)	/* increment for tcp_iss each second */
+#endif /* _KERNEL */
+#endif /* _NETINET_TCP_SEQ_H_ */
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
new file mode 100644
index 0000000..4089551
--- /dev/null
+++ b/sys/netinet/tcp_subr.c
@@ -0,0 +1,1424 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_subr.c	8.2 (Berkeley) 5/24/95
+ * $FreeBSD$
+ */
+
+#include "opt_compat.h"
+#include "opt_inet6.h"
+#include "opt_ipsec.h"
+#include "opt_tcpdebug.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/callout.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#ifdef INET6
+#include <sys/domain.h>
+#endif
+#include <sys/proc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/random.h>
+
+#include <vm/vm_zone.h>
+
+#include <net/route.h>
+#include <net/if.h>
+
+#define _IP_VHL
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+#include <netinet/in_pcb.h>
+#ifdef INET6
+#include <netinet6/in6_pcb.h>
+#endif
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+#ifdef INET6
+#include <netinet6/ip6_var.h>
+#endif
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#ifdef INET6
+#include <netinet6/tcp6_var.h>
+#endif
+#include <netinet/tcpip.h>
+#ifdef TCPDEBUG
+#include <netinet/tcp_debug.h>
+#endif
+#include <netinet6/ip6protosw.h>
+
+#ifdef IPSEC
+#include <netinet6/ipsec.h>
+#ifdef INET6
+#include <netinet6/ipsec6.h>
+#endif
+#endif /*IPSEC*/
+
+#include <machine/in_cksum.h>
+
+int 	tcp_mssdflt = TCP_MSS;
+SYSCTL_INT(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt, CTLFLAG_RW, 
+    &tcp_mssdflt , 0, "Default TCP Maximum Segment Size");
+
+#ifdef INET6
+int	tcp_v6mssdflt = TCP6_MSS;
+SYSCTL_INT(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt,
+	CTLFLAG_RW, &tcp_v6mssdflt , 0,
+	"Default TCP Maximum Segment Size for IPv6");
+#endif
+
+#if 0
+static int 	tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ;
+SYSCTL_INT(_net_inet_tcp, TCPCTL_RTTDFLT, rttdflt, CTLFLAG_RW, 
+    &tcp_rttdflt , 0, "Default maximum TCP Round Trip Time");
+#endif
+
+static int	tcp_do_rfc1323 = 1;
+SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_RW, 
+    &tcp_do_rfc1323 , 0, "Enable rfc1323 (high performance TCP) extensions");
+
+static int	tcp_do_rfc1644 = 0;
+SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1644, rfc1644, CTLFLAG_RW, 
+    &tcp_do_rfc1644 , 0, "Enable rfc1644 (TTCP) extensions");
+
+static int	tcp_tcbhashsize = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RD,
+     &tcp_tcbhashsize, 0, "Size of TCP control-block hashtable");
+
+static int	do_tcpdrain = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0,
+     "Enable tcp_drain routine for extra help when low on mbufs");
+
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_RD, 
+    &tcbinfo.ipi_count, 0, "Number of active PCBs");
+
+static int	icmp_may_rst = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_RW, &icmp_may_rst, 0, 
+    "Certain ICMP unreachable messages may abort connections in SYN_SENT");
+
+static void	tcp_cleartaocache __P((void));
+static void	tcp_notify __P((struct inpcb *, int));
+
+/*
+ * Target size of TCP PCB hash tables. Must be a power of two.
+ *
+ * Note that this can be overridden by the kernel environment
+ * variable net.inet.tcp.tcbhashsize
+ */
+#ifndef TCBHASHSIZE
+#define TCBHASHSIZE	512
+#endif
+
+/*
+ * This is the actual shape of what we allocate using the zone
+ * allocator.  Doing it this way allows us to protect both structures
+ * using the same generation count, and also eliminates the overhead
+ * of allocating tcpcbs separately.  By hiding the structure here,
+ * we avoid changing most of the rest of the code (although it needs
+ * to be changed, eventually, for greater efficiency).
+ */
+#define	ALIGNMENT	32
+#define	ALIGNM1		(ALIGNMENT - 1)
+struct	inp_tp {
+	union {
+		struct	inpcb inp;
+		char	align[(sizeof(struct inpcb) + ALIGNM1) & ~ALIGNM1];
+	} inp_tp_u;
+	struct	tcpcb tcb;
+	struct	callout inp_tp_rexmt, inp_tp_persist, inp_tp_keep, inp_tp_2msl;
+	struct	callout inp_tp_delack;
+};
+#undef ALIGNMENT
+#undef ALIGNM1
+
+/*
+ * Tcp initialization
+ */
+void
+tcp_init()
+{
+	int hashsize;
+	
+	tcp_ccgen = 1;
+	tcp_cleartaocache();
+
+	tcp_delacktime = TCPTV_DELACK;
+	tcp_keepinit = TCPTV_KEEP_INIT;
+	tcp_keepidle = TCPTV_KEEP_IDLE;
+	tcp_keepintvl = TCPTV_KEEPINTVL;
+	tcp_maxpersistidle = TCPTV_KEEP_IDLE;
+	tcp_msl = TCPTV_MSL;
+
+	LIST_INIT(&tcb);
+	tcbinfo.listhead = &tcb;
+	TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", TCBHASHSIZE, hashsize);
+	if (!powerof2(hashsize)) {
+		printf("WARNING: TCB hash size not a power of 2\n");
+		hashsize = 512; /* safe default */
+	}
+	tcp_tcbhashsize = hashsize;
+	tcbinfo.hashbase = hashinit(hashsize, M_PCB, &tcbinfo.hashmask);
+	tcbinfo.porthashbase = hashinit(hashsize, M_PCB,
+					&tcbinfo.porthashmask);
+	tcbinfo.ipi_zone = zinit("tcpcb", sizeof(struct inp_tp), maxsockets,
+				 ZONE_INTERRUPT, 0);
+#ifdef INET6
+#define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr))
+#else /* INET6 */
+#define TCP_MINPROTOHDR (sizeof(struct tcpiphdr))
+#endif /* INET6 */
+	if (max_protohdr < TCP_MINPROTOHDR)
+		max_protohdr = TCP_MINPROTOHDR;
+	if (max_linkhdr + TCP_MINPROTOHDR > MHLEN)
+		panic("tcp_init");
+#undef TCP_MINPROTOHDR
+}
+
+/*
+ * Create template to be used to send tcp packets on a connection.
+ * Call after host entry created, allocates an mbuf and fills
+ * in a skeletal tcp/ip header, minimizing the amount of work
+ * necessary when the connection is used.
+ */
+struct tcptemp *
+tcp_template(tp)
+	struct tcpcb *tp;
+{
+	register struct inpcb *inp = tp->t_inpcb;
+	register struct mbuf *m;
+	register struct tcptemp *n;
+
+	if ((n = tp->t_template) == 0) {
+		m = m_get(M_DONTWAIT, MT_HEADER);
+		if (m == NULL)
+			return (0);
+		m->m_len = sizeof (struct tcptemp);
+		n = mtod(m, struct tcptemp *);
+	}
+#ifdef INET6
+	if ((inp->inp_vflag & INP_IPV6) != 0) {
+		register struct ip6_hdr *ip6;
+
+		ip6 = (struct ip6_hdr *)n->tt_ipgen;
+		ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) |
+			(inp->in6p_flowinfo & IPV6_FLOWINFO_MASK);
+		ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) |
+			(IPV6_VERSION & IPV6_VERSION_MASK);
+		ip6->ip6_nxt = IPPROTO_TCP;
+		ip6->ip6_plen = sizeof(struct tcphdr);
+		ip6->ip6_src = inp->in6p_laddr;
+		ip6->ip6_dst = inp->in6p_faddr;
+		n->tt_t.th_sum = 0;
+	} else
+#endif
+      {
+	struct ip *ip = (struct ip *)n->tt_ipgen;
+
+	bzero(ip, sizeof(struct ip));		/* XXX overkill? */
+	ip->ip_vhl = IP_VHL_BORING;
+	ip->ip_p = IPPROTO_TCP;
+	ip->ip_src = inp->inp_laddr;
+	ip->ip_dst = inp->inp_faddr;
+	n->tt_t.th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+	    htons(sizeof(struct tcphdr) + IPPROTO_TCP));
+      }
+	n->tt_t.th_sport = inp->inp_lport;
+	n->tt_t.th_dport = inp->inp_fport;
+	n->tt_t.th_seq = 0;
+	n->tt_t.th_ack = 0;
+	n->tt_t.th_x2 = 0;
+	n->tt_t.th_off = 5;
+	n->tt_t.th_flags = 0;
+	n->tt_t.th_win = 0;
+	n->tt_t.th_urp = 0;
+	return (n);
+}
+
+/*
+ * Send a single message to the TCP at address specified by
+ * the given TCP/IP header.  If m == 0, then we make a copy
+ * of the tcpiphdr at ti and send directly to the addressed host.
+ * This is used to force keep alive messages out using the TCP
+ * template for a connection tp->t_template.  If flags are given
+ * then we send a message back to the TCP which originated the
+ * segment ti, and discard the mbuf containing it and any other
+ * attached mbufs.
+ *
+ * In any case the ack and sequence number of the transmitted
+ * segment are as specified by the parameters.
+ *
+ * NOTE: If m != NULL, then ti must point to *inside* the mbuf.
+ */
+void
+tcp_respond(tp, ipgen, th, m, ack, seq, flags)
+	struct tcpcb *tp;
+	void *ipgen;
+	register struct tcphdr *th;
+	register struct mbuf *m;
+	tcp_seq ack, seq;
+	int flags;
+{
+	register int tlen;
+	int win = 0;
+	struct route *ro = 0;
+	struct route sro;
+	struct ip *ip;
+	struct tcphdr *nth;
+#ifdef INET6
+	struct route_in6 *ro6 = 0;
+	struct route_in6 sro6;
+	struct ip6_hdr *ip6;
+	int isipv6;
+#endif /* INET6 */
+	int ipflags = 0;
+
+#ifdef INET6
+	isipv6 = IP_VHL_V(((struct ip *)ipgen)->ip_vhl) == 6;
+	ip6 = ipgen;
+#endif /* INET6 */
+	ip = ipgen;
+
+	if (tp) {
+		if (!(flags & TH_RST)) {
+			win = sbspace(&tp->t_inpcb->inp_socket->so_rcv);
+			if (win > (long)TCP_MAXWIN << tp->rcv_scale)
+				win = (long)TCP_MAXWIN << tp->rcv_scale;
+		}
+#ifdef INET6
+		if (isipv6)
+			ro6 = &tp->t_inpcb->in6p_route;
+		else
+#endif /* INET6 */
+		ro = &tp->t_inpcb->inp_route;
+	} else {
+#ifdef INET6
+		if (isipv6) {
+			ro6 = &sro6;
+			bzero(ro6, sizeof *ro6);
+		} else
+#endif /* INET6 */
+	      {
+		ro = &sro;
+		bzero(ro, sizeof *ro);
+	      }
+	}
+	if (m == 0) {
+		m = m_gethdr(M_DONTWAIT, MT_HEADER);
+		if (m == NULL)
+			return;
+		tlen = 0;
+		m->m_data += max_linkhdr;
+#ifdef INET6
+		if (isipv6) {
+			bcopy((caddr_t)ip6, mtod(m, caddr_t), 
+			      sizeof(struct ip6_hdr));
+			ip6 = mtod(m, struct ip6_hdr *);
+			nth = (struct tcphdr *)(ip6 + 1);
+		} else
+#endif /* INET6 */
+	      {
+		bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
+		ip = mtod(m, struct ip *);
+		nth = (struct tcphdr *)(ip + 1);
+	      }
+		bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr));
+		flags = TH_ACK;
+	} else {
+		m_freem(m->m_next);
+		m->m_next = 0;
+		m->m_data = (caddr_t)ipgen;
+		/* m_len is set later */
+		tlen = 0;
+#define xchg(a,b,type) { type t; t=a; a=b; b=t; }
+#ifdef INET6
+		if (isipv6) {
+			xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr);
+			nth = (struct tcphdr *)(ip6 + 1);
+		} else
+#endif /* INET6 */
+	      {
+		xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, n_long);
+		nth = (struct tcphdr *)(ip + 1);
+	      }
+		if (th != nth) {
+			/*
+			 * this is usually a case when an extension header
+			 * exists between the IPv6 header and the
+			 * TCP header.
+			 */
+			nth->th_sport = th->th_sport;
+			nth->th_dport = th->th_dport;
+		}
+		xchg(nth->th_dport, nth->th_sport, n_short);
+#undef xchg
+	}
+#ifdef INET6
+	if (isipv6) {
+		ip6->ip6_plen = htons((u_short)(sizeof (struct tcphdr) +
+						tlen));
+		tlen += sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
+	} else
+#endif
+      {
+	tlen += sizeof (struct tcpiphdr);
+	ip->ip_len = tlen;
+	ip->ip_ttl = ip_defttl;
+      }
+	m->m_len = tlen;
+	m->m_pkthdr.len = tlen;
+	m->m_pkthdr.rcvif = (struct ifnet *) 0;
+	nth->th_seq = htonl(seq);
+	nth->th_ack = htonl(ack);
+	nth->th_x2 = 0;
+	nth->th_off = sizeof (struct tcphdr) >> 2;
+	nth->th_flags = flags;
+	if (tp)
+		nth->th_win = htons((u_short) (win >> tp->rcv_scale));
+	else
+		nth->th_win = htons((u_short)win);
+	nth->th_urp = 0;
+#ifdef INET6
+	if (isipv6) {
+		nth->th_sum = 0;
+		nth->th_sum = in6_cksum(m, IPPROTO_TCP,
+					sizeof(struct ip6_hdr),
+					tlen - sizeof(struct ip6_hdr));
+		ip6->ip6_hlim = in6_selecthlim(tp ? tp->t_inpcb : NULL,
+					       ro6 && ro6->ro_rt ?
+					       ro6->ro_rt->rt_ifp :
+					       NULL);
+	} else
+#endif /* INET6 */
+      {
+        nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+	    htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p)));
+        m->m_pkthdr.csum_flags = CSUM_TCP;
+        m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
+      }
+#ifdef TCPDEBUG
+	if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+		tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0);
+#endif
+#ifdef IPSEC
+	ipsec_setsocket(m, tp ? tp->t_inpcb->inp_socket : NULL);
+#endif
+#ifdef INET6
+	if (isipv6) {
+		(void)ip6_output(m, NULL, ro6, ipflags, NULL, NULL);
+		if (ro6 == &sro6 && ro6->ro_rt) {
+			RTFREE(ro6->ro_rt);
+			ro6->ro_rt = NULL;
+		}
+	} else
+#endif /* INET6 */
+      {
+	(void) ip_output(m, NULL, ro, ipflags, NULL);
+	if (ro == &sro && ro->ro_rt) {
+		RTFREE(ro->ro_rt);
+		ro->ro_rt = NULL;
+	}
+      }
+}
+
+/*
+ * Create a new TCP control block, making an
+ * empty reassembly queue and hooking it to the argument
+ * protocol control block.  The `inp' parameter must have
+ * come from the zone allocator set up in tcp_init().
+ */
+struct tcpcb *
+tcp_newtcpcb(inp)
+	struct inpcb *inp;
+{
+	struct inp_tp *it;
+	register struct tcpcb *tp;
+#ifdef INET6
+	int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
+#endif /* INET6 */
+
+	it = (struct inp_tp *)inp;
+	tp = &it->tcb;
+	bzero((char *) tp, sizeof(struct tcpcb));
+	LIST_INIT(&tp->t_segq);
+	tp->t_maxseg = tp->t_maxopd =
+#ifdef INET6
+		isipv6 ? tcp_v6mssdflt :
+#endif /* INET6 */
+		tcp_mssdflt;
+
+	/* Set up our timeouts. */
+	callout_init(tp->tt_rexmt = &it->inp_tp_rexmt, 0);
+	callout_init(tp->tt_persist = &it->inp_tp_persist, 0);
+	callout_init(tp->tt_keep = &it->inp_tp_keep, 0);
+	callout_init(tp->tt_2msl = &it->inp_tp_2msl, 0);
+	callout_init(tp->tt_delack = &it->inp_tp_delack, 0);
+
+	if (tcp_do_rfc1323)
+		tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
+	if (tcp_do_rfc1644)
+		tp->t_flags |= TF_REQ_CC;
+	tp->t_inpcb = inp;	/* XXX */
+	/*
+	 * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
+	 * rtt estimate.  Set rttvar so that srtt + 4 * rttvar gives
+	 * reasonable initial retransmit time.
+	 */
+	tp->t_srtt = TCPTV_SRTTBASE;
+	tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
+	tp->t_rttmin = TCPTV_MIN;
+	tp->t_rxtcur = TCPTV_RTOBASE;
+	tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
+	tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
+	tp->t_rcvtime = ticks;
+        /*
+	 * IPv4 TTL initialization is necessary for an IPv6 socket as well,
+	 * because the socket may be bound to an IPv6 wildcard address,
+	 * which may match an IPv4-mapped IPv6 address.
+	 */
+	inp->inp_ip_ttl = ip_defttl;
+	inp->inp_ppcb = (caddr_t)tp;
+	return (tp);		/* XXX */
+}
+
+/*
+ * Drop a TCP connection, reporting
+ * the specified error.  If connection is synchronized,
+ * then send a RST to peer.
+ */
+struct tcpcb *
+tcp_drop(tp, errno)
+	register struct tcpcb *tp;
+	int errno;
+{
+	struct socket *so = tp->t_inpcb->inp_socket;
+
+	if (TCPS_HAVERCVDSYN(tp->t_state)) {
+		tp->t_state = TCPS_CLOSED;
+		(void) tcp_output(tp);
+		tcpstat.tcps_drops++;
+	} else
+		tcpstat.tcps_conndrops++;
+	if (errno == ETIMEDOUT && tp->t_softerror)
+		errno = tp->t_softerror;
+	so->so_error = errno;
+	return (tcp_close(tp));
+}
+
+/*
+ * Close a TCP control block:
+ *	discard all space held by the tcp
+ *	discard internet protocol block
+ *	wake up any sleepers
+ */
+struct tcpcb *
+tcp_close(tp)
+	register struct tcpcb *tp;
+{
+	register struct tseg_qent *q;
+	struct inpcb *inp = tp->t_inpcb;
+	struct socket *so = inp->inp_socket;
+#ifdef INET6
+	int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
+#endif /* INET6 */
+	register struct rtentry *rt;
+	int dosavessthresh;
+
+	/*
+	 * Make sure that all of our timers are stopped before we
+	 * delete the PCB.
+	 */
+	callout_stop(tp->tt_rexmt);
+	callout_stop(tp->tt_persist);
+	callout_stop(tp->tt_keep);
+	callout_stop(tp->tt_2msl);
+	callout_stop(tp->tt_delack);
+
+	/*
+	 * If we got enough samples through the srtt filter,
+	 * save the rtt and rttvar in the routing entry.
+	 * 'Enough' is arbitrarily defined as the 16 samples.
+	 * 16 samples is enough for the srtt filter to converge
+	 * to within 5% of the correct value; fewer samples and
+	 * we could save a very bogus rtt.
+	 *
+	 * Don't update the default route's characteristics and don't
+	 * update anything that the user "locked".
+	 */
+	if (tp->t_rttupdated >= 16) {
+		register u_long i = 0;
+#ifdef INET6
+		if (isipv6) {
+			struct sockaddr_in6 *sin6;
+
+			if ((rt = inp->in6p_route.ro_rt) == NULL)
+				goto no_valid_rt;
+			sin6 = (struct sockaddr_in6 *)rt_key(rt);
+			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
+				goto no_valid_rt;
+		}
+		else
+#endif /* INET6 */		
+		if ((rt = inp->inp_route.ro_rt) == NULL ||
+		    ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr
+		    == INADDR_ANY)
+			goto no_valid_rt;
+
+		if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) {
+			i = tp->t_srtt *
+			    (RTM_RTTUNIT / (hz * TCP_RTT_SCALE));
+			if (rt->rt_rmx.rmx_rtt && i)
+				/*
+				 * filter this update to half the old & half
+				 * the new values, converting scale.
+				 * See route.h and tcp_var.h for a
+				 * description of the scaling constants.
+				 */
+				rt->rt_rmx.rmx_rtt =
+				    (rt->rt_rmx.rmx_rtt + i) / 2;
+			else
+				rt->rt_rmx.rmx_rtt = i;
+			tcpstat.tcps_cachedrtt++;
+		}
+		if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) {
+			i = tp->t_rttvar *
+			    (RTM_RTTUNIT / (hz * TCP_RTTVAR_SCALE));
+			if (rt->rt_rmx.rmx_rttvar && i)
+				rt->rt_rmx.rmx_rttvar =
+				    (rt->rt_rmx.rmx_rttvar + i) / 2;
+			else
+				rt->rt_rmx.rmx_rttvar = i;
+			tcpstat.tcps_cachedrttvar++;
+		}
+		/*
+		 * The old comment here said:
+		 * update the pipelimit (ssthresh) if it has been updated
+		 * already or if a pipesize was specified & the threshhold
+		 * got below half the pipesize.  I.e., wait for bad news
+		 * before we start updating, then update on both good
+		 * and bad news.
+		 *
+		 * But we want to save the ssthresh even if no pipesize is
+		 * specified explicitly in the route, because such
+		 * connections still have an implicit pipesize specified
+		 * by the global tcp_sendspace.  In the absence of a reliable
+		 * way to calculate the pipesize, it will have to do.
+		 */
+		i = tp->snd_ssthresh;
+		if (rt->rt_rmx.rmx_sendpipe != 0)
+			dosavessthresh = (i < rt->rt_rmx.rmx_sendpipe / 2);
+		else
+			dosavessthresh = (i < so->so_snd.sb_hiwat / 2);
+		if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 &&
+		     i != 0 && rt->rt_rmx.rmx_ssthresh != 0)
+		    || dosavessthresh) {
+			/*
+			 * convert the limit from user data bytes to
+			 * packets then to packet data bytes.
+			 */
+			i = (i + tp->t_maxseg / 2) / tp->t_maxseg;
+			if (i < 2)
+				i = 2;
+			i *= (u_long)(tp->t_maxseg +
+#ifdef INET6
+				      (isipv6 ? sizeof (struct ip6_hdr) +
+					       sizeof (struct tcphdr) :
+#endif
+				       sizeof (struct tcpiphdr)
+#ifdef INET6
+				       )
+#endif
+				      );
+			if (rt->rt_rmx.rmx_ssthresh)
+				rt->rt_rmx.rmx_ssthresh =
+				    (rt->rt_rmx.rmx_ssthresh + i) / 2;
+			else
+				rt->rt_rmx.rmx_ssthresh = i;
+			tcpstat.tcps_cachedssthresh++;
+		}
+	}
+	rt = inp->inp_route.ro_rt;
+	if (rt) {
+		/* 
+		 * mark route for deletion if no information is
+		 * cached.
+		 */
+		if ((tp->t_flags & TF_LQ_OVERFLOW) &&
+		    ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0)){
+			if (rt->rt_rmx.rmx_rtt == 0)
+				rt->rt_flags |= RTF_DELCLONE;
+		}
+	}
+    no_valid_rt:
+	/* free the reassembly queue, if any */
+	while((q = LIST_FIRST(&tp->t_segq)) != NULL) {
+		LIST_REMOVE(q, tqe_q);
+		m_freem(q->tqe_m);
+		FREE(q, M_TSEGQ);
+	}
+	if (tp->t_template)
+		(void) m_free(dtom(tp->t_template));
+	inp->inp_ppcb = NULL;
+	soisdisconnected(so);
+#ifdef INET6
+	if (INP_CHECK_SOCKAF(so, AF_INET6))
+		in6_pcbdetach(inp);
+	else
+#endif /* INET6 */
+	in_pcbdetach(inp);
+	tcpstat.tcps_closed++;
+	return ((struct tcpcb *)0);
+}
+
+void
+tcp_drain()
+{
+	if (do_tcpdrain)
+	{
+		struct inpcb *inpb;
+		struct tcpcb *tcpb;
+		struct tseg_qent *te;
+
+	/*
+	 * Walk the tcpbs, if existing, and flush the reassembly queue,
+	 * if there is one...
+	 * XXX: The "Net/3" implementation doesn't imply that the TCP
+	 *      reassembly queue should be flushed, but in a situation
+	 * 	where we're really low on mbufs, this is potentially
+	 *  	usefull.	
+	 */
+		LIST_FOREACH(inpb, tcbinfo.listhead, inp_list) {
+			if ((tcpb = intotcpcb(inpb))) {
+				while ((te = LIST_FIRST(&tcpb->t_segq))
+			            != NULL) {
+					LIST_REMOVE(te, tqe_q);
+					m_freem(te->tqe_m);
+					FREE(te, M_TSEGQ);
+				}
+			}
+		}
+	}
+}
+
+/*
+ * Notify a tcp user of an asynchronous error;
+ * store error as soft error, but wake up user
+ * (for now, won't do anything until can select for soft error).
+ *
+ * Do not wake up user since there currently is no mechanism for
+ * reporting soft errors (yet - a kqueue filter may be added).
+ */
+static void
+tcp_notify(inp, error)
+	struct inpcb *inp;
+	int error;
+{
+	struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb;
+
+	/*
+	 * Ignore some errors if we are hooked up.
+	 * If connection hasn't completed, has retransmitted several times,
+	 * and receives a second error, give up now.  This is better
+	 * than waiting a long time to establish a connection that
+	 * can never complete.
+	 */
+	if (tp->t_state == TCPS_ESTABLISHED &&
+	     (error == EHOSTUNREACH || error == ENETUNREACH ||
+	      error == EHOSTDOWN)) {
+		return;
+	} else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
+	    tp->t_softerror)
+		tcp_drop(tp, error);
+	else
+		tp->t_softerror = error;
+#if 0
+	wakeup((caddr_t) &so->so_timeo);
+	sorwakeup(so);
+	sowwakeup(so);
+#endif
+}
+
+static int
+tcp_pcblist(SYSCTL_HANDLER_ARGS)
+{
+	int error, i, n, s;
+	struct inpcb *inp, **inp_list;
+	inp_gen_t gencnt;
+	struct xinpgen xig;
+
+	/*
+	 * The process of preparing the TCB list is too time-consuming and
+	 * resource-intensive to repeat twice on every request.
+	 */
+	if (req->oldptr == 0) {
+		n = tcbinfo.ipi_count;
+		req->oldidx = 2 * (sizeof xig)
+			+ (n + n/8) * sizeof(struct xtcpcb);
+		return 0;
+	}
+
+	if (req->newptr != 0)
+		return EPERM;
+
+	/*
+	 * OK, now we're committed to doing something.
+	 */
+	s = splnet();
+	gencnt = tcbinfo.ipi_gencnt;
+	n = tcbinfo.ipi_count;
+	splx(s);
+
+	xig.xig_len = sizeof xig;
+	xig.xig_count = n;
+	xig.xig_gen = gencnt;
+	xig.xig_sogen = so_gencnt;
+	error = SYSCTL_OUT(req, &xig, sizeof xig);
+	if (error)
+		return error;
+
+	inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
+	if (inp_list == 0)
+		return ENOMEM;
+	
+	s = splnet();
+	for (inp = LIST_FIRST(tcbinfo.listhead), i = 0; inp && i < n;
+	     inp = LIST_NEXT(inp, inp_list)) {
+		if (inp->inp_gencnt <= gencnt && !prison_xinpcb(req->p, inp))
+			inp_list[i++] = inp;
+	}
+	splx(s);
+	n = i;
+
+	error = 0;
+	for (i = 0; i < n; i++) {
+		inp = inp_list[i];
+		if (inp->inp_gencnt <= gencnt) {
+			struct xtcpcb xt;
+			caddr_t inp_ppcb;
+			xt.xt_len = sizeof xt;
+			/* XXX should avoid extra copy */
+			bcopy(inp, &xt.xt_inp, sizeof *inp);
+			inp_ppcb = inp->inp_ppcb;
+			if (inp_ppcb != NULL)
+				bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp);
+			else
+				bzero((char *) &xt.xt_tp, sizeof xt.xt_tp);
+			if (inp->inp_socket)
+				sotoxsocket(inp->inp_socket, &xt.xt_socket);
+			error = SYSCTL_OUT(req, &xt, sizeof xt);
+		}
+	}
+	if (!error) {
+		/*
+		 * Give the user an updated idea of our state.
+		 * If the generation differs from what we told
+		 * her before, she knows that something happened
+		 * while we were processing this request, and it
+		 * might be necessary to retry.
+		 */
+		s = splnet();
+		xig.xig_gen = tcbinfo.ipi_gencnt;
+		xig.xig_sogen = so_gencnt;
+		xig.xig_count = tcbinfo.ipi_count;
+		splx(s);
+		error = SYSCTL_OUT(req, &xig, sizeof xig);
+	}
+	free(inp_list, M_TEMP);
+	return error;
+}
+
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0,
+	    tcp_pcblist, "S,xtcpcb", "List of active TCP connections");
+
+static int
+tcp_getcred(SYSCTL_HANDLER_ARGS)
+{
+	struct xucred xuc;
+	struct sockaddr_in addrs[2];
+	struct inpcb *inp;
+	int error, s;
+
+	error = suser(req->p);
+	if (error)
+		return (error);
+	error = SYSCTL_IN(req, addrs, sizeof(addrs));
+	if (error)
+		return (error);
+	s = splnet();
+	inp = in_pcblookup_hash(&tcbinfo, addrs[1].sin_addr, addrs[1].sin_port,
+	    addrs[0].sin_addr, addrs[0].sin_port, 0, NULL);
+	if (inp == NULL || inp->inp_socket == NULL) {
+		error = ENOENT;
+		goto out;
+	}
+	bzero(&xuc, sizeof(xuc));
+	xuc.cr_uid = inp->inp_socket->so_cred->cr_uid;
+	xuc.cr_ngroups = inp->inp_socket->so_cred->cr_ngroups;
+	bcopy(inp->inp_socket->so_cred->cr_groups, xuc.cr_groups,
+	    sizeof(xuc.cr_groups));
+	error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
+out:
+	splx(s);
+	return (error);
+}
+
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW,
+    0, 0, tcp_getcred, "S,xucred", "Get the xucred of a TCP connection");
+
+#ifdef INET6
+static int
+tcp6_getcred(SYSCTL_HANDLER_ARGS)
+{
+	struct xucred xuc;
+	struct sockaddr_in6 addrs[2];
+	struct inpcb *inp;
+	int error, s, mapped = 0;
+
+	error = suser(req->p);
+	if (error)
+		return (error);
+	error = SYSCTL_IN(req, addrs, sizeof(addrs));
+	if (error)
+		return (error);
+	if (IN6_IS_ADDR_V4MAPPED(&addrs[0].sin6_addr)) {
+		if (IN6_IS_ADDR_V4MAPPED(&addrs[1].sin6_addr))
+			mapped = 1;
+		else
+			return (EINVAL);
+	}
+	s = splnet();
+	if (mapped == 1)
+		inp = in_pcblookup_hash(&tcbinfo,
+			*(struct in_addr *)&addrs[1].sin6_addr.s6_addr[12],
+			addrs[1].sin6_port,
+			*(struct in_addr *)&addrs[0].sin6_addr.s6_addr[12],
+			addrs[0].sin6_port,
+			0, NULL);
+	else
+		inp = in6_pcblookup_hash(&tcbinfo, &addrs[1].sin6_addr,
+				 addrs[1].sin6_port,
+				 &addrs[0].sin6_addr, addrs[0].sin6_port,
+				 0, NULL);
+	if (inp == NULL || inp->inp_socket == NULL) {
+		error = ENOENT;
+		goto out;
+	}
+	bzero(&xuc, sizeof(xuc));
+	xuc.cr_uid = inp->inp_socket->so_cred->cr_uid;
+	xuc.cr_ngroups = inp->inp_socket->so_cred->cr_ngroups;
+	bcopy(inp->inp_socket->so_cred->cr_groups, xuc.cr_groups,
+	    sizeof(xuc.cr_groups));
+	error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
+out:
+	splx(s);
+	return (error);
+}
+
+SYSCTL_PROC(_net_inet6_tcp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW,
+	    0, 0,
+	    tcp6_getcred, "S,xucred", "Get the xucred of a TCP6 connection");
+#endif
+
+
+void
+tcp_ctlinput(cmd, sa, vip)
+	int cmd;
+	struct sockaddr *sa;
+	void *vip;
+{
+	struct ip *ip = vip;
+	struct tcphdr *th;
+	struct in_addr faddr;
+	struct inpcb *inp;
+	struct tcpcb *tp;
+	void (*notify) __P((struct inpcb *, int)) = tcp_notify;
+	tcp_seq icmp_seq;
+	int s;
+
+	faddr = ((struct sockaddr_in *)sa)->sin_addr;
+	if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
+		return;
+
+	if (cmd == PRC_QUENCH)
+		notify = tcp_quench;
+	else if (icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB ||
+		cmd == PRC_UNREACH_PORT) && ip)
+		notify = tcp_drop_syn_sent;
+	else if (cmd == PRC_MSGSIZE)
+		notify = tcp_mtudisc;
+	else if (PRC_IS_REDIRECT(cmd)) {
+		ip = 0;
+		notify = in_rtchange;
+	} else if (cmd == PRC_HOSTDEAD)
+		ip = 0;
+	else if ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0)
+		return;
+	if (ip) {
+		s = splnet();
+		th = (struct tcphdr *)((caddr_t)ip 
+				       + (IP_VHL_HL(ip->ip_vhl) << 2));
+		inp = in_pcblookup_hash(&tcbinfo, faddr, th->th_dport,
+		    ip->ip_src, th->th_sport, 0, NULL);
+		if (inp != NULL && inp->inp_socket != NULL) {
+			icmp_seq = htonl(th->th_seq);
+			tp = intotcpcb(inp);
+			if (SEQ_GEQ(icmp_seq, tp->snd_una) &&
+			    SEQ_LT(icmp_seq, tp->snd_max))
+				(*notify)(inp, inetctlerrmap[cmd]);
+		}
+		splx(s);
+	} else
+		in_pcbnotifyall(&tcb, faddr, inetctlerrmap[cmd], notify);
+}
+
+#ifdef INET6
+void
+tcp6_ctlinput(cmd, sa, d)
+	int cmd;
+	struct sockaddr *sa;
+	void *d;
+{
+	register struct tcphdr *thp;
+	struct tcphdr th;
+	void (*notify) __P((struct inpcb *, int)) = tcp_notify;
+	struct sockaddr_in6 sa6;
+	struct ip6_hdr *ip6;
+	struct mbuf *m;
+	int off;
+
+	if (sa->sa_family != AF_INET6 ||
+	    sa->sa_len != sizeof(struct sockaddr_in6))
+		return;
+
+	if (cmd == PRC_QUENCH)
+		notify = tcp_quench;
+	else if (cmd == PRC_MSGSIZE)
+		notify = tcp_mtudisc;
+	else if (!PRC_IS_REDIRECT(cmd) &&
+		 ((unsigned)cmd > PRC_NCMDS || inet6ctlerrmap[cmd] == 0))
+		return;
+
+	/* if the parameter is from icmp6, decode it. */
+	if (d != NULL) {
+		struct ip6ctlparam *ip6cp = (struct ip6ctlparam *)d;
+		m = ip6cp->ip6c_m;
+		ip6 = ip6cp->ip6c_ip6;
+		off = ip6cp->ip6c_off;
+	} else {
+		m = NULL;
+		ip6 = NULL;
+		off = 0;	/* fool gcc */
+	}
+
+	/*
+	 * Translate addresses into internal form.
+	 * Sa check if it is AF_INET6 is done at the top of this funciton.
+	 */
+	sa6 = *(struct sockaddr_in6 *)sa;
+	if (IN6_IS_ADDR_LINKLOCAL(&sa6.sin6_addr) != 0 && m != NULL &&
+	    m->m_pkthdr.rcvif != NULL)
+		sa6.sin6_addr.s6_addr16[1] = htons(m->m_pkthdr.rcvif->if_index);
+
+	if (ip6) {
+		/*
+		 * XXX: We assume that when IPV6 is non NULL,
+		 * M and OFF are valid.
+		 */
+		struct in6_addr s;
+
+		/* translate addresses into internal form */
+		memcpy(&s, &ip6->ip6_src, sizeof(s));
+		if (IN6_IS_ADDR_LINKLOCAL(&s) != 0 && m != NULL &&
+		    m->m_pkthdr.rcvif != NULL)
+			s.s6_addr16[1] = htons(m->m_pkthdr.rcvif->if_index);
+
+		/* check if we can safely examine src and dst ports */
+		if (m->m_pkthdr.len < off + sizeof(th))
+			return;
+
+		if (m->m_len < off + sizeof(th)) {
+			/*
+			 * this should be rare case
+			 * because now MINCLSIZE is "(MHLEN + 1)",
+			 * so we compromise on this copy...
+			 */
+			m_copydata(m, off, sizeof(th), (caddr_t)&th);
+			thp = &th;
+		} else
+			thp = (struct tcphdr *)(mtod(m, caddr_t) + off);
+		in6_pcbnotify(&tcb, (struct sockaddr *)&sa6, thp->th_dport,
+			      &s, thp->th_sport, cmd, notify);
+	} else
+		in6_pcbnotify(&tcb, (struct sockaddr *)&sa6, 0, &zeroin6_addr,
+			      0, cmd, notify);
+}
+#endif /* INET6 */
+
+#define TCP_RNDISS_ROUNDS	16
+#define TCP_RNDISS_OUT	7200
+#define TCP_RNDISS_MAX	30000
+
+u_int8_t tcp_rndiss_sbox[128];
+u_int16_t tcp_rndiss_msb;
+u_int16_t tcp_rndiss_cnt;
+long tcp_rndiss_reseed;
+
+u_int16_t
+tcp_rndiss_encrypt(val)
+	u_int16_t val;
+{
+	u_int16_t sum = 0, i;
+  
+	for (i = 0; i < TCP_RNDISS_ROUNDS; i++) {
+		sum += 0x79b9;
+		val ^= ((u_int16_t)tcp_rndiss_sbox[(val^sum) & 0x7f]) << 7;
+		val = ((val & 0xff) << 7) | (val >> 8);
+	}
+
+	return val;
+}
+
+void
+tcp_rndiss_init()
+{
+	struct timeval time;
+
+	getmicrotime(&time);
+	read_random(tcp_rndiss_sbox, sizeof(tcp_rndiss_sbox));
+
+	tcp_rndiss_reseed = time.tv_sec + TCP_RNDISS_OUT;
+	tcp_rndiss_msb = tcp_rndiss_msb == 0x8000 ? 0 : 0x8000; 
+	tcp_rndiss_cnt = 0;
+}
+
+tcp_seq
+tcp_rndiss_next()
+{
+	u_int16_t tmp;
+	struct timeval time;
+
+	getmicrotime(&time);
+
+        if (tcp_rndiss_cnt >= TCP_RNDISS_MAX ||
+	    time.tv_sec > tcp_rndiss_reseed)
+                tcp_rndiss_init();
+	
+	read_random(&tmp, sizeof(tmp));
+
+	/* (tmp & 0x7fff) ensures a 32768 byte gap between ISS */
+	return ((tcp_rndiss_encrypt(tcp_rndiss_cnt++) | tcp_rndiss_msb) <<16) |
+		(tmp & 0x7fff);
+}
+
+
+/*
+ * When a source quench is received, close congestion window
+ * to one segment.  We will gradually open it again as we proceed.
+ */
+void
+tcp_quench(inp, errno)
+	struct inpcb *inp;
+	int errno;
+{
+	struct tcpcb *tp = intotcpcb(inp);
+
+	if (tp)
+		tp->snd_cwnd = tp->t_maxseg;
+}
+
+/*
+ * When a specific ICMP unreachable message is received and the
+ * connection state is SYN-SENT, drop the connection.  This behavior
+ * is controlled by the icmp_may_rst sysctl.
+ */
+void
+tcp_drop_syn_sent(inp, errno)
+	struct inpcb *inp;
+	int errno;
+{
+	struct tcpcb *tp = intotcpcb(inp);
+
+	if (tp && tp->t_state == TCPS_SYN_SENT)
+		tcp_drop(tp, errno);
+}
+
+/*
+ * When `need fragmentation' ICMP is received, update our idea of the MSS
+ * based on the new value in the route.  Also nudge TCP to send something,
+ * since we know the packet we just sent was dropped.
+ * This duplicates some code in the tcp_mss() function in tcp_input.c.
+ */
+void
+tcp_mtudisc(inp, errno)
+	struct inpcb *inp;
+	int errno;
+{
+	struct tcpcb *tp = intotcpcb(inp);
+	struct rtentry *rt;
+	struct rmxp_tao *taop;
+	struct socket *so = inp->inp_socket;
+	int offered;
+	int mss;
+#ifdef INET6
+	int isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0;
+#endif /* INET6 */
+
+	if (tp) {
+#ifdef INET6
+		if (isipv6)
+			rt = tcp_rtlookup6(inp);
+		else
+#endif /* INET6 */
+		rt = tcp_rtlookup(inp);
+		if (!rt || !rt->rt_rmx.rmx_mtu) {
+			tp->t_maxopd = tp->t_maxseg =
+#ifdef INET6
+				isipv6 ? tcp_v6mssdflt :
+#endif /* INET6 */
+				tcp_mssdflt;
+			return;
+		}
+		taop = rmx_taop(rt->rt_rmx);
+		offered = taop->tao_mssopt;
+		mss = rt->rt_rmx.rmx_mtu -
+#ifdef INET6
+			(isipv6 ?
+			 sizeof(struct ip6_hdr) + sizeof(struct tcphdr) :
+#endif /* INET6 */
+			 sizeof(struct tcpiphdr)
+#ifdef INET6
+			 )
+#endif /* INET6 */
+			;
+
+		if (offered)
+			mss = min(mss, offered);
+		/*
+		 * XXX - The above conditional probably violates the TCP
+		 * spec.  The problem is that, since we don't know the
+		 * other end's MSS, we are supposed to use a conservative
+		 * default.  But, if we do that, then MTU discovery will
+		 * never actually take place, because the conservative
+		 * default is much less than the MTUs typically seen
+		 * on the Internet today.  For the moment, we'll sweep
+		 * this under the carpet.
+		 *
+		 * The conservative default might not actually be a problem
+		 * if the only case this occurs is when sending an initial
+		 * SYN with options and data to a host we've never talked
+		 * to before.  Then, they will reply with an MSS value which
+		 * will get recorded and the new parameters should get
+		 * recomputed.  For Further Study.
+		 */
+		if (tp->t_maxopd <= mss)
+			return;
+		tp->t_maxopd = mss;
+
+		if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
+		    (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)
+			mss -= TCPOLEN_TSTAMP_APPA;
+		if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC &&
+		    (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC)
+			mss -= TCPOLEN_CC_APPA;
+#if	(MCLBYTES & (MCLBYTES - 1)) == 0
+		if (mss > MCLBYTES)
+			mss &= ~(MCLBYTES-1);
+#else
+		if (mss > MCLBYTES)
+			mss = mss / MCLBYTES * MCLBYTES;
+#endif
+		if (so->so_snd.sb_hiwat < mss)
+			mss = so->so_snd.sb_hiwat;
+
+		tp->t_maxseg = mss;
+
+		tcpstat.tcps_mturesent++;
+		tp->t_rtttime = 0;
+		tp->snd_nxt = tp->snd_una;
+		tcp_output(tp);
+	}
+}
+
+/*
+ * Look-up the routing entry to the peer of this inpcb.  If no route
+ * is found and it cannot be allocated the return NULL.  This routine
+ * is called by TCP routines that access the rmx structure and by tcp_mss
+ * to get the interface MTU.
+ */
+struct rtentry *
+tcp_rtlookup(inp)
+	struct inpcb *inp;
+{
+	struct route *ro;
+	struct rtentry *rt;
+
+	ro = &inp->inp_route;
+	rt = ro->ro_rt;
+	if (rt == NULL || !(rt->rt_flags & RTF_UP)) {
+		/* No route yet, so try to acquire one */
+		if (inp->inp_faddr.s_addr != INADDR_ANY) {
+			ro->ro_dst.sa_family = AF_INET;
+			ro->ro_dst.sa_len = sizeof(ro->ro_dst);
+			((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
+				inp->inp_faddr;
+			rtalloc(ro);
+			rt = ro->ro_rt;
+		}
+	}
+	return rt;
+}
+
+#ifdef INET6
+struct rtentry *
+tcp_rtlookup6(inp)
+	struct inpcb *inp;
+{
+	struct route_in6 *ro6;
+	struct rtentry *rt;
+
+	ro6 = &inp->in6p_route;
+	rt = ro6->ro_rt;
+	if (rt == NULL || !(rt->rt_flags & RTF_UP)) {
+		/* No route yet, so try to acquire one */
+		if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
+			ro6->ro_dst.sin6_family = AF_INET6;
+			ro6->ro_dst.sin6_len = sizeof(ro6->ro_dst);
+			ro6->ro_dst.sin6_addr = inp->in6p_faddr;
+			rtalloc((struct route *)ro6);
+			rt = ro6->ro_rt;
+		}
+	}
+	return rt;
+}
+#endif /* INET6 */
+
+#ifdef IPSEC
+/* compute ESP/AH header size for TCP, including outer IP header. */
+size_t
+ipsec_hdrsiz_tcp(tp)
+	struct tcpcb *tp;
+{
+	struct inpcb *inp;
+	struct mbuf *m;
+	size_t hdrsiz;
+	struct ip *ip;
+#ifdef INET6
+	struct ip6_hdr *ip6;
+#endif /* INET6 */
+	struct tcphdr *th;
+
+	if (!tp || !tp->t_template || !(inp = tp->t_inpcb))
+		return 0;
+	MGETHDR(m, M_DONTWAIT, MT_DATA);
+	if (!m)
+		return 0;
+
+#ifdef INET6
+	if ((inp->inp_vflag & INP_IPV6) != 0) {
+		ip6 = mtod(m, struct ip6_hdr *);
+		th = (struct tcphdr *)(ip6 + 1);
+		m->m_pkthdr.len = m->m_len =
+			sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
+		bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip6,
+		      sizeof(struct ip6_hdr));
+		bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th,
+		      sizeof(struct tcphdr));
+		hdrsiz = ipsec6_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp);
+	} else
+#endif /* INET6 */
+      {
+	ip = mtod(m, struct ip *);
+	th = (struct tcphdr *)(ip + 1);
+	m->m_pkthdr.len = m->m_len = sizeof(struct tcpiphdr);
+	bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip,
+	      sizeof(struct ip));
+	bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th,
+	      sizeof(struct tcphdr));
+	hdrsiz = ipsec4_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp);
+      }
+
+	m_free(m);
+	return hdrsiz;
+}
+#endif /*IPSEC*/
+
+/*
+ * Return a pointer to the cached information about the remote host.
+ * The cached information is stored in the protocol specific part of
+ * the route metrics.
+ */
+struct rmxp_tao *
+tcp_gettaocache(inp)
+	struct inpcb *inp;
+{
+	struct rtentry *rt;
+
+#ifdef INET6
+	if ((inp->inp_vflag & INP_IPV6) != 0)
+		rt = tcp_rtlookup6(inp);
+	else
+#endif /* INET6 */
+	rt = tcp_rtlookup(inp);
+
+	/* Make sure this is a host route and is up. */
+	if (rt == NULL ||
+	    (rt->rt_flags & (RTF_UP|RTF_HOST)) != (RTF_UP|RTF_HOST))
+		return NULL;
+
+	return rmx_taop(rt->rt_rmx);
+}
+
+/*
+ * Clear all the TAO cache entries, called from tcp_init.
+ *
+ * XXX
+ * This routine is just an empty one, because we assume that the routing
+ * routing tables are initialized at the same time when TCP, so there is
+ * nothing in the cache left over.
+ */
+static void
+tcp_cleartaocache()
+{
+}
diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c
new file mode 100644
index 0000000..0a85cf8
--- /dev/null
+++ b/sys/netinet/tcp_timer.c
@@ -0,0 +1,463 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_timer.c	8.2 (Berkeley) 5/24/95
+ * $FreeBSD$
+ */
+
+#include "opt_compat.h"
+#include "opt_inet6.h"
+#include "opt_tcpdebug.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+
+#include <machine/cpu.h>	/* before tcp_seq.h, for tcp_random18() */
+
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_pcb.h>
+#ifdef INET6
+#include <netinet6/in6_pcb.h>
+#endif
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#ifdef TCPDEBUG
+#include <netinet/tcp_debug.h>
+#endif
+
+static int
+sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS)
+{
+	int error, s, tt;
+
+	tt = *(int *)oidp->oid_arg1;
+	s = tt * 1000 / hz;
+
+	error = sysctl_handle_int(oidp, &s, 0, req);
+	if (error || !req->newptr)
+		return (error);
+
+	tt = s * hz / 1000;
+	if (tt < 1)
+		return (EINVAL);
+
+	*(int *)oidp->oid_arg1 = tt;
+        return (0);
+}
+
+int	tcp_keepinit;
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
+    &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "");
+
+int	tcp_keepidle;
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW,
+    &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "");
+
+int	tcp_keepintvl;
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW,
+    &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "");
+
+int	tcp_delacktime;
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime,
+    CTLTYPE_INT|CTLFLAG_RW, &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
+    "Time before a delayed ACK is sent");
+ 
+int	tcp_msl;
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW,
+    &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime");
+
+static int	always_keepalive = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, 
+    &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
+
+static int	tcp_keepcnt = TCPTV_KEEPCNT;
+	/* max idle probes */
+int	tcp_maxpersistidle;
+	/* max idle time in persist */
+int	tcp_maxidle;
+
+/*
+ * Tcp protocol timeout routine called every 500 ms.
+ * Updates timestamps used for TCP
+ * causes finite state machine actions if timers expire.
+ */
+void
+tcp_slowtimo()
+{
+	int s;
+
+	s = splnet();
+
+	tcp_maxidle = tcp_keepcnt * tcp_keepintvl;
+
+	splx(s);
+}
+
+/*
+ * Cancel all timers for TCP tp.
+ */
+void
+tcp_canceltimers(tp)
+	struct tcpcb *tp;
+{
+	callout_stop(tp->tt_2msl);
+	callout_stop(tp->tt_persist);
+	callout_stop(tp->tt_keep);
+	callout_stop(tp->tt_rexmt);
+}
+
+int	tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
+    { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 };
+
+int	tcp_backoff[TCP_MAXRXTSHIFT + 1] =
+    { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
+
+static int tcp_totbackoff = 511;	/* sum of tcp_backoff[] */
+
+/*
+ * TCP timer processing.
+ */
+void
+tcp_timer_delack(xtp)
+	void *xtp;
+{
+	struct tcpcb *tp = xtp;
+	int s;
+
+	s = splnet();
+	if (callout_pending(tp->tt_delack) || !callout_active(tp->tt_delack)) {
+		splx(s);
+		return;
+	}
+	callout_deactivate(tp->tt_delack);
+
+	tp->t_flags |= TF_ACKNOW;
+	tcpstat.tcps_delack++;
+	(void) tcp_output(tp);
+	splx(s);
+}
+
+void
+tcp_timer_2msl(xtp)
+	void *xtp;
+{
+	struct tcpcb *tp = xtp;
+	int s;
+#ifdef TCPDEBUG
+	int ostate;
+
+	ostate = tp->t_state;
+#endif
+	s = splnet();
+	if (callout_pending(tp->tt_2msl) || !callout_active(tp->tt_2msl)) {
+		splx(s);
+		return;
+	}
+	callout_deactivate(tp->tt_2msl);
+	/*
+	 * 2 MSL timeout in shutdown went off.  If we're closed but
+	 * still waiting for peer to close and connection has been idle
+	 * too long, or if 2MSL time is up from TIME_WAIT, delete connection
+	 * control block.  Otherwise, check again in a bit.
+	 */
+	if (tp->t_state != TCPS_TIME_WAIT &&
+	    (ticks - tp->t_rcvtime) <= tcp_maxidle)
+		callout_reset(tp->tt_2msl, tcp_keepintvl,
+			      tcp_timer_2msl, tp);
+	else
+		tp = tcp_close(tp);
+
+#ifdef TCPDEBUG
+	if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
+			  PRU_SLOWTIMO);
+#endif
+	splx(s);
+}
+
+void
+tcp_timer_keep(xtp)
+	void *xtp;
+{
+	struct tcpcb *tp = xtp;
+	int s;
+#ifdef TCPDEBUG
+	int ostate;
+
+	ostate = tp->t_state;
+#endif
+	s = splnet();
+	if (callout_pending(tp->tt_keep) || !callout_active(tp->tt_keep)) {
+		splx(s);
+		return;
+	}
+	callout_deactivate(tp->tt_keep);
+	/*
+	 * Keep-alive timer went off; send something
+	 * or drop connection if idle for too long.
+	 */
+	tcpstat.tcps_keeptimeo++;
+	if (tp->t_state < TCPS_ESTABLISHED)
+		goto dropit;
+	if ((always_keepalive ||
+	     tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) &&
+	    tp->t_state <= TCPS_CLOSING) {
+		if ((ticks - tp->t_rcvtime) >= tcp_keepidle + tcp_maxidle)
+			goto dropit;
+		/*
+		 * Send a packet designed to force a response
+		 * if the peer is up and reachable:
+		 * either an ACK if the connection is still alive,
+		 * or an RST if the peer has closed the connection
+		 * due to timeout or reboot.
+		 * Using sequence number tp->snd_una-1
+		 * causes the transmitted zero-length segment
+		 * to lie outside the receive window;
+		 * by the protocol spec, this requires the
+		 * correspondent TCP to respond.
+		 */
+		tcpstat.tcps_keepprobe++;
+		tcp_respond(tp, tp->t_template->tt_ipgen,
+			    &tp->t_template->tt_t, (struct mbuf *)NULL,
+			    tp->rcv_nxt, tp->snd_una - 1, 0);
+		callout_reset(tp->tt_keep, tcp_keepintvl, tcp_timer_keep, tp);
+	} else
+		callout_reset(tp->tt_keep, tcp_keepidle, tcp_timer_keep, tp);
+
+#ifdef TCPDEBUG
+	if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
+		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
+			  PRU_SLOWTIMO);
+#endif
+	splx(s);
+	return;
+
+dropit:
+	tcpstat.tcps_keepdrops++;
+	tp = tcp_drop(tp, ETIMEDOUT);
+
+#ifdef TCPDEBUG
+	if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
+			  PRU_SLOWTIMO);
+#endif
+	splx(s);
+}
+
+void
+tcp_timer_persist(xtp)
+	void *xtp;
+{
+	struct tcpcb *tp = xtp;
+	int s;
+#ifdef TCPDEBUG
+	int ostate;
+
+	ostate = tp->t_state;
+#endif
+	s = splnet();
+	if (callout_pending(tp->tt_persist) || !callout_active(tp->tt_persist)){
+		splx(s);
+		return;
+	}
+	callout_deactivate(tp->tt_persist);
+	/*
+	 * Persistance timer into zero window.
+	 * Force a byte to be output, if possible.
+	 */
+	tcpstat.tcps_persisttimeo++;
+	/*
+	 * Hack: if the peer is dead/unreachable, we do not
+	 * time out if the window is closed.  After a full
+	 * backoff, drop the connection if the idle time
+	 * (no responses to probes) reaches the maximum
+	 * backoff that we would use if retransmitting.
+	 */
+	if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
+	    ((ticks - tp->t_rcvtime) >= tcp_maxpersistidle ||
+	     (ticks - tp->t_rcvtime) >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
+		tcpstat.tcps_persistdrop++;
+		tp = tcp_drop(tp, ETIMEDOUT);
+		goto out;
+	}
+	tcp_setpersist(tp);
+	tp->t_force = 1;
+	(void) tcp_output(tp);
+	tp->t_force = 0;
+
+out:
+#ifdef TCPDEBUG
+	if (tp && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
+		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
+			  PRU_SLOWTIMO);
+#endif
+	splx(s);
+}
+
+void
+tcp_timer_rexmt(xtp)
+	void *xtp;
+{
+	struct tcpcb *tp = xtp;
+	int s;
+	int rexmt;
+#ifdef TCPDEBUG
+	int ostate;
+
+	ostate = tp->t_state;
+#endif
+	s = splnet();
+	if (callout_pending(tp->tt_rexmt) || !callout_active(tp->tt_rexmt)) {
+		splx(s);
+		return;
+	}
+	callout_deactivate(tp->tt_rexmt);
+	/*
+	 * Retransmission timer went off.  Message has not
+	 * been acked within retransmit interval.  Back off
+	 * to a longer retransmit interval and retransmit one segment.
+	 */
+	if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
+		tp->t_rxtshift = TCP_MAXRXTSHIFT;
+		tcpstat.tcps_timeoutdrop++;
+		tp = tcp_drop(tp, tp->t_softerror ?
+			      tp->t_softerror : ETIMEDOUT);
+		goto out;
+	}
+	if (tp->t_rxtshift == 1) {
+		/*
+		 * first retransmit; record ssthresh and cwnd so they can
+	 	 * be recovered if this turns out to be a "bad" retransmit.
+		 * A retransmit is considered "bad" if an ACK for this 
+		 * segment is received within RTT/2 interval; the assumption
+		 * here is that the ACK was already in flight.  See 
+		 * "On Estimating End-to-End Network Path Properties" by
+		 * Allman and Paxson for more details.
+		 */
+		tp->snd_cwnd_prev = tp->snd_cwnd;
+		tp->snd_ssthresh_prev = tp->snd_ssthresh;
+		tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
+	}
+	tcpstat.tcps_rexmttimeo++;
+	if (tp->t_state == TCPS_SYN_SENT)
+		rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift];
+	else
+		rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
+	TCPT_RANGESET(tp->t_rxtcur, rexmt,
+		      tp->t_rttmin, TCPTV_REXMTMAX);
+	/*
+	 * If losing, let the lower level know and try for
+	 * a better route.  Also, if we backed off this far,
+	 * our srtt estimate is probably bogus.  Clobber it
+	 * so we'll take the next rtt measurement as our srtt;
+	 * move the current srtt into rttvar to keep the current
+	 * retransmit times until then.
+	 */
+	if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
+#ifdef INET6
+		if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
+			in6_losing(tp->t_inpcb);
+		else
+#endif
+		in_losing(tp->t_inpcb);
+		tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
+		tp->t_srtt = 0;
+	}
+	tp->snd_nxt = tp->snd_una;
+	/*
+	 * Note:  We overload snd_recover to function also as the
+	 * snd_last variable described in RFC 2582
+	 */
+	tp->snd_recover = tp->snd_max;
+	/*
+	 * Force a segment to be sent.
+	 */
+	tp->t_flags |= TF_ACKNOW;
+	/*
+	 * If timing a segment in this window, stop the timer.
+	 */
+	tp->t_rtttime = 0;
+	/*
+	 * Close the congestion window down to one segment
+	 * (we'll open it by one segment for each ack we get).
+	 * Since we probably have a window's worth of unacked
+	 * data accumulated, this "slow start" keeps us from
+	 * dumping all that data as back-to-back packets (which
+	 * might overwhelm an intermediate gateway).
+	 *
+	 * There are two phases to the opening: Initially we
+	 * open by one mss on each ack.  This makes the window
+	 * size increase exponentially with time.  If the
+	 * window is larger than the path can handle, this
+	 * exponential growth results in dropped packet(s)
+	 * almost immediately.  To get more time between
+	 * drops but still "push" the network to take advantage
+	 * of improving conditions, we switch from exponential
+	 * to linear window opening at some threshhold size.
+	 * For a threshhold, we use half the current window
+	 * size, truncated to a multiple of the mss.
+	 *
+	 * (the minimum cwnd that will give us exponential
+	 * growth is 2 mss.  We don't allow the threshhold
+	 * to go below this.)
+	 */
+	{
+		u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;
+		if (win < 2)
+			win = 2;
+		tp->snd_cwnd = tp->t_maxseg;
+		tp->snd_ssthresh = win * tp->t_maxseg;
+		tp->t_dupacks = 0;
+	}
+	(void) tcp_output(tp);
+
+out:
+#ifdef TCPDEBUG
+	if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
+			  PRU_SLOWTIMO);
+#endif
+	splx(s);
+}
diff --git a/sys/netinet/tcp_timer.h b/sys/netinet/tcp_timer.h
new file mode 100644
index 0000000..b2bcb43
--- /dev/null
+++ b/sys/netinet/tcp_timer.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_timer.h	8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_TCP_TIMER_H_
+#define _NETINET_TCP_TIMER_H_
+
+/*
+ * The TCPT_REXMT timer is used to force retransmissions.
+ * The TCP has the TCPT_REXMT timer set whenever segments
+ * have been sent for which ACKs are expected but not yet
+ * received.  If an ACK is received which advances tp->snd_una,
+ * then the retransmit timer is cleared (if there are no more
+ * outstanding segments) or reset to the base value (if there
+ * are more ACKs expected).  Whenever the retransmit timer goes off,
+ * we retransmit one unacknowledged segment, and do a backoff
+ * on the retransmit timer.
+ *
+ * The TCPT_PERSIST timer is used to keep window size information
+ * flowing even if the window goes shut.  If all previous transmissions
+ * have been acknowledged (so that there are no retransmissions in progress),
+ * and the window is too small to bother sending anything, then we start
+ * the TCPT_PERSIST timer.  When it expires, if the window is nonzero,
+ * we go to transmit state.  Otherwise, at intervals send a single byte
+ * into the peer's window to force him to update our window information.
+ * We do this at most as often as TCPT_PERSMIN time intervals,
+ * but no more frequently than the current estimate of round-trip
+ * packet time.  The TCPT_PERSIST timer is cleared whenever we receive
+ * a window update from the peer.
+ *
+ * The TCPT_KEEP timer is used to keep connections alive.  If an
+ * connection is idle (no segments received) for TCPTV_KEEP_INIT amount of time,
+ * but not yet established, then we drop the connection.  Once the connection
+ * is established, if the connection is idle for TCPTV_KEEP_IDLE time
+ * (and keepalives have been enabled on the socket), we begin to probe
+ * the connection.  We force the peer to send us a segment by sending:
+ *	<SEQ=SND.UNA-1><ACK=RCV.NXT><CTL=ACK>
+ * This segment is (deliberately) outside the window, and should elicit
+ * an ack segment in response from the peer.  If, despite the TCPT_KEEP
+ * initiated segments we cannot elicit a response from a peer in TCPT_MAXIDLE
+ * amount of time probing, then we drop the connection.
+ */
+
+/*
+ * Time constants.
+ */
+#define	TCPTV_MSL	( 30*hz)		/* max seg lifetime (hah!) */
+#define	TCPTV_SRTTBASE	0			/* base roundtrip time;
+						   if 0, no idea yet */
+#define	TCPTV_RTOBASE	(  3*hz)		/* assumed RTO if no info */
+#define	TCPTV_SRTTDFLT	(  3*hz)		/* assumed RTT if no info */
+
+#define	TCPTV_PERSMIN	(  5*hz)		/* retransmit persistence */
+#define	TCPTV_PERSMAX	( 60*hz)		/* maximum persist interval */
+
+#define	TCPTV_KEEP_INIT	( 75*hz)		/* initial connect keepalive */
+#define	TCPTV_KEEP_IDLE	(120*60*hz)		/* dflt time before probing */
+#define	TCPTV_KEEPINTVL	( 75*hz)		/* default probe interval */
+#define	TCPTV_KEEPCNT	8			/* max probes before drop */
+
+#define	TCPTV_MIN	(  1*hz)		/* minimum allowable value */
+#define	TCPTV_REXMTMAX	( 64*hz)		/* max allowable REXMT value */
+
+#define TCPTV_TWTRUNC	8			/* RTO factor to truncate TW */
+
+#define	TCP_LINGERTIME	120			/* linger at most 2 minutes */
+
+#define	TCP_MAXRXTSHIFT	12			/* maximum retransmits */
+
+#define	TCPTV_DELACK	(hz / PR_FASTHZ / 2)	/* 100ms timeout */
+
+#ifdef	TCPTIMERS
+static char *tcptimers[] =
+    { "REXMT", "PERSIST", "KEEP", "2MSL" };
+#endif
+
+/*
+ * Force a time value to be in a certain range.
+ */
+#define	TCPT_RANGESET(tv, value, tvmin, tvmax) do { \
+	(tv) = (value); \
+	if ((u_long)(tv) < (u_long)(tvmin)) \
+		(tv) = (tvmin); \
+	else if ((u_long)(tv) > (u_long)(tvmax)) \
+		(tv) = (tvmax); \
+} while(0)
+
+#ifdef _KERNEL
+extern int tcp_keepinit;		/* time to establish connection */
+extern int tcp_keepidle;		/* time before keepalive probes begin */
+extern int tcp_keepintvl;		/* time between keepalive probes */
+extern int tcp_maxidle;			/* time to drop after starting probes */
+extern int tcp_delacktime;		/* time before sending a delayed ACK */
+extern int tcp_maxpersistidle;
+extern int tcp_msl;
+extern int tcp_ttl;			/* time to live for TCP segs */
+extern int tcp_backoff[];
+
+void	tcp_timer_2msl __P((void *xtp));
+void	tcp_timer_keep __P((void *xtp));
+void	tcp_timer_persist __P((void *xtp));
+void	tcp_timer_rexmt __P((void *xtp));
+void	tcp_timer_delack __P((void *xtp));
+
+#endif /* _KERNEL */
+
+#endif /* !_NETINET_TCP_TIMER_H_ */
diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c
new file mode 100644
index 0000000..4089551
--- /dev/null
+++ b/sys/netinet/tcp_timewait.c
@@ -0,0 +1,1424 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_subr.c	8.2 (Berkeley) 5/24/95
+ * $FreeBSD$
+ */
+
+#include "opt_compat.h"
+#include "opt_inet6.h"
+#include "opt_ipsec.h"
+#include "opt_tcpdebug.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/callout.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#ifdef INET6
+#include <sys/domain.h>
+#endif
+#include <sys/proc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/random.h>
+
+#include <vm/vm_zone.h>
+
+#include <net/route.h>
+#include <net/if.h>
+
+#define _IP_VHL
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+#include <netinet/in_pcb.h>
+#ifdef INET6
+#include <netinet6/in6_pcb.h>
+#endif
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+#ifdef INET6
+#include <netinet6/ip6_var.h>
+#endif
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#ifdef INET6
+#include <netinet6/tcp6_var.h>
+#endif
+#include <netinet/tcpip.h>
+#ifdef TCPDEBUG
+#include <netinet/tcp_debug.h>
+#endif
+#include <netinet6/ip6protosw.h>
+
+#ifdef IPSEC
+#include <netinet6/ipsec.h>
+#ifdef INET6
+#include <netinet6/ipsec6.h>
+#endif
+#endif /*IPSEC*/
+
+#include <machine/in_cksum.h>
+
+int 	tcp_mssdflt = TCP_MSS;
+SYSCTL_INT(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt, CTLFLAG_RW, 
+    &tcp_mssdflt , 0, "Default TCP Maximum Segment Size");
+
+#ifdef INET6
+int	tcp_v6mssdflt = TCP6_MSS;
+SYSCTL_INT(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt,
+	CTLFLAG_RW, &tcp_v6mssdflt , 0,
+	"Default TCP Maximum Segment Size for IPv6");
+#endif
+
+#if 0
+static int 	tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ;
+SYSCTL_INT(_net_inet_tcp, TCPCTL_RTTDFLT, rttdflt, CTLFLAG_RW, 
+    &tcp_rttdflt , 0, "Default maximum TCP Round Trip Time");
+#endif
+
+static int	tcp_do_rfc1323 = 1;
+SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_RW, 
+    &tcp_do_rfc1323 , 0, "Enable rfc1323 (high performance TCP) extensions");
+
+static int	tcp_do_rfc1644 = 0;
+SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1644, rfc1644, CTLFLAG_RW, 
+    &tcp_do_rfc1644 , 0, "Enable rfc1644 (TTCP) extensions");
+
+static int	tcp_tcbhashsize = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RD,
+     &tcp_tcbhashsize, 0, "Size of TCP control-block hashtable");
+
+static int	do_tcpdrain = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0,
+     "Enable tcp_drain routine for extra help when low on mbufs");
+
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_RD, 
+    &tcbinfo.ipi_count, 0, "Number of active PCBs");
+
+static int	icmp_may_rst = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_RW, &icmp_may_rst, 0, 
+    "Certain ICMP unreachable messages may abort connections in SYN_SENT");
+
+static void	tcp_cleartaocache __P((void));
+static void	tcp_notify __P((struct inpcb *, int));
+
+/*
+ * Target size of TCP PCB hash tables. Must be a power of two.
+ *
+ * Note that this can be overridden by the kernel environment
+ * variable net.inet.tcp.tcbhashsize
+ */
+#ifndef TCBHASHSIZE
+#define TCBHASHSIZE	512
+#endif
+
+/*
+ * This is the actual shape of what we allocate using the zone
+ * allocator.  Doing it this way allows us to protect both structures
+ * using the same generation count, and also eliminates the overhead
+ * of allocating tcpcbs separately.  By hiding the structure here,
+ * we avoid changing most of the rest of the code (although it needs
+ * to be changed, eventually, for greater efficiency).
+ */
+#define	ALIGNMENT	32
+#define	ALIGNM1		(ALIGNMENT - 1)
+struct	inp_tp {
+	union {
+		struct	inpcb inp;
+		char	align[(sizeof(struct inpcb) + ALIGNM1) & ~ALIGNM1];
+	} inp_tp_u;
+	struct	tcpcb tcb;
+	struct	callout inp_tp_rexmt, inp_tp_persist, inp_tp_keep, inp_tp_2msl;
+	struct	callout inp_tp_delack;
+};
+#undef ALIGNMENT
+#undef ALIGNM1
+
+/*
+ * Tcp initialization
+ */
+void
+tcp_init()
+{
+	int hashsize;
+	
+	tcp_ccgen = 1;
+	tcp_cleartaocache();
+
+	tcp_delacktime = TCPTV_DELACK;
+	tcp_keepinit = TCPTV_KEEP_INIT;
+	tcp_keepidle = TCPTV_KEEP_IDLE;
+	tcp_keepintvl = TCPTV_KEEPINTVL;
+	tcp_maxpersistidle = TCPTV_KEEP_IDLE;
+	tcp_msl = TCPTV_MSL;
+
+	LIST_INIT(&tcb);
+	tcbinfo.listhead = &tcb;
+	TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", TCBHASHSIZE, hashsize);
+	if (!powerof2(hashsize)) {
+		printf("WARNING: TCB hash size not a power of 2\n");
+		hashsize = 512; /* safe default */
+	}
+	tcp_tcbhashsize = hashsize;
+	tcbinfo.hashbase = hashinit(hashsize, M_PCB, &tcbinfo.hashmask);
+	tcbinfo.porthashbase = hashinit(hashsize, M_PCB,
+					&tcbinfo.porthashmask);
+	tcbinfo.ipi_zone = zinit("tcpcb", sizeof(struct inp_tp), maxsockets,
+				 ZONE_INTERRUPT, 0);
+#ifdef INET6
+#define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr))
+#else /* INET6 */
+#define TCP_MINPROTOHDR (sizeof(struct tcpiphdr))
+#endif /* INET6 */
+	if (max_protohdr < TCP_MINPROTOHDR)
+		max_protohdr = TCP_MINPROTOHDR;
+	if (max_linkhdr + TCP_MINPROTOHDR > MHLEN)
+		panic("tcp_init");
+#undef TCP_MINPROTOHDR
+}
+
+/*
+ * Create template to be used to send tcp packets on a connection.
+ * Call after host entry created, allocates an mbuf and fills
+ * in a skeletal tcp/ip header, minimizing the amount of work
+ * necessary when the connection is used.
+ */
+struct tcptemp *
+tcp_template(tp)
+	struct tcpcb *tp;
+{
+	register struct inpcb *inp = tp->t_inpcb;
+	register struct mbuf *m;
+	register struct tcptemp *n;
+
+	if ((n = tp->t_template) == 0) {
+		m = m_get(M_DONTWAIT, MT_HEADER);
+		if (m == NULL)
+			return (0);
+		m->m_len = sizeof (struct tcptemp);
+		n = mtod(m, struct tcptemp *);
+	}
+#ifdef INET6
+	if ((inp->inp_vflag & INP_IPV6) != 0) {
+		register struct ip6_hdr *ip6;
+
+		ip6 = (struct ip6_hdr *)n->tt_ipgen;
+		ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) |
+			(inp->in6p_flowinfo & IPV6_FLOWINFO_MASK);
+		ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) |
+			(IPV6_VERSION & IPV6_VERSION_MASK);
+		ip6->ip6_nxt = IPPROTO_TCP;
+		ip6->ip6_plen = sizeof(struct tcphdr);
+		ip6->ip6_src = inp->in6p_laddr;
+		ip6->ip6_dst = inp->in6p_faddr;
+		n->tt_t.th_sum = 0;
+	} else
+#endif
+      {
+	struct ip *ip = (struct ip *)n->tt_ipgen;
+
+	bzero(ip, sizeof(struct ip));		/* XXX overkill? */
+	ip->ip_vhl = IP_VHL_BORING;
+	ip->ip_p = IPPROTO_TCP;
+	ip->ip_src = inp->inp_laddr;
+	ip->ip_dst = inp->inp_faddr;
+	n->tt_t.th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+	    htons(sizeof(struct tcphdr) + IPPROTO_TCP));
+      }
+	n->tt_t.th_sport = inp->inp_lport;
+	n->tt_t.th_dport = inp->inp_fport;
+	n->tt_t.th_seq = 0;
+	n->tt_t.th_ack = 0;
+	n->tt_t.th_x2 = 0;
+	n->tt_t.th_off = 5;
+	n->tt_t.th_flags = 0;
+	n->tt_t.th_win = 0;
+	n->tt_t.th_urp = 0;
+	return (n);
+}
+
+/*
+ * Send a single message to the TCP at address specified by
+ * the given TCP/IP header.  If m == 0, then we make a copy
+ * of the tcpiphdr at ti and send directly to the addressed host.
+ * This is used to force keep alive messages out using the TCP
+ * template for a connection tp->t_template.  If flags are given
+ * then we send a message back to the TCP which originated the
+ * segment ti, and discard the mbuf containing it and any other
+ * attached mbufs.
+ *
+ * In any case the ack and sequence number of the transmitted
+ * segment are as specified by the parameters.
+ *
+ * NOTE: If m != NULL, then ti must point to *inside* the mbuf.
+ */
+void
+tcp_respond(tp, ipgen, th, m, ack, seq, flags)
+	struct tcpcb *tp;
+	void *ipgen;
+	register struct tcphdr *th;
+	register struct mbuf *m;
+	tcp_seq ack, seq;
+	int flags;
+{
+	register int tlen;
+	int win = 0;
+	struct route *ro = 0;
+	struct route sro;
+	struct ip *ip;
+	struct tcphdr *nth;
+#ifdef INET6
+	struct route_in6 *ro6 = 0;
+	struct route_in6 sro6;
+	struct ip6_hdr *ip6;
+	int isipv6;
+#endif /* INET6 */
+	int ipflags = 0;
+
+#ifdef INET6
+	isipv6 = IP_VHL_V(((struct ip *)ipgen)->ip_vhl) == 6;
+	ip6 = ipgen;
+#endif /* INET6 */
+	ip = ipgen;
+
+	if (tp) {
+		if (!(flags & TH_RST)) {
+			win = sbspace(&tp->t_inpcb->inp_socket->so_rcv);
+			if (win > (long)TCP_MAXWIN << tp->rcv_scale)
+				win = (long)TCP_MAXWIN << tp->rcv_scale;
+		}
+#ifdef INET6
+		if (isipv6)
+			ro6 = &tp->t_inpcb->in6p_route;
+		else
+#endif /* INET6 */
+		ro = &tp->t_inpcb->inp_route;
+	} else {
+#ifdef INET6
+		if (isipv6) {
+			ro6 = &sro6;
+			bzero(ro6, sizeof *ro6);
+		} else
+#endif /* INET6 */
+	      {
+		ro = &sro;
+		bzero(ro, sizeof *ro);
+	      }
+	}
+	if (m == 0) {
+		m = m_gethdr(M_DONTWAIT, MT_HEADER);
+		if (m == NULL)
+			return;
+		tlen = 0;
+		m->m_data += max_linkhdr;
+#ifdef INET6
+		if (isipv6) {
+			bcopy((caddr_t)ip6, mtod(m, caddr_t), 
+			      sizeof(struct ip6_hdr));
+			ip6 = mtod(m, struct ip6_hdr *);
+			nth = (struct tcphdr *)(ip6 + 1);
+		} else
+#endif /* INET6 */
+	      {
+		bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
+		ip = mtod(m, struct ip *);
+		nth = (struct tcphdr *)(ip + 1);
+	      }
+		bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr));
+		flags = TH_ACK;
+	} else {
+		m_freem(m->m_next);
+		m->m_next = 0;
+		m->m_data = (caddr_t)ipgen;
+		/* m_len is set later */
+		tlen = 0;
+#define xchg(a,b,type) { type t; t=a; a=b; b=t; }
+#ifdef INET6
+		if (isipv6) {
+			xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr);
+			nth = (struct tcphdr *)(ip6 + 1);
+		} else
+#endif /* INET6 */
+	      {
+		xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, n_long);
+		nth = (struct tcphdr *)(ip + 1);
+	      }
+		if (th != nth) {
+			/*
+			 * this is usually a case when an extension header
+			 * exists between the IPv6 header and the
+			 * TCP header.
+			 */
+			nth->th_sport = th->th_sport;
+			nth->th_dport = th->th_dport;
+		}
+		xchg(nth->th_dport, nth->th_sport, n_short);
+#undef xchg
+	}
+#ifdef INET6
+	if (isipv6) {
+		ip6->ip6_plen = htons((u_short)(sizeof (struct tcphdr) +
+						tlen));
+		tlen += sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
+	} else
+#endif
+      {
+	tlen += sizeof (struct tcpiphdr);
+	ip->ip_len = tlen;
+	ip->ip_ttl = ip_defttl;
+      }
+	m->m_len = tlen;
+	m->m_pkthdr.len = tlen;
+	m->m_pkthdr.rcvif = (struct ifnet *) 0;
+	nth->th_seq = htonl(seq);
+	nth->th_ack = htonl(ack);
+	nth->th_x2 = 0;
+	nth->th_off = sizeof (struct tcphdr) >> 2;
+	nth->th_flags = flags;
+	if (tp)
+		nth->th_win = htons((u_short) (win >> tp->rcv_scale));
+	else
+		nth->th_win = htons((u_short)win);
+	nth->th_urp = 0;
+#ifdef INET6
+	if (isipv6) {
+		nth->th_sum = 0;
+		nth->th_sum = in6_cksum(m, IPPROTO_TCP,
+					sizeof(struct ip6_hdr),
+					tlen - sizeof(struct ip6_hdr));
+		ip6->ip6_hlim = in6_selecthlim(tp ? tp->t_inpcb : NULL,
+					       ro6 && ro6->ro_rt ?
+					       ro6->ro_rt->rt_ifp :
+					       NULL);
+	} else
+#endif /* INET6 */
+      {
+        nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+	    htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p)));
+        m->m_pkthdr.csum_flags = CSUM_TCP;
+        m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
+      }
+#ifdef TCPDEBUG
+	if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+		tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0);
+#endif
+#ifdef IPSEC
+	ipsec_setsocket(m, tp ? tp->t_inpcb->inp_socket : NULL);
+#endif
+#ifdef INET6
+	if (isipv6) {
+		(void)ip6_output(m, NULL, ro6, ipflags, NULL, NULL);
+		if (ro6 == &sro6 && ro6->ro_rt) {
+			RTFREE(ro6->ro_rt);
+			ro6->ro_rt = NULL;
+		}
+	} else
+#endif /* INET6 */
+      {
+	(void) ip_output(m, NULL, ro, ipflags, NULL);
+	if (ro == &sro && ro->ro_rt) {
+		RTFREE(ro->ro_rt);
+		ro->ro_rt = NULL;
+	}
+      }
+}
+
+/*
+ * Create a new TCP control block, making an
+ * empty reassembly queue and hooking it to the argument
+ * protocol control block.  The `inp' parameter must have
+ * come from the zone allocator set up in tcp_init().
+ */
+struct tcpcb *
+tcp_newtcpcb(inp)
+	struct inpcb *inp;
+{
+	struct inp_tp *it;
+	register struct tcpcb *tp;
+#ifdef INET6
+	int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
+#endif /* INET6 */
+
+	it = (struct inp_tp *)inp;
+	tp = &it->tcb;
+	bzero((char *) tp, sizeof(struct tcpcb));
+	LIST_INIT(&tp->t_segq);
+	tp->t_maxseg = tp->t_maxopd =
+#ifdef INET6
+		isipv6 ? tcp_v6mssdflt :
+#endif /* INET6 */
+		tcp_mssdflt;
+
+	/* Set up our timeouts. */
+	callout_init(tp->tt_rexmt = &it->inp_tp_rexmt, 0);
+	callout_init(tp->tt_persist = &it->inp_tp_persist, 0);
+	callout_init(tp->tt_keep = &it->inp_tp_keep, 0);
+	callout_init(tp->tt_2msl = &it->inp_tp_2msl, 0);
+	callout_init(tp->tt_delack = &it->inp_tp_delack, 0);
+
+	if (tcp_do_rfc1323)
+		tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
+	if (tcp_do_rfc1644)
+		tp->t_flags |= TF_REQ_CC;
+	tp->t_inpcb = inp;	/* XXX */
+	/*
+	 * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
+	 * rtt estimate.  Set rttvar so that srtt + 4 * rttvar gives
+	 * reasonable initial retransmit time.
+	 */
+	tp->t_srtt = TCPTV_SRTTBASE;
+	tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
+	tp->t_rttmin = TCPTV_MIN;
+	tp->t_rxtcur = TCPTV_RTOBASE;
+	tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
+	tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
+	tp->t_rcvtime = ticks;
+        /*
+	 * IPv4 TTL initialization is necessary for an IPv6 socket as well,
+	 * because the socket may be bound to an IPv6 wildcard address,
+	 * which may match an IPv4-mapped IPv6 address.
+	 */
+	inp->inp_ip_ttl = ip_defttl;
+	inp->inp_ppcb = (caddr_t)tp;
+	return (tp);		/* XXX */
+}
+
+/*
+ * Drop a TCP connection, reporting
+ * the specified error.  If connection is synchronized,
+ * then send a RST to peer.
+ */
+struct tcpcb *
+tcp_drop(tp, errno)
+	register struct tcpcb *tp;
+	int errno;
+{
+	struct socket *so = tp->t_inpcb->inp_socket;
+
+	if (TCPS_HAVERCVDSYN(tp->t_state)) {
+		tp->t_state = TCPS_CLOSED;
+		(void) tcp_output(tp);
+		tcpstat.tcps_drops++;
+	} else
+		tcpstat.tcps_conndrops++;
+	if (errno == ETIMEDOUT && tp->t_softerror)
+		errno = tp->t_softerror;
+	so->so_error = errno;
+	return (tcp_close(tp));
+}
+
+/*
+ * Close a TCP control block:
+ *	discard all space held by the tcp
+ *	discard internet protocol block
+ *	wake up any sleepers
+ */
+struct tcpcb *
+tcp_close(tp)
+	register struct tcpcb *tp;
+{
+	register struct tseg_qent *q;
+	struct inpcb *inp = tp->t_inpcb;
+	struct socket *so = inp->inp_socket;
+#ifdef INET6
+	int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
+#endif /* INET6 */
+	register struct rtentry *rt;
+	int dosavessthresh;
+
+	/*
+	 * Make sure that all of our timers are stopped before we
+	 * delete the PCB.
+	 */
+	callout_stop(tp->tt_rexmt);
+	callout_stop(tp->tt_persist);
+	callout_stop(tp->tt_keep);
+	callout_stop(tp->tt_2msl);
+	callout_stop(tp->tt_delack);
+
+	/*
+	 * If we got enough samples through the srtt filter,
+	 * save the rtt and rttvar in the routing entry.
+	 * 'Enough' is arbitrarily defined as the 16 samples.
+	 * 16 samples is enough for the srtt filter to converge
+	 * to within 5% of the correct value; fewer samples and
+	 * we could save a very bogus rtt.
+	 *
+	 * Don't update the default route's characteristics and don't
+	 * update anything that the user "locked".
+	 */
+	if (tp->t_rttupdated >= 16) {
+		register u_long i = 0;
+#ifdef INET6
+		if (isipv6) {
+			struct sockaddr_in6 *sin6;
+
+			if ((rt = inp->in6p_route.ro_rt) == NULL)
+				goto no_valid_rt;
+			sin6 = (struct sockaddr_in6 *)rt_key(rt);
+			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
+				goto no_valid_rt;
+		}
+		else
+#endif /* INET6 */		
+		if ((rt = inp->inp_route.ro_rt) == NULL ||
+		    ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr
+		    == INADDR_ANY)
+			goto no_valid_rt;
+
+		if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) {
+			i = tp->t_srtt *
+			    (RTM_RTTUNIT / (hz * TCP_RTT_SCALE));
+			if (rt->rt_rmx.rmx_rtt && i)
+				/*
+				 * filter this update to half the old & half
+				 * the new values, converting scale.
+				 * See route.h and tcp_var.h for a
+				 * description of the scaling constants.
+				 */
+				rt->rt_rmx.rmx_rtt =
+				    (rt->rt_rmx.rmx_rtt + i) / 2;
+			else
+				rt->rt_rmx.rmx_rtt = i;
+			tcpstat.tcps_cachedrtt++;
+		}
+		if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) {
+			i = tp->t_rttvar *
+			    (RTM_RTTUNIT / (hz * TCP_RTTVAR_SCALE));
+			if (rt->rt_rmx.rmx_rttvar && i)
+				rt->rt_rmx.rmx_rttvar =
+				    (rt->rt_rmx.rmx_rttvar + i) / 2;
+			else
+				rt->rt_rmx.rmx_rttvar = i;
+			tcpstat.tcps_cachedrttvar++;
+		}
+		/*
+		 * The old comment here said:
+		 * update the pipelimit (ssthresh) if it has been updated
+		 * already or if a pipesize was specified & the threshhold
+		 * got below half the pipesize.  I.e., wait for bad news
+		 * before we start updating, then update on both good
+		 * and bad news.
+		 *
+		 * But we want to save the ssthresh even if no pipesize is
+		 * specified explicitly in the route, because such
+		 * connections still have an implicit pipesize specified
+		 * by the global tcp_sendspace.  In the absence of a reliable
+		 * way to calculate the pipesize, it will have to do.
+		 */
+		i = tp->snd_ssthresh;
+		if (rt->rt_rmx.rmx_sendpipe != 0)
+			dosavessthresh = (i < rt->rt_rmx.rmx_sendpipe / 2);
+		else
+			dosavessthresh = (i < so->so_snd.sb_hiwat / 2);
+		if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 &&
+		     i != 0 && rt->rt_rmx.rmx_ssthresh != 0)
+		    || dosavessthresh) {
+			/*
+			 * convert the limit from user data bytes to
+			 * packets then to packet data bytes.
+			 */
+			i = (i + tp->t_maxseg / 2) / tp->t_maxseg;
+			if (i < 2)
+				i = 2;
+			i *= (u_long)(tp->t_maxseg +
+#ifdef INET6
+				      (isipv6 ? sizeof (struct ip6_hdr) +
+					       sizeof (struct tcphdr) :
+#endif
+				       sizeof (struct tcpiphdr)
+#ifdef INET6
+				       )
+#endif
+				      );
+			if (rt->rt_rmx.rmx_ssthresh)
+				rt->rt_rmx.rmx_ssthresh =
+				    (rt->rt_rmx.rmx_ssthresh + i) / 2;
+			else
+				rt->rt_rmx.rmx_ssthresh = i;
+			tcpstat.tcps_cachedssthresh++;
+		}
+	}
+	rt = inp->inp_route.ro_rt;
+	if (rt) {
+		/* 
+		 * mark route for deletion if no information is
+		 * cached.
+		 */
+		if ((tp->t_flags & TF_LQ_OVERFLOW) &&
+		    ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0)){
+			if (rt->rt_rmx.rmx_rtt == 0)
+				rt->rt_flags |= RTF_DELCLONE;
+		}
+	}
+    no_valid_rt:
+	/* free the reassembly queue, if any */
+	while((q = LIST_FIRST(&tp->t_segq)) != NULL) {
+		LIST_REMOVE(q, tqe_q);
+		m_freem(q->tqe_m);
+		FREE(q, M_TSEGQ);
+	}
+	if (tp->t_template)
+		(void) m_free(dtom(tp->t_template));
+	inp->inp_ppcb = NULL;
+	soisdisconnected(so);
+#ifdef INET6
+	if (INP_CHECK_SOCKAF(so, AF_INET6))
+		in6_pcbdetach(inp);
+	else
+#endif /* INET6 */
+	in_pcbdetach(inp);
+	tcpstat.tcps_closed++;
+	return ((struct tcpcb *)0);
+}
+
+void
+tcp_drain()
+{
+	if (do_tcpdrain)
+	{
+		struct inpcb *inpb;
+		struct tcpcb *tcpb;
+		struct tseg_qent *te;
+
+	/*
+	 * Walk the tcpbs, if existing, and flush the reassembly queue,
+	 * if there is one...
+	 * XXX: The "Net/3" implementation doesn't imply that the TCP
+	 *      reassembly queue should be flushed, but in a situation
+	 * 	where we're really low on mbufs, this is potentially
+	 *  	usefull.	
+	 */
+		LIST_FOREACH(inpb, tcbinfo.listhead, inp_list) {
+			if ((tcpb = intotcpcb(inpb))) {
+				while ((te = LIST_FIRST(&tcpb->t_segq))
+			            != NULL) {
+					LIST_REMOVE(te, tqe_q);
+					m_freem(te->tqe_m);
+					FREE(te, M_TSEGQ);
+				}
+			}
+		}
+	}
+}
+
+/*
+ * Notify a tcp user of an asynchronous error;
+ * store error as soft error, but wake up user
+ * (for now, won't do anything until can select for soft error).
+ *
+ * Do not wake up user since there currently is no mechanism for
+ * reporting soft errors (yet - a kqueue filter may be added).
+ */
+static void
+tcp_notify(inp, error)
+	struct inpcb *inp;
+	int error;
+{
+	struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb;
+
+	/*
+	 * Ignore some errors if we are hooked up.
+	 * If connection hasn't completed, has retransmitted several times,
+	 * and receives a second error, give up now.  This is better
+	 * than waiting a long time to establish a connection that
+	 * can never complete.
+	 */
+	if (tp->t_state == TCPS_ESTABLISHED &&
+	     (error == EHOSTUNREACH || error == ENETUNREACH ||
+	      error == EHOSTDOWN)) {
+		return;
+	} else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
+	    tp->t_softerror)
+		tcp_drop(tp, error);
+	else
+		tp->t_softerror = error;
+#if 0
+	wakeup((caddr_t) &so->so_timeo);
+	sorwakeup(so);
+	sowwakeup(so);
+#endif
+}
+
+static int
+tcp_pcblist(SYSCTL_HANDLER_ARGS)
+{
+	int error, i, n, s;
+	struct inpcb *inp, **inp_list;
+	inp_gen_t gencnt;
+	struct xinpgen xig;
+
+	/*
+	 * The process of preparing the TCB list is too time-consuming and
+	 * resource-intensive to repeat twice on every request.
+	 */
+	if (req->oldptr == 0) {
+		n = tcbinfo.ipi_count;
+		req->oldidx = 2 * (sizeof xig)
+			+ (n + n/8) * sizeof(struct xtcpcb);
+		return 0;
+	}
+
+	if (req->newptr != 0)
+		return EPERM;
+
+	/*
+	 * OK, now we're committed to doing something.
+	 */
+	s = splnet();
+	gencnt = tcbinfo.ipi_gencnt;
+	n = tcbinfo.ipi_count;
+	splx(s);
+
+	xig.xig_len = sizeof xig;
+	xig.xig_count = n;
+	xig.xig_gen = gencnt;
+	xig.xig_sogen = so_gencnt;
+	error = SYSCTL_OUT(req, &xig, sizeof xig);
+	if (error)
+		return error;
+
+	inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
+	if (inp_list == 0)
+		return ENOMEM;
+	
+	s = splnet();
+	for (inp = LIST_FIRST(tcbinfo.listhead), i = 0; inp && i < n;
+	     inp = LIST_NEXT(inp, inp_list)) {
+		if (inp->inp_gencnt <= gencnt && !prison_xinpcb(req->p, inp))
+			inp_list[i++] = inp;
+	}
+	splx(s);
+	n = i;
+
+	error = 0;
+	for (i = 0; i < n; i++) {
+		inp = inp_list[i];
+		if (inp->inp_gencnt <= gencnt) {
+			struct xtcpcb xt;
+			caddr_t inp_ppcb;
+			xt.xt_len = sizeof xt;
+			/* XXX should avoid extra copy */
+			bcopy(inp, &xt.xt_inp, sizeof *inp);
+			inp_ppcb = inp->inp_ppcb;
+			if (inp_ppcb != NULL)
+				bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp);
+			else
+				bzero((char *) &xt.xt_tp, sizeof xt.xt_tp);
+			if (inp->inp_socket)
+				sotoxsocket(inp->inp_socket, &xt.xt_socket);
+			error = SYSCTL_OUT(req, &xt, sizeof xt);
+		}
+	}
+	if (!error) {
+		/*
+		 * Give the user an updated idea of our state.
+		 * If the generation differs from what we told
+		 * her before, she knows that something happened
+		 * while we were processing this request, and it
+		 * might be necessary to retry.
+		 */
+		s = splnet();
+		xig.xig_gen = tcbinfo.ipi_gencnt;
+		xig.xig_sogen = so_gencnt;
+		xig.xig_count = tcbinfo.ipi_count;
+		splx(s);
+		error = SYSCTL_OUT(req, &xig, sizeof xig);
+	}
+	free(inp_list, M_TEMP);
+	return error;
+}
+
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0,
+	    tcp_pcblist, "S,xtcpcb", "List of active TCP connections");
+
+static int
+tcp_getcred(SYSCTL_HANDLER_ARGS)
+{
+	struct xucred xuc;
+	struct sockaddr_in addrs[2];
+	struct inpcb *inp;
+	int error, s;
+
+	error = suser(req->p);
+	if (error)
+		return (error);
+	error = SYSCTL_IN(req, addrs, sizeof(addrs));
+	if (error)
+		return (error);
+	s = splnet();
+	inp = in_pcblookup_hash(&tcbinfo, addrs[1].sin_addr, addrs[1].sin_port,
+	    addrs[0].sin_addr, addrs[0].sin_port, 0, NULL);
+	if (inp == NULL || inp->inp_socket == NULL) {
+		error = ENOENT;
+		goto out;
+	}
+	bzero(&xuc, sizeof(xuc));
+	xuc.cr_uid = inp->inp_socket->so_cred->cr_uid;
+	xuc.cr_ngroups = inp->inp_socket->so_cred->cr_ngroups;
+	bcopy(inp->inp_socket->so_cred->cr_groups, xuc.cr_groups,
+	    sizeof(xuc.cr_groups));
+	error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
+out:
+	splx(s);
+	return (error);
+}
+
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW,
+    0, 0, tcp_getcred, "S,xucred", "Get the xucred of a TCP connection");
+
+#ifdef INET6
+static int
+tcp6_getcred(SYSCTL_HANDLER_ARGS)
+{
+	struct xucred xuc;
+	struct sockaddr_in6 addrs[2];
+	struct inpcb *inp;
+	int error, s, mapped = 0;
+
+	error = suser(req->p);
+	if (error)
+		return (error);
+	error = SYSCTL_IN(req, addrs, sizeof(addrs));
+	if (error)
+		return (error);
+	if (IN6_IS_ADDR_V4MAPPED(&addrs[0].sin6_addr)) {
+		if (IN6_IS_ADDR_V4MAPPED(&addrs[1].sin6_addr))
+			mapped = 1;
+		else
+			return (EINVAL);
+	}
+	s = splnet();
+	if (mapped == 1)
+		inp = in_pcblookup_hash(&tcbinfo,
+			*(struct in_addr *)&addrs[1].sin6_addr.s6_addr[12],
+			addrs[1].sin6_port,
+			*(struct in_addr *)&addrs[0].sin6_addr.s6_addr[12],
+			addrs[0].sin6_port,
+			0, NULL);
+	else
+		inp = in6_pcblookup_hash(&tcbinfo, &addrs[1].sin6_addr,
+				 addrs[1].sin6_port,
+				 &addrs[0].sin6_addr, addrs[0].sin6_port,
+				 0, NULL);
+	if (inp == NULL || inp->inp_socket == NULL) {
+		error = ENOENT;
+		goto out;
+	}
+	bzero(&xuc, sizeof(xuc));
+	xuc.cr_uid = inp->inp_socket->so_cred->cr_uid;
+	xuc.cr_ngroups = inp->inp_socket->so_cred->cr_ngroups;
+	bcopy(inp->inp_socket->so_cred->cr_groups, xuc.cr_groups,
+	    sizeof(xuc.cr_groups));
+	error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
+out:
+	splx(s);
+	return (error);
+}
+
+SYSCTL_PROC(_net_inet6_tcp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW,
+	    0, 0,
+	    tcp6_getcred, "S,xucred", "Get the xucred of a TCP6 connection");
+#endif
+
+
+void
+tcp_ctlinput(cmd, sa, vip)
+	int cmd;
+	struct sockaddr *sa;
+	void *vip;
+{
+	struct ip *ip = vip;
+	struct tcphdr *th;
+	struct in_addr faddr;
+	struct inpcb *inp;
+	struct tcpcb *tp;
+	void (*notify) __P((struct inpcb *, int)) = tcp_notify;
+	tcp_seq icmp_seq;
+	int s;
+
+	faddr = ((struct sockaddr_in *)sa)->sin_addr;
+	if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
+		return;
+
+	if (cmd == PRC_QUENCH)
+		notify = tcp_quench;
+	else if (icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB ||
+		cmd == PRC_UNREACH_PORT) && ip)
+		notify = tcp_drop_syn_sent;
+	else if (cmd == PRC_MSGSIZE)
+		notify = tcp_mtudisc;
+	else if (PRC_IS_REDIRECT(cmd)) {
+		ip = 0;
+		notify = in_rtchange;
+	} else if (cmd == PRC_HOSTDEAD)
+		ip = 0;
+	else if ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0)
+		return;
+	if (ip) {
+		s = splnet();
+		th = (struct tcphdr *)((caddr_t)ip 
+				       + (IP_VHL_HL(ip->ip_vhl) << 2));
+		inp = in_pcblookup_hash(&tcbinfo, faddr, th->th_dport,
+		    ip->ip_src, th->th_sport, 0, NULL);
+		if (inp != NULL && inp->inp_socket != NULL) {
+			icmp_seq = htonl(th->th_seq);
+			tp = intotcpcb(inp);
+			if (SEQ_GEQ(icmp_seq, tp->snd_una) &&
+			    SEQ_LT(icmp_seq, tp->snd_max))
+				(*notify)(inp, inetctlerrmap[cmd]);
+		}
+		splx(s);
+	} else
+		in_pcbnotifyall(&tcb, faddr, inetctlerrmap[cmd], notify);
+}
+
+#ifdef INET6
+void
+tcp6_ctlinput(cmd, sa, d)
+	int cmd;
+	struct sockaddr *sa;
+	void *d;
+{
+	register struct tcphdr *thp;
+	struct tcphdr th;
+	void (*notify) __P((struct inpcb *, int)) = tcp_notify;
+	struct sockaddr_in6 sa6;
+	struct ip6_hdr *ip6;
+	struct mbuf *m;
+	int off;
+
+	if (sa->sa_family != AF_INET6 ||
+	    sa->sa_len != sizeof(struct sockaddr_in6))
+		return;
+
+	if (cmd == PRC_QUENCH)
+		notify = tcp_quench;
+	else if (cmd == PRC_MSGSIZE)
+		notify = tcp_mtudisc;
+	else if (!PRC_IS_REDIRECT(cmd) &&
+		 ((unsigned)cmd > PRC_NCMDS || inet6ctlerrmap[cmd] == 0))
+		return;
+
+	/* if the parameter is from icmp6, decode it. */
+	if (d != NULL) {
+		struct ip6ctlparam *ip6cp = (struct ip6ctlparam *)d;
+		m = ip6cp->ip6c_m;
+		ip6 = ip6cp->ip6c_ip6;
+		off = ip6cp->ip6c_off;
+	} else {
+		m = NULL;
+		ip6 = NULL;
+		off = 0;	/* fool gcc */
+	}
+
+	/*
+	 * Translate addresses into internal form.
+	 * Sa check if it is AF_INET6 is done at the top of this funciton.
+	 */
+	sa6 = *(struct sockaddr_in6 *)sa;
+	if (IN6_IS_ADDR_LINKLOCAL(&sa6.sin6_addr) != 0 && m != NULL &&
+	    m->m_pkthdr.rcvif != NULL)
+		sa6.sin6_addr.s6_addr16[1] = htons(m->m_pkthdr.rcvif->if_index);
+
+	if (ip6) {
+		/*
+		 * XXX: We assume that when IPV6 is non NULL,
+		 * M and OFF are valid.
+		 */
+		struct in6_addr s;
+
+		/* translate addresses into internal form */
+		memcpy(&s, &ip6->ip6_src, sizeof(s));
+		if (IN6_IS_ADDR_LINKLOCAL(&s) != 0 && m != NULL &&
+		    m->m_pkthdr.rcvif != NULL)
+			s.s6_addr16[1] = htons(m->m_pkthdr.rcvif->if_index);
+
+		/* check if we can safely examine src and dst ports */
+		if (m->m_pkthdr.len < off + sizeof(th))
+			return;
+
+		if (m->m_len < off + sizeof(th)) {
+			/*
+			 * this should be rare case
+			 * because now MINCLSIZE is "(MHLEN + 1)",
+			 * so we compromise on this copy...
+			 */
+			m_copydata(m, off, sizeof(th), (caddr_t)&th);
+			thp = &th;
+		} else
+			thp = (struct tcphdr *)(mtod(m, caddr_t) + off);
+		in6_pcbnotify(&tcb, (struct sockaddr *)&sa6, thp->th_dport,
+			      &s, thp->th_sport, cmd, notify);
+	} else
+		in6_pcbnotify(&tcb, (struct sockaddr *)&sa6, 0, &zeroin6_addr,
+			      0, cmd, notify);
+}
+#endif /* INET6 */
+
+#define TCP_RNDISS_ROUNDS	16
+#define TCP_RNDISS_OUT	7200
+#define TCP_RNDISS_MAX	30000
+
+u_int8_t tcp_rndiss_sbox[128];
+u_int16_t tcp_rndiss_msb;
+u_int16_t tcp_rndiss_cnt;
+long tcp_rndiss_reseed;
+
+u_int16_t
+tcp_rndiss_encrypt(val)
+	u_int16_t val;
+{
+	u_int16_t sum = 0, i;
+  
+	for (i = 0; i < TCP_RNDISS_ROUNDS; i++) {
+		sum += 0x79b9;
+		val ^= ((u_int16_t)tcp_rndiss_sbox[(val^sum) & 0x7f]) << 7;
+		val = ((val & 0xff) << 7) | (val >> 8);
+	}
+
+	return val;
+}
+
+void
+tcp_rndiss_init()
+{
+	struct timeval time;
+
+	getmicrotime(&time);
+	read_random(tcp_rndiss_sbox, sizeof(tcp_rndiss_sbox));
+
+	tcp_rndiss_reseed = time.tv_sec + TCP_RNDISS_OUT;
+	tcp_rndiss_msb = tcp_rndiss_msb == 0x8000 ? 0 : 0x8000; 
+	tcp_rndiss_cnt = 0;
+}
+
+tcp_seq
+tcp_rndiss_next()
+{
+	u_int16_t tmp;
+	struct timeval time;
+
+	getmicrotime(&time);
+
+        if (tcp_rndiss_cnt >= TCP_RNDISS_MAX ||
+	    time.tv_sec > tcp_rndiss_reseed)
+                tcp_rndiss_init();
+	
+	read_random(&tmp, sizeof(tmp));
+
+	/* (tmp & 0x7fff) ensures a 32768 byte gap between ISS */
+	return ((tcp_rndiss_encrypt(tcp_rndiss_cnt++) | tcp_rndiss_msb) <<16) |
+		(tmp & 0x7fff);
+}
+
+
+/*
+ * When a source quench is received, close congestion window
+ * to one segment.  We will gradually open it again as we proceed.
+ */
+void
+tcp_quench(inp, errno)
+	struct inpcb *inp;
+	int errno;
+{
+	struct tcpcb *tp = intotcpcb(inp);
+
+	if (tp)
+		tp->snd_cwnd = tp->t_maxseg;
+}
+
+/*
+ * When a specific ICMP unreachable message is received and the
+ * connection state is SYN-SENT, drop the connection.  This behavior
+ * is controlled by the icmp_may_rst sysctl.
+ */
+void
+tcp_drop_syn_sent(inp, errno)
+	struct inpcb *inp;
+	int errno;
+{
+	struct tcpcb *tp = intotcpcb(inp);
+
+	if (tp && tp->t_state == TCPS_SYN_SENT)
+		tcp_drop(tp, errno);
+}
+
+/*
+ * When `need fragmentation' ICMP is received, update our idea of the MSS
+ * based on the new value in the route.  Also nudge TCP to send something,
+ * since we know the packet we just sent was dropped.
+ * This duplicates some code in the tcp_mss() function in tcp_input.c.
+ */
+void
+tcp_mtudisc(inp, errno)
+	struct inpcb *inp;
+	int errno;
+{
+	struct tcpcb *tp = intotcpcb(inp);
+	struct rtentry *rt;
+	struct rmxp_tao *taop;
+	struct socket *so = inp->inp_socket;
+	int offered;
+	int mss;
+#ifdef INET6
+	int isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0;
+#endif /* INET6 */
+
+	if (tp) {
+#ifdef INET6
+		if (isipv6)
+			rt = tcp_rtlookup6(inp);
+		else
+#endif /* INET6 */
+		rt = tcp_rtlookup(inp);
+		if (!rt || !rt->rt_rmx.rmx_mtu) {
+			tp->t_maxopd = tp->t_maxseg =
+#ifdef INET6
+				isipv6 ? tcp_v6mssdflt :
+#endif /* INET6 */
+				tcp_mssdflt;
+			return;
+		}
+		taop = rmx_taop(rt->rt_rmx);
+		offered = taop->tao_mssopt;
+		mss = rt->rt_rmx.rmx_mtu -
+#ifdef INET6
+			(isipv6 ?
+			 sizeof(struct ip6_hdr) + sizeof(struct tcphdr) :
+#endif /* INET6 */
+			 sizeof(struct tcpiphdr)
+#ifdef INET6
+			 )
+#endif /* INET6 */
+			;
+
+		if (offered)
+			mss = min(mss, offered);
+		/*
+		 * XXX - The above conditional probably violates the TCP
+		 * spec.  The problem is that, since we don't know the
+		 * other end's MSS, we are supposed to use a conservative
+		 * default.  But, if we do that, then MTU discovery will
+		 * never actually take place, because the conservative
+		 * default is much less than the MTUs typically seen
+		 * on the Internet today.  For the moment, we'll sweep
+		 * this under the carpet.
+		 *
+		 * The conservative default might not actually be a problem
+		 * if the only case this occurs is when sending an initial
+		 * SYN with options and data to a host we've never talked
+		 * to before.  Then, they will reply with an MSS value which
+		 * will get recorded and the new parameters should get
+		 * recomputed.  For Further Study.
+		 */
+		if (tp->t_maxopd <= mss)
+			return;
+		tp->t_maxopd = mss;
+
+		if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
+		    (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)
+			mss -= TCPOLEN_TSTAMP_APPA;
+		if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC &&
+		    (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC)
+			mss -= TCPOLEN_CC_APPA;
+#if	(MCLBYTES & (MCLBYTES - 1)) == 0
+		if (mss > MCLBYTES)
+			mss &= ~(MCLBYTES-1);
+#else
+		if (mss > MCLBYTES)
+			mss = mss / MCLBYTES * MCLBYTES;
+#endif
+		if (so->so_snd.sb_hiwat < mss)
+			mss = so->so_snd.sb_hiwat;
+
+		tp->t_maxseg = mss;
+
+		tcpstat.tcps_mturesent++;
+		tp->t_rtttime = 0;
+		tp->snd_nxt = tp->snd_una;
+		tcp_output(tp);
+	}
+}
+
+/*
+ * Look-up the routing entry to the peer of this inpcb.  If no route
+ * is found and it cannot be allocated the return NULL.  This routine
+ * is called by TCP routines that access the rmx structure and by tcp_mss
+ * to get the interface MTU.
+ */
+struct rtentry *
+tcp_rtlookup(inp)
+	struct inpcb *inp;
+{
+	struct route *ro;
+	struct rtentry *rt;
+
+	ro = &inp->inp_route;
+	rt = ro->ro_rt;
+	if (rt == NULL || !(rt->rt_flags & RTF_UP)) {
+		/* No route yet, so try to acquire one */
+		if (inp->inp_faddr.s_addr != INADDR_ANY) {
+			ro->ro_dst.sa_family = AF_INET;
+			ro->ro_dst.sa_len = sizeof(ro->ro_dst);
+			((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
+				inp->inp_faddr;
+			rtalloc(ro);
+			rt = ro->ro_rt;
+		}
+	}
+	return rt;
+}
+
+#ifdef INET6
+struct rtentry *
+tcp_rtlookup6(inp)
+	struct inpcb *inp;
+{
+	struct route_in6 *ro6;
+	struct rtentry *rt;
+
+	ro6 = &inp->in6p_route;
+	rt = ro6->ro_rt;
+	if (rt == NULL || !(rt->rt_flags & RTF_UP)) {
+		/* No route yet, so try to acquire one */
+		if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
+			ro6->ro_dst.sin6_family = AF_INET6;
+			ro6->ro_dst.sin6_len = sizeof(ro6->ro_dst);
+			ro6->ro_dst.sin6_addr = inp->in6p_faddr;
+			rtalloc((struct route *)ro6);
+			rt = ro6->ro_rt;
+		}
+	}
+	return rt;
+}
+#endif /* INET6 */
+
+#ifdef IPSEC
+/* compute ESP/AH header size for TCP, including outer IP header. */
+size_t
+ipsec_hdrsiz_tcp(tp)
+	struct tcpcb *tp;
+{
+	struct inpcb *inp;
+	struct mbuf *m;
+	size_t hdrsiz;
+	struct ip *ip;
+#ifdef INET6
+	struct ip6_hdr *ip6;
+#endif /* INET6 */
+	struct tcphdr *th;
+
+	if (!tp || !tp->t_template || !(inp = tp->t_inpcb))
+		return 0;
+	MGETHDR(m, M_DONTWAIT, MT_DATA);
+	if (!m)
+		return 0;
+
+#ifdef INET6
+	if ((inp->inp_vflag & INP_IPV6) != 0) {
+		ip6 = mtod(m, struct ip6_hdr *);
+		th = (struct tcphdr *)(ip6 + 1);
+		m->m_pkthdr.len = m->m_len =
+			sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
+		bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip6,
+		      sizeof(struct ip6_hdr));
+		bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th,
+		      sizeof(struct tcphdr));
+		hdrsiz = ipsec6_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp);
+	} else
+#endif /* INET6 */
+      {
+	ip = mtod(m, struct ip *);
+	th = (struct tcphdr *)(ip + 1);
+	m->m_pkthdr.len = m->m_len = sizeof(struct tcpiphdr);
+	bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip,
+	      sizeof(struct ip));
+	bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th,
+	      sizeof(struct tcphdr));
+	hdrsiz = ipsec4_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp);
+      }
+
+	m_free(m);
+	return hdrsiz;
+}
+#endif /*IPSEC*/
+
+/*
+ * Return a pointer to the cached information about the remote host.
+ * The cached information is stored in the protocol specific part of
+ * the route metrics.
+ */
+struct rmxp_tao *
+tcp_gettaocache(inp)
+	struct inpcb *inp;
+{
+	struct rtentry *rt;
+
+#ifdef INET6
+	if ((inp->inp_vflag & INP_IPV6) != 0)
+		rt = tcp_rtlookup6(inp);
+	else
+#endif /* INET6 */
+	rt = tcp_rtlookup(inp);
+
+	/* Make sure this is a host route and is up. */
+	if (rt == NULL ||
+	    (rt->rt_flags & (RTF_UP|RTF_HOST)) != (RTF_UP|RTF_HOST))
+		return NULL;
+
+	return rmx_taop(rt->rt_rmx);
+}
+
+/*
+ * Clear all the TAO cache entries, called from tcp_init.
+ *
+ * XXX
+ * This routine is just an empty one, because we assume that the routing
+ * routing tables are initialized at the same time when TCP, so there is
+ * nothing in the cache left over.
+ */
+static void
+tcp_cleartaocache()
+{
+}
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
new file mode 100644
index 0000000..25834d4
--- /dev/null
+++ b/sys/netinet/tcp_usrreq.c
@@ -0,0 +1,1151 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	From: @(#)tcp_usrreq.c	8.2 (Berkeley) 1/3/94
+ * $FreeBSD$
+ */
+
+#include "opt_ipsec.h"
+#include "opt_inet6.h"
+#include "opt_tcpdebug.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/mbuf.h>
+#ifdef INET6
+#include <sys/domain.h>
+#endif /* INET6 */
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/proc.h>
+#include <sys/jail.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+#include <netinet/in_pcb.h>
+#ifdef INET6
+#include <netinet6/in6_pcb.h>
+#endif
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+#ifdef INET6
+#include <netinet6/ip6_var.h>
+#endif
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#ifdef TCPDEBUG
+#include <netinet/tcp_debug.h>
+#endif
+
+#ifdef IPSEC
+#include <netinet6/ipsec.h>
+#endif /*IPSEC*/
+
+/*
+ * TCP protocol interface to socket abstraction.
+ */
+extern	char *tcpstates[];	/* XXX ??? */
+
+static int	tcp_attach __P((struct socket *, struct proc *));
+static int	tcp_connect __P((struct tcpcb *, struct sockaddr *, 
+				 struct proc *));
+#ifdef INET6
+static int	tcp6_connect __P((struct tcpcb *, struct sockaddr *,
+				 struct proc *));
+#endif /* INET6 */
+static struct tcpcb *
+		tcp_disconnect __P((struct tcpcb *));
+static struct tcpcb *
+		tcp_usrclosed __P((struct tcpcb *));
+
+#ifdef TCPDEBUG
+#define	TCPDEBUG0	int ostate = 0
+#define	TCPDEBUG1()	ostate = tp ? tp->t_state : 0
+#define	TCPDEBUG2(req)	if (tp && (so->so_options & SO_DEBUG)) \
+				tcp_trace(TA_USER, ostate, tp, 0, 0, req)
+#else
+#define	TCPDEBUG0
+#define	TCPDEBUG1()
+#define	TCPDEBUG2(req)
+#endif
+
+/*
+ * TCP attaches to socket via pru_attach(), reserving space,
+ * and an internet control block.
+ */
+static int
+tcp_usr_attach(struct socket *so, int proto, struct proc *p)
+{
+	int s = splnet();
+	int error;
+	struct inpcb *inp = sotoinpcb(so);
+	struct tcpcb *tp = 0;
+	TCPDEBUG0;
+
+	TCPDEBUG1();
+	if (inp) {
+		error = EISCONN;
+		goto out;
+	}
+
+	error = tcp_attach(so, p);
+	if (error)
+		goto out;
+
+	if ((so->so_options & SO_LINGER) && so->so_linger == 0)
+		so->so_linger = TCP_LINGERTIME;
+	tp = sototcpcb(so);
+out:
+	TCPDEBUG2(PRU_ATTACH);
+	splx(s);
+	return error;
+}
+
+/*
+ * pru_detach() detaches the TCP protocol from the socket.
+ * If the protocol state is non-embryonic, then can't
+ * do this directly: have to initiate a pru_disconnect(),
+ * which may finish later; embryonic TCB's can just
+ * be discarded here.
+ */
+static int
+tcp_usr_detach(struct socket *so)
+{
+	int s = splnet();
+	int error = 0;
+	struct inpcb *inp = sotoinpcb(so);
+	struct tcpcb *tp;
+	TCPDEBUG0;
+
+	if (inp == 0) {
+		splx(s);
+		return EINVAL;	/* XXX */
+	}
+	tp = intotcpcb(inp);
+	TCPDEBUG1();
+	tp = tcp_disconnect(tp);
+
+	TCPDEBUG2(PRU_DETACH);
+	splx(s);
+	return error;
+}
+
+#define	COMMON_START()	TCPDEBUG0; \
+			do { \
+				     if (inp == 0) { \
+					     splx(s); \
+					     return EINVAL; \
+				     } \
+				     tp = intotcpcb(inp); \
+				     TCPDEBUG1(); \
+		     } while(0)
+			     
+#define COMMON_END(req)	out: TCPDEBUG2(req); splx(s); return error; goto out
+
+
+/*
+ * Give the socket an address.
+ */
+static int
+tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
+{
+	int s = splnet();
+	int error = 0;
+	struct inpcb *inp = sotoinpcb(so);
+	struct tcpcb *tp;
+	struct sockaddr_in *sinp;
+
+	COMMON_START();
+
+	/*
+	 * Must check for multicast addresses and disallow binding
+	 * to them.
+	 */
+	sinp = (struct sockaddr_in *)nam;
+	if (sinp->sin_family == AF_INET &&
+	    IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
+		error = EAFNOSUPPORT;
+		goto out;
+	}
+	error = in_pcbbind(inp, nam, p);
+	if (error)
+		goto out;
+	COMMON_END(PRU_BIND);
+
+}
+
+#ifdef INET6
+static int
+tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
+{
+	int s = splnet();
+	int error = 0;
+	struct inpcb *inp = sotoinpcb(so);
+	struct tcpcb *tp;
+	struct sockaddr_in6 *sin6p;
+
+	COMMON_START();
+
+	/*
+	 * Must check for multicast addresses and disallow binding
+	 * to them.
+	 */
+	sin6p = (struct sockaddr_in6 *)nam;
+	if (sin6p->sin6_family == AF_INET6 &&
+	    IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) {
+		error = EAFNOSUPPORT;
+		goto out;
+	}
+	inp->inp_vflag &= ~INP_IPV4;
+	inp->inp_vflag |= INP_IPV6;
+	if ((inp->inp_flags & IN6P_BINDV6ONLY) == 0) {
+
+		if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr))
+			inp->inp_vflag |= INP_IPV4;
+		else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
+			struct sockaddr_in sin;
+
+			in6_sin6_2_sin(&sin, sin6p);
+			inp->inp_vflag |= INP_IPV4;
+			inp->inp_vflag &= ~INP_IPV6;
+			error = in_pcbbind(inp, (struct sockaddr *)&sin, p);
+			goto out;
+		}
+	}
+	error = in6_pcbbind(inp, nam, p);
+	if (error)
+		goto out;
+	COMMON_END(PRU_BIND);
+}
+#endif /* INET6 */
+
+/*
+ * Prepare to accept connections.
+ */
+static int
+tcp_usr_listen(struct socket *so, struct proc *p)
+{
+	int s = splnet();
+	int error = 0;
+	struct inpcb *inp = sotoinpcb(so);
+	struct tcpcb *tp;
+
+	COMMON_START();
+	if (inp->inp_lport == 0)
+		error = in_pcbbind(inp, (struct sockaddr *)0, p);
+	if (error == 0)
+		tp->t_state = TCPS_LISTEN;
+	COMMON_END(PRU_LISTEN);
+}
+
+#ifdef INET6
+static int
+tcp6_usr_listen(struct socket *so, struct proc *p)
+{
+	int s = splnet();
+	int error = 0;
+	struct inpcb *inp = sotoinpcb(so);
+	struct tcpcb *tp;
+
+	COMMON_START();
+	if (inp->inp_lport == 0) {
+		inp->inp_vflag &= ~INP_IPV4;
+		if ((inp->inp_flags & IN6P_BINDV6ONLY) == 0)
+			inp->inp_vflag |= INP_IPV4;
+		error = in6_pcbbind(inp, (struct sockaddr *)0, p);
+	}
+	if (error == 0)
+		tp->t_state = TCPS_LISTEN;
+	COMMON_END(PRU_LISTEN);
+}
+#endif /* INET6 */
+
+/*
+ * Initiate connection to peer.
+ * Create a template for use in transmissions on this connection.
+ * Enter SYN_SENT state, and mark socket as connecting.
+ * Start keep-alive timer, and seed output sequence space.
+ * Send initial segment on connection.
+ */
+static int
+tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
+{
+	int s = splnet();
+	int error = 0;
+	struct inpcb *inp = sotoinpcb(so);
+	struct tcpcb *tp;
+	struct sockaddr_in *sinp;
+
+	COMMON_START();
+
+	/*
+	 * Must disallow TCP ``connections'' to multicast addresses.
+	 */
+	sinp = (struct sockaddr_in *)nam;
+	if (sinp->sin_family == AF_INET
+	    && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
+		error = EAFNOSUPPORT;
+		goto out;
+	}
+
+	if (p && jailed(p->p_ucred))
+		prison_remote_ip(p->p_ucred, 0, &sinp->sin_addr.s_addr);
+
+	if ((error = tcp_connect(tp, nam, p)) != 0)
+		goto out;
+	error = tcp_output(tp);
+	COMMON_END(PRU_CONNECT);
+}
+
+#ifdef INET6
+static int
+tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
+{
+	int s = splnet();
+	int error = 0;
+	struct inpcb *inp = sotoinpcb(so);
+	struct tcpcb *tp;
+	struct sockaddr_in6 *sin6p;
+
+	COMMON_START();
+
+	/*
+	 * Must disallow TCP ``connections'' to multicast addresses.
+	 */
+	sin6p = (struct sockaddr_in6 *)nam;
+	if (sin6p->sin6_family == AF_INET6
+	    && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) {
+		error = EAFNOSUPPORT;
+		goto out;
+	}
+
+	if ((inp->inp_flags & IN6P_BINDV6ONLY) == 0 &&
+	    IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
+		struct sockaddr_in sin;
+
+		in6_sin6_2_sin(&sin, sin6p);
+		inp->inp_vflag |= INP_IPV4;
+		inp->inp_vflag &= ~INP_IPV6;
+		if ((error = tcp_connect(tp, (struct sockaddr *)&sin, p)) != 0)
+			goto out;
+		error = tcp_output(tp);
+		goto out;
+	}
+	inp->inp_vflag &= ~INP_IPV4;
+	inp->inp_vflag |= INP_IPV6;
+	if ((error = tcp6_connect(tp, nam, p)) != 0)
+		goto out;
+	error = tcp_output(tp);
+	COMMON_END(PRU_CONNECT);
+}
+#endif /* INET6 */
+
+/*
+ * Initiate disconnect from peer.
+ * If connection never passed embryonic stage, just drop;
+ * else if don't need to let data drain, then can just drop anyways,
+ * else have to begin TCP shutdown process: mark socket disconnecting,
+ * drain unread data, state switch to reflect user close, and
+ * send segment (e.g. FIN) to peer.  Socket will be really disconnected
+ * when peer sends FIN and acks ours.
+ *
+ * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
+ */
+static int
+tcp_usr_disconnect(struct socket *so)
+{
+	int s = splnet();
+	int error = 0;
+	struct inpcb *inp = sotoinpcb(so);
+	struct tcpcb *tp;
+
+	COMMON_START();
+	tp = tcp_disconnect(tp);
+	COMMON_END(PRU_DISCONNECT);
+}
+
+/*
+ * Accept a connection.  Essentially all the work is
+ * done at higher levels; just return the address
+ * of the peer, storing through addr.
+ */
+static int
+tcp_usr_accept(struct socket *so, struct sockaddr **nam)
+{
+	int s = splnet();
+	int error = 0;
+	struct inpcb *inp = sotoinpcb(so);
+	struct tcpcb *tp = NULL;
+	TCPDEBUG0;
+
+	if (so->so_state & SS_ISDISCONNECTED) {
+		error = ECONNABORTED;
+		goto out;
+	}
+	if (inp == 0) {
+		splx(s);
+		return (EINVAL);
+	}
+	tp = intotcpcb(inp);
+	TCPDEBUG1();
+	in_setpeeraddr(so, nam);
+	COMMON_END(PRU_ACCEPT);
+}
+
+#ifdef INET6
+static int
+tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
+{
+	int s = splnet();
+	int error = 0;
+	struct inpcb *inp = sotoinpcb(so);
+	struct tcpcb *tp = NULL;
+	TCPDEBUG0;
+
+	if (so->so_state & SS_ISDISCONNECTED) {
+		error = ECONNABORTED;
+		goto out;
+	}
+	if (inp == 0) {
+		splx(s);
+		return (EINVAL);
+	}
+	tp = intotcpcb(inp);
+	TCPDEBUG1();
+	in6_mapped_peeraddr(so, nam);
+	COMMON_END(PRU_ACCEPT);
+}
+#endif /* INET6 */
+/*
+ * Mark the connection as being incapable of further output.
+ */
+static int
+tcp_usr_shutdown(struct socket *so)
+{
+	int s = splnet();
+	int error = 0;
+	struct inpcb *inp = sotoinpcb(so);
+	struct tcpcb *tp;
+
+	COMMON_START();
+	socantsendmore(so);
+	tp = tcp_usrclosed(tp);
+	if (tp)
+		error = tcp_output(tp);
+	COMMON_END(PRU_SHUTDOWN);
+}
+
+/*
+ * After a receive, possibly send window update to peer.
+ */
+static int
+tcp_usr_rcvd(struct socket *so, int flags)
+{
+	int s = splnet();
+	int error = 0;
+	struct inpcb *inp = sotoinpcb(so);
+	struct tcpcb *tp;
+
+	COMMON_START();
+	tcp_output(tp);
+	COMMON_END(PRU_RCVD);
+}
+
+/*
+ * Do a send by putting data in output queue and updating urgent
+ * marker if URG set.  Possibly send more data.  Unlike the other
+ * pru_*() routines, the mbuf chains are our responsibility.  We
+ * must either enqueue them or free them.  The other pru_* routines
+ * generally are caller-frees.
+ */
+static int
+tcp_usr_send(struct socket *so, int flags, struct mbuf *m, 
+	     struct sockaddr *nam, struct mbuf *control, struct proc *p)
+{
+	int s = splnet();
+	int error = 0;
+	struct inpcb *inp = sotoinpcb(so);
+	struct tcpcb *tp;
+#ifdef INET6
+	int isipv6;
+#endif
+	TCPDEBUG0;
+
+	if (inp == NULL) {
+		/*
+		 * OOPS! we lost a race, the TCP session got reset after
+		 * we checked SS_CANTSENDMORE, eg: while doing uiomove or a
+		 * network interrupt in the non-splnet() section of sosend().
+		 */
+		if (m)
+			m_freem(m);
+		if (control)
+			m_freem(control);
+		error = ECONNRESET;	/* XXX EPIPE? */
+		tp = NULL;
+		TCPDEBUG1();
+		goto out;
+	}
+#ifdef INET6
+	isipv6 = nam && nam->sa_family == AF_INET6;
+#endif /* INET6 */
+	tp = intotcpcb(inp);
+	TCPDEBUG1();
+	if (control) {
+		/* TCP doesn't do control messages (rights, creds, etc) */
+		if (control->m_len) {
+			m_freem(control);
+			if (m)
+				m_freem(m);
+			error = EINVAL;
+			goto out;
+		}
+		m_freem(control);	/* empty control, just free it */
+	}
+	if(!(flags & PRUS_OOB)) {
+		sbappend(&so->so_snd, m);
+		if (nam && tp->t_state < TCPS_SYN_SENT) {
+			/*
+			 * Do implied connect if not yet connected,
+			 * initialize window to default value, and
+			 * initialize maxseg/maxopd using peer's cached
+			 * MSS.
+			 */
+#ifdef INET6
+			if (isipv6)
+				error = tcp6_connect(tp, nam, p);
+			else
+#endif /* INET6 */
+			error = tcp_connect(tp, nam, p);
+			if (error)
+				goto out;
+			tp->snd_wnd = TTCP_CLIENT_SND_WND;
+			tcp_mss(tp, -1);
+		}
+
+		if (flags & PRUS_EOF) {
+			/*
+			 * Close the send side of the connection after
+			 * the data is sent.
+			 */
+			socantsendmore(so);
+			tp = tcp_usrclosed(tp);
+		}
+		if (tp != NULL) {
+			if (flags & PRUS_MORETOCOME)
+				tp->t_flags |= TF_MORETOCOME;
+			error = tcp_output(tp);
+			if (flags & PRUS_MORETOCOME)
+				tp->t_flags &= ~TF_MORETOCOME;
+		}
+	} else {
+		if (sbspace(&so->so_snd) < -512) {
+			m_freem(m);
+			error = ENOBUFS;
+			goto out;
+		}
+		/*
+		 * According to RFC961 (Assigned Protocols),
+		 * the urgent pointer points to the last octet
+		 * of urgent data.  We continue, however,
+		 * to consider it to indicate the first octet
+		 * of data past the urgent section.
+		 * Otherwise, snd_up should be one lower.
+		 */
+		sbappend(&so->so_snd, m);
+		if (nam && tp->t_state < TCPS_SYN_SENT) {
+			/*
+			 * Do implied connect if not yet connected,
+			 * initialize window to default value, and
+			 * initialize maxseg/maxopd using peer's cached
+			 * MSS.
+			 */
+#ifdef INET6
+			if (isipv6)
+				error = tcp6_connect(tp, nam, p);
+			else
+#endif /* INET6 */
+			error = tcp_connect(tp, nam, p);
+			if (error)
+				goto out;
+			tp->snd_wnd = TTCP_CLIENT_SND_WND;
+			tcp_mss(tp, -1);
+		}
+		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
+		tp->t_force = 1;
+		error = tcp_output(tp);
+		tp->t_force = 0;
+	}
+	COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB : 
+		   ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
+}
+
+/*
+ * Abort the TCP.
+ */
+static int
+tcp_usr_abort(struct socket *so)
+{
+	int s = splnet();
+	int error = 0;
+	struct inpcb *inp = sotoinpcb(so);
+	struct tcpcb *tp;
+
+	COMMON_START();
+	tp = tcp_drop(tp, ECONNABORTED);
+	COMMON_END(PRU_ABORT);
+}
+
+/*
+ * Receive out-of-band data.
+ */
+static int
+tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
+{
+	int s = splnet();
+	int error = 0;
+	struct inpcb *inp = sotoinpcb(so);
+	struct tcpcb *tp;
+
+	COMMON_START();
+	if ((so->so_oobmark == 0 &&
+	     (so->so_state & SS_RCVATMARK) == 0) ||
+	    so->so_options & SO_OOBINLINE ||
+	    tp->t_oobflags & TCPOOB_HADDATA) {
+		error = EINVAL;
+		goto out;
+	}
+	if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
+		error = EWOULDBLOCK;
+		goto out;
+	}
+	m->m_len = 1;
+	*mtod(m, caddr_t) = tp->t_iobc;
+	if ((flags & MSG_PEEK) == 0)
+		tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
+	COMMON_END(PRU_RCVOOB);
+}
+
+/* xxx - should be const */
+struct pr_usrreqs tcp_usrreqs = {
+	tcp_usr_abort, tcp_usr_accept, tcp_usr_attach, tcp_usr_bind,
+	tcp_usr_connect, pru_connect2_notsupp, in_control, tcp_usr_detach,
+	tcp_usr_disconnect, tcp_usr_listen, in_setpeeraddr, tcp_usr_rcvd,
+	tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown,
+	in_setsockaddr, sosend, soreceive, sopoll
+};
+
+#ifdef INET6
+struct pr_usrreqs tcp6_usrreqs = {
+	tcp_usr_abort, tcp6_usr_accept, tcp_usr_attach, tcp6_usr_bind,
+	tcp6_usr_connect, pru_connect2_notsupp, in6_control, tcp_usr_detach,
+	tcp_usr_disconnect, tcp6_usr_listen, in6_mapped_peeraddr, tcp_usr_rcvd,
+	tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown,
+	in6_mapped_sockaddr, sosend, soreceive, sopoll
+};
+#endif /* INET6 */
+
+/*
+ * Common subroutine to open a TCP connection to remote host specified
+ * by struct sockaddr_in in mbuf *nam.  Call in_pcbbind to assign a local
+ * port number if needed.  Call in_pcbladdr to do the routing and to choose
+ * a local host address (interface).  If there is an existing incarnation
+ * of the same connection in TIME-WAIT state and if the remote host was
+ * sending CC options and if the connection duration was < MSL, then
+ * truncate the previous TIME-WAIT state and proceed.
+ * Initialize connection parameters and enter SYN-SENT state.
+ */
+static int
+tcp_connect(tp, nam, p)
+	register struct tcpcb *tp;
+	struct sockaddr *nam;
+	struct proc *p;
+{
+	struct inpcb *inp = tp->t_inpcb, *oinp;
+	struct socket *so = inp->inp_socket;
+	struct tcpcb *otp;
+	struct sockaddr_in *sin = (struct sockaddr_in *)nam;
+	struct sockaddr_in *ifaddr;
+	struct rmxp_tao *taop;
+	struct rmxp_tao tao_noncached;
+	int error;
+
+	if (inp->inp_lport == 0) {
+		error = in_pcbbind(inp, (struct sockaddr *)0, p);
+		if (error)
+			return error;
+	}
+
+	/*
+	 * Cannot simply call in_pcbconnect, because there might be an
+	 * earlier incarnation of this same connection still in
+	 * TIME_WAIT state, creating an ADDRINUSE error.
+	 */
+	error = in_pcbladdr(inp, nam, &ifaddr);
+	if (error)
+		return error;
+	oinp = in_pcblookup_hash(inp->inp_pcbinfo,
+	    sin->sin_addr, sin->sin_port,
+	    inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr
+						: ifaddr->sin_addr,
+	    inp->inp_lport,  0, NULL);
+	if (oinp) {
+		if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
+		otp->t_state == TCPS_TIME_WAIT &&
+		    (ticks - otp->t_starttime) < tcp_msl &&
+		    (otp->t_flags & TF_RCVD_CC))
+			otp = tcp_close(otp);
+		else
+			return EADDRINUSE;
+	}
+	if (inp->inp_laddr.s_addr == INADDR_ANY)
+		inp->inp_laddr = ifaddr->sin_addr;
+	inp->inp_faddr = sin->sin_addr;
+	inp->inp_fport = sin->sin_port;
+	in_pcbrehash(inp);
+
+	tp->t_template = tcp_template(tp);
+	if (tp->t_template == 0) {
+		in_pcbdisconnect(inp);
+		return ENOBUFS;
+	}
+
+	/* Compute window scaling to request.  */
+	while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
+	    (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
+		tp->request_r_scale++;
+
+	soisconnecting(so);
+	tcpstat.tcps_connattempt++;
+	tp->t_state = TCPS_SYN_SENT;
+	callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
+	tp->iss = tcp_rndiss_next();
+	tcp_sendseqinit(tp);
+
+	/*
+	 * Generate a CC value for this connection and
+	 * check whether CC or CCnew should be used.
+	 */
+	if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) {
+		taop = &tao_noncached;
+		bzero(taop, sizeof(*taop));
+	}
+
+	tp->cc_send = CC_INC(tcp_ccgen);
+	if (taop->tao_ccsent != 0 &&
+	    CC_GEQ(tp->cc_send, taop->tao_ccsent)) {
+		taop->tao_ccsent = tp->cc_send;
+	} else {
+		taop->tao_ccsent = 0;
+		tp->t_flags |= TF_SENDCCNEW;
+	}
+
+	return 0;
+}
+
+#ifdef INET6
+static int
+tcp6_connect(tp, nam, p)
+	register struct tcpcb *tp;
+	struct sockaddr *nam;
+	struct proc *p;
+{
+	struct inpcb *inp = tp->t_inpcb, *oinp;
+	struct socket *so = inp->inp_socket;
+	struct tcpcb *otp;
+	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
+	struct in6_addr *addr6;
+	struct rmxp_tao *taop;
+	struct rmxp_tao tao_noncached;
+	int error;
+
+	if (inp->inp_lport == 0) {
+		error = in6_pcbbind(inp, (struct sockaddr *)0, p);
+		if (error)
+			return error;
+	}
+
+	/*
+	 * Cannot simply call in_pcbconnect, because there might be an
+	 * earlier incarnation of this same connection still in
+	 * TIME_WAIT state, creating an ADDRINUSE error.
+	 */
+	error = in6_pcbladdr(inp, nam, &addr6);
+	if (error)
+		return error;
+	oinp = in6_pcblookup_hash(inp->inp_pcbinfo,
+				  &sin6->sin6_addr, sin6->sin6_port,
+				  IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
+				  ? addr6
+				  : &inp->in6p_laddr,
+				  inp->inp_lport,  0, NULL);
+	if (oinp) {
+		if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
+		    otp->t_state == TCPS_TIME_WAIT &&
+		    (ticks - otp->t_starttime) < tcp_msl &&
+		    (otp->t_flags & TF_RCVD_CC))
+			otp = tcp_close(otp);
+		else
+			return EADDRINUSE;
+	}
+	if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
+		inp->in6p_laddr = *addr6;
+	inp->in6p_faddr = sin6->sin6_addr;
+	inp->inp_fport = sin6->sin6_port;
+	if ((sin6->sin6_flowinfo & IPV6_FLOWINFO_MASK) != NULL)
+		inp->in6p_flowinfo = sin6->sin6_flowinfo;
+	in_pcbrehash(inp);
+
+	tp->t_template = tcp_template(tp);
+	if (tp->t_template == 0) {
+		in6_pcbdisconnect(inp);
+		return ENOBUFS;
+	}
+
+	/* Compute window scaling to request.  */
+	while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
+	    (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
+		tp->request_r_scale++;
+
+	soisconnecting(so);
+	tcpstat.tcps_connattempt++;
+	tp->t_state = TCPS_SYN_SENT;
+	callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
+	tp->iss = tcp_rndiss_next();
+	tcp_sendseqinit(tp);
+
+	/*
+	 * Generate a CC value for this connection and
+	 * check whether CC or CCnew should be used.
+	 */
+	if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) {
+		taop = &tao_noncached;
+		bzero(taop, sizeof(*taop));
+	}
+
+	tp->cc_send = CC_INC(tcp_ccgen);
+	if (taop->tao_ccsent != 0 &&
+	    CC_GEQ(tp->cc_send, taop->tao_ccsent)) {
+		taop->tao_ccsent = tp->cc_send;
+	} else {
+		taop->tao_ccsent = 0;
+		tp->t_flags |= TF_SENDCCNEW;
+	}
+
+	return 0;
+}
+#endif /* INET6 */
+
+/*
+ * The new sockopt interface makes it possible for us to block in the
+ * copyin/out step (if we take a page fault).  Taking a page fault at
+ * splnet() is probably a Bad Thing.  (Since sockets and pcbs both now
+ * use TSM, there probably isn't any need for this function to run at
+ * splnet() any more.  This needs more examination.)
+ */
+int
+tcp_ctloutput(so, sopt)
+	struct socket *so;
+	struct sockopt *sopt;
+{
+	int	error, opt, optval, s;
+	struct	inpcb *inp;
+	struct	tcpcb *tp;
+
+	error = 0;
+	s = splnet();		/* XXX */
+	inp = sotoinpcb(so);
+	if (inp == NULL) {
+		splx(s);
+		return (ECONNRESET);
+	}
+	if (sopt->sopt_level != IPPROTO_TCP) {
+#ifdef INET6
+		if (INP_CHECK_SOCKAF(so, AF_INET6))
+			error = ip6_ctloutput(so, sopt);
+		else
+#endif /* INET6 */
+		error = ip_ctloutput(so, sopt);
+		splx(s);
+		return (error);
+	}
+	tp = intotcpcb(inp);
+
+	switch (sopt->sopt_dir) {
+	case SOPT_SET:
+		switch (sopt->sopt_name) {
+		case TCP_NODELAY:
+		case TCP_NOOPT:
+			error = sooptcopyin(sopt, &optval, sizeof optval,
+					    sizeof optval);
+			if (error)
+				break;
+
+			switch (sopt->sopt_name) {
+			case TCP_NODELAY:
+				opt = TF_NODELAY;
+				break;
+			case TCP_NOOPT:
+				opt = TF_NOOPT;
+				break;
+			default:
+				opt = 0; /* dead code to fool gcc */
+				break;
+			}
+
+			if (optval)
+				tp->t_flags |= opt;
+			else
+				tp->t_flags &= ~opt;
+			break;
+
+		case TCP_NOPUSH:
+			error = sooptcopyin(sopt, &optval, sizeof optval,
+					    sizeof optval);
+			if (error)
+				break;
+
+			if (optval)
+				tp->t_flags |= TF_NOPUSH;
+			else {
+				tp->t_flags &= ~TF_NOPUSH;
+				error = tcp_output(tp);
+			}
+			break;
+
+		case TCP_MAXSEG:
+			error = sooptcopyin(sopt, &optval, sizeof optval,
+					    sizeof optval);
+			if (error)
+				break;
+
+			if (optval > 0 && optval <= tp->t_maxseg)
+				tp->t_maxseg = optval;
+			else
+				error = EINVAL;
+			break;
+
+		default:
+			error = ENOPROTOOPT;
+			break;
+		}
+		break;
+
+	case SOPT_GET:
+		switch (sopt->sopt_name) {
+		case TCP_NODELAY:
+			optval = tp->t_flags & TF_NODELAY;
+			break;
+		case TCP_MAXSEG:
+			optval = tp->t_maxseg;
+			break;
+		case TCP_NOOPT:
+			optval = tp->t_flags & TF_NOOPT;
+			break;
+		case TCP_NOPUSH:
+			optval = tp->t_flags & TF_NOPUSH;
+			break;
+		default:
+			error = ENOPROTOOPT;
+			break;
+		}
+		if (error == 0)
+			error = sooptcopyout(sopt, &optval, sizeof optval);
+		break;
+	}
+	splx(s);
+	return (error);
+}
+
+/*
+ * tcp_sendspace and tcp_recvspace are the default send and receive window
+ * sizes, respectively.  These are obsolescent (this information should
+ * be set by the route).
+ */
+u_long	tcp_sendspace = 1024*16;
+SYSCTL_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW, 
+    &tcp_sendspace , 0, "Maximum outgoing TCP datagram size");
+u_long	tcp_recvspace = 1024*16;
+SYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW, 
+    &tcp_recvspace , 0, "Maximum incoming TCP datagram size");
+
+/*
+ * Attach TCP protocol to socket, allocating
+ * internet protocol control block, tcp control block,
+ * bufer space, and entering LISTEN state if to accept connections.
+ */
+static int
+tcp_attach(so, p)
+	struct socket *so;
+	struct proc *p;
+{
+	register struct tcpcb *tp;
+	struct inpcb *inp;
+	int error;
+#ifdef INET6
+	int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != NULL;
+#endif
+
+	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
+		error = soreserve(so, tcp_sendspace, tcp_recvspace);
+		if (error)
+			return (error);
+	}
+	error = in_pcballoc(so, &tcbinfo, p);
+	if (error)
+		return (error);
+	inp = sotoinpcb(so);
+#ifdef IPSEC
+	error = ipsec_init_policy(so, &inp->inp_sp);
+	if (error) {
+#ifdef INET6
+		if (isipv6)
+			in6_pcbdetach(inp);
+		else
+#endif
+		in_pcbdetach(inp);
+		return (error);
+	}
+#endif /*IPSEC*/
+#ifdef INET6
+	if (isipv6) {
+		inp->inp_vflag |= INP_IPV6;
+		inp->in6p_hops = -1;	/* use kernel default */
+	}
+	else
+#endif
+	inp->inp_vflag |= INP_IPV4;
+	tp = tcp_newtcpcb(inp);
+	if (tp == 0) {
+		int nofd = so->so_state & SS_NOFDREF;	/* XXX */
+
+		so->so_state &= ~SS_NOFDREF;	/* don't free the socket yet */
+#ifdef INET6
+		if (isipv6)
+			in6_pcbdetach(inp);
+		else
+#endif
+		in_pcbdetach(inp);
+		so->so_state |= nofd;
+		return (ENOBUFS);
+	}
+	tp->t_state = TCPS_CLOSED;
+	return (0);
+}
+
+/*
+ * Initiate (or continue) disconnect.
+ * If embryonic state, just send reset (once).
+ * If in ``let data drain'' option and linger null, just drop.
+ * Otherwise (hard), mark socket disconnecting and drop
+ * current input data; switch states based on user close, and
+ * send segment to peer (with FIN).
+ */
+static struct tcpcb *
+tcp_disconnect(tp)
+	register struct tcpcb *tp;
+{
+	struct socket *so = tp->t_inpcb->inp_socket;
+
+	if (tp->t_state < TCPS_ESTABLISHED)
+		tp = tcp_close(tp);
+	else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
+		tp = tcp_drop(tp, 0);
+	else {
+		soisdisconnecting(so);
+		sbflush(&so->so_rcv);
+		tp = tcp_usrclosed(tp);
+		if (tp)
+			(void) tcp_output(tp);
+	}
+	return (tp);
+}
+
+/*
+ * User issued close, and wish to trail through shutdown states:
+ * if never received SYN, just forget it.  If got a SYN from peer,
+ * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
+ * If already got a FIN from peer, then almost done; go to LAST_ACK
+ * state.  In all other cases, have already sent FIN to peer (e.g.
+ * after PRU_SHUTDOWN), and just have to play tedious game waiting
+ * for peer to send FIN or not respond to keep-alives, etc.
+ * We can let the user exit from the close as soon as the FIN is acked.
+ */
+static struct tcpcb *
+tcp_usrclosed(tp)
+	register struct tcpcb *tp;
+{
+
+	switch (tp->t_state) {
+
+	case TCPS_CLOSED:
+	case TCPS_LISTEN:
+		tp->t_state = TCPS_CLOSED;
+		tp = tcp_close(tp);
+		break;
+
+	case TCPS_SYN_SENT:
+	case TCPS_SYN_RECEIVED:
+		tp->t_flags |= TF_NEEDFIN;
+		break;
+
+	case TCPS_ESTABLISHED:
+		tp->t_state = TCPS_FIN_WAIT_1;
+		break;
+
+	case TCPS_CLOSE_WAIT:
+		tp->t_state = TCPS_LAST_ACK;
+		break;
+	}
+	if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
+		soisdisconnected(tp->t_inpcb->inp_socket);
+		/* To prevent the connection hanging in FIN_WAIT_2 forever. */
+		if (tp->t_state == TCPS_FIN_WAIT_2)
+			callout_reset(tp->tt_2msl, tcp_maxidle,
+				      tcp_timer_2msl, tp);
+	}
+	return (tp);
+}
+
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
new file mode 100644
index 0000000..ecac451
--- /dev/null
+++ b/sys/netinet/tcp_var.h
@@ -0,0 +1,419 @@
+/*
+ * Copyright (c) 1982, 1986, 1993, 1994, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcp_var.h	8.4 (Berkeley) 5/24/95
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_TCP_VAR_H_
+#define _NETINET_TCP_VAR_H_
+/*
+ * Kernel variables for tcp.
+ */
+
+/* TCP segment queue entry */
+struct tseg_qent {
+	LIST_ENTRY(tseg_qent) tqe_q;
+	int	tqe_len;		/* TCP segment data length */
+	struct	tcphdr *tqe_th;		/* a pointer to tcp header */
+	struct	mbuf	*tqe_m;		/* mbuf contains packet */
+};
+LIST_HEAD(tsegqe_head, tseg_qent);
+#ifdef MALLOC_DECLARE
+MALLOC_DECLARE(M_TSEGQ);
+#endif
+
+struct tcptemp {
+	u_char	tt_ipgen[40]; /* the size must be of max ip header, now IPv6 */
+	struct	tcphdr tt_t;
+};
+
+#define tcp6cb		tcpcb  /* for KAME src sync over BSD*'s */
+
+/*
+ * Tcp control block, one per tcp; fields:
+ * Organized for 16 byte cacheline efficiency.
+ */
+struct tcpcb {
+	struct	tsegqe_head t_segq;
+	int	t_dupacks;		/* consecutive dup acks recd */
+	struct	tcptemp	*t_template;	/* skeletal packet for transmit */
+
+	struct	callout *tt_rexmt;	/* retransmit timer */
+	struct	callout *tt_persist;	/* retransmit persistence */
+	struct	callout *tt_keep;	/* keepalive */
+	struct	callout *tt_2msl;	/* 2*msl TIME_WAIT timer */
+	struct	callout *tt_delack;	/* delayed ACK timer */
+
+	struct	inpcb *t_inpcb;		/* back pointer to internet pcb */
+	int	t_state;		/* state of this connection */
+	u_int	t_flags;
+#define	TF_ACKNOW	0x00001		/* ack peer immediately */
+#define	TF_DELACK	0x00002		/* ack, but try to delay it */
+#define	TF_NODELAY	0x00004		/* don't delay packets to coalesce */
+#define	TF_NOOPT	0x00008		/* don't use tcp options */
+#define	TF_SENTFIN	0x00010		/* have sent FIN */
+#define	TF_REQ_SCALE	0x00020		/* have/will request window scaling */
+#define	TF_RCVD_SCALE	0x00040		/* other side has requested scaling */
+#define	TF_REQ_TSTMP	0x00080		/* have/will request timestamps */
+#define	TF_RCVD_TSTMP	0x00100		/* a timestamp was received in SYN */
+#define	TF_SACK_PERMIT	0x00200		/* other side said I could SACK */
+#define	TF_NEEDSYN	0x00400		/* send SYN (implicit state) */
+#define	TF_NEEDFIN	0x00800		/* send FIN (implicit state) */
+#define	TF_NOPUSH	0x01000		/* don't push */
+#define	TF_REQ_CC	0x02000		/* have/will request CC */
+#define	TF_RCVD_CC	0x04000		/* a CC was received in SYN */
+#define	TF_SENDCCNEW	0x08000		/* send CCnew instead of CC in SYN */
+#define	TF_MORETOCOME	0x10000		/* More data to be appended to sock */
+#define	TF_LQ_OVERFLOW	0x20000		/* listen queue overflow */
+	int	t_force;		/* 1 if forcing out a byte */
+
+	tcp_seq	snd_una;		/* send unacknowledged */
+	tcp_seq	snd_max;		/* highest sequence number sent;
+					 * used to recognize retransmits
+					 */
+	tcp_seq	snd_nxt;		/* send next */
+	tcp_seq	snd_up;			/* send urgent pointer */
+
+	tcp_seq	snd_wl1;		/* window update seg seq number */
+	tcp_seq	snd_wl2;		/* window update seg ack number */
+	tcp_seq	iss;			/* initial send sequence number */
+	tcp_seq	irs;			/* initial receive sequence number */
+
+	tcp_seq	rcv_nxt;		/* receive next */
+	tcp_seq	rcv_adv;		/* advertised window */
+	u_long	rcv_wnd;		/* receive window */
+	tcp_seq	rcv_up;			/* receive urgent pointer */
+
+	u_long	snd_wnd;		/* send window */
+	u_long	snd_cwnd;		/* congestion-controlled window */
+	u_long	snd_ssthresh;		/* snd_cwnd size threshold for
+					 * for slow start exponential to
+					 * linear switch
+					 */
+	tcp_seq	snd_recover;		/* for use in fast recovery */
+
+	u_int	t_maxopd;		/* mss plus options */
+
+	u_long	t_rcvtime;		/* inactivity time */
+	u_long	t_starttime;		/* time connection was established */
+	int	t_rtttime;		/* round trip time */
+	tcp_seq	t_rtseq;		/* sequence number being timed */
+
+	int	t_rxtcur;		/* current retransmit value (ticks) */
+	u_int	t_maxseg;		/* maximum segment size */
+	int	t_srtt;			/* smoothed round-trip time */
+	int	t_rttvar;		/* variance in round-trip time */
+
+	int	t_rxtshift;		/* log(2) of rexmt exp. backoff */
+	u_int	t_rttmin;		/* minimum rtt allowed */
+	u_long	t_rttupdated;		/* number of times rtt sampled */
+	u_long	max_sndwnd;		/* largest window peer has offered */
+
+	int	t_softerror;		/* possible error not yet reported */
+/* out-of-band data */
+	char	t_oobflags;		/* have some */
+	char	t_iobc;			/* input character */
+#define	TCPOOB_HAVEDATA	0x01
+#define	TCPOOB_HADDATA	0x02
+/* RFC 1323 variables */
+	u_char	snd_scale;		/* window scaling for send window */
+	u_char	rcv_scale;		/* window scaling for recv window */
+	u_char	request_r_scale;	/* pending window scaling */
+	u_char	requested_s_scale;
+	u_long	ts_recent;		/* timestamp echo data */
+
+	u_long	ts_recent_age;		/* when last updated */
+	tcp_seq	last_ack_sent;
+/* RFC 1644 variables */
+	tcp_cc	cc_send;		/* send connection count */
+	tcp_cc	cc_recv;		/* receive connection count */
+/* experimental */
+	u_long	snd_cwnd_prev;		/* cwnd prior to retransmit */
+	u_long	snd_ssthresh_prev;	/* ssthresh prior to retransmit */
+	u_long	t_badrxtwin;		/* window for retransmit recovery */
+};
+
+/*
+ * Structure to hold TCP options that are only used during segment
+ * processing (in tcp_input), but not held in the tcpcb.
+ * It's basically used to reduce the number of parameters
+ * to tcp_dooptions.
+ */
+struct tcpopt {
+	u_long	to_flag;		/* which options are present */
+#define TOF_TS		0x0001		/* timestamp */
+#define TOF_CC		0x0002		/* CC and CCnew are exclusive */
+#define TOF_CCNEW	0x0004
+#define	TOF_CCECHO	0x0008
+	u_long	to_tsval;
+	u_long	to_tsecr;
+	tcp_cc	to_cc;		/* holds CC or CCnew */
+	tcp_cc	to_ccecho;
+};
+
+/*
+ * The TAO cache entry which is stored in the protocol family specific
+ * portion of the route metrics.
+ */
+struct rmxp_tao {
+	tcp_cc	tao_cc;			/* latest CC in valid SYN */
+	tcp_cc	tao_ccsent;		/* latest CC sent to peer */
+	u_short	tao_mssopt;		/* peer's cached MSS */
+#ifdef notyet
+	u_short	tao_flags;		/* cache status flags */
+#define	TAOF_DONT	0x0001		/* peer doesn't understand rfc1644 */
+#define	TAOF_OK		0x0002		/* peer does understand rfc1644 */
+#define	TAOF_UNDEF	0		/* we don't know yet */
+#endif /* notyet */
+};
+#define rmx_taop(r)	((struct rmxp_tao *)(r).rmx_filler)
+
+#define	intotcpcb(ip)	((struct tcpcb *)(ip)->inp_ppcb)
+#define	sototcpcb(so)	(intotcpcb(sotoinpcb(so)))
+
+/*
+ * The smoothed round-trip time and estimated variance
+ * are stored as fixed point numbers scaled by the values below.
+ * For convenience, these scales are also used in smoothing the average
+ * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed).
+ * With these scales, srtt has 3 bits to the right of the binary point,
+ * and thus an "ALPHA" of 0.875.  rttvar has 2 bits to the right of the
+ * binary point, and is smoothed with an ALPHA of 0.75.
+ */
+#define	TCP_RTT_SCALE		32	/* multiplier for srtt; 3 bits frac. */
+#define	TCP_RTT_SHIFT		5	/* shift for srtt; 3 bits frac. */
+#define	TCP_RTTVAR_SCALE	16	/* multiplier for rttvar; 2 bits */
+#define	TCP_RTTVAR_SHIFT	4	/* shift for rttvar; 2 bits */
+#define	TCP_DELTA_SHIFT		2	/* see tcp_input.c */
+
+/*
+ * The initial retransmission should happen at rtt + 4 * rttvar.
+ * Because of the way we do the smoothing, srtt and rttvar
+ * will each average +1/2 tick of bias.  When we compute
+ * the retransmit timer, we want 1/2 tick of rounding and
+ * 1 extra tick because of +-1/2 tick uncertainty in the
+ * firing of the timer.  The bias will give us exactly the
+ * 1.5 tick we need.  But, because the bias is
+ * statistical, we have to test that we don't drop below
+ * the minimum feasible timer (which is 2 ticks).
+ * This version of the macro adapted from a paper by Lawrence
+ * Brakmo and Larry Peterson which outlines a problem caused
+ * by insufficient precision in the original implementation,
+ * which results in inappropriately large RTO values for very
+ * fast networks.
+ */
+#define	TCP_REXMTVAL(tp) \
+	max((tp)->t_rttmin, (((tp)->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT))  \
+	  + (tp)->t_rttvar) >> TCP_DELTA_SHIFT)
+
+/*
+ * TCP statistics.
+ * Many of these should be kept per connection,
+ * but that's inconvenient at the moment.
+ */
+struct	tcpstat {
+	u_long	tcps_connattempt;	/* connections initiated */
+	u_long	tcps_accepts;		/* connections accepted */
+	u_long	tcps_connects;		/* connections established */
+	u_long	tcps_drops;		/* connections dropped */
+	u_long	tcps_conndrops;		/* embryonic connections dropped */
+	u_long	tcps_closed;		/* conn. closed (includes drops) */
+	u_long	tcps_segstimed;		/* segs where we tried to get rtt */
+	u_long	tcps_rttupdated;	/* times we succeeded */
+	u_long	tcps_delack;		/* delayed acks sent */
+	u_long	tcps_timeoutdrop;	/* conn. dropped in rxmt timeout */
+	u_long	tcps_rexmttimeo;	/* retransmit timeouts */
+	u_long	tcps_persisttimeo;	/* persist timeouts */
+	u_long	tcps_keeptimeo;		/* keepalive timeouts */
+	u_long	tcps_keepprobe;		/* keepalive probes sent */
+	u_long	tcps_keepdrops;		/* connections dropped in keepalive */
+
+	u_long	tcps_sndtotal;		/* total packets sent */
+	u_long	tcps_sndpack;		/* data packets sent */
+	u_long	tcps_sndbyte;		/* data bytes sent */
+	u_long	tcps_sndrexmitpack;	/* data packets retransmitted */
+	u_long	tcps_sndrexmitbyte;	/* data bytes retransmitted */
+	u_long	tcps_sndacks;		/* ack-only packets sent */
+	u_long	tcps_sndprobe;		/* window probes sent */
+	u_long	tcps_sndurg;		/* packets sent with URG only */
+	u_long	tcps_sndwinup;		/* window update-only packets sent */
+	u_long	tcps_sndctrl;		/* control (SYN|FIN|RST) packets sent */
+
+	u_long	tcps_rcvtotal;		/* total packets received */
+	u_long	tcps_rcvpack;		/* packets received in sequence */
+	u_long	tcps_rcvbyte;		/* bytes received in sequence */
+	u_long	tcps_rcvbadsum;		/* packets received with ccksum errs */
+	u_long	tcps_rcvbadoff;		/* packets received with bad offset */
+	u_long	tcps_rcvmemdrop;	/* packets dropped for lack of memory */
+	u_long	tcps_rcvshort;		/* packets received too short */
+	u_long	tcps_rcvduppack;	/* duplicate-only packets received */
+	u_long	tcps_rcvdupbyte;	/* duplicate-only bytes received */
+	u_long	tcps_rcvpartduppack;	/* packets with some duplicate data */
+	u_long	tcps_rcvpartdupbyte;	/* dup. bytes in part-dup. packets */
+	u_long	tcps_rcvoopack;		/* out-of-order packets received */
+	u_long	tcps_rcvoobyte;		/* out-of-order bytes received */
+	u_long	tcps_rcvpackafterwin;	/* packets with data after window */
+	u_long	tcps_rcvbyteafterwin;	/* bytes rcvd after window */
+	u_long	tcps_rcvafterclose;	/* packets rcvd after "close" */
+	u_long	tcps_rcvwinprobe;	/* rcvd window probe packets */
+	u_long	tcps_rcvdupack;		/* rcvd duplicate acks */
+	u_long	tcps_rcvacktoomuch;	/* rcvd acks for unsent data */
+	u_long	tcps_rcvackpack;	/* rcvd ack packets */
+	u_long	tcps_rcvackbyte;	/* bytes acked by rcvd acks */
+	u_long	tcps_rcvwinupd;		/* rcvd window update packets */
+	u_long	tcps_pawsdrop;		/* segments dropped due to PAWS */
+	u_long	tcps_predack;		/* times hdr predict ok for acks */
+	u_long	tcps_preddat;		/* times hdr predict ok for data pkts */
+	u_long	tcps_pcbcachemiss;
+	u_long	tcps_cachedrtt;		/* times cached RTT in route updated */
+	u_long	tcps_cachedrttvar;	/* times cached rttvar updated */
+	u_long	tcps_cachedssthresh;	/* times cached ssthresh updated */
+	u_long	tcps_usedrtt;		/* times RTT initialized from route */
+	u_long	tcps_usedrttvar;	/* times RTTVAR initialized from rt */
+	u_long	tcps_usedssthresh;	/* times ssthresh initialized from rt*/
+	u_long	tcps_persistdrop;	/* timeout in persist state */
+	u_long	tcps_badsyn;		/* bogus SYN, e.g. premature ACK */
+	u_long	tcps_mturesent;		/* resends due to MTU discovery */
+	u_long	tcps_listendrop;	/* listen queue overflows */
+};
+
+/*
+ * TCB structure exported to user-land via sysctl(3).
+ * Evil hack: declare only if in_pcb.h and sys/socketvar.h have been
+ * included.  Not all of our clients do.
+ */
+#if defined(_NETINET_IN_PCB_H_) && defined(_SYS_SOCKETVAR_H_)
+struct	xtcpcb {
+	size_t	xt_len;
+	struct	inpcb	xt_inp;
+	struct	tcpcb	xt_tp;
+	struct	xsocket	xt_socket;
+	u_quad_t	xt_alignment_hack;
+};
+#endif
+
+/*
+ * Names for TCP sysctl objects
+ */
+#define	TCPCTL_DO_RFC1323	1	/* use RFC-1323 extensions */
+#define	TCPCTL_DO_RFC1644	2	/* use RFC-1644 extensions */
+#define	TCPCTL_MSSDFLT		3	/* MSS default */
+#define TCPCTL_STATS		4	/* statistics (read-only) */
+#define	TCPCTL_RTTDFLT		5	/* default RTT estimate */
+#define	TCPCTL_KEEPIDLE		6	/* keepalive idle timer */
+#define	TCPCTL_KEEPINTVL	7	/* interval to send keepalives */
+#define	TCPCTL_SENDSPACE	8	/* send buffer space */
+#define	TCPCTL_RECVSPACE	9	/* receive buffer space */
+#define	TCPCTL_KEEPINIT		10	/* timeout for establishing syn */
+#define	TCPCTL_PCBLIST		11	/* list of all outstanding PCBs */
+#define	TCPCTL_DELACKTIME	12	/* time before sending delayed ACK */
+#define	TCPCTL_V6MSSDFLT	13	/* MSS default for IPv6 */
+#define	TCPCTL_MAXID		14
+
+#define TCPCTL_NAMES { \
+	{ 0, 0 }, \
+	{ "rfc1323", CTLTYPE_INT }, \
+	{ "rfc1644", CTLTYPE_INT }, \
+	{ "mssdflt", CTLTYPE_INT }, \
+	{ "stats", CTLTYPE_STRUCT }, \
+	{ "rttdflt", CTLTYPE_INT }, \
+	{ "keepidle", CTLTYPE_INT }, \
+	{ "keepintvl", CTLTYPE_INT }, \
+	{ "sendspace", CTLTYPE_INT }, \
+	{ "recvspace", CTLTYPE_INT }, \
+	{ "keepinit", CTLTYPE_INT }, \
+	{ "pcblist", CTLTYPE_STRUCT }, \
+	{ "delacktime", CTLTYPE_INT }, \
+	{ "v6mssdflt", CTLTYPE_INT }, \
+}
+
+
+#ifdef _KERNEL
+#ifdef SYSCTL_DECL
+SYSCTL_DECL(_net_inet_tcp);
+#endif
+
+extern	struct inpcbhead tcb;		/* head of queue of active tcpcb's */
+extern	struct inpcbinfo tcbinfo;
+extern	struct tcpstat tcpstat;	/* tcp statistics */
+extern	int tcp_mssdflt;	/* XXX */
+extern	int tcp_delack_enabled;
+extern	int tcp_do_newreno;
+extern	int ss_fltsz;
+extern	int ss_fltsz_local;
+
+void	 tcp_canceltimers __P((struct tcpcb *));
+struct tcpcb *
+	 tcp_close __P((struct tcpcb *));
+void	 tcp_ctlinput __P((int, struct sockaddr *, void *));
+int	 tcp_ctloutput __P((struct socket *, struct sockopt *));
+struct tcpcb *
+	 tcp_drop __P((struct tcpcb *, int));
+void	 tcp_drain __P((void));
+void	 tcp_fasttimo __P((void));
+struct rmxp_tao *
+	 tcp_gettaocache __P((struct inpcb *));
+void	 tcp_init __P((void));
+void	 tcp_input __P((struct mbuf *, int, int));
+void	 tcp_mss __P((struct tcpcb *, int));
+int	 tcp_mssopt __P((struct tcpcb *));
+void	 tcp_drop_syn_sent __P((struct inpcb *, int));
+void	 tcp_mtudisc __P((struct inpcb *, int));
+struct tcpcb *
+	 tcp_newtcpcb __P((struct inpcb *));
+int	 tcp_output __P((struct tcpcb *));
+void	 tcp_quench __P((struct inpcb *, int));
+void	 tcp_respond __P((struct tcpcb *, void *,
+	    struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int));
+struct rtentry *
+	 tcp_rtlookup __P((struct inpcb *));
+void	 tcp_setpersist __P((struct tcpcb *));
+void	 tcp_slowtimo __P((void));
+struct tcptemp *
+	 tcp_template __P((struct tcpcb *));
+struct tcpcb *
+	 tcp_timers __P((struct tcpcb *, int));
+void	 tcp_trace __P((int, int, struct tcpcb *, void *, struct tcphdr *,
+			int));
+
+extern	struct pr_usrreqs tcp_usrreqs;
+extern	u_long tcp_sendspace;
+extern	u_long tcp_recvspace;
+void	tcp_rndiss_init __P((void));
+tcp_seq	tcp_rndiss_next __P((void));
+u_int16_t
+	tcp_rndiss_encrypt __P((u_int16_t));
+
+#endif /* _KERNEL */
+
+#endif /* _NETINET_TCP_VAR_H_ */
diff --git a/sys/netinet/tcpip.h b/sys/netinet/tcpip.h
new file mode 100644
index 0000000..92189b9
--- /dev/null
+++ b/sys/netinet/tcpip.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)tcpip.h	8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_TCPIP_H_
+#define _NETINET_TCPIP_H_
+
+/*
+ * Tcp+ip header, after ip options removed.
+ */
+struct tcpiphdr {
+	struct 	ipovly ti_i;		/* overlaid ip structure */
+	struct	tcphdr ti_t;		/* tcp header */
+};
+#define	ti_x1		ti_i.ih_x1
+#define	ti_pr		ti_i.ih_pr
+#define	ti_len		ti_i.ih_len
+#define	ti_src		ti_i.ih_src
+#define	ti_dst		ti_i.ih_dst
+#define	ti_sport	ti_t.th_sport
+#define	ti_dport	ti_t.th_dport
+#define	ti_seq		ti_t.th_seq
+#define	ti_ack		ti_t.th_ack
+#define	ti_x2		ti_t.th_x2
+#define	ti_off		ti_t.th_off
+#define	ti_flags	ti_t.th_flags
+#define	ti_win		ti_t.th_win
+#define	ti_sum		ti_t.th_sum
+#define	ti_urp		ti_t.th_urp
+
+#endif
diff --git a/sys/netinet/udp.h b/sys/netinet/udp.h
new file mode 100644
index 0000000..635267f
--- /dev/null
+++ b/sys/netinet/udp.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)udp.h	8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_UDP_H_
+#define _NETINET_UDP_H_
+
+/*
+ * Udp protocol header.
+ * Per RFC 768, September, 1981.
+ */
+struct udphdr {
+	u_short	uh_sport;		/* source port */
+	u_short	uh_dport;		/* destination port */
+	u_short	uh_ulen;		/* udp length */
+	u_short	uh_sum;			/* udp checksum */
+};
+
+#endif
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
new file mode 100644
index 0000000..350a384
--- /dev/null
+++ b/sys/netinet/udp_usrreq.c
@@ -0,0 +1,933 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)udp_usrreq.c	8.6 (Berkeley) 5/23/95
+ * $FreeBSD$
+ */
+
+#include "opt_ipsec.h"
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/proc.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/jail.h>
+
+#include <vm/vm_zone.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+#ifdef INET6
+#include <netinet6/ip6_var.h>
+#endif
+#include <netinet/ip_icmp.h>
+#include <netinet/icmp_var.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+
+#ifdef IPSEC
+#include <netinet6/ipsec.h>
+#endif /*IPSEC*/
+
+#include <machine/in_cksum.h>
+
+/*
+ * UDP protocol implementation.
+ * Per RFC 768, August, 1980.
+ */
+#ifndef	COMPAT_42
+static int	udpcksum = 1;
+#else
+static int	udpcksum = 0;		/* XXX */
+#endif
+SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW,
+		&udpcksum, 0, "");
+
+int	log_in_vain = 0;
+SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW, 
+    &log_in_vain, 0, "Log all incoming UDP packets");
+
+static int	blackhole = 0;
+SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW,
+	&blackhole, 0, "Do not send port unreachables for refused connects");
+
+struct	inpcbhead udb;		/* from udp_var.h */
+#define	udb6	udb  /* for KAME src sync over BSD*'s */
+struct	inpcbinfo udbinfo;
+
+#ifndef UDBHASHSIZE
+#define UDBHASHSIZE 16
+#endif
+
+struct	udpstat udpstat;	/* from udp_var.h */
+SYSCTL_STRUCT(_net_inet_udp, UDPCTL_STATS, stats, CTLFLAG_RD,
+    &udpstat, udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)");
+
+static struct	sockaddr_in udp_in = { sizeof(udp_in), AF_INET };
+#ifdef INET6
+struct udp_in6 {
+	struct sockaddr_in6	uin6_sin;
+	u_char			uin6_init_done : 1;
+} udp_in6 = {
+	{ sizeof(udp_in6.uin6_sin), AF_INET6 },
+	0
+};
+struct udp_ip6 {
+	struct ip6_hdr		uip6_ip6;
+	u_char			uip6_init_done : 1;
+} udp_ip6;
+#endif /* INET6 */
+
+static void udp_append __P((struct inpcb *last, struct ip *ip,
+			    struct mbuf *n, int off));
+#ifdef INET6
+static void ip_2_ip6_hdr __P((struct ip6_hdr *ip6, struct ip *ip));
+#endif
+
+static int udp_detach __P((struct socket *so));
+static	int udp_output __P((struct inpcb *, struct mbuf *, struct sockaddr *,
+			    struct mbuf *, struct proc *));
+
+void
+udp_init()
+{
+	LIST_INIT(&udb);
+	udbinfo.listhead = &udb;
+	udbinfo.hashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.hashmask);
+	udbinfo.porthashbase = hashinit(UDBHASHSIZE, M_PCB,
+					&udbinfo.porthashmask);
+	udbinfo.ipi_zone = zinit("udpcb", sizeof(struct inpcb), maxsockets,
+				 ZONE_INTERRUPT, 0);
+}
+
+void
+udp_input(m, off, proto)
+	register struct mbuf *m;
+	int off, proto;
+{
+	int iphlen = off;
+	register struct ip *ip;
+	register struct udphdr *uh;
+	register struct inpcb *inp;
+	struct mbuf *opts = 0;
+	int len;
+	struct ip save_ip;
+	struct sockaddr *append_sa;
+
+	udpstat.udps_ipackets++;
+
+	/*
+	 * Strip IP options, if any; should skip this,
+	 * make available to user, and use on returned packets,
+	 * but we don't yet have a way to check the checksum
+	 * with options still present.
+	 */
+	if (iphlen > sizeof (struct ip)) {
+		ip_stripoptions(m, (struct mbuf *)0);
+		iphlen = sizeof(struct ip);
+	}
+
+	/*
+	 * Get IP and UDP header together in first mbuf.
+	 */
+	ip = mtod(m, struct ip *);
+	if (m->m_len < iphlen + sizeof(struct udphdr)) {
+		if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == 0) {
+			udpstat.udps_hdrops++;
+			return;
+		}
+		ip = mtod(m, struct ip *);
+	}
+	uh = (struct udphdr *)((caddr_t)ip + iphlen);
+
+	/* destination port of 0 is illegal, based on RFC768. */
+	if (uh->uh_dport == 0)
+		goto bad;
+
+	/*
+	 * Make mbuf data length reflect UDP length.
+	 * If not enough data to reflect UDP length, drop.
+	 */
+	len = ntohs((u_short)uh->uh_ulen);
+	if (ip->ip_len != len) {
+		if (len > ip->ip_len || len < sizeof(struct udphdr)) {
+			udpstat.udps_badlen++;
+			goto bad;
+		}
+		m_adj(m, len - ip->ip_len);
+		/* ip->ip_len = len; */
+	}
+	/*
+	 * Save a copy of the IP header in case we want restore it
+	 * for sending an ICMP error message in response.
+	 */
+	if (!blackhole)
+		save_ip = *ip;
+
+	/*
+	 * Checksum extended UDP header and data.
+	 */
+	if (uh->uh_sum) {
+		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
+			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
+				uh->uh_sum = m->m_pkthdr.csum_data;
+			else
+	                	uh->uh_sum = in_pseudo(ip->ip_src.s_addr,
+				    ip->ip_dst.s_addr, htonl((u_short)len +
+				    m->m_pkthdr.csum_data + IPPROTO_UDP));
+			uh->uh_sum ^= 0xffff;
+		} else {
+			bzero(((struct ipovly *)ip)->ih_x1, 9);
+			((struct ipovly *)ip)->ih_len = uh->uh_ulen;
+			uh->uh_sum = in_cksum(m, len + sizeof (struct ip));
+		}
+		if (uh->uh_sum) {
+			udpstat.udps_badsum++;
+			m_freem(m);
+			return;
+		}
+	} else
+		udpstat.udps_nosum++;
+
+	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
+	    in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) {
+		struct inpcb *last;
+		/*
+		 * Deliver a multicast or broadcast datagram to *all* sockets
+		 * for which the local and remote addresses and ports match
+		 * those of the incoming datagram.  This allows more than
+		 * one process to receive multi/broadcasts on the same port.
+		 * (This really ought to be done for unicast datagrams as
+		 * well, but that would cause problems with existing
+		 * applications that open both address-specific sockets and
+		 * a wildcard socket listening to the same port -- they would
+		 * end up receiving duplicates of every unicast datagram.
+		 * Those applications open the multiple sockets to overcome an
+		 * inadequacy of the UDP socket interface, but for backwards
+		 * compatibility we avoid the problem here rather than
+		 * fixing the interface.  Maybe 4.5BSD will remedy this?)
+		 */
+
+		/*
+		 * Construct sockaddr format source address.
+		 */
+		udp_in.sin_port = uh->uh_sport;
+		udp_in.sin_addr = ip->ip_src;
+		/*
+		 * Locate pcb(s) for datagram.
+		 * (Algorithm copied from raw_intr().)
+		 */
+		last = NULL;
+#ifdef INET6
+		udp_in6.uin6_init_done = udp_ip6.uip6_init_done = 0;
+#endif
+		LIST_FOREACH(inp, &udb, inp_list) {
+#ifdef INET6
+			if ((inp->inp_vflag & INP_IPV4) == 0)
+				continue;
+#endif
+			if (inp->inp_lport != uh->uh_dport)
+				continue;
+			if (inp->inp_laddr.s_addr != INADDR_ANY) {
+				if (inp->inp_laddr.s_addr !=
+				    ip->ip_dst.s_addr)
+					continue;
+			}
+			if (inp->inp_faddr.s_addr != INADDR_ANY) {
+				if (inp->inp_faddr.s_addr !=
+				    ip->ip_src.s_addr ||
+				    inp->inp_fport != uh->uh_sport)
+					continue;
+			}
+
+			if (last != NULL) {
+				struct mbuf *n;
+
+#ifdef IPSEC
+				/* check AH/ESP integrity. */
+				if (ipsec4_in_reject_so(m, last->inp_socket))
+					ipsecstat.in_polvio++;
+					/* do not inject data to pcb */
+				else
+#endif /*IPSEC*/
+				if ((n = m_copy(m, 0, M_COPYALL)) != NULL)
+					udp_append(last, ip, n,
+						   iphlen +
+						   sizeof(struct udphdr));
+			}
+			last = inp;
+			/*
+			 * Don't look for additional matches if this one does
+			 * not have either the SO_REUSEPORT or SO_REUSEADDR
+			 * socket options set.  This heuristic avoids searching
+			 * through all pcbs in the common case of a non-shared
+			 * port.  It * assumes that an application will never
+			 * clear these options after setting them.
+			 */
+			if ((last->inp_socket->so_options&(SO_REUSEPORT|SO_REUSEADDR)) == 0)
+				break;
+		}
+
+		if (last == NULL) {
+			/*
+			 * No matching pcb found; discard datagram.
+			 * (No need to send an ICMP Port Unreachable
+			 * for a broadcast or multicast datgram.)
+			 */
+			udpstat.udps_noportbcast++;
+			goto bad;
+		}
+#ifdef IPSEC
+		/* check AH/ESP integrity. */
+		if (ipsec4_in_reject_so(m, last->inp_socket)) {
+			ipsecstat.in_polvio++;
+			goto bad;
+		}
+#endif /*IPSEC*/
+		udp_append(last, ip, m, iphlen + sizeof(struct udphdr));
+		return;
+	}
+	/*
+	 * Locate pcb for datagram.
+	 */
+	inp = in_pcblookup_hash(&udbinfo, ip->ip_src, uh->uh_sport,
+	    ip->ip_dst, uh->uh_dport, 1, m->m_pkthdr.rcvif);
+	if (inp == NULL) {
+		if (log_in_vain) {
+			char buf[4*sizeof "123"];
+
+			strcpy(buf, inet_ntoa(ip->ip_dst));
+			log(LOG_INFO,
+			    "Connection attempt to UDP %s:%d from %s:%d\n",
+			    buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src),
+			    ntohs(uh->uh_sport));
+		}
+		udpstat.udps_noport++;
+		if (m->m_flags & (M_BCAST | M_MCAST)) {
+			udpstat.udps_noportbcast++;
+			goto bad;
+		}
+		if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0)
+			goto bad;
+		if (blackhole)
+			goto bad;
+		*ip = save_ip;
+		ip->ip_len += iphlen;
+		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
+		return;
+	}
+#ifdef IPSEC
+	if (ipsec4_in_reject_so(m, inp->inp_socket)) {
+		ipsecstat.in_polvio++;
+		goto bad;
+	}
+#endif /*IPSEC*/
+
+	/*
+	 * Construct sockaddr format source address.
+	 * Stuff source address and datagram in user buffer.
+	 */
+	udp_in.sin_port = uh->uh_sport;
+	udp_in.sin_addr = ip->ip_src;
+	if (inp->inp_flags & INP_CONTROLOPTS
+	    || inp->inp_socket->so_options & SO_TIMESTAMP) {
+#ifdef INET6
+		if (inp->inp_vflag & INP_IPV6) {
+			int savedflags;
+
+			ip_2_ip6_hdr(&udp_ip6.uip6_ip6, ip);
+			savedflags = inp->inp_flags;
+			inp->inp_flags &= ~INP_UNMAPPABLEOPTS;
+			ip6_savecontrol(inp, &opts, &udp_ip6.uip6_ip6, m);
+			inp->inp_flags = savedflags;
+		} else
+#endif
+		ip_savecontrol(inp, &opts, ip, m);
+	}
+	iphlen += sizeof(struct udphdr);
+	m_adj(m, iphlen);
+#ifdef INET6
+	if (inp->inp_vflag & INP_IPV6) {
+		in6_sin_2_v4mapsin6(&udp_in, &udp_in6.uin6_sin);
+		append_sa = (struct sockaddr *)&udp_in6;
+	} else
+#endif
+	append_sa = (struct sockaddr *)&udp_in;
+	if (sbappendaddr(&inp->inp_socket->so_rcv, append_sa, m, opts) == 0) {
+		udpstat.udps_fullsock++;
+		goto bad;
+	}
+	sorwakeup(inp->inp_socket);
+	return;
+bad:
+	m_freem(m);
+	if (opts)
+		m_freem(opts);
+	return;
+}
+
+#ifdef INET6
+static void
+ip_2_ip6_hdr(ip6, ip)
+	struct ip6_hdr *ip6;
+	struct ip *ip;
+{
+	bzero(ip6, sizeof(*ip6));
+
+	ip6->ip6_vfc = IPV6_VERSION;
+	ip6->ip6_plen = ip->ip_len;
+	ip6->ip6_nxt = ip->ip_p;
+	ip6->ip6_hlim = ip->ip_ttl;
+	ip6->ip6_src.s6_addr32[2] = ip6->ip6_dst.s6_addr32[2] =
+		IPV6_ADDR_INT32_SMP;
+	ip6->ip6_src.s6_addr32[3] = ip->ip_src.s_addr;
+	ip6->ip6_dst.s6_addr32[3] = ip->ip_dst.s_addr;
+}
+#endif
+
+/*
+ * subroutine of udp_input(), mainly for source code readability.
+ * caller must properly init udp_ip6 and udp_in6 beforehand.
+ */
+static void
+udp_append(last, ip, n, off)
+	struct inpcb *last;
+	struct ip *ip;
+	struct mbuf *n;
+	int off;
+{
+	struct sockaddr *append_sa;
+	struct mbuf *opts = 0;
+
+	if (last->inp_flags & INP_CONTROLOPTS ||
+	    last->inp_socket->so_options & SO_TIMESTAMP) {
+#ifdef INET6
+		if (last->inp_vflag & INP_IPV6) {
+			int savedflags;
+
+			if (udp_ip6.uip6_init_done == 0) {
+				ip_2_ip6_hdr(&udp_ip6.uip6_ip6, ip);
+				udp_ip6.uip6_init_done = 1;
+			}
+			savedflags = last->inp_flags;
+			last->inp_flags &= ~INP_UNMAPPABLEOPTS;
+			ip6_savecontrol(last, &opts, &udp_ip6.uip6_ip6, n);
+			last->inp_flags = savedflags;
+		} else
+#endif
+		ip_savecontrol(last, &opts, ip, n);
+	}
+#ifdef INET6
+	if (last->inp_vflag & INP_IPV6) {
+		if (udp_in6.uin6_init_done == 0) {
+			in6_sin_2_v4mapsin6(&udp_in, &udp_in6.uin6_sin);
+			udp_in6.uin6_init_done = 1;
+		}
+		append_sa = (struct sockaddr *)&udp_in6.uin6_sin;
+	} else
+#endif
+	append_sa = (struct sockaddr *)&udp_in;
+	m_adj(n, off);
+	if (sbappendaddr(&last->inp_socket->so_rcv, append_sa, n, opts) == 0) {
+		m_freem(n);
+		if (opts)
+			m_freem(opts);
+		udpstat.udps_fullsock++;
+	} else
+		sorwakeup(last->inp_socket);
+}
+
+/*
+ * Notify a udp user of an asynchronous error;
+ * just wake up so that he can collect error status.
+ */
+void
+udp_notify(inp, errno)
+	register struct inpcb *inp;
+	int errno;
+{
+	inp->inp_socket->so_error = errno;
+	sorwakeup(inp->inp_socket);
+	sowwakeup(inp->inp_socket);
+}
+
+void
+udp_ctlinput(cmd, sa, vip)
+	int cmd;
+	struct sockaddr *sa;
+	void *vip;
+{
+	struct ip *ip = vip;
+	struct udphdr *uh;
+	void (*notify) __P((struct inpcb *, int)) = udp_notify;
+        struct in_addr faddr;
+	struct inpcb *inp;
+	int s;
+
+	faddr = ((struct sockaddr_in *)sa)->sin_addr;
+	if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
+        	return;
+
+	if (PRC_IS_REDIRECT(cmd)) {
+		ip = 0;
+		notify = in_rtchange;
+	} else if (cmd == PRC_HOSTDEAD)
+		ip = 0;
+	else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)
+		return;
+	if (ip) {
+		s = splnet();
+		uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
+		inp = in_pcblookup_hash(&udbinfo, faddr, uh->uh_dport,
+                    ip->ip_src, uh->uh_sport, 0, NULL);
+		if (inp != NULL && inp->inp_socket != NULL)
+			(*notify)(inp, inetctlerrmap[cmd]);
+		splx(s);
+	} else
+		in_pcbnotifyall(&udb, faddr, inetctlerrmap[cmd], notify);
+}
+
+static int
+udp_pcblist(SYSCTL_HANDLER_ARGS)
+{
+	int error, i, n, s;
+	struct inpcb *inp, **inp_list;
+	inp_gen_t gencnt;
+	struct xinpgen xig;
+
+	/*
+	 * The process of preparing the TCB list is too time-consuming and
+	 * resource-intensive to repeat twice on every request.
+	 */
+	if (req->oldptr == 0) {
+		n = udbinfo.ipi_count;
+		req->oldidx = 2 * (sizeof xig)
+			+ (n + n/8) * sizeof(struct xinpcb);
+		return 0;
+	}
+
+	if (req->newptr != 0)
+		return EPERM;
+
+	/*
+	 * OK, now we're committed to doing something.
+	 */
+	s = splnet();
+	gencnt = udbinfo.ipi_gencnt;
+	n = udbinfo.ipi_count;
+	splx(s);
+
+	xig.xig_len = sizeof xig;
+	xig.xig_count = n;
+	xig.xig_gen = gencnt;
+	xig.xig_sogen = so_gencnt;
+	error = SYSCTL_OUT(req, &xig, sizeof xig);
+	if (error)
+		return error;
+
+	inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
+	if (inp_list == 0)
+		return ENOMEM;
+	
+	s = splnet();
+	for (inp = LIST_FIRST(udbinfo.listhead), i = 0; inp && i < n;
+	     inp = LIST_NEXT(inp, inp_list)) {
+		if (inp->inp_gencnt <= gencnt && !prison_xinpcb(req->p, inp))
+			inp_list[i++] = inp;
+	}
+	splx(s);
+	n = i;
+
+	error = 0;
+	for (i = 0; i < n; i++) {
+		inp = inp_list[i];
+		if (inp->inp_gencnt <= gencnt) {
+			struct xinpcb xi;
+			xi.xi_len = sizeof xi;
+			/* XXX should avoid extra copy */
+			bcopy(inp, &xi.xi_inp, sizeof *inp);
+			if (inp->inp_socket)
+				sotoxsocket(inp->inp_socket, &xi.xi_socket);
+			error = SYSCTL_OUT(req, &xi, sizeof xi);
+		}
+	}
+	if (!error) {
+		/*
+		 * Give the user an updated idea of our state.
+		 * If the generation differs from what we told
+		 * her before, she knows that something happened
+		 * while we were processing this request, and it
+		 * might be necessary to retry.
+		 */
+		s = splnet();
+		xig.xig_gen = udbinfo.ipi_gencnt;
+		xig.xig_sogen = so_gencnt;
+		xig.xig_count = udbinfo.ipi_count;
+		splx(s);
+		error = SYSCTL_OUT(req, &xig, sizeof xig);
+	}
+	free(inp_list, M_TEMP);
+	return error;
+}
+
+SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0,
+	    udp_pcblist, "S,xinpcb", "List of active UDP sockets");
+
+static int
+udp_getcred(SYSCTL_HANDLER_ARGS)
+{
+	struct xucred xuc;
+	struct sockaddr_in addrs[2];
+	struct inpcb *inp;
+	int error, s;
+
+	error = suser(req->p);
+	if (error)
+		return (error);
+	error = SYSCTL_IN(req, addrs, sizeof(addrs));
+	if (error)
+		return (error);
+	s = splnet();
+	inp = in_pcblookup_hash(&udbinfo, addrs[1].sin_addr, addrs[1].sin_port,
+				addrs[0].sin_addr, addrs[0].sin_port, 1, NULL);
+	if (inp == NULL || inp->inp_socket == NULL) {
+		error = ENOENT;
+		goto out;
+	}
+	bzero(&xuc, sizeof(xuc));
+	xuc.cr_uid = inp->inp_socket->so_cred->cr_uid;
+	xuc.cr_ngroups = inp->inp_socket->so_cred->cr_ngroups;
+	bcopy(inp->inp_socket->so_cred->cr_groups, xuc.cr_groups,
+	    sizeof(xuc.cr_groups));
+	error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
+out:
+	splx(s);
+	return (error);
+}
+
+SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW,
+    0, 0, udp_getcred, "S,xucred", "Get the xucred of a UDP connection");
+
+static int
+udp_output(inp, m, addr, control, p)
+	register struct inpcb *inp;
+	struct mbuf *m;
+	struct sockaddr *addr;
+	struct mbuf *control;
+	struct proc *p;
+{
+	register struct udpiphdr *ui;
+	register int len = m->m_pkthdr.len;
+	struct in_addr laddr;
+	struct sockaddr_in *sin;
+	int s = 0, error = 0;
+
+	if (control)
+		m_freem(control);		/* XXX */
+
+	if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) {
+		error = EMSGSIZE;
+		goto release;
+	}
+
+	if (addr) {
+		sin = (struct sockaddr_in *)addr;
+		if (p && jailed(p->p_ucred))
+			prison_remote_ip(p->p_ucred, 0, &sin->sin_addr.s_addr);
+		laddr = inp->inp_laddr;
+		if (inp->inp_faddr.s_addr != INADDR_ANY) {
+			error = EISCONN;
+			goto release;
+		}
+		/*
+		 * Must block input while temporarily connected.
+		 */
+		s = splnet();
+		error = in_pcbconnect(inp, addr, p);
+		if (error) {
+			splx(s);
+			goto release;
+		}
+	} else {
+		if (inp->inp_faddr.s_addr == INADDR_ANY) {
+			error = ENOTCONN;
+			goto release;
+		}
+	}
+	/*
+	 * Calculate data length and get a mbuf
+	 * for UDP and IP headers.
+	 */
+	M_PREPEND(m, sizeof(struct udpiphdr), M_DONTWAIT);
+	if (m == 0) {
+		error = ENOBUFS;
+		if (addr)
+			splx(s);
+		goto release;
+	}
+
+	/*
+	 * Fill in mbuf with extended UDP header
+	 * and addresses and length put into network format.
+	 */
+	ui = mtod(m, struct udpiphdr *);
+	bzero(ui->ui_x1, sizeof(ui->ui_x1));	/* XXX still needed? */
+	ui->ui_pr = IPPROTO_UDP;
+	ui->ui_src = inp->inp_laddr;
+	ui->ui_dst = inp->inp_faddr;
+	ui->ui_sport = inp->inp_lport;
+	ui->ui_dport = inp->inp_fport;
+	ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr));
+
+	/*
+	 * Set up checksum and output datagram.
+	 */
+	if (udpcksum) {
+        	ui->ui_sum = in_pseudo(ui->ui_src.s_addr, ui->ui_dst.s_addr,
+		    htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP));
+		m->m_pkthdr.csum_flags = CSUM_UDP;
+		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+	} else {
+		ui->ui_sum = 0;
+	}
+	((struct ip *)ui)->ip_len = sizeof (struct udpiphdr) + len;
+	((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl;	/* XXX */
+	((struct ip *)ui)->ip_tos = inp->inp_ip_tos;	/* XXX */
+	udpstat.udps_opackets++;
+
+#ifdef IPSEC
+	ipsec_setsocket(m, inp->inp_socket);
+#endif /*IPSEC*/
+	error = ip_output(m, inp->inp_options, &inp->inp_route,
+	    (inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST)),
+	    inp->inp_moptions);
+
+	if (addr) {
+		in_pcbdisconnect(inp);
+		inp->inp_laddr = laddr;	/* XXX rehash? */
+		splx(s);
+	}
+	return (error);
+
+release:
+	m_freem(m);
+	return (error);
+}
+
+u_long	udp_sendspace = 9216;		/* really max datagram size */
+					/* 40 1K datagrams */
+SYSCTL_INT(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW,
+    &udp_sendspace, 0, "Maximum outgoing UDP datagram size");
+
+u_long	udp_recvspace = 40 * (1024 +
+#ifdef INET6
+				      sizeof(struct sockaddr_in6)
+#else
+				      sizeof(struct sockaddr_in)
+#endif
+				      );
+SYSCTL_INT(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
+    &udp_recvspace, 0, "Maximum incoming UDP datagram size");
+
+static int
+udp_abort(struct socket *so)
+{
+	struct inpcb *inp;
+	int s;
+
+	inp = sotoinpcb(so);
+	if (inp == 0)
+		return EINVAL;	/* ??? possible? panic instead? */
+	soisdisconnected(so);
+	s = splnet();
+	in_pcbdetach(inp);
+	splx(s);
+	return 0;
+}
+
+static int
+udp_attach(struct socket *so, int proto, struct proc *p)
+{
+	struct inpcb *inp;
+	int s, error;
+
+	inp = sotoinpcb(so);
+	if (inp != 0)
+		return EINVAL;
+
+	error = soreserve(so, udp_sendspace, udp_recvspace);
+	if (error)
+		return error;
+	s = splnet();
+	error = in_pcballoc(so, &udbinfo, p);
+	splx(s);
+	if (error)
+		return error;
+
+	inp = (struct inpcb *)so->so_pcb;
+	inp->inp_vflag |= INP_IPV4;
+	inp->inp_ip_ttl = ip_defttl;
+#ifdef IPSEC
+	error = ipsec_init_policy(so, &inp->inp_sp);
+	if (error != 0) {
+		in_pcbdetach(inp);
+		return error;
+	}
+#endif /*IPSEC*/
+	return 0;
+}
+
+static int
+udp_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
+{
+	struct inpcb *inp;
+	int s, error;
+
+	inp = sotoinpcb(so);
+	if (inp == 0)
+		return EINVAL;
+	s = splnet();
+	error = in_pcbbind(inp, nam, p);
+	splx(s);
+	return error;
+}
+
+static int
+udp_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
+{
+	struct inpcb *inp;
+	int s, error;
+	struct sockaddr_in *sin;
+
+	inp = sotoinpcb(so);
+	if (inp == 0)
+		return EINVAL;
+	if (inp->inp_faddr.s_addr != INADDR_ANY)
+		return EISCONN;
+	s = splnet();
+	sin = (struct sockaddr_in *)nam;
+	if (p && jailed(p->p_ucred))
+		prison_remote_ip(p->p_ucred, 0, &sin->sin_addr.s_addr);
+	error = in_pcbconnect(inp, nam, p);
+	splx(s);
+	if (error == 0)
+		soisconnected(so);
+	return error;
+}
+
+static int
+udp_detach(struct socket *so)
+{
+	struct inpcb *inp;
+	int s;
+
+	inp = sotoinpcb(so);
+	if (inp == 0)
+		return EINVAL;
+	s = splnet();
+	in_pcbdetach(inp);
+	splx(s);
+	return 0;
+}
+
+static int
+udp_disconnect(struct socket *so)
+{
+	struct inpcb *inp;
+	int s;
+
+	inp = sotoinpcb(so);
+	if (inp == 0)
+		return EINVAL;
+	if (inp->inp_faddr.s_addr == INADDR_ANY)
+		return ENOTCONN;
+
+	s = splnet();
+	in_pcbdisconnect(inp);
+	inp->inp_laddr.s_addr = INADDR_ANY;
+	splx(s);
+	so->so_state &= ~SS_ISCONNECTED;		/* XXX */
+	return 0;
+}
+
+static int
+udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
+	    struct mbuf *control, struct proc *p)
+{
+	struct inpcb *inp;
+
+	inp = sotoinpcb(so);
+	if (inp == 0) {
+		m_freem(m);
+		return EINVAL;
+	}
+	return udp_output(inp, m, addr, control, p);
+}
+
+int
+udp_shutdown(struct socket *so)
+{
+	struct inpcb *inp;
+
+	inp = sotoinpcb(so);
+	if (inp == 0)
+		return EINVAL;
+	socantsendmore(so);
+	return 0;
+}
+
+struct pr_usrreqs udp_usrreqs = {
+	udp_abort, pru_accept_notsupp, udp_attach, udp_bind, udp_connect, 
+	pru_connect2_notsupp, in_control, udp_detach, udp_disconnect, 
+	pru_listen_notsupp, in_setpeeraddr, pru_rcvd_notsupp, 
+	pru_rcvoob_notsupp, udp_send, pru_sense_null, udp_shutdown,
+	in_setsockaddr, sosend, soreceive, sopoll
+};
diff --git a/sys/netinet/udp_var.h b/sys/netinet/udp_var.h
new file mode 100644
index 0000000..b0b2667
--- /dev/null
+++ b/sys/netinet/udp_var.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)udp_var.h	8.1 (Berkeley) 6/10/93
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_UDP_VAR_H_
+#define _NETINET_UDP_VAR_H_
+
+/*
+ * UDP kernel structures and variables.
+ */
+struct	udpiphdr {
+	struct 	ipovly ui_i;		/* overlaid ip structure */
+	struct	udphdr ui_u;		/* udp header */
+};
+#define	ui_x1		ui_i.ih_x1
+#define	ui_pr		ui_i.ih_pr
+#define	ui_len		ui_i.ih_len
+#define	ui_src		ui_i.ih_src
+#define	ui_dst		ui_i.ih_dst
+#define	ui_sport	ui_u.uh_sport
+#define	ui_dport	ui_u.uh_dport
+#define	ui_ulen		ui_u.uh_ulen
+#define	ui_sum		ui_u.uh_sum
+
+struct	udpstat {
+				/* input statistics: */
+	u_long	udps_ipackets;		/* total input packets */
+	u_long	udps_hdrops;		/* packet shorter than header */
+	u_long	udps_badsum;		/* checksum error */
+	u_long	udps_nosum;		/* no checksum */
+	u_long	udps_badlen;		/* data length larger than packet */
+	u_long	udps_noport;		/* no socket on port */
+	u_long	udps_noportbcast;	/* of above, arrived as broadcast */
+	u_long	udps_fullsock;		/* not delivered, input socket full */
+	u_long	udpps_pcbcachemiss;	/* input packets missing pcb cache */
+	u_long	udpps_pcbhashmiss;	/* input packets not for hashed pcb */
+				/* output statistics: */
+	u_long	udps_opackets;		/* total output packets */
+	u_long	udps_fastout;		/* output packets on fast path */
+	/* of no socket on port, arrived as multicast */
+	u_long	udps_noportmcast;
+};
+
+/*
+ * Names for UDP sysctl objects
+ */
+#define	UDPCTL_CHECKSUM		1	/* checksum UDP packets */
+#define UDPCTL_STATS		2	/* statistics (read-only) */
+#define	UDPCTL_MAXDGRAM		3	/* max datagram size */
+#define	UDPCTL_RECVSPACE	4	/* default receive buffer space */
+#define	UDPCTL_PCBLIST		5	/* list of PCBs for UDP sockets */
+#define UDPCTL_MAXID		6
+
+#define UDPCTL_NAMES { \
+	{ 0, 0 }, \
+	{ "checksum", CTLTYPE_INT }, \
+	{ "stats", CTLTYPE_STRUCT }, \
+	{ "maxdgram", CTLTYPE_INT }, \
+	{ "recvspace", CTLTYPE_INT }, \
+	{ "pcblist", CTLTYPE_STRUCT }, \
+}
+
+#ifdef _KERNEL
+SYSCTL_DECL(_net_inet_udp);
+
+extern struct	pr_usrreqs udp_usrreqs;
+extern struct	inpcbhead udb;
+extern struct	inpcbinfo udbinfo;
+extern u_long	udp_sendspace;
+extern u_long	udp_recvspace;
+extern struct	udpstat udpstat;
+extern int	log_in_vain;
+
+void	udp_ctlinput __P((int, struct sockaddr *, void *));
+void	udp_init __P((void));
+void	udp_input __P((struct mbuf *, int, int));
+
+void	udp_notify __P((struct inpcb *inp, int errno));
+int	udp_shutdown __P((struct socket *so));
+#endif
+
+#endif